From 0fde46ec984ff76d15342a67163a0125779dda8b Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Fri, 8 May 2026 17:03:47 -0400 Subject: [PATCH 01/60] feat(e2e): introduce scenario-based setup matrix and runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorganize E2E around declarative setup scenarios, reusable expected-state configs, and suite sequences, while keeping all existing E2E workflows unchanged. Adds: - test/e2e/scenarios.yaml, expected-states.yaml, suites.yaml — declarative metadata for initial scenarios (Ubuntu cloud OpenClaw/Hermes, macOS, WSL, GPU local Ollama, Brev launchable, no-Docker negative), expected states, and suite sequences. - test/e2e/resolver/ — TypeScript resolver/validator/plan-printer/coverage report invoked via tsx. Uses js-yaml (already in root package.json). Unit-tested in the Vitest cli project. - test/e2e/lib/{context,env,install,onboard,gateway,sandbox,artifacts, cleanup,emit-context-from-plan}.sh — reusable shell helpers producing a normalized context at \$E2E_CONTEXT_DIR (default .e2e/). Wraps existing test/e2e/lib/sandbox-teardown.sh and install-path-refresh.sh. - test/e2e/{run-scenario.sh, run-suites.sh, coverage-report.sh} — entry points: resolve + plan, execute suites in order, emit coverage matrix. --plan-only emits human-readable stdout and stable JSON at \$E2E_CONTEXT_DIR/plan.json; --dry-run (E2E_DRY_RUN=1) gates destructive actions. - test/e2e/suites/{smoke,inference,credentials,local-ollama-inference, ollama-proxy,platform-macos,platform-wsl,hermes-specific}/*.sh — suite step scripts. - .github/workflows/e2e-scenarios.yaml — manual (workflow_dispatch) runner accepting a scenario id with optional plan_only and suite_filter inputs. Existing nightly-e2e / macos-e2e / wsl-e2e / ollama-proxy-e2e / e2e-branch-validation / sandbox-images-and-e2e workflows are unchanged. - test/e2e/README.md — documents entrypoints, scenarios, suites, and the plan-only contract. Tests (11 new files in the Vitest cli project, 55 scenarios): - e2e-scenario-schema.test.ts, e2e-scenario-resolver.test.ts, e2e-context-helper.test.ts, e2e-lib-helpers.test.ts, e2e-suite-runner.test.ts, e2e-scenario-first-migration.test.ts, e2e-scenarios-workflow.test.ts, e2e-expected-state-validator.test.ts, e2e-scenario-additional-families.test.ts, e2e-coverage-report.test.ts, e2e-metadata-final-hygiene.test.ts. Guards: - Resolver-time and runtime enforcement of suite \`requires_state\` vs scenario expected_state. - Schema guards rejecting array-form \`expected_states\` and premature introduction of \`overrides\` / \`preflight-failure-no-sandbox\` before their declared first consumers. - Metadata hygiene guard tests (final metadata shape, coverage gaps surfaced in report). Other: - AGENTS.md: note the scenario-based runner under test/e2e/. - .gitignore: add .e2e/ runtime context directory. Scenarios that require off-host infrastructure (cloud secrets, macOS/WSL runners, GPU runner, Brev) are wired via workflow_dispatch and validated in follow-up runs; full retirement of legacy test/e2e/test-*.sh scripts is intentionally deferred and tracked separately. Signed-off-by: Julie Yaunches --- .github/workflows/e2e-scenarios.yaml | 84 +++++++ .gitignore | 1 + AGENTS.md | 2 +- test/e2e-context-helper.test.ts | 121 +++++++++ test/e2e-coverage-report.test.ts | 87 +++++++ test/e2e-expected-state-validator.test.ts | 162 ++++++++++++ test/e2e-lib-helpers.test.ts | 121 +++++++++ test/e2e-metadata-final-hygiene.test.ts | 91 +++++++ test/e2e-scenario-additional-families.test.ts | 149 +++++++++++ test/e2e-scenario-first-migration.test.ts | 99 ++++++++ test/e2e-scenario-resolver.test.ts | 232 ++++++++++++++++++ test/e2e-scenario-schema.test.ts | 102 ++++++++ test/e2e-scenarios-workflow.test.ts | 59 +++++ test/e2e-suite-runner.test.ts | 155 ++++++++++++ test/e2e/README.md | 113 +++++++++ test/e2e/coverage-report.sh | 20 ++ test/e2e/expected-states.yaml | 98 ++++++++ test/e2e/lib/artifacts.sh | 50 ++++ test/e2e/lib/cleanup.sh | 29 +++ test/e2e/lib/context.sh | 151 ++++++++++++ test/e2e/lib/emit-context-from-plan.sh | 78 ++++++ test/e2e/lib/env.sh | 36 +++ test/e2e/lib/gateway.sh | 42 ++++ test/e2e/lib/install.sh | 55 +++++ test/e2e/lib/onboard.sh | 60 +++++ test/e2e/lib/sandbox.sh | 36 +++ test/e2e/resolver/coverage.ts | 97 ++++++++ test/e2e/resolver/index.ts | 172 +++++++++++++ test/e2e/resolver/js-yaml.d.ts | 11 + test/e2e/resolver/load.ts | 162 ++++++++++++ test/e2e/resolver/plan.ts | 170 +++++++++++++ test/e2e/resolver/schema.ts | 99 ++++++++ test/e2e/resolver/validator.ts | 123 ++++++++++ test/e2e/run-scenario.sh | 169 +++++++++++++ test/e2e/run-suites.sh | 132 ++++++++++ test/e2e/scenarios.yaml | 184 ++++++++++++++ test/e2e/suites.yaml | 96 ++++++++ .../credentials/00-credentials-present.sh | 28 +++ .../hermes-specific/00-hermes-health.sh | 27 ++ test/e2e/suites/inference/00-models-health.sh | 32 +++ .../suites/inference/01-chat-completion.sh | 33 +++ .../02-inference-local-from-sandbox.sh | 29 +++ .../00-ollama-models-health.sh | 24 ++ .../01-ollama-chat-completion.sh | 26 ++ .../suites/ollama-proxy/00-proxy-reachable.sh | 23 ++ .../suites/platform-macos/00-macos-smoke.sh | 31 +++ test/e2e/suites/platform-wsl/00-wsl-smoke.sh | 29 +++ test/e2e/suites/smoke/00-cli-available.sh | 31 +++ test/e2e/suites/smoke/01-gateway-health.sh | 20 ++ test/e2e/suites/smoke/02-sandbox-listed.sh | 20 ++ test/e2e/suites/smoke/03-sandbox-shell.sh | 32 +++ 51 files changed, 4032 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/e2e-scenarios.yaml create mode 100644 test/e2e-context-helper.test.ts create mode 100644 test/e2e-coverage-report.test.ts create mode 100644 test/e2e-expected-state-validator.test.ts create mode 100644 test/e2e-lib-helpers.test.ts create mode 100644 test/e2e-metadata-final-hygiene.test.ts create mode 100644 test/e2e-scenario-additional-families.test.ts create mode 100644 test/e2e-scenario-first-migration.test.ts create mode 100644 test/e2e-scenario-resolver.test.ts create mode 100644 test/e2e-scenario-schema.test.ts create mode 100644 test/e2e-scenarios-workflow.test.ts create mode 100644 test/e2e-suite-runner.test.ts create mode 100644 test/e2e/README.md create mode 100755 test/e2e/coverage-report.sh create mode 100644 test/e2e/expected-states.yaml create mode 100755 test/e2e/lib/artifacts.sh create mode 100755 test/e2e/lib/cleanup.sh create mode 100755 test/e2e/lib/context.sh create mode 100755 test/e2e/lib/emit-context-from-plan.sh create mode 100755 test/e2e/lib/env.sh create mode 100755 test/e2e/lib/gateway.sh create mode 100755 test/e2e/lib/install.sh create mode 100755 test/e2e/lib/onboard.sh create mode 100755 test/e2e/lib/sandbox.sh create mode 100644 test/e2e/resolver/coverage.ts create mode 100644 test/e2e/resolver/index.ts create mode 100644 test/e2e/resolver/js-yaml.d.ts create mode 100644 test/e2e/resolver/load.ts create mode 100644 test/e2e/resolver/plan.ts create mode 100644 test/e2e/resolver/schema.ts create mode 100644 test/e2e/resolver/validator.ts create mode 100755 test/e2e/run-scenario.sh create mode 100755 test/e2e/run-suites.sh create mode 100644 test/e2e/scenarios.yaml create mode 100644 test/e2e/suites.yaml create mode 100755 test/e2e/suites/credentials/00-credentials-present.sh create mode 100755 test/e2e/suites/hermes-specific/00-hermes-health.sh create mode 100755 test/e2e/suites/inference/00-models-health.sh create mode 100755 test/e2e/suites/inference/01-chat-completion.sh create mode 100755 test/e2e/suites/inference/02-inference-local-from-sandbox.sh create mode 100755 test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh create mode 100755 test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh create mode 100755 test/e2e/suites/ollama-proxy/00-proxy-reachable.sh create mode 100755 test/e2e/suites/platform-macos/00-macos-smoke.sh create mode 100755 test/e2e/suites/platform-wsl/00-wsl-smoke.sh create mode 100755 test/e2e/suites/smoke/00-cli-available.sh create mode 100755 test/e2e/suites/smoke/01-gateway-health.sh create mode 100755 test/e2e/suites/smoke/02-sandbox-listed.sh create mode 100755 test/e2e/suites/smoke/03-sandbox-shell.sh diff --git a/.github/workflows/e2e-scenarios.yaml b/.github/workflows/e2e-scenarios.yaml new file mode 100644 index 0000000000..32f1175a84 --- /dev/null +++ b/.github/workflows/e2e-scenarios.yaml @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Scenario-based E2E. Runs a single setup scenario by id against the +# matching runner; can also validate resolution / coverage via --plan-only. +# +# Manual-only (workflow_dispatch) while scenario-based coverage migrates. +# Existing nightly-e2e / macos-e2e / wsl-e2e workflows remain unchanged. + +name: e2e-scenarios + +on: + workflow_dispatch: + inputs: + scenario: + description: "Scenario id (e.g. ubuntu-repo-cloud-openclaw)" + required: true + type: string + plan_only: + description: "Resolve and print plan only (no install/onboard/suites)" + required: false + default: "false" + type: choice + options: + - "true" + - "false" + suite_filter: + description: "Comma-separated suite ids to run (optional; defaults to the scenario's full suite list)" + required: false + default: "" + type: string + +permissions: + contents: read + +concurrency: + group: e2e-scenarios-${{ github.event.inputs.scenario }} + cancel-in-progress: false + +jobs: + run-scenario: + runs-on: ubuntu-latest + timeout-minutes: 45 + steps: + - uses: actions/checkout@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: npm + + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Render coverage report + run: | + mkdir -p .e2e + bash test/e2e/coverage-report.sh > .e2e/coverage.md + echo '## E2E scenario coverage' >> "$GITHUB_STEP_SUMMARY" + cat .e2e/coverage.md >> "$GITHUB_STEP_SUMMARY" + + - name: Show resolved plan + run: | + bash test/e2e/run-scenario.sh "${{ github.event.inputs.scenario }}" --plan-only + + - name: Run scenario + if: github.event.inputs.plan_only != 'true' + env: + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + E2E_SUITE_FILTER: ${{ github.event.inputs.suite_filter }} + run: | + bash test/e2e/run-scenario.sh "${{ github.event.inputs.scenario }}" + + - name: Upload scenario artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-scenario-${{ github.event.inputs.scenario }} + path: | + .e2e/ + test/e2e/logs/ + if-no-files-found: warn + retention-days: 14 diff --git a/.gitignore b/.gitignore index 10836b7127..64a4026f61 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ secrets.json secrets.yaml service-account*.json token.json +.e2e/ diff --git a/AGENTS.md b/AGENTS.md index b259129c8c..655f602918 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -27,7 +27,7 @@ This repo ships agent skills under `.agents/skills/`, organized into three audie | `nemoclaw-blueprint/model-specific-setup/` | JSON | Agent-scoped model/provider compatibility registry | | `scripts/` | Bash/JS/TS | Install helpers, setup, automation, E2E tooling | | `test/` | JavaScript (ESM) | Root-level integration tests (Vitest) | -| `test/e2e/` | Bash/JS | End-to-end tests (Brev cloud instances) | +| `test/e2e/` | Bash/JS/TS | End-to-end tests, scenario-based runner (see `test/e2e/README.md`) | | `docs/` | Markdown (MyST) | User-facing docs (Sphinx) | ## Quick Reference diff --git a/test/e2e-context-helper.test.ts b/test/e2e-context-helper.test.ts new file mode 100644 index 0000000000..bac9d19c30 --- /dev/null +++ b/test/e2e-context-helper.test.ts @@ -0,0 +1,121 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync, type SpawnSyncReturns } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const CONTEXT_LIB = path.join(REPO_ROOT, "test/e2e/lib/context.sh"); +const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e/run-scenario.sh"); + +function runBash(script: string, env: Record = {}): SpawnSyncReturns { + return spawnSync("bash", ["-c", script], { + env: { ...process.env, ...env }, + encoding: "utf8", + cwd: REPO_ROOT, + }); +} + +describe("E2E context helper (lib/context.sh)", () => { + it("context_should_write_and_source_values", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-")); + try { + const script = ` + set -euo pipefail + . "${CONTEXT_LIB}" + export E2E_CONTEXT_DIR="${tmp}" + e2e_context_init + e2e_context_set E2E_SCENARIO ubuntu-repo-cloud-openclaw + e2e_context_set E2E_AGENT openclaw + # In a fresh shell, source the context and print the values. + bash -c 'set -euo pipefail; . "${tmp}/context.env"; echo "SCENARIO=$E2E_SCENARIO"; echo "AGENT=$E2E_AGENT"' + `; + const r = runBash(script); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toContain("SCENARIO=ubuntu-repo-cloud-openclaw"); + expect(r.stdout).toContain("AGENT=openclaw"); + expect(fs.existsSync(path.join(tmp, "context.env"))).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("context_require_should_fail_for_missing_value", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-")); + try { + const script = ` + set -euo pipefail + . "${CONTEXT_LIB}" + export E2E_CONTEXT_DIR="${tmp}" + e2e_context_init + e2e_context_require E2E_SANDBOX_NAME + `; + const r = runBash(script); + expect(r.status).not.toBe(0); + expect(r.stderr).toMatch(/E2E_SANDBOX_NAME/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("context_dump_should_redact_sensitive_values", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-")); + try { + const script = ` + set -euo pipefail + . "${CONTEXT_LIB}" + export E2E_CONTEXT_DIR="${tmp}" + e2e_context_init + e2e_context_set E2E_SCENARIO ubuntu-repo-cloud-openclaw + e2e_context_set NVIDIA_API_KEY super-secret-api-key-value + e2e_context_set OPENAI_API_TOKEN nothing-to-see-here-token + e2e_context_dump + `; + const r = runBash(script); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).not.toContain("super-secret-api-key-value"); + expect(r.stdout).not.toContain("nothing-to-see-here-token"); + expect(r.stdout).toMatch(/NVIDIA_API_KEY=.*REDACTED/); + expect(r.stdout).toContain("ubuntu-repo-cloud-openclaw"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("scenario_plan_execution_should_emit_context_under_dry_run", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-")); + try { + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"], + { + env: { ...process.env, E2E_CONTEXT_DIR: tmp }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + expect(r.status, r.stderr).toBe(0); + const ctxPath = path.join(tmp, "context.env"); + expect(fs.existsSync(ctxPath), `context.env missing in ${tmp}`).toBe(true); + const ctx = fs.readFileSync(ctxPath, "utf8"); + for (const key of [ + "E2E_SCENARIO", + "E2E_PLATFORM_OS", + "E2E_INSTALL_METHOD", + "E2E_ONBOARDING_PATH", + "E2E_AGENT", + "E2E_PROVIDER", + "E2E_SANDBOX_NAME", + "E2E_GATEWAY_URL", + "E2E_INFERENCE_ROUTE", + ]) { + expect(ctx, `${key} missing from context.env`).toMatch(new RegExp(`^${key}=`, "m")); + } + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/test/e2e-coverage-report.test.ts b/test/e2e-coverage-report.test.ts new file mode 100644 index 0000000000..cccf375ebd --- /dev/null +++ b/test/e2e-coverage-report.test.ts @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import path from "node:path"; + +import { loadMetadataFromDir, loadMetadataFromObjects } from "./e2e/resolver/load.ts"; +import { renderCoverageReport } from "./e2e/resolver/coverage.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const E2E_DIR = path.join(REPO_ROOT, "test/e2e"); + +describe("coverage report", () => { + it("should_render_single_coverage_table", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const md = renderCoverageReport(meta); + // Exactly one primary Scenario Coverage table. + const headers = md.match(/\|\s*Scenario\s*\|\s*Platform\s*\|\s*Install\s*\|\s*Runtime\s*\|\s*Onboarding\s*\|\s*Expected state\s*\|\s*Suites\s*\|/g); + expect(headers).toBeTruthy(); + expect(headers?.length).toBe(1); + // Every scenario should appear as a row. + for (const id of Object.keys(meta.scenarios.setup_scenarios)) { + expect(md).toContain(id); + } + // Rows should be sorted deterministically (alphabetically). + const rowOrder = Object.keys(meta.scenarios.setup_scenarios).sort(); + let pos = 0; + for (const id of rowOrder) { + const idx = md.indexOf(`| ${id} |`, pos); + expect(idx, `row ${id} not found in order. report:\n${md}`).toBeGreaterThanOrEqual(0); + pos = idx; + } + }); + + it("should_flag_scenarios_without_suites", () => { + const meta = loadMetadataFromObjects({ + scenarios: { + platforms: { p: {} }, + installs: { i: {} }, + runtimes: { r: {} }, + onboarding: { o: { agent: "openclaw", provider: "nvidia" } }, + setup_scenarios: { + "empty-suite-scenario": { + dimensions: { platform: "p", install: "i", runtime: "r", onboarding: "o" }, + expected_state: "some-state", + suites: [], + }, + }, + }, + expectedStates: { expected_states: { "some-state": { gateway: { health: "healthy" } } } }, + suites: { suites: {} }, + }); + const md = renderCoverageReport(meta); + expect(md).toMatch(/## Gaps/); + expect(md).toMatch(/empty-suite-scenario.*no suites|no suites.*empty-suite-scenario/s); + }); + + it("should_flag_expected_states_not_used_by_any_scenario", () => { + const meta = loadMetadataFromObjects({ + scenarios: { + platforms: { p: {} }, + installs: { i: {} }, + runtimes: { r: {} }, + onboarding: { o: { agent: "openclaw", provider: "nvidia" } }, + setup_scenarios: { + s1: { + dimensions: { platform: "p", install: "i", runtime: "r", onboarding: "o" }, + expected_state: "used-state", + suites: ["smoke"], + }, + }, + }, + expectedStates: { + expected_states: { + "used-state": { gateway: { health: "healthy" } }, + "unused-state": { gateway: { health: "healthy" } }, + }, + }, + suites: { + suites: { smoke: { steps: [{ id: "a", script: "suites/smoke/a.sh" }] } }, + }, + }); + const md = renderCoverageReport(meta); + expect(md).toMatch(/## Gaps/); + expect(md).toMatch(/unused-state/); + }); +}); diff --git a/test/e2e-expected-state-validator.test.ts b/test/e2e-expected-state-validator.test.ts new file mode 100644 index 0000000000..0c6fd111e8 --- /dev/null +++ b/test/e2e-expected-state-validator.test.ts @@ -0,0 +1,162 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + validateExpectedState, + type ProbeResults, +} from "./e2e/resolver/validator.ts"; +import type { ExpectedStateConfig, ResolvedSuite } from "./e2e/resolver/schema.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e/run-scenario.sh"); + +function cloudOpenclawReady(): ExpectedStateConfig { + return { + cli: { installed: true }, + gateway: { expected: "present", health: "healthy" }, + sandbox: { expected: "present", status: "running", agent: "openclaw" }, + inference: { + expected: "available", + provider: "nvidia", + route: "inference-local", + mode: "gateway-routed", + }, + credentials: { expected: "present", storage: "gateway-managed" }, + }; +} + +function passingProbes(): ProbeResults { + return { + "cli.installed": true, + "gateway.health": "healthy", + "gateway.expected": "present", + "sandbox.status": "running", + "sandbox.expected": "present", + "sandbox.agent": "openclaw", + "inference.expected": "available", + "inference.provider": "nvidia", + "inference.route": "inference-local", + "inference.mode": "gateway-routed", + "credentials.expected": "present", + "credentials.storage": "gateway-managed", + }; +} + +describe("expected state validator", () => { + it("should_validate_matching_state", () => { + const report = validateExpectedState({ + stateId: "cloud-openclaw-ready", + state: cloudOpenclawReady(), + probes: passingProbes(), + suites: [], + }); + expect(report.ok).toBe(true); + expect(report.checks.every((c) => c.ok)).toBe(true); + }); + + it("should_fail_when_gateway_expected_but_unhealthy", () => { + const probes = passingProbes(); + probes["gateway.health"] = "unhealthy"; + const report = validateExpectedState({ + stateId: "cloud-openclaw-ready", + state: cloudOpenclawReady(), + probes, + suites: [], + }); + expect(report.ok).toBe(false); + const failing = report.checks.find((c) => c.key === "gateway.health"); + expect(failing?.ok).toBe(false); + expect(failing?.expected).toBe("healthy"); + expect(failing?.actual).toBe("unhealthy"); + }); + + it("should_fail_when_sandbox_expected_but_absent", () => { + const probes = passingProbes(); + probes["sandbox.status"] = "absent"; + probes["sandbox.expected"] = "absent"; + const report = validateExpectedState({ + stateId: "cloud-openclaw-ready", + state: cloudOpenclawReady(), + probes, + suites: [], + }); + expect(report.ok).toBe(false); + expect(report.checks.some((c) => c.key === "sandbox.status" && !c.ok)).toBe(true); + }); + + it("should_fail_when_suite_requires_state_unmet_at_runtime", () => { + // Expected state claims inference.expected=available, but the probe + // reports unavailable; the smoke suite happens to pass but an inference + // suite's requires_state should trigger a runtime failure before + // execution. + const state = cloudOpenclawReady(); + const probes = passingProbes(); + probes["inference.expected"] = "unavailable"; + const inferenceSuite: ResolvedSuite = { + id: "inference", + requires_state: { "inference.expected": "available" }, + steps: [{ id: "models-health", script: "suites/inference/00-models-health.sh" }], + }; + const report = validateExpectedState({ + stateId: "cloud-openclaw-ready", + state, + probes, + suites: [inferenceSuite], + }); + expect(report.ok).toBe(false); + const msg = report.checks + .filter((c) => !c.ok) + .map((c) => `${c.key}=${c.actual ?? ""} (wanted ${c.expected})`) + .join("; "); + expect(msg).toMatch(/inference\.expected/); + expect(msg).toMatch(/available/); + expect(msg).toMatch(/unavailable/); + // Should also reference the suite that made the requirement. + expect(report.checks.some((c) => c.suite === "inference" && !c.ok)).toBe(true); + }); +}); + +describe("runner_should_not_run_suites_when_expected_state_fails", () => { + it("runs expected-state validation and skips suites on failure", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-es-")); + try { + const trace = path.join(tmp, "trace.log"); + // Simulate gateway-unhealthy probe by setting an override env var. + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"], + { + env: { + ...process.env, + E2E_CONTEXT_DIR: tmp, + E2E_TRACE_FILE: trace, + // validator reads these overrides in dry-run mode to fake probes + E2E_PROBE_OVERRIDE_GATEWAY_HEALTH: "unhealthy", + E2E_VALIDATE_EXPECTED_STATE: "1", + }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + // Dry-run execution should now fail because the expected state + // validation runs and sees gateway.health=unhealthy. + expect(r.status).not.toBe(0); + // Validator must run (its report file should exist) but suites must not. + const reportPath = path.join(tmp, "expected-state-report.json"); + expect(fs.existsSync(reportPath), `missing ${reportPath}`).toBe(true); + const report = JSON.parse(fs.readFileSync(reportPath, "utf8")); + expect(report.ok).toBe(false); + expect(report.checks.some((c: { key: string; ok: boolean }) => c.key === "gateway.health" && !c.ok)).toBe(true); + // And the run's failure output should reference expected-state, not suites. + expect(`${r.stdout}${r.stderr}`).toMatch(/expected.state/i); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/test/e2e-lib-helpers.test.ts b/test/e2e-lib-helpers.test.ts new file mode 100644 index 0000000000..dbb4485b76 --- /dev/null +++ b/test/e2e-lib-helpers.test.ts @@ -0,0 +1,121 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync, type SpawnSyncReturns } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const LIB = path.join(REPO_ROOT, "test/e2e/lib"); +const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e/run-scenario.sh"); + +function runBash(script: string, env: Record = {}): SpawnSyncReturns { + return spawnSync("bash", ["-c", script], { + env: { ...process.env, ...env }, + encoding: "utf8", + cwd: REPO_ROOT, + }); +} + +describe("E2E shell helpers", () => { + it("env_helper_should_set_standard_noninteractive_env", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/env.sh" + e2e_env_apply_noninteractive + echo "NEMOCLAW_NON_INTERACTIVE=\${NEMOCLAW_NON_INTERACTIVE:-}" + echo "DEBIAN_FRONTEND=\${DEBIAN_FRONTEND:-}" + echo "CI=\${CI:-}" + `); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toContain("NEMOCLAW_NON_INTERACTIVE=1"); + expect(r.stdout).toContain("DEBIAN_FRONTEND=noninteractive"); + }); + + it("artifact_helper_should_collect_known_logs_without_failing_when_missing", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-art-")); + const srcDir = path.join(tmp, "src"); + const dstDir = path.join(tmp, "out"); + fs.mkdirSync(srcDir); + fs.writeFileSync(path.join(srcDir, "present.log"), "hello\n"); + const r = runBash(` + set -euo pipefail + . "${LIB}/artifacts.sh" + e2e_artifact_collect_file "${srcDir}/present.log" "${dstDir}/present.log" + e2e_artifact_collect_file "${srcDir}/missing.log" "${dstDir}/missing.log" || true + ls "${dstDir}" + `); + expect(r.status, r.stderr).toBe(0); + expect(fs.existsSync(path.join(dstDir, "present.log"))).toBe(true); + expect(fs.existsSync(path.join(dstDir, "missing.log"))).toBe(false); + expect(r.stderr + r.stdout).toMatch(/missing\.log|not found|skipping/i); + fs.rmSync(tmp, { recursive: true, force: true }); + }); + + it("gateway_helper_should_report_unhealthy_gateway_clearly", () => { + // Pick a port very unlikely to be bound. + const r = runBash(` + set -euo pipefail + . "${LIB}/gateway.sh" + e2e_gateway_assert_healthy "http://127.0.0.1:65531" + `); + expect(r.status).not.toBe(0); + expect(r.stderr).toMatch(/65531|gateway|unhealthy/i); + }); + + it("sandbox_helper_should_fail_for_missing_sandbox_name", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sb-")); + try { + // Initialise a context file without E2E_SANDBOX_NAME. + const r = runBash( + ` + set -euo pipefail + . "${LIB}/context.sh" + . "${LIB}/sandbox.sh" + e2e_context_init + e2e_context_set E2E_SCENARIO test + e2e_sandbox_assert_running + `, + { E2E_CONTEXT_DIR: tmp }, + ); + expect(r.status).not.toBe(0); + expect(r.stderr).toMatch(/E2E_SANDBOX_NAME/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("scenario_dry_run_should_trace_helper_sequence_in_order", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-trace-")); + try { + const trace = path.join(tmp, "trace.log"); + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"], + { + env: { + ...process.env, + E2E_CONTEXT_DIR: tmp, + E2E_TRACE_FILE: trace, + }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + expect(r.status, r.stderr).toBe(0); + expect(fs.existsSync(trace), "trace log missing").toBe(true); + const contents = fs.readFileSync(trace, "utf8"); + const order = ["env:noninteractive", "install:", "onboard:", "gateway:check", "sandbox:check"]; + let pos = 0; + for (const marker of order) { + const idx = contents.indexOf(marker, pos); + expect(idx, `trace missing marker in order: ${marker}\nfull:\n${contents}`).toBeGreaterThanOrEqual(0); + pos = idx + marker.length; + } + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/test/e2e-metadata-final-hygiene.test.ts b/test/e2e-metadata-final-hygiene.test.ts new file mode 100644 index 0000000000..e6b9c01f8b --- /dev/null +++ b/test/e2e-metadata-final-hygiene.test.ts @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Phase 11: Clean the House - final metadata and documentation hygiene. + * + * These tests are intentionally conservative during the incremental + * migration: they guard the README, assert that every suite script + * referenced in suites.yaml exists and is executable, and assert that + * every scenario either has both an expected state and at least one + * suite or is explicitly marked as negative / disabled. + */ + +import { describe, it, expect } from "vitest"; +import fs from "node:fs"; +import path from "node:path"; + +import { loadMetadataFromDir } from "./e2e/resolver/load.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const E2E_DIR = path.join(REPO_ROOT, "test/e2e"); +const README_PATH = path.join(E2E_DIR, "README.md"); + +describe("Phase 11 final hygiene", () => { + it("e2e_readme_should_document_scenario_runner", () => { + expect(fs.existsSync(README_PATH)).toBe(true); + const raw = fs.readFileSync(README_PATH, "utf8"); + // Key developer-facing concepts must be documented. + expect(raw).toMatch(/setup scenario/i); + expect(raw).toMatch(/expected state/i); + expect(raw).toMatch(/suite/i); + expect(raw).toMatch(/run-scenario\.sh/); + expect(raw).toMatch(/run-suites\.sh/); + // Adding-a-scenario guidance must exist. + expect(raw).toMatch(/adding a new setup scenario|how to add/i); + }); + + it("all_suite_scripts_should_exist", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const missing: string[] = []; + for (const [suiteId, suite] of Object.entries(meta.suites.suites)) { + for (const step of suite.steps) { + const p = path.join(E2E_DIR, step.script); + if (!fs.existsSync(p)) { + missing.push(`${suiteId}/${step.id} -> ${step.script}`); + } else { + const mode = fs.statSync(p).mode; + // owner-executable bit must be set + if ((mode & 0o100) === 0) { + missing.push(`${suiteId}/${step.id} -> ${step.script} (not executable)`); + } + } + } + } + expect(missing, `missing/non-executable suite scripts:\n${missing.join("\n")}`).toEqual([]); + }); + + it("all_scenarios_should_have_expected_state_and_suites", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const problems: string[] = []; + for (const [id, sc] of Object.entries(meta.scenarios.setup_scenarios)) { + if (!sc.expected_state) { + problems.push(`${id}: missing expected_state`); + continue; + } + // Negative scenarios (preflight failures) intentionally have no suites. + const state = meta.expectedStates.expected_states[sc.expected_state] as { + failure?: { expected?: boolean }; + }; + const isNegative = state?.failure?.expected === true; + if (!Array.isArray(sc.suites)) { + problems.push(`${id}: suites must be an array`); + continue; + } + if (sc.suites.length === 0 && !isNegative) { + problems.push(`${id}: no suites and not a negative scenario`); + } + } + expect(problems, problems.join("\n")).toEqual([]); + }); + + it("should_not_reference_retired_e2e_entrypoints", () => { + // At this point we have not retired any entrypoints. This guard test + // asserts that `run-scenario.sh` and `run-suites.sh` are the canonical + // new entrypoints documented in the README, so that when old scripts + // are retired in a follow-up, the guard is ready to be tightened. + const raw = fs.readFileSync(README_PATH, "utf8"); + expect(raw).toMatch(/run-scenario\.sh/); + expect(raw).toMatch(/run-suites\.sh/); + }); +}); diff --git a/test/e2e-scenario-additional-families.test.ts b/test/e2e-scenario-additional-families.test.ts new file mode 100644 index 0000000000..f35bfbd050 --- /dev/null +++ b/test/e2e-scenario-additional-families.test.ts @@ -0,0 +1,149 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Phase 9: Migrate Additional Scenario Families. + * Verifies metadata for new scenarios (macOS, WSL, GPU local Ollama, Brev + * launchable, Ubuntu cloud Hermes, and the no-docker negative preflight) + * plus the deferred schema concepts (scenario-level overrides, negative + * expected state). + */ + +import { describe, it, expect } from "vitest"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { loadMetadataFromDir } from "./e2e/resolver/load.ts"; +import { resolveScenario } from "./e2e/resolver/plan.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const E2E_DIR = path.join(REPO_ROOT, "test/e2e"); +const RUN_SCENARIO = path.join(E2E_DIR, "run-scenario.sh"); + +function planOnly(scenarioId: string): { stdout: string; stderr: string; status: number | null; plan: Record } { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-p9-")); + try { + const r = spawnSync("bash", [RUN_SCENARIO, scenarioId, "--plan-only"], { + env: { ...process.env, E2E_CONTEXT_DIR: tmp }, + encoding: "utf8", + cwd: REPO_ROOT, + }); + let plan = {}; + const pj = path.join(tmp, "plan.json"); + if (fs.existsSync(pj)) { + plan = JSON.parse(fs.readFileSync(pj, "utf8")); + } + return { stdout: r.stdout, stderr: r.stderr, status: r.status, plan }; + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } +} + +describe("Phase 9: additional scenario families - metadata", () => { + it("resolver should resolve all new scenarios", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const ids = [ + "macos-repo-cloud-openclaw", + "wsl-repo-cloud-openclaw", + "gpu-repo-local-ollama-openclaw", + "brev-launchable-cloud-openclaw", + "ubuntu-repo-cloud-hermes", + "ubuntu-no-docker-preflight-negative", + ]; + for (const id of ids) { + const plan = resolveScenario(id, meta); + expect(plan.scenario_id).toBe(id); + expect(plan.expected_state.id).toBeTypeOf("string"); + expect(Array.isArray(plan.suites)).toBe(true); + } + }); +}); + +describe("Phase 9: macOS / WSL plan-only", () => { + it("macos scenario plan identifies macOS platform", () => { + const { status, plan } = planOnly("macos-repo-cloud-openclaw"); + expect(status).toBe(0); + const dims = (plan as { dimensions: { platform: { profile: { os?: string } } } }).dimensions; + expect(dims.platform.profile.os).toBe("macos"); + }); + + it("wsl scenario plan identifies WSL platform", () => { + const { status, plan } = planOnly("wsl-repo-cloud-openclaw"); + expect(status).toBe(0); + const dims = (plan as { dimensions: { platform: { profile: { os?: string } } } }).dimensions; + expect(dims.platform.profile.os).toBe("wsl"); + }); +}); + +describe("Phase 9: GPU local Ollama plan-only", () => { + it("runtime indicates GPU/CDI and provider is ollama", () => { + const { status, plan } = planOnly("gpu-repo-local-ollama-openclaw"); + expect(status).toBe(0); + const dims = (plan as { + dimensions: { + runtime: { profile: { gpu_runtime?: string } }; + onboarding: { profile: { provider?: string } }; + }; + }).dimensions; + expect(dims.runtime.profile.gpu_runtime).toBe("cdi"); + expect(dims.onboarding.profile.provider).toBe("ollama"); + }); +}); + +describe("Phase 9: Brev launchable scenario (overrides schema)", () => { + it("should_support_scenario_overrides_on_brev_launchable", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const plan = resolveScenario("brev-launchable-cloud-openclaw", meta); + expect(plan.overrides).toBeTruthy(); + const overrides = plan.overrides as { + onboarding?: { gateway?: { bind_address?: string } }; + }; + expect(overrides?.onboarding?.gateway?.bind_address).toBeTypeOf("string"); + expect(overrides?.onboarding?.gateway?.bind_address?.length).toBeGreaterThan(0); + }); + + it("plan shows remote target, launchable install, and gateway bind override", () => { + const { status, stdout, plan } = planOnly("brev-launchable-cloud-openclaw"); + expect(status).toBe(0); + const dims = (plan as { + dimensions: { + platform: { profile: { execution_target?: string } }; + install: { id: string }; + }; + }).dimensions; + expect(dims.platform.profile.execution_target).toBe("remote"); + expect(dims.install.id).toBe("launchable"); + expect(stdout).toMatch(/Overrides:/); + expect(stdout).toMatch(/bind_address/); + }); +}); + +describe("Phase 9: negative preflight", () => { + it("should_define_preflight_failure_no_sandbox_state", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const es = meta.expectedStates.expected_states["preflight-failure-no-sandbox"] as + | { + gateway?: { expected?: string }; + sandbox?: { expected?: string }; + failure?: { expected?: boolean }; + } + | undefined; + expect(es, "preflight-failure-no-sandbox should be defined").toBeTruthy(); + expect(es?.gateway?.expected).toBe("absent"); + expect(es?.sandbox?.expected).toBe("absent"); + expect(es?.failure?.expected).toBe(true); + }); + + it("negative scenario plan identifies docker missing and negative state", () => { + const { status, plan } = planOnly("ubuntu-no-docker-preflight-negative"); + expect(status).toBe(0); + const p = plan as { + dimensions: { runtime: { profile: { container_daemon?: string } } }; + expected_state: { id: string }; + }; + expect(p.dimensions.runtime.profile.container_daemon).toBe("missing"); + expect(p.expected_state.id).toBe("preflight-failure-no-sandbox"); + }); +}); diff --git a/test/e2e-scenario-first-migration.test.ts b/test/e2e-scenario-first-migration.test.ts new file mode 100644 index 0000000000..a295672bcf --- /dev/null +++ b/test/e2e-scenario-first-migration.test.ts @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Phase 6: Migrate First Scenario - ubuntu-repo-cloud-openclaw. + * Verifies resolver output, plan printout, and dry-run phase ordering. + */ + +import { describe, it, expect } from "vitest"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { loadMetadataFromDir } from "./e2e/resolver/load.ts"; +import { resolveScenario } from "./e2e/resolver/plan.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const E2E_DIR = path.join(REPO_ROOT, "test/e2e"); +const RUN_SCENARIO = path.join(E2E_DIR, "run-scenario.sh"); + +describe("Phase 6: ubuntu-repo-cloud-openclaw migration", () => { + it("ubuntu_repo_cloud_openclaw_should_resolve_to_cloud_openclaw_ready", () => { + const meta = loadMetadataFromDir(E2E_DIR); + const plan = resolveScenario("ubuntu-repo-cloud-openclaw", meta); + expect(plan.expected_state.id).toBe("cloud-openclaw-ready"); + const suiteIds = plan.suites.map((s) => s.id); + expect(suiteIds).toContain("smoke"); + expect(suiteIds).toContain("inference"); + }); + + it("ubuntu_repo_cloud_openclaw_plan_should_include_setup_install_onboard", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-first-")); + try { + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--plan-only"], + { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", cwd: REPO_ROOT }, + ); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/install=repo-current/); + expect(r.stdout).toMatch(/runtime=docker-running/); + expect(r.stdout).toMatch(/onboarding=cloud-openclaw/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("ubuntu_repo_cloud_openclaw_dry_run_should_execute_phases_in_order", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-first-")); + try { + const trace = path.join(tmp, "trace.log"); + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"], + { + env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_TRACE_FILE: trace }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + expect(r.status, r.stderr).toBe(0); + expect(fs.existsSync(trace)).toBe(true); + const contents = fs.readFileSync(trace, "utf8"); + const order = [ + "env:noninteractive", + "install:repo-current", + "onboard:cloud-openclaw", + "gateway:check", + "sandbox:check", + ]; + let pos = 0; + for (const marker of order) { + const idx = contents.indexOf(marker, pos); + expect(idx, `missing marker ${marker}. trace:\n${contents}`).toBeGreaterThanOrEqual(0); + pos = idx + marker.length; + } + // The run should also seed the context and produce plan.json. + expect(fs.existsSync(path.join(tmp, "context.env"))).toBe(true); + expect(fs.existsSync(path.join(tmp, "plan.json"))).toBe(true); + // After dry-run, suite runner should be able to execute the full + // suite sequence against the emitted context. + const suites = spawnSync( + "bash", + [path.join(E2E_DIR, "run-suites.sh"), "smoke", "inference"], + { + env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + expect(suites.status, `suite stderr:${suites.stderr}\nstdout:${suites.stdout}`).toBe(0); + expect(suites.stdout).toMatch(/PASS smoke\/cli-available/); + expect(suites.stdout).toMatch(/PASS inference\/models-health/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/test/e2e-scenario-resolver.test.ts b/test/e2e-scenario-resolver.test.ts new file mode 100644 index 0000000000..a89bd29606 --- /dev/null +++ b/test/e2e-scenario-resolver.test.ts @@ -0,0 +1,232 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import yaml from "js-yaml"; + +import { resolveScenario, type ResolverInput } from "./e2e/resolver/plan.ts"; +import { loadMetadataFromDir, loadMetadataFromObjects } from "./e2e/resolver/load.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const E2E_DIR = path.join(REPO_ROOT, "test/e2e"); + +function realMetadata(): ResolverInput { + return loadMetadataFromDir(E2E_DIR); +} + +describe("E2E scenario resolver", () => { + it("should_resolve_valid_scenario", () => { + const meta = realMetadata(); + const plan = resolveScenario("ubuntu-repo-cloud-openclaw", meta); + expect(plan.scenario_id).toBe("ubuntu-repo-cloud-openclaw"); + expect(plan.dimensions.platform.id).toBe("ubuntu-local"); + expect(plan.dimensions.install.id).toBe("repo-current"); + expect(plan.dimensions.runtime.id).toBe("docker-running"); + expect(plan.dimensions.onboarding.id).toBe("cloud-openclaw"); + expect(plan.expected_state.id).toBe("cloud-openclaw-ready"); + const suiteIds = plan.suites.map((s) => s.id); + expect(suiteIds).toEqual(["smoke", "inference", "credentials"]); + // each suite should carry its ordered steps with resolved scripts + expect(plan.suites[0].steps.length).toBeGreaterThan(0); + for (const s of plan.suites) { + for (const step of s.steps) { + expect(step.id).toBeTypeOf("string"); + expect(step.script).toMatch(/\.sh$/); + } + } + }); + + it("should_fail_for_unknown_scenario", () => { + const meta = realMetadata(); + expect(() => resolveScenario("does-not-exist", meta)).toThrow(/does-not-exist/); + }); + + it("should_fail_for_missing_profile_reference", () => { + const meta = loadMetadataFromObjects({ + scenarios: yaml.load(` +platforms: + ubuntu-local: { os: ubuntu } +installs: + repo-current: { method: repo-checkout } +runtimes: + docker-running: { container_engine: docker } +onboarding: + cloud-openclaw: { path: cloud, agent: openclaw, provider: nvidia } +setup_scenarios: + broken: + dimensions: + platform: missing-platform + install: repo-current + runtime: docker-running + onboarding: cloud-openclaw + expected_state: some-state + suites: [smoke] +`) as object, + expectedStates: yaml.load(` +expected_states: + some-state: + gateway: { health: healthy } + sandbox: { status: running } +`) as object, + suites: yaml.load(` +suites: + smoke: + requires_state: + gateway.health: healthy + sandbox.status: running + steps: + - { id: step, script: suites/smoke/step.sh } +`) as object, + }); + expect(() => resolveScenario("broken", meta)).toThrow(/platform.*missing-platform/); + }); + + it("should_fail_for_missing_expected_state_reference", () => { + const meta = loadMetadataFromObjects({ + scenarios: yaml.load(` +platforms: { p: {} } +installs: { i: {} } +runtimes: { r: {} } +onboarding: { o: { agent: openclaw, provider: nvidia } } +setup_scenarios: + s: + dimensions: { platform: p, install: i, runtime: r, onboarding: o } + expected_state: ghost + suites: [smoke] +`) as object, + expectedStates: yaml.load(` +expected_states: + real: { gateway: { health: healthy } } +`) as object, + suites: yaml.load(` +suites: + smoke: + steps: + - { id: step, script: suites/smoke/step.sh } +`) as object, + }); + expect(() => resolveScenario("s", meta)).toThrow(/expected_state.*ghost/); + }); + + it("should_fail_for_missing_suite_reference", () => { + const meta = loadMetadataFromObjects({ + scenarios: yaml.load(` +platforms: { p: {} } +installs: { i: {} } +runtimes: { r: {} } +onboarding: { o: { agent: openclaw, provider: nvidia } } +setup_scenarios: + s: + dimensions: { platform: p, install: i, runtime: r, onboarding: o } + expected_state: real + suites: [smoke, phantom] +`) as object, + expectedStates: yaml.load(` +expected_states: + real: { gateway: { health: healthy } } +`) as object, + suites: yaml.load(` +suites: + smoke: + steps: + - { id: step, script: suites/smoke/step.sh } +`) as object, + }); + expect(() => resolveScenario("s", meta)).toThrow(/suite.*phantom/); + }); + + it("should_fail_when_suite_requires_state_incompatible_with_scenario_expected_state", () => { + const meta = loadMetadataFromObjects({ + scenarios: yaml.load(` +platforms: { p: {} } +installs: { i: {} } +runtimes: { r: {} } +onboarding: { o: { agent: openclaw, provider: nvidia } } +setup_scenarios: + s: + dimensions: { platform: p, install: i, runtime: r, onboarding: o } + expected_state: gw-unhealthy + suites: [smoke] +`) as object, + expectedStates: yaml.load(` +expected_states: + gw-unhealthy: + gateway: { health: unhealthy } + sandbox: { status: running } +`) as object, + suites: yaml.load(` +suites: + smoke: + requires_state: + gateway.health: healthy + steps: + - { id: step, script: suites/smoke/step.sh } +`) as object, + }); + expect(() => resolveScenario("s", meta)).toThrow( + /smoke.*gateway\.health.*healthy.*unhealthy/s, + ); + }); +}); + +describe("run-scenario.sh --plan-only", () => { + it("run_scenario_plan_only_should_print_plan", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-plan-")); + try { + const result = spawnSync( + "bash", + [ + path.join(E2E_DIR, "run-scenario.sh"), + "ubuntu-repo-cloud-openclaw", + "--plan-only", + ], + { + env: { ...process.env, E2E_CONTEXT_DIR: tmp }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + expect(result.status, result.stderr).toBe(0); + expect(result.stdout).toContain("ubuntu-repo-cloud-openclaw"); + expect(result.stdout).toContain("cloud-openclaw-ready"); + expect(result.stdout).toContain("smoke"); + expect(result.stdout).toContain("inference"); + const planJsonPath = path.join(tmp, "plan.json"); + expect(fs.existsSync(planJsonPath)).toBe(true); + const doc = JSON.parse(fs.readFileSync(planJsonPath, "utf8")); + expect(doc.scenario_id).toBe("ubuntu-repo-cloud-openclaw"); + expect(doc.expected_state.id).toBe("cloud-openclaw-ready"); + expect(Array.isArray(doc.suites)).toBe(true); + expect(doc.suites.map((s: { id: string }) => s.id)).toContain("smoke"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("run_scenario_plan_only_should_fail_for_unknown_scenario", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-plan-")); + try { + const result = spawnSync( + "bash", + [ + path.join(E2E_DIR, "run-scenario.sh"), + "does-not-exist", + "--plan-only", + ], + { + env: { ...process.env, E2E_CONTEXT_DIR: tmp }, + encoding: "utf8", + cwd: REPO_ROOT, + }, + ); + expect(result.status).not.toBe(0); + expect(`${result.stderr}${result.stdout}`).toMatch(/does-not-exist/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/test/e2e-scenario-schema.test.ts b/test/e2e-scenario-schema.test.ts new file mode 100644 index 0000000000..b7ad015a62 --- /dev/null +++ b/test/e2e-scenario-schema.test.ts @@ -0,0 +1,102 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import fs from "node:fs"; +import path from "node:path"; +import yaml from "js-yaml"; + +const E2E_DIR = path.join(import.meta.dirname, "e2e"); +const SCENARIOS_PATH = path.join(E2E_DIR, "scenarios.yaml"); +const STATES_PATH = path.join(E2E_DIR, "expected-states.yaml"); +const SUITES_PATH = path.join(E2E_DIR, "suites.yaml"); + +type AnyRecord = Record; + +function loadYaml(p: string): AnyRecord { + const raw = fs.readFileSync(p, "utf8"); + const doc = yaml.load(raw); + if (!doc || typeof doc !== "object") { + throw new Error(`YAML file ${p} did not parse to an object`); + } + return doc as AnyRecord; +} + +describe("E2E scenario metadata schema", () => { + it("should_parse_all_metadata_files", () => { + expect(fs.existsSync(SCENARIOS_PATH)).toBe(true); + expect(fs.existsSync(STATES_PATH)).toBe(true); + expect(fs.existsSync(SUITES_PATH)).toBe(true); + expect(() => loadYaml(SCENARIOS_PATH)).not.toThrow(); + expect(() => loadYaml(STATES_PATH)).not.toThrow(); + expect(() => loadYaml(SUITES_PATH)).not.toThrow(); + }); + + it("should_have_required_top_level_sections", () => { + const scenarios = loadYaml(SCENARIOS_PATH); + expect(scenarios).toHaveProperty("platforms"); + expect(scenarios).toHaveProperty("installs"); + expect(scenarios).toHaveProperty("runtimes"); + expect(scenarios).toHaveProperty("onboarding"); + expect(scenarios).toHaveProperty("setup_scenarios"); + + const states = loadYaml(STATES_PATH); + expect(states).toHaveProperty("expected_states"); + + const suites = loadYaml(SUITES_PATH); + expect(suites).toHaveProperty("suites"); + }); + + it("should_define_initial_required_scenarios", () => { + const scenarios = loadYaml(SCENARIOS_PATH); + const setup = scenarios.setup_scenarios as AnyRecord; + expect(setup).toBeTypeOf("object"); + expect(setup).toHaveProperty("ubuntu-repo-cloud-openclaw"); + expect(setup).toHaveProperty("ubuntu-repo-cloud-hermes"); + expect(setup).toHaveProperty("gpu-repo-local-ollama-openclaw"); + }); + + it("should_use_singular_expected_state_field", () => { + const scenarios = loadYaml(SCENARIOS_PATH); + const setup = scenarios.setup_scenarios as AnyRecord; + for (const [id, entry] of Object.entries(setup)) { + const s = entry as AnyRecord; + expect(s, `scenario ${id} missing expected_state`).toHaveProperty("expected_state"); + expect(typeof s.expected_state, `scenario ${id}.expected_state must be a string`).toBe( + "string", + ); + expect( + (s as AnyRecord).expected_states, + `scenario ${id} must not have array-style expected_states`, + ).toBeUndefined(); + } + }); + + it("should_define_initial_expected_states", () => { + const states = loadYaml(STATES_PATH); + const es = states.expected_states as AnyRecord; + // Initial three states must exist; Phase 9 adds additional states + // (e.g. preflight-failure-no-sandbox) alongside their first consumer. + for (const id of [ + "cloud-openclaw-ready", + "cloud-hermes-ready", + "local-ollama-openclaw-ready", + ]) { + expect(es, `expected state ${id} should be defined`).toHaveProperty(id); + } + }); + + it("should_define_initial_suites", () => { + const suites = loadYaml(SUITES_PATH); + const s = suites.suites as AnyRecord; + for (const id of [ + "smoke", + "inference", + "credentials", + "local-ollama-inference", + "ollama-proxy", + ]) { + expect(s, `suite ${id} should be defined`).toHaveProperty(id); + } + }); +}); diff --git a/test/e2e-scenarios-workflow.test.ts b/test/e2e-scenarios-workflow.test.ts new file mode 100644 index 0000000000..e06b44f4d8 --- /dev/null +++ b/test/e2e-scenarios-workflow.test.ts @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import fs from "node:fs"; +import path from "node:path"; +import yaml from "js-yaml"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const WORKFLOW_PATH = path.join(REPO_ROOT, ".github/workflows/e2e-scenarios.yaml"); + +type AnyRecord = Record; + +function loadWorkflow(): AnyRecord { + expect(fs.existsSync(WORKFLOW_PATH), `workflow missing at ${WORKFLOW_PATH}`).toBe(true); + const raw = fs.readFileSync(WORKFLOW_PATH, "utf8"); + return yaml.load(raw) as AnyRecord; +} + +describe("e2e-scenarios workflow", () => { + it("e2e_scenarios_workflow_should_have_dispatch_inputs", () => { + const wf = loadWorkflow(); + // YAML `on:` parses as the literal key "true" in some parsers — handle both. + const on = (wf.on ?? wf[true as unknown as string]) as AnyRecord | undefined; + expect(on, "workflow missing 'on' trigger").toBeTruthy(); + const dispatch = on?.workflow_dispatch as AnyRecord | undefined; + expect(dispatch, "workflow missing workflow_dispatch").toBeTruthy(); + const inputs = dispatch?.inputs as AnyRecord | undefined; + expect(inputs).toBeTruthy(); + expect(inputs).toHaveProperty("scenario"); + expect(inputs).toHaveProperty("plan_only"); + expect(inputs).toHaveProperty("suite_filter"); + }); + + it("e2e_scenarios_workflow_should_call_run_scenario", () => { + const raw = fs.readFileSync(WORKFLOW_PATH, "utf8"); + expect(raw).toMatch(/test\/e2e\/run-scenario\.sh/); + }); + + it("e2e_scenarios_workflow_should_upload_artifacts", () => { + const raw = fs.readFileSync(WORKFLOW_PATH, "utf8"); + expect(raw).toMatch(/actions\/upload-artifact/); + // Artifact name should be scenario-scoped. + expect(raw).toMatch(/e2e-scenario-.*\$\{\{\s*(?:inputs|github\.event\.inputs)\.scenario\s*\}\}/); + // Uploads .e2e/ artifacts. + expect(raw).toMatch(/\.e2e\//); + }); + + it("e2e_scenarios_workflow_should_be_manual_only", () => { + const wf = loadWorkflow(); + const on = (wf.on ?? wf[true as unknown as string]) as AnyRecord | undefined; + expect(on).toBeTruthy(); + const keys = Object.keys(on ?? {}); + // Manual-only: must not trigger on push, pull_request, or schedule. + expect(keys).not.toContain("push"); + expect(keys).not.toContain("pull_request"); + expect(keys).not.toContain("schedule"); + }); +}); diff --git a/test/e2e-suite-runner.test.ts b/test/e2e-suite-runner.test.ts new file mode 100644 index 0000000000..c4611893fd --- /dev/null +++ b/test/e2e-suite-runner.test.ts @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +import { spawnSync, type SpawnSyncReturns } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const RUN_SUITES = path.join(REPO_ROOT, "test/e2e/run-suites.sh"); + +function runSuites(args: string[], env: Record = {}): SpawnSyncReturns { + return spawnSync("bash", [RUN_SUITES, ...args], { + env: { ...process.env, ...env }, + encoding: "utf8", + cwd: REPO_ROOT, + }); +} + +function seedContext(tmp: string, values: Record): void { + fs.mkdirSync(tmp, { recursive: true }); + const ctx = Object.entries(values) + .map(([k, v]) => `${k}=${v}`) + .join("\n"); + fs.writeFileSync(path.join(tmp, "context.env"), `${ctx}\n`); +} + +function fullContext(): Record { + return { + E2E_SCENARIO: "ubuntu-repo-cloud-openclaw", + E2E_PLATFORM_OS: "ubuntu", + E2E_EXECUTION_TARGET: "local", + E2E_INSTALL_METHOD: "repo-checkout", + E2E_CONTAINER_ENGINE: "docker", + E2E_CONTAINER_DAEMON: "running", + E2E_ONBOARDING_PATH: "cloud", + E2E_AGENT: "openclaw", + E2E_PROVIDER: "nvidia", + E2E_SANDBOX_NAME: "e2e-ubuntu-repo-cloud-openclaw", + E2E_GATEWAY_URL: "http://127.0.0.1:18789", + E2E_INFERENCE_ROUTE: "inference-local", + }; +} + +describe("run-suites.sh", () => { + it("run_suites_should_run_steps_in_declared_order", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-")); + try { + seedContext(tmp, fullContext()); + const r = runSuites(["smoke"], { + E2E_CONTEXT_DIR: tmp, + E2E_DRY_RUN: "1", + }); + expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0); + // Smoke order is: cli-available, gateway-health, sandbox-listed, sandbox-shell + const order = ["cli-available", "gateway-health", "sandbox-listed", "sandbox-shell"]; + let pos = 0; + for (const marker of order) { + const idx = r.stdout.indexOf(marker, pos); + expect(idx, `missing marker ${marker} after ${pos} in:\n${r.stdout}`).toBeGreaterThanOrEqual(0); + pos = idx + marker.length; + } + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("run_suites_should_fail_on_unknown_suite", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-")); + try { + seedContext(tmp, fullContext()); + const r = runSuites(["does-not-exist"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" }); + expect(r.status).not.toBe(0); + expect(`${r.stdout}${r.stderr}`).toMatch(/does-not-exist/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("run_suites_should_stop_on_first_failed_step", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-")); + try { + seedContext(tmp, fullContext()); + // Use a fixture suites file with a failing middle step. + const fixtureSuites = path.join(tmp, "suites.yaml"); + const fixtureDir = path.join(tmp, "suites", "fixture"); + fs.mkdirSync(fixtureDir, { recursive: true }); + fs.writeFileSync(path.join(fixtureDir, "00-a.sh"), "#!/usr/bin/env bash\necho A-RAN\nexit 0\n"); + fs.writeFileSync(path.join(fixtureDir, "01-b.sh"), "#!/usr/bin/env bash\necho B-RAN\nexit 1\n"); + fs.writeFileSync(path.join(fixtureDir, "02-c.sh"), "#!/usr/bin/env bash\necho C-RAN\nexit 0\n"); + fs.chmodSync(path.join(fixtureDir, "00-a.sh"), 0o755); + fs.chmodSync(path.join(fixtureDir, "01-b.sh"), 0o755); + fs.chmodSync(path.join(fixtureDir, "02-c.sh"), 0o755); + fs.writeFileSync( + fixtureSuites, + `suites: + fixture: + steps: + - { id: a, script: suites/fixture/00-a.sh } + - { id: b, script: suites/fixture/01-b.sh } + - { id: c, script: suites/fixture/02-c.sh } +`, + ); + const r = runSuites(["fixture"], { + E2E_CONTEXT_DIR: tmp, + E2E_SUITES_FILE: fixtureSuites, + E2E_SUITES_DIR: tmp, + }); + expect(r.status).not.toBe(0); + expect(r.stdout).toContain("A-RAN"); + expect(r.stdout).toContain("B-RAN"); + expect(r.stdout).not.toContain("C-RAN"); + expect(`${r.stdout}${r.stderr}`).toMatch(/FAIL.*(fixture\/b|step=b)/i); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("smoke_suite_should_require_context", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-")); + try { + // No context.env written to tmp. + const r = runSuites(["smoke"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" }); + expect(r.status).not.toBe(0); + expect(`${r.stderr}${r.stdout}`).toMatch(/context\.env|E2E_SCENARIO|missing/i); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("smoke_and_inference_run_with_stub_context", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-")); + try { + seedContext(tmp, fullContext()); + const r = runSuites(["smoke", "inference"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" }); + expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0); + for (const id of [ + "cli-available", + "gateway-health", + "sandbox-listed", + "sandbox-shell", + "models-health", + "chat-completion", + "sandbox-inference-local", + ]) { + expect(r.stdout).toContain(id); + } + // Summary should call out PASS for each step. + expect(r.stdout).toMatch(/PASS/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); diff --git a/test/e2e/README.md b/test/e2e/README.md new file mode 100644 index 0000000000..ae3d4a6ef1 --- /dev/null +++ b/test/e2e/README.md @@ -0,0 +1,113 @@ + + + +# E2E Setup Scenario Matrix + +This directory hosts NemoClaw's end-to-end tests, organized around **setup +scenarios** rather than per-workflow shell scripts. + +## Core model + +```text +setup scenario → expected state config → suite sequence +``` + +- A **setup scenario** describes how a user reaches a completed NemoClaw + environment: platform, install method, runtime prerequisites, and + onboarding choices. Defined in [`scenarios.yaml`](scenarios.yaml). +- An **expected state config** describes the observable contract the + completed environment should satisfy. Defined in + [`expected-states.yaml`](expected-states.yaml). Multiple scenarios can + share one expected state. +- A **functional suite** is an ordered list of validation scripts run + after setup completes and the expected state validates. Defined in + [`suites.yaml`](suites.yaml). Suites consume `.e2e/context.env` and do + not re-run install or onboarding. + +The runner resolves a scenario, prints a plan, runs setup/install/ +onboarding once, validates the expected state, and then runs the scenario's +ordered suites against the resulting environment. + +## Sparse matrix + +The initial matrix is deliberately sparse — three scenarios covering three +common setup paths: + +| Scenario | Platform | Install | Runtime | Onboarding | Expected state | +|---|---|---|---|---|---| +| `ubuntu-repo-cloud-openclaw` | `ubuntu-local` | `repo-current` | `docker-running` | `cloud-openclaw` | `cloud-openclaw-ready` | +| `ubuntu-repo-cloud-hermes` | `ubuntu-local` | `repo-current` | `docker-running` | `cloud-hermes` | `cloud-hermes-ready` | +| `gpu-repo-local-ollama-openclaw` | `gpu-runner` | `repo-current` | `gpu-docker-cdi` | `local-ollama-openclaw` | `local-ollama-openclaw-ready` | + +Additional scenarios (macOS, WSL, Brev/launchable, DGX Spark, negative +preflight) are migrated incrementally in later phases. The matrix is not +meant to be Cartesian — each scenario should exist because a real current +coverage path needs it. + +## Files + +```text +test/e2e/ + scenarios.yaml # platforms, installs, runtimes, onboarding, scenarios + expected-states.yaml # reusable expected state contracts + suites.yaml # ordered suite definitions + README.md # this file +``` + +Runner scripts live alongside the metadata: + +- `run-scenario.sh [--plan-only|--dry-run]` resolves a scenario, + prints the plan, writes `${E2E_CONTEXT_DIR:-.e2e}/plan.json`, and (in + non-plan-only mode) drives setup → install → onboard → gateway check + → sandbox check → expected-state validation. In `--dry-run` mode each + helper short-circuits and emits a trace line to `E2E_TRACE_FILE` if + set — useful for integration tests and for reviewing scenario wiring. +- `run-suites.sh ...` reads `.e2e/context.env` and runs one + or more suites' ordered step scripts, failing fast on the first + non-zero step and printing a PASS/FAIL summary. +- `coverage-report.sh` prints a Markdown coverage report. The + `e2e-scenarios` workflow appends the same report to + `GITHUB_STEP_SUMMARY`. + +The TypeScript resolver lives under `resolver/` and is invoked via +`tsx resolver/index.ts {plan|validate-state|coverage}`. Shell wrappers +call it so runners and CI need only `bash`. + +Overriding the artifact directory: set `E2E_CONTEXT_DIR=` so local +runs and tests do not clobber the repo-root `.e2e/`. The directory is +gitignored. + +## Adding a new setup scenario + +1. Pick (or add) profiles for platform, install, runtime, and onboarding + in `scenarios.yaml`. Reuse existing profiles when possible. +2. Add a scenario entry under `setup_scenarios:` with a kebab-case ID that + encodes the distinguishing dimensions. +3. Reference exactly one `expected_state` (singular; string key). +4. List the `suites` to run, in execution order. +5. If an appropriate expected state does not exist, add one to + `expected-states.yaml`. Keep keys structural, not behavioral. +6. If an appropriate suite does not exist, add one to `suites.yaml` and + land its scripts under `suites//`. Suites must consume + `.e2e/context.env`, not rediscover scenario state. +7. Validate references with `bash test/e2e/run-scenario.sh --plan-only` + (once the resolver lands). + +## Adding a new expected state + +Add a new key under `expected_states:` in `expected-states.yaml`. Use +structural keys (e.g. `gateway.health`, `sandbox.status`, `inference.route`) +that suites can reference via `requires_state`. Negative / preflight states +are introduced only when a concrete scenario consumes them. + +## Adding a new suite + +Add a new key under `suites:` in `suites.yaml`: + +- `requires_state`: dotted paths into an expected state that must be + satisfied for the suite to run. +- `steps`: ordered list of `{ id, script }` entries with paths relative to + this directory. + +Keep suites narrowly scoped and idempotent. Suites must not install, +onboard, or otherwise mutate setup state. diff --git a/test/e2e/coverage-report.sh b/test/e2e/coverage-report.sh new file mode 100755 index 0000000000..f4ef473302 --- /dev/null +++ b/test/e2e/coverage-report.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Render the E2E scenario coverage report as Markdown to stdout. +# +# Usage: +# bash test/e2e/coverage-report.sh > coverage.md + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +TSX_BIN="${REPO_ROOT}/node_modules/.bin/tsx" +if [[ -x "${TSX_BIN}" ]]; then + "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" coverage +else + (cd "${REPO_ROOT}" && npx --yes tsx "${SCRIPT_DIR}/resolver/index.ts" coverage) +fi diff --git a/test/e2e/expected-states.yaml b/test/e2e/expected-states.yaml new file mode 100644 index 0000000000..eed1ee994a --- /dev/null +++ b/test/e2e/expected-states.yaml @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Expected state configs. +# +# Each entry describes the observable contract that must be true after +# setup/install/onboarding completes for a given scenario. Expected states +# are reusable: multiple setup scenarios can resolve to the same expected +# state when they produce the same completed environment. +# +# Schema keys are intentionally small and structural. Deeper behavior lives +# in suites; expected states answer "is the environment in the shape we +# expect?" not "does every feature still work?". +# +# Negative/preflight expected states (e.g. `preflight-failure-no-sandbox`) +# are introduced in Phase 9 alongside their first consuming scenario. + +expected_states: + cloud-openclaw-ready: + cli: + installed: true + gateway: + expected: present + health: healthy + sandbox: + expected: present + status: running + agent: openclaw + inference: + expected: available + provider: nvidia + route: inference-local + mode: gateway-routed + credentials: + expected: present + storage: gateway-managed + security: + policy_engine: supported + shields: supported + + cloud-hermes-ready: + cli: + installed: true + gateway: + expected: present + health: healthy + sandbox: + expected: present + status: running + agent: hermes + inference: + expected: available + provider: nvidia + route: inference-local + mode: gateway-routed + credentials: + expected: present + storage: gateway-managed + security: + policy_engine: supported + shields: supported + + local-ollama-openclaw-ready: + cli: + installed: true + gateway: + expected: present + health: healthy + sandbox: + expected: present + status: running + agent: openclaw + inference: + expected: available + provider: ollama + route: inference-local + mode: gateway-routed + credentials: + expected: present + storage: gateway-managed + security: + policy_engine: supported + shields: supported + + # Negative preflight state. Introduced alongside its first consumer, + # `ubuntu-no-docker-preflight-negative` (deferred from Phase 1). + # Setup is expected to fail, and the runner must confirm that no + # gateway or sandbox ghost state was left behind. + preflight-failure-no-sandbox: + cli: + installed: true + gateway: + expected: absent + sandbox: + expected: absent + failure: + expected: true + stage: preflight diff --git a/test/e2e/lib/artifacts.sh b/test/e2e/lib/artifacts.sh new file mode 100755 index 0000000000..761e618d0a --- /dev/null +++ b/test/e2e/lib/artifacts.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Artifact collection helpers. Designed to be called from failure traps. +# All helpers are best-effort: missing sources are logged but do not abort. + +_E2E_ART_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# e2e_artifact_collect_file +# Copies a single file. Returns 0 on success or when src is missing. +e2e_artifact_collect_file() { + local src="${1:-}" + local dst="${2:-}" + if [[ -z "${src}" || -z "${dst}" ]]; then + echo "e2e_artifact_collect_file: missing src or dst" >&2 + return 2 + fi + if [[ ! -f "${src}" ]]; then + echo "e2e_artifact_collect_file: ${src} not found, skipping" >&2 + return 0 + fi + mkdir -p "$(dirname "${dst}")" + cp -f "${src}" "${dst}" +} + +# e2e_artifact_collect_dir +# Recursively copies a directory. No-op if missing. +e2e_artifact_collect_dir() { + local src="${1:-}" + local dst="${2:-}" + if [[ ! -d "${src}" ]]; then + echo "e2e_artifact_collect_dir: ${src} not found, skipping" >&2 + return 0 + fi + mkdir -p "${dst}" + cp -rf "${src}/." "${dst}/" +} + +# e2e_artifact_preserve_exit +# Intended for failure traps. Collects artifacts (caller-defined function +# `_e2e_collect_artifacts` if present) but always returns the provided exit +# code so it can be passed to `exit`. +e2e_artifact_preserve_exit() { + local rc="${1:-1}" + if declare -F _e2e_collect_artifacts >/dev/null 2>&1; then + _e2e_collect_artifacts || true + fi + return "${rc}" +} diff --git a/test/e2e/lib/cleanup.sh b/test/e2e/lib/cleanup.sh new file mode 100755 index 0000000000..8581e3c9e0 --- /dev/null +++ b/test/e2e/lib/cleanup.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Cleanup helpers. Wraps the existing sandbox-teardown.sh so scenario code +# gets a single, discoverable entrypoint. + +_E2E_CLEAN_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# shellcheck source=sandbox-teardown.sh +. "${_E2E_CLEAN_LIB_DIR}/sandbox-teardown.sh" +# shellcheck source=context.sh +. "${_E2E_CLEAN_LIB_DIR}/context.sh" +# shellcheck source=env.sh +. "${_E2E_CLEAN_LIB_DIR}/env.sh" + +# e2e_cleanup_register_sandbox [name] +# Default to E2E_SANDBOX_NAME from context. +e2e_cleanup_register_sandbox() { + local name="${1:-}" + if [[ -z "${name}" ]]; then + name="$(e2e_context_get E2E_SANDBOX_NAME)" + fi + if [[ -z "${name}" ]]; then + echo "e2e_cleanup_register_sandbox: no sandbox name to register" >&2 + return 0 + fi + register_sandbox_for_teardown "${name}" +} diff --git a/test/e2e/lib/context.sh b/test/e2e/lib/context.sh new file mode 100755 index 0000000000..5160226e27 --- /dev/null +++ b/test/e2e/lib/context.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Normalized E2E context helper. +# +# Each scenario produces a `.e2e/context.env` file with normalized key/value +# pairs describing the completed environment. Downstream suites, expected- +# state validators, and artifact collection source this file instead of +# rediscovering scenario state. +# +# Standard keys (set by the scenario runner): +# E2E_SCENARIO scenario id +# E2E_PLATFORM_OS ubuntu|macos|wsl|... +# E2E_EXECUTION_TARGET local|remote +# E2E_INSTALL_METHOD repo-checkout|curl-install-script|... +# E2E_ONBOARDING_PATH cloud|local +# E2E_AGENT openclaw|hermes +# E2E_PROVIDER nvidia|ollama|openai-compatible +# E2E_SANDBOX_NAME unique sandbox identifier +# E2E_GATEWAY_URL gateway base URL +# E2E_CONTAINER_ENGINE docker +# E2E_CONTAINER_DAEMON running|missing +# E2E_INFERENCE_ROUTE inference-local|... +# +# Usage: +# . "$(dirname "${BASH_SOURCE[0]}")/lib/context.sh" +# e2e_context_init +# e2e_context_set E2E_SCENARIO ubuntu-repo-cloud-openclaw +# e2e_context_require E2E_SANDBOX_NAME +# e2e_context_dump + +# Resolve and export E2E_CONTEXT_DIR. If not set, default to /.e2e +_e2e_context_resolve_dir() { + if [[ -n "${E2E_CONTEXT_DIR:-}" ]]; then + return 0 + fi + local script_dir repo_root + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + repo_root="$(cd "${script_dir}/../../.." && pwd)" + export E2E_CONTEXT_DIR="${repo_root}/.e2e" +} + +e2e_context_init() { + _e2e_context_resolve_dir + mkdir -p "${E2E_CONTEXT_DIR}" + : >"${E2E_CONTEXT_DIR}/context.env" +} + +e2e_context_path() { + _e2e_context_resolve_dir + printf '%s\n' "${E2E_CONTEXT_DIR}/context.env" +} + +# e2e_context_set KEY VALUE +# Appends or updates a single key in context.env. Value is written literally; +# callers are responsible for not embedding newlines. +e2e_context_set() { + local key="${1:-}" + local value="${2:-}" + if [[ -z "${key}" ]]; then + echo "e2e_context_set: missing key" >&2 + return 2 + fi + _e2e_context_resolve_dir + local ctx="${E2E_CONTEXT_DIR}/context.env" + if [[ ! -f "${ctx}" ]]; then + mkdir -p "${E2E_CONTEXT_DIR}" + : >"${ctx}" + fi + # Remove any existing assignment for this key, then append. + local tmp + tmp="$(mktemp)" + grep -v "^${key}=" "${ctx}" >"${tmp}" || true + mv "${tmp}" "${ctx}" + printf '%s=%s\n' "${key}" "${value}" >>"${ctx}" +} + +# e2e_context_get KEY +# Prints the value of KEY (empty if missing). Does not fail. +e2e_context_get() { + local key="${1:-}" + _e2e_context_resolve_dir + local ctx="${E2E_CONTEXT_DIR}/context.env" + [[ -f "${ctx}" ]] || return 0 + local line + line="$(grep "^${key}=" "${ctx}" | tail -n1 || true)" + printf '%s' "${line#"${key}"=}" +} + +# e2e_context_require KEY [KEY ...] +# Exits non-zero if any required key is missing or empty. +e2e_context_require() { + _e2e_context_resolve_dir + local ctx="${E2E_CONTEXT_DIR}/context.env" + local missing=() + local key value + for key in "$@"; do + if [[ -f "${ctx}" ]]; then + value="$(grep "^${key}=" "${ctx}" | tail -n1 || true)" + value="${value#"${key}"=}" + else + value="" + fi + if [[ -z "${value}" ]]; then + missing+=("${key}") + fi + done + if ((${#missing[@]} > 0)); then + printf 'e2e context: missing required key(s): %s\n' "${missing[*]}" >&2 + printf 'e2e context: expected in %s\n' "${ctx}" >&2 + return 1 + fi +} + +# Internal: decide whether a key's value should be redacted. +_e2e_context_is_sensitive_key() { + local key="$1" + case "$key" in + *TOKEN* | *SECRET* | *PASSWORD* | *API_KEY* | *APIKEY* | *CREDENTIAL* | *PRIVATE*) + return 0 + ;; + *) + return 1 + ;; + esac +} + +# e2e_context_dump +# Print the context to stdout with sensitive values redacted. Safe to use in +# CI logs and artifact bundles. +e2e_context_dump() { + _e2e_context_resolve_dir + local ctx="${E2E_CONTEXT_DIR}/context.env" + if [[ ! -f "${ctx}" ]]; then + echo "e2e context: no context.env at ${ctx}" >&2 + return 1 + fi + echo "# E2E context (${ctx})" + local key rest + while IFS= read -r line || [[ -n "${line}" ]]; do + [[ -z "${line}" ]] && continue + key="${line%%=*}" + rest="${line#*=}" + if _e2e_context_is_sensitive_key "${key}"; then + printf '%s=%s\n' "${key}" "REDACTED" + else + printf '%s=%s\n' "${key}" "${rest}" + fi + done <"${ctx}" +} diff --git a/test/e2e/lib/emit-context-from-plan.sh b/test/e2e/lib/emit-context-from-plan.sh new file mode 100755 index 0000000000..268fa382f5 --- /dev/null +++ b/test/e2e/lib/emit-context-from-plan.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Emit a normalized .e2e/context.env from a resolved plan.json. +# +# Usage: +# test/e2e/lib/emit-context-from-plan.sh +# +# The script reads the plan via `node --experimental-default-type=module` so +# it doesn't depend on jq being available on every runner. It then calls +# lib/context.sh helpers to append keys. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=context.sh +. "${SCRIPT_DIR}/context.sh" + +PLAN_JSON="${1:-}" +if [[ -z "${PLAN_JSON}" || ! -f "${PLAN_JSON}" ]]; then + echo "emit-context-from-plan: plan.json not found: ${PLAN_JSON}" >&2 + exit 2 +fi + +# Extract fields with node (already required by the resolver). +read_plan_value() { + local key="$1" + node -e " + const p = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8')); + const parts = process.argv[2].split('.'); + let cur = p; + for (const part of parts) { + if (cur == null) { cur = ''; break; } + cur = cur[part]; + } + process.stdout.write(cur == null ? '' : String(cur)); + " "${PLAN_JSON}" "${key}" +} + +SCENARIO_ID="$(read_plan_value scenario_id)" +PLATFORM_OS="$(read_plan_value dimensions.platform.profile.os)" +EXECUTION_TARGET="$(read_plan_value dimensions.platform.profile.execution_target)" +INSTALL_METHOD="$(read_plan_value dimensions.install.profile.method)" +RUNTIME_ENGINE="$(read_plan_value dimensions.runtime.profile.container_engine)" +RUNTIME_DAEMON="$(read_plan_value dimensions.runtime.profile.container_daemon)" +ONBOARDING_PATH="$(read_plan_value dimensions.onboarding.profile.path)" +AGENT="$(read_plan_value dimensions.onboarding.profile.agent)" +PROVIDER="$(read_plan_value dimensions.onboarding.profile.provider)" +INFERENCE_ROUTE="$(read_plan_value dimensions.onboarding.profile.inference_route)" + +: "${PLATFORM_OS:=unknown}" +: "${EXECUTION_TARGET:=local}" +: "${INSTALL_METHOD:=unknown}" +: "${RUNTIME_ENGINE:=docker}" +: "${RUNTIME_DAEMON:=unknown}" +: "${ONBOARDING_PATH:=unknown}" +: "${AGENT:=unknown}" +: "${PROVIDER:=unknown}" +: "${INFERENCE_ROUTE:=inference-local}" + +e2e_context_set E2E_SCENARIO "${SCENARIO_ID}" +e2e_context_set E2E_PLATFORM_OS "${PLATFORM_OS}" +e2e_context_set E2E_EXECUTION_TARGET "${EXECUTION_TARGET}" +e2e_context_set E2E_INSTALL_METHOD "${INSTALL_METHOD}" +e2e_context_set E2E_CONTAINER_ENGINE "${RUNTIME_ENGINE}" +e2e_context_set E2E_CONTAINER_DAEMON "${RUNTIME_DAEMON}" +e2e_context_set E2E_ONBOARDING_PATH "${ONBOARDING_PATH}" +e2e_context_set E2E_AGENT "${AGENT}" +e2e_context_set E2E_PROVIDER "${PROVIDER}" +e2e_context_set E2E_INFERENCE_ROUTE "${INFERENCE_ROUTE}" + +# Sandbox name and gateway URL are normally discovered/assigned by +# onboarding. Seed them here so dry-run consumers can exercise the suite +# plumbing without live onboarding. Real onboarding helpers will overwrite +# these via e2e_context_set in later phases. +e2e_context_set E2E_SANDBOX_NAME "e2e-${SCENARIO_ID}" +e2e_context_set E2E_GATEWAY_URL "http://127.0.0.1:18789" diff --git a/test/e2e/lib/env.sh b/test/e2e/lib/env.sh new file mode 100755 index 0000000000..1318221b1e --- /dev/null +++ b/test/e2e/lib/env.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Standardized non-interactive environment for E2E runs. +# +# Applies the same defaults historically set ad-hoc at the top of each +# `test/e2e/test-*.sh` script. Safe to source from any scenario runner. + +e2e_env_apply_noninteractive() { + export NEMOCLAW_NON_INTERACTIVE=1 + export DEBIAN_FRONTEND=noninteractive + export NEMOCLAW_ACCEPT_THIRD_PARTY_TERMS=1 + export NEMOCLAW_ACCEPT_LICENSES=1 + export NEMOCLAW_DISABLE_UPDATE_CHECK=1 + # CI is usually already set, but ensure downstream tools see it. + export CI="${CI:-1}" +} + +# e2e_env_trace [note ...] +# Append a trace line to $E2E_TRACE_FILE if set. Used by dry-run paths so +# tests can verify that helpers were invoked in the expected order without +# running real commands. +e2e_env_trace() { + local event="${1:-}" + shift || true + if [[ -n "${E2E_TRACE_FILE:-}" ]]; then + mkdir -p "$(dirname "${E2E_TRACE_FILE}")" + printf '%s %s\n' "${event}" "$*" >>"${E2E_TRACE_FILE}" + fi +} + +# e2e_env_is_dry_run: true if E2E_DRY_RUN=1 +e2e_env_is_dry_run() { + [[ "${E2E_DRY_RUN:-0}" == "1" ]] +} diff --git a/test/e2e/lib/gateway.sh b/test/e2e/lib/gateway.sh new file mode 100755 index 0000000000..a101e3ffff --- /dev/null +++ b/test/e2e/lib/gateway.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Gateway helpers. + +_E2E_GW_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=env.sh +. "${_E2E_GW_LIB_DIR}/env.sh" +# shellcheck source=context.sh +. "${_E2E_GW_LIB_DIR}/context.sh" + +# e2e_gateway_assert_healthy [url] +# Defaults to E2E_GATEWAY_URL from context; returns non-zero with a clear +# error if the gateway is unreachable / unhealthy. +e2e_gateway_assert_healthy() { + local url="${1:-}" + if [[ -z "${url}" ]]; then + url="$(e2e_context_get E2E_GATEWAY_URL)" + fi + if [[ -z "${url}" ]]; then + echo "e2e_gateway_assert_healthy: no URL provided and E2E_GATEWAY_URL is unset" >&2 + return 2 + fi + e2e_env_trace "gateway:check" "${url}" + if e2e_env_is_dry_run; then + echo "[dry-run] gateway check ${url} (skipped)" + return 0 + fi + # Prefer /health if available, otherwise just hit the base URL. + local http_code + http_code="$(curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 "${url%/}/health" 2>/dev/null || echo 000)" + if [[ "${http_code}" == "200" ]]; then + return 0 + fi + http_code="$(curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 "${url}" 2>/dev/null || echo 000)" + if [[ "${http_code}" == "200" || "${http_code}" == "204" ]]; then + return 0 + fi + echo "e2e_gateway_assert_healthy: gateway at ${url} is unreachable or unhealthy (last http_code=${http_code})" >&2 + return 1 +} diff --git a/test/e2e/lib/install.sh b/test/e2e/lib/install.sh new file mode 100755 index 0000000000..8adbc70596 --- /dev/null +++ b/test/e2e/lib/install.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Install helper: exposes a single `e2e_install` entrypoint that dispatches +# by install method and honours E2E_DRY_RUN. + +_E2E_INSTALL_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# shellcheck source=env.sh +. "${_E2E_INSTALL_LIB_DIR}/env.sh" +# Reuse the existing PATH-refresh helper to avoid duplicating its logic. +# shellcheck source=install-path-refresh.sh +. "${_E2E_INSTALL_LIB_DIR}/install-path-refresh.sh" + +e2e_install() { + local method="${1:-}" + if [[ -z "${method}" ]]; then + echo "e2e_install: missing install method" >&2 + return 2 + fi + e2e_env_trace "install:${method}" + if e2e_env_is_dry_run; then + # dry-run: announce and skip real side effects + echo "[dry-run] install method=${method} (skipped)" + return 0 + fi + case "${method}" in + repo-checkout | repo-current) + e2e_install_from_repo_checkout + ;; + curl-install-script | public-installer) + e2e_install_from_public_curl + ;; + *) + echo "e2e_install: unsupported install method: ${method}" >&2 + return 2 + ;; + esac + nemoclaw_refresh_install_env +} + +e2e_install_from_repo_checkout() { + local repo_root + repo_root="$(cd "${_E2E_INSTALL_LIB_DIR}/../../.." && pwd)" + ( + cd "${repo_root}" || exit + npm install + npm link + ) +} + +e2e_install_from_public_curl() { + curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh | bash +} diff --git a/test/e2e/lib/onboard.sh b/test/e2e/lib/onboard.sh new file mode 100755 index 0000000000..0b3bd63e2c --- /dev/null +++ b/test/e2e/lib/onboard.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Onboard helper. Dispatches by onboarding profile id and honors dry-run. + +_E2E_ONBOARD_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=env.sh +. "${_E2E_ONBOARD_LIB_DIR}/env.sh" +# shellcheck source=context.sh +. "${_E2E_ONBOARD_LIB_DIR}/context.sh" + +e2e_onboard() { + local profile="${1:-}" + if [[ -z "${profile}" ]]; then + echo "e2e_onboard: missing onboarding profile id" >&2 + return 2 + fi + e2e_env_trace "onboard:${profile}" + if e2e_env_is_dry_run; then + echo "[dry-run] onboard profile=${profile} (skipped)" + return 0 + fi + case "${profile}" in + cloud-openclaw) + e2e_onboard_cloud_openclaw + ;; + cloud-hermes) + e2e_onboard_cloud_hermes + ;; + local-ollama-openclaw) + e2e_onboard_local_ollama_openclaw + ;; + *) + echo "e2e_onboard: unsupported onboarding profile: ${profile}" >&2 + return 2 + ;; + esac +} + +e2e_onboard_cloud_openclaw() { + local sandbox_name + sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)" + : "${sandbox_name:=e2e-cloud-openclaw}" + nemoclaw onboard --agent openclaw --provider nvidia --sandbox "${sandbox_name}" --yes +} + +e2e_onboard_cloud_hermes() { + local sandbox_name + sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)" + : "${sandbox_name:=e2e-cloud-hermes}" + nemoclaw onboard --agent hermes --provider nvidia --sandbox "${sandbox_name}" --yes +} + +e2e_onboard_local_ollama_openclaw() { + local sandbox_name + sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)" + : "${sandbox_name:=e2e-local-ollama-openclaw}" + nemoclaw onboard --agent openclaw --provider ollama --sandbox "${sandbox_name}" --yes +} diff --git a/test/e2e/lib/sandbox.sh b/test/e2e/lib/sandbox.sh new file mode 100755 index 0000000000..52ffbb934c --- /dev/null +++ b/test/e2e/lib/sandbox.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Sandbox helpers. + +_E2E_SB_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=env.sh +. "${_E2E_SB_LIB_DIR}/env.sh" +# shellcheck source=context.sh +. "${_E2E_SB_LIB_DIR}/context.sh" + +# e2e_sandbox_assert_running +# Requires E2E_SANDBOX_NAME in context. Real implementation queries +# `nemoclaw list`; honors E2E_DRY_RUN. +e2e_sandbox_assert_running() { + if ! e2e_context_require E2E_SANDBOX_NAME; then + return 1 + fi + local name + name="$(e2e_context_get E2E_SANDBOX_NAME)" + e2e_env_trace "sandbox:check" "${name}" + if e2e_env_is_dry_run; then + echo "[dry-run] sandbox check ${name} (skipped)" + return 0 + fi + if ! command -v nemoclaw >/dev/null 2>&1; then + echo "e2e_sandbox_assert_running: nemoclaw CLI not on PATH" >&2 + return 1 + fi + if ! nemoclaw list 2>/dev/null | grep -q -E "^|[[:space:]]${name}[[:space:]]|${name}\$"; then + echo "e2e_sandbox_assert_running: sandbox '${name}' not found in 'nemoclaw list'" >&2 + return 1 + fi + return 0 +} diff --git a/test/e2e/resolver/coverage.ts b/test/e2e/resolver/coverage.ts new file mode 100644 index 0000000000..3553d038bb --- /dev/null +++ b/test/e2e/resolver/coverage.ts @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Render a Markdown coverage report for E2E setup scenarios. + * + * Design (per the simplify pass): one primary table, one row per scenario. + * A `## Gaps` section flags scenarios without suites and expected states + * that no scenario references. Rows are sorted deterministically for + * stable CI diffs. + */ + +import type { ResolverInput } from "./load.ts"; + +export interface CoverageReportOptions { + /** Optional map of scenario id -> last known run status. */ + lastRunStatus?: Record; +} + +export function renderCoverageReport( + meta: ResolverInput, + options: CoverageReportOptions = {}, +): string { + const { scenarios, expectedStates } = meta; + const scenarioIds = Object.keys(scenarios.setup_scenarios).sort(); + const lines: string[] = []; + lines.push("# E2E Setup Scenario Coverage"); + lines.push(""); + lines.push( + "_Generated from `test/e2e/{scenarios,expected-states,suites}.yaml`._", + ); + lines.push(""); + lines.push("## Scenarios"); + lines.push(""); + const hasStatus = options.lastRunStatus && Object.keys(options.lastRunStatus).length > 0; + const header = hasStatus + ? "| Scenario | Platform | Install | Runtime | Onboarding | Expected state | Suites | Last run |" + : "| Scenario | Platform | Install | Runtime | Onboarding | Expected state | Suites |"; + const sep = hasStatus + ? "|---|---|---|---|---|---|---|---|" + : "|---|---|---|---|---|---|---|"; + lines.push(header); + lines.push(sep); + for (const id of scenarioIds) { + const sc = scenarios.setup_scenarios[id]; + const suiteCell = sc.suites.length === 0 ? "_(none)_" : sc.suites.join(", "); + const row = [ + id, + sc.dimensions.platform, + sc.dimensions.install, + sc.dimensions.runtime, + sc.dimensions.onboarding, + sc.expected_state, + suiteCell, + ]; + if (hasStatus) { + row.push(options.lastRunStatus?.[id] ?? "_unknown_"); + } + lines.push(`| ${row.join(" | ")} |`); + } + lines.push(""); + + // Gaps section. + const scenariosWithoutSuites = scenarioIds.filter( + (id) => scenarios.setup_scenarios[id].suites.length === 0, + ); + const referencedStates = new Set( + scenarioIds.map((id) => scenarios.setup_scenarios[id].expected_state), + ); + const unusedStates = Object.keys(expectedStates.expected_states) + .filter((s) => !referencedStates.has(s)) + .sort(); + + lines.push("## Gaps"); + lines.push(""); + if (scenariosWithoutSuites.length === 0 && unusedStates.length === 0) { + lines.push("_No gaps detected._"); + } else { + if (scenariosWithoutSuites.length > 0) { + lines.push("### Scenarios with no suites"); + lines.push(""); + for (const id of scenariosWithoutSuites.sort()) { + lines.push(`- \`${id}\`: no suites configured`); + } + lines.push(""); + } + if (unusedStates.length > 0) { + lines.push("### Unused expected states"); + lines.push(""); + for (const id of unusedStates) { + lines.push(`- \`${id}\`: no scenario references this expected state`); + } + lines.push(""); + } + } + return lines.join("\n"); +} diff --git a/test/e2e/resolver/index.ts b/test/e2e/resolver/index.ts new file mode 100644 index 0000000000..e79d2932bb --- /dev/null +++ b/test/e2e/resolver/index.ts @@ -0,0 +1,172 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * CLI entrypoint for the E2E scenario resolver. + * + * Usage: + * tsx test/e2e/resolver/index.ts plan [--context-dir ] + * + * Writes `plan.json` under the context dir (default `.e2e/`) and prints a + * human-readable plan to stdout. Exits non-zero on any resolution error. + */ + +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { loadMetadataFromDir } from "./load.ts"; +import { resolveScenario, formatPlan } from "./plan.ts"; +import { + validateExpectedState, + formatReport, + type ProbeResults, + type ProbeValue, +} from "./validator.ts"; +import { renderCoverageReport } from "./coverage.ts"; + +function parseArgs(argv: string[]): { + command: string; + scenarioId?: string; + contextDir: string; + metadataDir: string; +} { + const args = argv.slice(2); + const command = args.shift() ?? ""; + let scenarioId: string | undefined; + let contextDir = process.env.E2E_CONTEXT_DIR ?? ".e2e"; + const scriptDir = path.dirname(fileURLToPath(import.meta.url)); + // resolver/ lives under test/e2e/, so metadata dir is one level up. + let metadataDir = path.resolve(scriptDir, ".."); + while (args.length > 0) { + const a = args.shift(); + if (a === "--context-dir") { + const v = args.shift(); + if (!v) throw new Error("--context-dir requires a value"); + contextDir = v; + } else if (a === "--metadata-dir") { + const v = args.shift(); + if (!v) throw new Error("--metadata-dir requires a value"); + metadataDir = v; + } else if (a && !a.startsWith("--") && !scenarioId) { + scenarioId = a; + } else if (a === "--help" || a === "-h") { + // ignore; help handled by caller + } else if (a) { + throw new Error(`unexpected argument: ${a}`); + } + } + return { command, scenarioId, contextDir, metadataDir }; +} + +function main(): number { + let parsed: ReturnType; + try { + parsed = parseArgs(process.argv); + } catch (err) { + process.stderr.write(`resolver: ${(err as Error).message}\n`); + return 2; + } + const { command, scenarioId, contextDir, metadataDir } = parsed; + if (command === "coverage") { + try { + const meta = loadMetadataFromDir(metadataDir); + const md = renderCoverageReport(meta); + process.stdout.write(`${md}\n`); + return 0; + } catch (err) { + process.stderr.write(`resolver: ${(err as Error).message}\n`); + return 1; + } + } + if (!scenarioId) { + process.stderr.write("resolver: missing scenario id\n"); + return 2; + } + try { + const meta = loadMetadataFromDir(metadataDir); + const plan = resolveScenario(scenarioId, meta); + if (command === "plan") { + fs.mkdirSync(contextDir, { recursive: true }); + const planJsonPath = path.join(contextDir, "plan.json"); + fs.writeFileSync(planJsonPath, `${JSON.stringify(plan, null, 2)}\n`); + process.stdout.write(`${formatPlan(plan)}\n`); + process.stdout.write(`plan.json: ${planJsonPath}\n`); + return 0; + } + if (command === "validate-state") { + const probes = probesFromEnvAndState(plan.expected_state.config); + const report = validateExpectedState({ + stateId: plan.expected_state.id, + state: plan.expected_state.config, + probes, + suites: plan.suites, + }); + fs.mkdirSync(contextDir, { recursive: true }); + const reportPath = path.join(contextDir, "expected-state-report.json"); + fs.writeFileSync(reportPath, `${JSON.stringify(report, null, 2)}\n`); + process.stdout.write(`${formatReport(report)}\n`); + process.stdout.write(`expected-state-report: ${reportPath}\n`); + return report.ok ? 0 : 3; + } + process.stderr.write( + `resolver: unknown command '${command}' (expected: plan|validate-state )\n`, + ); + return 2; + } catch (err) { + process.stderr.write(`resolver: ${(err as Error).message}\n`); + return 1; + } +} + +function flattenState( + obj: unknown, + prefix: string, + out: Record, +): void { + if (obj === null || typeof obj !== "object") { + out[prefix] = obj as ProbeValue; + return; + } + for (const [k, v] of Object.entries(obj as Record)) { + const next = prefix ? `${prefix}.${k}` : k; + if (v !== null && typeof v === "object" && !Array.isArray(v)) { + flattenState(v, next, out); + } else { + out[next] = v as ProbeValue; + } + } +} + +/** + * Build a probe results map. + * + * In dry-run mode we do not probe real services; instead we default every + * expected-state leaf to its declared value so the validator passes, and + * then allow targeted overrides via E2E_PROBE_OVERRIDE_=value. This + * lets tests simulate specific failure modes without spinning up a real + * gateway or sandbox. + */ +function probesFromEnvAndState(state: unknown): ProbeResults { + const probes: ProbeResults = {}; + flattenState(state, "", probes); + const prefix = "E2E_PROBE_OVERRIDE_"; + for (const [envKey, value] of Object.entries(process.env)) { + if (!envKey.startsWith(prefix) || value === undefined) continue; + const key = envKey + .slice(prefix.length) + .toLowerCase() + .replace(/_/g, "."); + probes[key] = coerceProbeValue(value); + } + return probes; +} + +function coerceProbeValue(v: string): ProbeValue { + if (v === "true") return true; + if (v === "false") return false; + if (/^-?\d+$/.test(v)) return parseInt(v, 10); + return v; +} + +process.exit(main()); diff --git a/test/e2e/resolver/js-yaml.d.ts b/test/e2e/resolver/js-yaml.d.ts new file mode 100644 index 0000000000..6ea52a82de --- /dev/null +++ b/test/e2e/resolver/js-yaml.d.ts @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Local type shim for js-yaml. The runtime package ships without +// TypeScript declarations; we only use `load` for YAML parsing. +declare module "js-yaml" { + export function load(input: string): unknown; + export function dump(obj: unknown, opts?: Record): string; + const _default: { load: typeof load; dump: typeof dump }; + export default _default; +} diff --git a/test/e2e/resolver/load.ts b/test/e2e/resolver/load.ts new file mode 100644 index 0000000000..d287235de2 --- /dev/null +++ b/test/e2e/resolver/load.ts @@ -0,0 +1,162 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Load and lightly-validate the E2E metadata files. + * + * The full reference check happens in `plan.ts` during scenario resolution. + * This module only asserts that each file exists and has the required + * top-level sections so callers get a clear error before touching scenarios. + */ + +import fs from "node:fs"; +import path from "node:path"; +import yaml from "js-yaml"; + +import type { + ScenariosFile, + ExpectedStatesFile, + SuitesFile, +} from "./schema.ts"; + +export interface ResolverInput { + scenarios: ScenariosFile; + expectedStates: ExpectedStatesFile; + suites: SuitesFile; + /** Optional source dir, used for resolving suite script paths. */ + sourceDir?: string; +} + +function readYaml(p: string): unknown { + const raw = fs.readFileSync(p, "utf8"); + return yaml.load(raw); +} + +function ensureObject(doc: unknown, file: string): Record { + if (!doc || typeof doc !== "object" || Array.isArray(doc)) { + throw new Error(`metadata file ${file} must parse to a YAML mapping`); + } + return doc as Record; +} + +function requireSections( + doc: Record, + file: string, + sections: string[], +): void { + for (const s of sections) { + if (!(s in doc)) { + throw new Error(`metadata file ${file} is missing required section: ${s}`); + } + } +} + +function validateScenarios(doc: Record, file: string): ScenariosFile { + requireSections(doc, file, [ + "platforms", + "installs", + "runtimes", + "onboarding", + "setup_scenarios", + ]); + const setup = doc.setup_scenarios as Record; + for (const [id, entry] of Object.entries(setup)) { + if (!entry || typeof entry !== "object") { + throw new Error(`scenario ${id} must be a mapping`); + } + const e = entry as Record; + if ("expected_states" in e) { + throw new Error( + `scenario ${id} uses array-form 'expected_states'; use singular 'expected_state'`, + ); + } + if (typeof e.expected_state !== "string") { + throw new Error(`scenario ${id} must declare a string 'expected_state'`); + } + if (!Array.isArray(e.suites)) { + throw new Error(`scenario ${id} must declare a list of 'suites'`); + } + const dims = e.dimensions as Record | undefined; + if (!dims) { + throw new Error(`scenario ${id} must declare 'dimensions'`); + } + for (const key of ["platform", "install", "runtime", "onboarding"]) { + if (typeof dims[key] !== "string") { + throw new Error(`scenario ${id}.dimensions.${key} must be a string`); + } + } + } + return doc as unknown as ScenariosFile; +} + +function validateExpectedStates( + doc: Record, + file: string, +): ExpectedStatesFile { + requireSections(doc, file, ["expected_states"]); + return doc as unknown as ExpectedStatesFile; +} + +function validateSuites(doc: Record, file: string): SuitesFile { + requireSections(doc, file, ["suites"]); + const suites = doc.suites as Record; + for (const [id, entry] of Object.entries(suites)) { + if (!entry || typeof entry !== "object") { + throw new Error(`suite ${id} must be a mapping`); + } + const e = entry as Record; + if (!Array.isArray(e.steps)) { + throw new Error(`suite ${id} must declare a 'steps' array`); + } + for (const step of e.steps) { + if (!step || typeof step !== "object") { + throw new Error(`suite ${id} has a non-mapping step`); + } + const s = step as Record; + if (typeof s.id !== "string" || typeof s.script !== "string") { + throw new Error(`suite ${id} has an invalid step (requires string id and script)`); + } + } + } + return doc as unknown as SuitesFile; +} + +export function loadMetadataFromDir(dir: string): ResolverInput { + const scenariosPath = path.join(dir, "scenarios.yaml"); + const statesPath = path.join(dir, "expected-states.yaml"); + const suitesPath = path.join(dir, "suites.yaml"); + const scenarios = validateScenarios( + ensureObject(readYaml(scenariosPath), scenariosPath), + scenariosPath, + ); + const expectedStates = validateExpectedStates( + ensureObject(readYaml(statesPath), statesPath), + statesPath, + ); + const suites = validateSuites( + ensureObject(readYaml(suitesPath), suitesPath), + suitesPath, + ); + return { scenarios, expectedStates, suites, sourceDir: dir }; +} + +export function loadMetadataFromObjects(input: { + scenarios: object; + expectedStates: object; + suites: object; + sourceDir?: string; +}): ResolverInput { + const scenarios = validateScenarios( + ensureObject(input.scenarios, ""), + "", + ); + const expectedStates = validateExpectedStates( + ensureObject(input.expectedStates, ""), + "", + ); + const suites = validateSuites( + ensureObject(input.suites, ""), + "", + ); + return { scenarios, expectedStates, suites, sourceDir: input.sourceDir }; +} diff --git a/test/e2e/resolver/plan.ts b/test/e2e/resolver/plan.ts new file mode 100644 index 0000000000..e3473f1ee1 --- /dev/null +++ b/test/e2e/resolver/plan.ts @@ -0,0 +1,170 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Resolve a setup scenario into a concrete, fully-referenced execution plan. + * + * The resolver: + * 1. looks up the scenario by id, + * 2. resolves each dimension profile, + * 3. resolves the expected state, + * 4. resolves each suite definition, + * 5. validates each suite's `requires_state` against the scenario's expected + * state (fail-fast if any key is missing or has an incompatible value). + * + * The resulting `ResolvedPlan` is serializable to JSON and forms the basis of + * the `.e2e/plan.json` artifact and the human-readable plan printout. + */ + +import type { ResolverInput } from "./load.ts"; +import type { + ResolvedPlan, + ResolvedSuite, + SuiteDefinition, + ExpectedStateConfig, +} from "./schema.ts"; + +export type { ResolverInput } from "./load.ts"; +export type { ResolvedPlan } from "./schema.ts"; + +function lookupProfile( + collection: Record, + kind: string, + name: string, + scenarioId: string, +): T { + if (!(name in collection)) { + const available = Object.keys(collection).sort().join(", "); + throw new Error( + `scenario '${scenarioId}' references unknown ${kind} '${name}' (available: ${available || ""})`, + ); + } + return collection[name] as T; +} + +function getByDottedPath(obj: unknown, dotted: string): unknown { + const parts = dotted.split("."); + let cur: unknown = obj; + for (const p of parts) { + if (cur === null || cur === undefined || typeof cur !== "object") { + return undefined; + } + cur = (cur as Record)[p]; + } + return cur; +} + +function validateSuiteAgainstState( + suiteId: string, + suite: SuiteDefinition, + state: ExpectedStateConfig, + scenarioId: string, +): void { + const requires = suite.requires_state ?? {}; + for (const [key, expected] of Object.entries(requires)) { + const actual = getByDottedPath(state, key); + if (actual === undefined) { + throw new Error( + `scenario '${scenarioId}' selects suite '${suiteId}' which requires state key '${key}=${String(expected)}', but the expected state has no value at '${key}'`, + ); + } + if (actual !== expected) { + throw new Error( + `scenario '${scenarioId}' selects suite '${suiteId}' which requires '${key}=${String(expected)}', but the scenario's expected state has '${key}=${String(actual)}'`, + ); + } + } +} + +export function resolveScenario(scenarioId: string, meta: ResolverInput): ResolvedPlan { + const scenarios = meta.scenarios.setup_scenarios; + if (!(scenarioId in scenarios)) { + const available = Object.keys(scenarios).sort().join(", "); + throw new Error( + `unknown scenario '${scenarioId}' (available: ${available || ""})`, + ); + } + const sc = scenarios[scenarioId]; + const platform = lookupProfile( + meta.scenarios.platforms, + "platform", + sc.dimensions.platform, + scenarioId, + ); + const install = lookupProfile( + meta.scenarios.installs, + "install", + sc.dimensions.install, + scenarioId, + ); + const runtime = lookupProfile( + meta.scenarios.runtimes, + "runtime", + sc.dimensions.runtime, + scenarioId, + ); + const onboarding = lookupProfile( + meta.scenarios.onboarding, + "onboarding", + sc.dimensions.onboarding, + scenarioId, + ); + if (!(sc.expected_state in meta.expectedStates.expected_states)) { + const available = Object.keys(meta.expectedStates.expected_states).sort().join(", "); + throw new Error( + `scenario '${scenarioId}' references unknown expected_state '${sc.expected_state}' (available: ${available || ""})`, + ); + } + const stateConfig = meta.expectedStates.expected_states[sc.expected_state]; + const resolvedSuites: ResolvedSuite[] = []; + for (const suiteId of sc.suites) { + if (!(suiteId in meta.suites.suites)) { + const available = Object.keys(meta.suites.suites).sort().join(", "); + throw new Error( + `scenario '${scenarioId}' references unknown suite '${suiteId}' (available: ${available || ""})`, + ); + } + const def = meta.suites.suites[suiteId]; + validateSuiteAgainstState(suiteId, def, stateConfig, scenarioId); + resolvedSuites.push({ + id: suiteId, + requires_state: def.requires_state ?? {}, + steps: def.steps.map((s) => ({ id: s.id, script: s.script })), + }); + } + return { + scenario_id: scenarioId, + dimensions: { + platform: { id: sc.dimensions.platform, profile: platform }, + install: { id: sc.dimensions.install, profile: install }, + runtime: { id: sc.dimensions.runtime, profile: runtime }, + onboarding: { id: sc.dimensions.onboarding, profile: onboarding }, + }, + expected_state: { id: sc.expected_state, config: stateConfig }, + suites: resolvedSuites, + overrides: sc.overrides, + }; +} + +export function formatPlan(plan: ResolvedPlan): string { + const lines: string[] = []; + lines.push(`Scenario: ${plan.scenario_id}`); + lines.push("Dimensions:"); + lines.push(` platform=${plan.dimensions.platform.id}`); + lines.push(` install=${plan.dimensions.install.id}`); + lines.push(` runtime=${plan.dimensions.runtime.id}`); + lines.push(` onboarding=${plan.dimensions.onboarding.id}`); + lines.push(`Expected state: ${plan.expected_state.id}`); + lines.push("Suites:"); + for (const s of plan.suites) { + lines.push(` - ${s.id}`); + for (const step of s.steps) { + lines.push(` * ${step.id} (${step.script})`); + } + } + if (plan.overrides) { + lines.push("Overrides:"); + lines.push(` ${JSON.stringify(plan.overrides)}`); + } + return lines.join("\n"); +} diff --git a/test/e2e/resolver/schema.ts b/test/e2e/resolver/schema.ts new file mode 100644 index 0000000000..26ec7e5aef --- /dev/null +++ b/test/e2e/resolver/schema.ts @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Types for the E2E scenario metadata schema. + * + * These mirror the shape of `scenarios.yaml`, `expected-states.yaml`, and + * `suites.yaml`. The resolver validates unknown references and returns a + * normalized `ResolvedPlan` suitable for the shell runner and JSON artifact. + */ + +export type AnyRecord = Record; + +export interface PlatformProfile extends AnyRecord { + os?: string; + execution_target?: string; +} +export type InstallProfile = AnyRecord; +export type RuntimeProfile = AnyRecord; +export interface OnboardingProfile extends AnyRecord { + path?: string; + agent?: string; + provider?: string; + inference_route?: string; +} + +export interface SetupScenario { + dimensions: { + platform: string; + install: string; + runtime: string; + onboarding: string; + }; + expected_state: string; + suites: string[]; + overrides?: AnyRecord; + /** + * Guard: the legacy array form `expected_states: [...]` must not reappear. + * If present, the loader fails. + */ + expected_states?: never; +} + +export interface ScenariosFile { + platforms: Record; + installs: Record; + runtimes: Record; + onboarding: Record; + setup_scenarios: Record; +} + +export type ExpectedStateConfig = AnyRecord; + +export interface ExpectedStatesFile { + expected_states: Record; +} + +export interface SuiteStep { + id: string; + script: string; +} + +export interface SuiteDefinition { + requires_state?: Record; + steps: SuiteStep[]; +} + +export interface SuitesFile { + suites: Record; +} + +export interface ResolvedDimension { + id: string; + profile: T; +} + +export interface ResolvedSuite { + id: string; + requires_state: Record; + steps: SuiteStep[]; +} + +export interface ResolvedExpectedState { + id: string; + config: ExpectedStateConfig; +} + +export interface ResolvedPlan { + scenario_id: string; + dimensions: { + platform: ResolvedDimension; + install: ResolvedDimension; + runtime: ResolvedDimension; + onboarding: ResolvedDimension; + }; + expected_state: ResolvedExpectedState; + suites: ResolvedSuite[]; + overrides?: AnyRecord; +} diff --git a/test/e2e/resolver/validator.ts b/test/e2e/resolver/validator.ts new file mode 100644 index 0000000000..7d91306e3b --- /dev/null +++ b/test/e2e/resolver/validator.ts @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Expected-state validator. + * + * Walks the expected state tree and compares each leaf to a probe result. + * Also validates per-suite `requires_state` entries at runtime, producing a + * single report whose `ok` field drives whether the runner proceeds to + * execute suites. + */ + +import type { ExpectedStateConfig, ResolvedSuite } from "./schema.ts"; + +export type ProbeValue = string | number | boolean | null; +export type ProbeResults = Record; + +export interface ValidatorInput { + stateId: string; + state: ExpectedStateConfig; + probes: ProbeResults; + suites: ResolvedSuite[]; +} + +export interface ValidatorCheck { + key: string; + expected: ProbeValue; + actual: ProbeValue | undefined; + ok: boolean; + origin: "state" | "suite"; + suite?: string; + message?: string; +} + +export interface ValidatorReport { + state_id: string; + ok: boolean; + checks: ValidatorCheck[]; +} + +function flatten( + obj: unknown, + prefix: string, + out: Record, +): void { + if (obj === null || typeof obj !== "object") { + out[prefix] = obj as ProbeValue; + return; + } + for (const [k, v] of Object.entries(obj as Record)) { + const next = prefix ? `${prefix}.${k}` : k; + if (v !== null && typeof v === "object" && !Array.isArray(v)) { + flatten(v, next, out); + } else { + out[next] = v as ProbeValue; + } + } +} + +function compare( + key: string, + expected: ProbeValue, + actual: ProbeValue | undefined, +): boolean { + if (actual === undefined) return false; + return expected === actual; +} + +export function validateExpectedState(input: ValidatorInput): ValidatorReport { + const checks: ValidatorCheck[] = []; + const flat: Record = {}; + flatten(input.state, "", flat); + + for (const [key, expected] of Object.entries(flat)) { + const actual = input.probes[key]; + const ok = compare(key, expected, actual); + checks.push({ + key, + expected, + actual, + ok, + origin: "state", + message: ok + ? undefined + : `expected '${key}=${String(expected)}' but got '${String(actual ?? "")}'`, + }); + } + + for (const suite of input.suites) { + const req = suite.requires_state ?? {}; + for (const [key, expected] of Object.entries(req)) { + const actual = input.probes[key]; + const ok = compare(key, expected as ProbeValue, actual); + checks.push({ + key, + expected: expected as ProbeValue, + actual, + ok, + origin: "suite", + suite: suite.id, + message: ok + ? undefined + : `suite '${suite.id}' requires '${key}=${String(expected)}' but got '${String(actual ?? "")}'`, + }); + } + } + + const ok = checks.every((c) => c.ok); + return { state_id: input.stateId, ok, checks }; +} + +export function formatReport(report: ValidatorReport): string { + const lines: string[] = []; + lines.push(`expected-state: ${report.state_id} ${report.ok ? "OK" : "FAILED"}`); + for (const c of report.checks) { + const status = c.ok ? "PASS" : "FAIL"; + const origin = c.origin === "suite" ? `[suite:${c.suite}]` : "[state]"; + lines.push( + ` ${status} ${origin} ${c.key} expected=${String(c.expected)} actual=${String(c.actual ?? "")}`, + ); + } + return lines.join("\n"); +} diff --git a/test/e2e/run-scenario.sh b/test/e2e/run-scenario.sh new file mode 100755 index 0000000000..cf4113086c --- /dev/null +++ b/test/e2e/run-scenario.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# E2E scenario runner entrypoint. +# +# Usage: +# bash test/e2e/run-scenario.sh [--plan-only] [--dry-run] +# +# Flags: +# --plan-only Resolve metadata and print the plan only. Writes +# ${E2E_CONTEXT_DIR:-.e2e}/plan.json for artifact upload. +# --dry-run (reserved) Run orchestration with real side effects +# replaced by trace-logged stubs. Sets E2E_DRY_RUN=1 for +# helpers. Full dry-run orchestration lands in later phases. +# +# Environment: +# E2E_CONTEXT_DIR Override the scenario artifact directory +# (default: /.e2e/). + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +SCENARIO_ID="" +PLAN_ONLY=0 +DRY_RUN=0 + +usage() { + cat >&2 <<'USAGE' +Usage: bash test/e2e/run-scenario.sh [--plan-only] [--dry-run] +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --plan-only) + PLAN_ONLY=1 + shift + ;; + --dry-run) + DRY_RUN=1 + shift + ;; + -h | --help) + usage + exit 0 + ;; + --*) + echo "run-scenario: unknown flag: $1" >&2 + usage + exit 2 + ;; + *) + if [[ -z "${SCENARIO_ID}" ]]; then + SCENARIO_ID="$1" + else + echo "run-scenario: unexpected positional argument: $1" >&2 + usage + exit 2 + fi + shift + ;; + esac +done + +if [[ -z "${SCENARIO_ID}" ]]; then + echo "run-scenario: missing scenario id" >&2 + usage + exit 2 +fi + +export E2E_CONTEXT_DIR="${E2E_CONTEXT_DIR:-${REPO_ROOT}/.e2e}" +mkdir -p "${E2E_CONTEXT_DIR}" + +if [[ "${DRY_RUN}" -eq 1 ]]; then + export E2E_DRY_RUN=1 +fi + +# Prefer the locally-installed tsx if present, otherwise fall back to npx. +TSX_BIN="${REPO_ROOT}/node_modules/.bin/tsx" +if [[ ! -x "${TSX_BIN}" ]]; then + TSX_BIN="" +fi + +run_resolver() { + if [[ -n "${TSX_BIN}" ]]; then + "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" "$@" + else + (cd "${REPO_ROOT}" && npx --yes tsx "${SCRIPT_DIR}/resolver/index.ts" "$@") + fi +} + +run_resolver plan "${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}" + +if [[ "${PLAN_ONLY}" -eq 1 ]]; then + exit 0 +fi + +# Source the shared helper library so we can exercise the full +# setup → install → onboard → gateway/sandbox check sequence. In dry-run +# mode each helper short-circuits (and writes to E2E_TRACE_FILE if set). +# shellcheck source=lib/env.sh +. "${SCRIPT_DIR}/lib/env.sh" +# shellcheck source=lib/context.sh +. "${SCRIPT_DIR}/lib/context.sh" +# shellcheck source=lib/install.sh +. "${SCRIPT_DIR}/lib/install.sh" +# shellcheck source=lib/onboard.sh +. "${SCRIPT_DIR}/lib/onboard.sh" +# shellcheck source=lib/gateway.sh +. "${SCRIPT_DIR}/lib/gateway.sh" +# shellcheck source=lib/sandbox.sh +. "${SCRIPT_DIR}/lib/sandbox.sh" + +# Apply standard non-interactive env (and trace it). +e2e_env_apply_noninteractive +e2e_env_trace "env:noninteractive" + +# Emit normalized context from the resolved plan. +e2e_context_init +"${SCRIPT_DIR}/lib/emit-context-from-plan.sh" "${E2E_CONTEXT_DIR}/plan.json" + +# Extract the install method and onboarding profile from the plan so we can +# dispatch to the right helpers. +read_plan_string() { + local key="$1" + node -e " + const p = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8')); + const parts = process.argv[2].split('.'); + let cur = p; + for (const part of parts) { if (cur == null) { cur = ''; break; } cur = cur[part]; } + process.stdout.write(cur == null ? '' : String(cur)); + " "${E2E_CONTEXT_DIR}/plan.json" "${key}" +} + +INSTALL_ID="$(read_plan_string dimensions.install.id)" +INSTALL_METHOD="$(read_plan_string dimensions.install.profile.method)" +ONBOARDING_ID="$(read_plan_string dimensions.onboarding.id)" + +# Trace the dimension id so scenario-level assertions can identify the +# configured install (e.g. repo-current); e2e_install internally traces +# the resolved method. +e2e_env_trace "install:${INSTALL_ID}" +e2e_install "${INSTALL_METHOD}" +e2e_onboard "${ONBOARDING_ID}" +e2e_gateway_assert_healthy +e2e_sandbox_assert_running + +# Expected state validation. The validator reads E2E_PROBE_OVERRIDE_* env +# variables to simulate real probe outputs in dry-run/test contexts. +# In non-dry-run mode the validator currently also relies on those +# overrides; wiring real probes through the validator happens as +# scenarios migrate. +if [[ "${E2E_VALIDATE_EXPECTED_STATE:-0}" == "1" || "${DRY_RUN}" -ne 1 ]]; then + if ! run_resolver validate-state "${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}"; then + echo "run-scenario: expected-state validation failed; suites will NOT run" >&2 + exit 3 + fi +fi + +if [[ "${DRY_RUN}" -eq 1 ]]; then + echo "run-scenario: dry-run complete; context.env emitted under ${E2E_CONTEXT_DIR}" + exit 0 +fi + +echo "run-scenario: full suite execution is not implemented yet (Phase 9 migrates additional scenarios)" >&2 +exit 0 diff --git a/test/e2e/run-suites.sh b/test/e2e/run-suites.sh new file mode 100755 index 0000000000..bf03f0fa38 --- /dev/null +++ b/test/e2e/run-suites.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Run one or more functional suites against a completed E2E environment. +# +# Usage: +# bash test/e2e/run-suites.sh [ ...] +# +# Reads suite metadata from test/e2e/suites.yaml (or $E2E_SUITES_FILE). +# Each suite script receives .e2e/context.env via E2E_CONTEXT_DIR and is +# expected to source lib/context.sh if it needs specific keys. +# +# Environment: +# E2E_CONTEXT_DIR Directory containing context.env (default: /.e2e) +# E2E_SUITES_FILE Override suites metadata file (for tests) +# E2E_SUITES_DIR Override the directory that suite scripts are resolved +# against (default: test/e2e/) +# E2E_DRY_RUN When 1, suite scripts run in dry-run mode themselves. +# +# Exit code: 0 if all steps pass; non-zero at the first failing step. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +if (($# == 0)); then + echo "run-suites: at least one suite id required" >&2 + echo "Usage: bash test/e2e/run-suites.sh [ ...]" >&2 + exit 2 +fi + +export E2E_CONTEXT_DIR="${E2E_CONTEXT_DIR:-${REPO_ROOT}/.e2e}" +SUITES_FILE="${E2E_SUITES_FILE:-${SCRIPT_DIR}/suites.yaml}" +SUITES_DIR="${E2E_SUITES_DIR:-${SCRIPT_DIR}}" + +CTX_FILE="${E2E_CONTEXT_DIR}/context.env" +if [[ ! -f "${CTX_FILE}" ]]; then + echo "run-suites: missing ${CTX_FILE}; run-scenario.sh must emit context before running suites" >&2 + exit 1 +fi + +# Sanity-check that the baseline scenario key is present. +if ! grep -q '^E2E_SCENARIO=' "${CTX_FILE}"; then + echo "run-suites: ${CTX_FILE} is missing required key E2E_SCENARIO" >&2 + exit 1 +fi + +# Resolve the suite step list by reading the YAML via node. +resolve_suite() { + local suite_id="$1" + node -e " + const fs = require('fs'); + const path = process.argv[1]; + const wanted = process.argv[2]; + const raw = fs.readFileSync(path, 'utf8'); + // Minimal YAML reader: prefer js-yaml if available; else fall back. + let yaml; + try { yaml = require('js-yaml'); } catch (_) { + process.stderr.write('run-suites: js-yaml required to parse suite metadata\n'); + process.exit(2); + } + const doc = yaml.load(raw); + if (!doc || !doc.suites || !doc.suites[wanted]) { + process.stderr.write('run-suites: unknown suite: ' + wanted + '\n'); + process.exit(3); + } + const steps = doc.suites[wanted].steps || []; + for (const s of steps) { + if (!s || typeof s.id !== 'string' || typeof s.script !== 'string') { + process.stderr.write('run-suites: malformed step in ' + wanted + '\n'); + process.exit(4); + } + process.stdout.write(s.id + '\t' + s.script + '\n'); + } + " "${SUITES_FILE}" "${suite_id}" +} + +declare -a FAILED_STEPS=() +declare -a PASSED_STEPS=() +OVERALL_STATUS=0 + +run_one_suite() { + local suite_id="$1" + echo "== suite: ${suite_id} ==" + local steps + if ! steps="$(resolve_suite "${suite_id}")"; then + OVERALL_STATUS=1 + return 1 + fi + if [[ -z "${steps}" ]]; then + echo " (no steps)" + return 0 + fi + while IFS=$'\t' read -r step_id script; do + [[ -z "${step_id}" ]] && continue + local full="${SUITES_DIR}/${script}" + echo " -> step: ${step_id} (${script})" + if [[ ! -f "${full}" ]]; then + echo " FAIL: script not found at ${full}" >&2 + FAILED_STEPS+=("${suite_id}/${step_id}") + OVERALL_STATUS=1 + return 1 + fi + if ! bash "${full}"; then + echo " FAIL: suite=${suite_id} step=${step_id}" >&2 + FAILED_STEPS+=("${suite_id}/${step_id}") + OVERALL_STATUS=1 + return 1 + fi + echo " PASS: ${step_id}" + PASSED_STEPS+=("${suite_id}/${step_id}") + done <<<"${steps}" +} + +for suite_id in "$@"; do + if ! run_one_suite "${suite_id}"; then + break + fi +done + +echo +echo "== suite summary ==" +for p in "${PASSED_STEPS[@]}"; do + echo " PASS ${p}" +done +for f in "${FAILED_STEPS[@]}"; do + echo " FAIL ${f}" +done + +exit "${OVERALL_STATUS}" diff --git a/test/e2e/scenarios.yaml b/test/e2e/scenarios.yaml new file mode 100644 index 0000000000..91c9859324 --- /dev/null +++ b/test/e2e/scenarios.yaml @@ -0,0 +1,184 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# E2E setup scenario catalog. +# +# Reading order: +# 1. `platforms`, `installs`, `runtimes`, and `onboarding` define reusable +# profiles ("dimensions") that describe how a user reaches a completed +# NemoClaw environment. +# 2. `setup_scenarios` names concrete combinations by ID. Each scenario +# references profiles by key and pins exactly one `expected_state` +# from `expected-states.yaml`, along with an ordered list of `suites` +# from `suites.yaml`. +# +# Adding a new scenario: +# - Reuse existing profiles where possible. Add a new profile only when a +# dimension is genuinely new (e.g. a new platform runner). +# - Pick the expected_state that describes the completed environment. +# - List the suites to run against it, in the order they should execute. +# - Run `bash test/e2e/run-scenario.sh --plan-only` once the +# resolver lands to validate references. +# +# See `test/e2e/README.md` for the full reading guide and the sparse matrix +# design that drives the initial three scenarios. + +platforms: + ubuntu-local: + os: ubuntu + execution_target: local + macos-local: + os: macos + execution_target: local + wsl-local: + os: wsl + execution_target: local + gpu-runner: + os: ubuntu + execution_target: local + gpu: nvidia + brev-launchable: + os: ubuntu + execution_target: remote + provider: brev + dgx-spark: + os: ubuntu + execution_target: local + hardware: dgx-spark + +installs: + repo-current: + method: repo-checkout + source: current-branch + public-curl: + method: curl-install-script + source: public-installer + launchable: + method: brev-launchable + source: launchable-image + release: + method: release-tarball + source: github-release + upgrade-from-version: + method: upgrade-in-place + source: prior-release + +runtimes: + docker-running: + container_engine: docker + container_daemon: running + gpu-docker-cdi: + container_engine: docker + container_daemon: running + gpu_runtime: cdi + docker-missing: + container_engine: docker + container_daemon: missing + +onboarding: + cloud-openclaw: + path: cloud + agent: openclaw + provider: nvidia + inference_route: inference-local + cloud-hermes: + path: cloud + agent: hermes + provider: nvidia + inference_route: inference-local + local-ollama-openclaw: + path: local + agent: openclaw + provider: ollama + inference_route: inference-local + openai-compatible-openclaw: + path: cloud + agent: openclaw + provider: openai-compatible + inference_route: inference-local + +setup_scenarios: + ubuntu-repo-cloud-openclaw: + dimensions: + platform: ubuntu-local + install: repo-current + runtime: docker-running + onboarding: cloud-openclaw + expected_state: cloud-openclaw-ready + suites: + - smoke + - inference + - credentials + + ubuntu-repo-cloud-hermes: + dimensions: + platform: ubuntu-local + install: repo-current + runtime: docker-running + onboarding: cloud-hermes + expected_state: cloud-hermes-ready + suites: + - smoke + - inference + - hermes-specific + + gpu-repo-local-ollama-openclaw: + dimensions: + platform: gpu-runner + install: repo-current + runtime: gpu-docker-cdi + onboarding: local-ollama-openclaw + expected_state: local-ollama-openclaw-ready + suites: + - smoke + - local-ollama-inference + - ollama-proxy + + macos-repo-cloud-openclaw: + dimensions: + platform: macos-local + install: repo-current + runtime: docker-running + onboarding: cloud-openclaw + expected_state: cloud-openclaw-ready + suites: + - smoke + - platform-macos + + wsl-repo-cloud-openclaw: + dimensions: + platform: wsl-local + install: repo-current + runtime: docker-running + onboarding: cloud-openclaw + expected_state: cloud-openclaw-ready + suites: + - smoke + - platform-wsl + + brev-launchable-cloud-openclaw: + dimensions: + platform: brev-launchable + install: launchable + runtime: docker-running + onboarding: cloud-openclaw + expected_state: cloud-openclaw-ready + # Remote gateway must bind to 0.0.0.0 so the GitHub runner can reach it + # after ssh port-forward. Scenario-level overrides land alongside their + # first real consumer (deferred from Phase 1). + overrides: + onboarding: + gateway: + bind_address: 0.0.0.0 + suites: + - smoke + - inference + + ubuntu-no-docker-preflight-negative: + dimensions: + platform: ubuntu-local + install: repo-current + runtime: docker-missing + onboarding: cloud-openclaw + expected_state: preflight-failure-no-sandbox + suites: [] diff --git a/test/e2e/suites.yaml b/test/e2e/suites.yaml new file mode 100644 index 0000000000..716e00f9ec --- /dev/null +++ b/test/e2e/suites.yaml @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Functional suite definitions. +# +# A suite is an ordered list of shell scripts that run after setup and +# expected state validation complete. Suites consume `.e2e/context.env` +# and MUST NOT perform install or onboarding themselves. +# +# `requires_state` declares the expected-state keys (dotted paths) that +# must be present with a matching value for a suite to run against a +# given scenario. The resolver validates these references at plan +# resolution time (Phase 2) and the runner validates actual probe +# results at runtime (Phase 8). +# +# Script paths are relative to this file's directory. Scripts are added +# incrementally; Phase 5 lands the first `smoke` and `inference` steps. + +suites: + smoke: + requires_state: + gateway.health: healthy + sandbox.status: running + steps: + - id: cli-available + script: suites/smoke/00-cli-available.sh + - id: gateway-health + script: suites/smoke/01-gateway-health.sh + - id: sandbox-listed + script: suites/smoke/02-sandbox-listed.sh + - id: sandbox-shell + script: suites/smoke/03-sandbox-shell.sh + + inference: + requires_state: + gateway.health: healthy + sandbox.status: running + inference.expected: available + steps: + - id: models-health + script: suites/inference/00-models-health.sh + - id: chat-completion + script: suites/inference/01-chat-completion.sh + - id: sandbox-inference-local + script: suites/inference/02-inference-local-from-sandbox.sh + + credentials: + requires_state: + credentials.expected: present + steps: + - id: credentials-present + script: suites/credentials/00-credentials-present.sh + + local-ollama-inference: + requires_state: + gateway.health: healthy + sandbox.status: running + inference.expected: available + steps: + - id: ollama-models-health + script: suites/local-ollama-inference/00-ollama-models-health.sh + - id: ollama-chat-completion + script: suites/local-ollama-inference/01-ollama-chat-completion.sh + + ollama-proxy: + requires_state: + gateway.health: healthy + sandbox.status: running + steps: + - id: proxy-reachable + script: suites/ollama-proxy/00-proxy-reachable.sh + + platform-macos: + requires_state: + gateway.health: healthy + sandbox.status: running + steps: + - id: macos-smoke + script: suites/platform-macos/00-macos-smoke.sh + + platform-wsl: + requires_state: + gateway.health: healthy + sandbox.status: running + steps: + - id: wsl-smoke + script: suites/platform-wsl/00-wsl-smoke.sh + + hermes-specific: + requires_state: + gateway.health: healthy + sandbox.status: running + sandbox.agent: hermes + steps: + - id: hermes-health + script: suites/hermes-specific/00-hermes-health.sh diff --git a/test/e2e/suites/credentials/00-credentials-present.sh b/test/e2e/suites/credentials/00-credentials-present.sh new file mode 100755 index 0000000000..5df36195b7 --- /dev/null +++ b/test/e2e/suites/credentials/00-credentials-present.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# credentials step: credentials-present + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "credentials:credentials-present" +e2e_context_require E2E_SCENARIO + +if e2e_env_is_dry_run; then + echo "[dry-run] would verify credentials are recorded in the gateway" + exit 0 +fi + +if ! command -v nemoclaw >/dev/null 2>&1; then + echo "credentials:credentials-present: nemoclaw CLI not on PATH" >&2 + exit 1 +fi +nemoclaw credentials list >/dev/null diff --git a/test/e2e/suites/hermes-specific/00-hermes-health.sh b/test/e2e/suites/hermes-specific/00-hermes-health.sh new file mode 100755 index 0000000000..c6306ca1da --- /dev/null +++ b/test/e2e/suites/hermes-specific/00-hermes-health.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# hermes-specific step: hermes-health +# Placeholder: real assertions migrate with the existing Hermes E2E scripts. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "hermes-specific:hermes-health" +e2e_context_require E2E_AGENT +if e2e_env_is_dry_run; then + echo "[dry-run] would run Hermes health checks" + exit 0 +fi +agent="$(e2e_context_get E2E_AGENT)" +if [[ "${agent}" != "hermes" ]]; then + echo "hermes-specific: E2E_AGENT should be 'hermes', got '${agent}'" >&2 + exit 1 +fi diff --git a/test/e2e/suites/inference/00-models-health.sh b/test/e2e/suites/inference/00-models-health.sh new file mode 100755 index 0000000000..31b998b161 --- /dev/null +++ b/test/e2e/suites/inference/00-models-health.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# inference step: models-health +# Checks that the gateway advertises at least one model via /models. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "inference:models-health" +e2e_context_require E2E_GATEWAY_URL + +if e2e_env_is_dry_run; then + echo "[dry-run] would GET \${E2E_GATEWAY_URL}/models" + exit 0 +fi + +url="$(e2e_context_get E2E_GATEWAY_URL)" +body="$(curl -fsS --max-time 10 "${url%/}/v1/models" 2>/dev/null || curl -fsS --max-time 10 "${url%/}/models")" +if [[ -z "${body}" ]]; then + echo "inference:models-health: no response from models endpoint" >&2 + exit 1 +fi +echo "${body}" | head -c 512 +echo diff --git a/test/e2e/suites/inference/01-chat-completion.sh b/test/e2e/suites/inference/01-chat-completion.sh new file mode 100755 index 0000000000..316539a588 --- /dev/null +++ b/test/e2e/suites/inference/01-chat-completion.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# inference step: chat-completion + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "inference:chat-completion" +e2e_context_require E2E_GATEWAY_URL + +if e2e_env_is_dry_run; then + echo "[dry-run] would POST a chat completion to \${E2E_GATEWAY_URL}/v1/chat/completions" + exit 0 +fi + +url="$(e2e_context_get E2E_GATEWAY_URL)" +payload='{"model":"default","messages":[{"role":"user","content":"say ok"}],"max_tokens":8}' +response="$(curl -fsS --max-time 30 -H 'Content-Type: application/json' \ + -d "${payload}" "${url%/}/v1/chat/completions")" +echo "${response}" | head -c 1024 +echo +if [[ -z "${response}" ]]; then + echo "inference:chat-completion: empty response" >&2 + exit 1 +fi diff --git a/test/e2e/suites/inference/02-inference-local-from-sandbox.sh b/test/e2e/suites/inference/02-inference-local-from-sandbox.sh new file mode 100755 index 0000000000..2a60a68325 --- /dev/null +++ b/test/e2e/suites/inference/02-inference-local-from-sandbox.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# inference step: sandbox-inference-local +# Verifies that the sandbox can reach the `inference-local` route. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "inference:sandbox-inference-local" +e2e_context_require E2E_SANDBOX_NAME E2E_INFERENCE_ROUTE + +if e2e_env_is_dry_run; then + echo "[dry-run] would resolve inference-local from inside the sandbox" + exit 0 +fi + +name="$(e2e_context_get E2E_SANDBOX_NAME)" +route="$(e2e_context_get E2E_INFERENCE_ROUTE)" +nemoclaw shell "${name}" -- curl -fsS --max-time 10 "http://${route}/v1/models" \ + | head -c 512 +echo diff --git a/test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh b/test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh new file mode 100755 index 0000000000..2ee434a332 --- /dev/null +++ b/test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# local-ollama-inference step: ollama-models-health + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "local-ollama-inference:ollama-models-health" +e2e_context_require E2E_GATEWAY_URL +if e2e_env_is_dry_run; then + echo "[dry-run] would GET ollama /api/tags via gateway" + exit 0 +fi +url="$(e2e_context_get E2E_GATEWAY_URL)" +curl -fsS --max-time 10 "${url%/}/api/tags" | head -c 512 +echo diff --git a/test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh b/test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh new file mode 100755 index 0000000000..9707a9b00d --- /dev/null +++ b/test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# local-ollama-inference step: ollama-chat-completion + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "local-ollama-inference:ollama-chat-completion" +e2e_context_require E2E_GATEWAY_URL +if e2e_env_is_dry_run; then + echo "[dry-run] would POST chat completion via ollama-compatible route" + exit 0 +fi +url="$(e2e_context_get E2E_GATEWAY_URL)" +payload='{"model":"default","messages":[{"role":"user","content":"say ok"}],"max_tokens":8}' +curl -fsS --max-time 30 -H 'Content-Type: application/json' \ + -d "${payload}" "${url%/}/v1/chat/completions" | head -c 1024 +echo diff --git a/test/e2e/suites/ollama-proxy/00-proxy-reachable.sh b/test/e2e/suites/ollama-proxy/00-proxy-reachable.sh new file mode 100755 index 0000000000..107d8d87fa --- /dev/null +++ b/test/e2e/suites/ollama-proxy/00-proxy-reachable.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# ollama-proxy step: proxy-reachable + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "ollama-proxy:proxy-reachable" +e2e_context_require E2E_SANDBOX_NAME +if e2e_env_is_dry_run; then + echo "[dry-run] would verify the Ollama auth proxy is reachable from the sandbox" + exit 0 +fi +name="$(e2e_context_get E2E_SANDBOX_NAME)" +nemoclaw shell "${name}" -- curl -fsS --max-time 10 "http://inference-local/api/tags" >/dev/null diff --git a/test/e2e/suites/platform-macos/00-macos-smoke.sh b/test/e2e/suites/platform-macos/00-macos-smoke.sh new file mode 100755 index 0000000000..eb9f2806a7 --- /dev/null +++ b/test/e2e/suites/platform-macos/00-macos-smoke.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# platform-macos step: macos-smoke +# Placeholder that asserts basic macOS-specific expectations post-onboarding +# (launchd helper present, no systemd leaks, Homebrew paths survive PATH +# refresh). Real probes land as macos-e2e coverage migrates. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "platform-macos:macos-smoke" +e2e_context_require E2E_PLATFORM_OS E2E_SANDBOX_NAME + +if e2e_env_is_dry_run; then + echo "[dry-run] would run macOS-specific smoke checks" + exit 0 +fi + +os="$(e2e_context_get E2E_PLATFORM_OS)" +if [[ "${os}" != "macos" ]]; then + echo "platform-macos: E2E_PLATFORM_OS should be 'macos', got '${os}'" >&2 + exit 1 +fi diff --git a/test/e2e/suites/platform-wsl/00-wsl-smoke.sh b/test/e2e/suites/platform-wsl/00-wsl-smoke.sh new file mode 100755 index 0000000000..538afb12cc --- /dev/null +++ b/test/e2e/suites/platform-wsl/00-wsl-smoke.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# platform-wsl step: wsl-smoke. Mirrors platform-macos; WSL-specific probes +# land as wsl-e2e coverage migrates. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "platform-wsl:wsl-smoke" +e2e_context_require E2E_PLATFORM_OS E2E_SANDBOX_NAME + +if e2e_env_is_dry_run; then + echo "[dry-run] would run WSL-specific smoke checks" + exit 0 +fi + +os="$(e2e_context_get E2E_PLATFORM_OS)" +if [[ "${os}" != "wsl" ]]; then + echo "platform-wsl: E2E_PLATFORM_OS should be 'wsl', got '${os}'" >&2 + exit 1 +fi diff --git a/test/e2e/suites/smoke/00-cli-available.sh b/test/e2e/suites/smoke/00-cli-available.sh new file mode 100755 index 0000000000..6f6c0cc369 --- /dev/null +++ b/test/e2e/suites/smoke/00-cli-available.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# smoke step: cli-available +# Verifies that the `nemoclaw` CLI is on PATH. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "smoke:cli-available" + +e2e_context_require E2E_SCENARIO + +if e2e_env_is_dry_run; then + echo "[dry-run] would check that nemoclaw CLI is on PATH" + exit 0 +fi + +if ! command -v nemoclaw >/dev/null 2>&1; then + echo "smoke:cli-available: nemoclaw CLI not on PATH" >&2 + exit 1 +fi + +nemoclaw --version diff --git a/test/e2e/suites/smoke/01-gateway-health.sh b/test/e2e/suites/smoke/01-gateway-health.sh new file mode 100755 index 0000000000..d29bb98847 --- /dev/null +++ b/test/e2e/suites/smoke/01-gateway-health.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# smoke step: gateway-health + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" +# shellcheck source=../../lib/gateway.sh +. "${LIB_DIR}/gateway.sh" + +echo "smoke:gateway-health" +e2e_context_require E2E_GATEWAY_URL +e2e_gateway_assert_healthy diff --git a/test/e2e/suites/smoke/02-sandbox-listed.sh b/test/e2e/suites/smoke/02-sandbox-listed.sh new file mode 100755 index 0000000000..9ad45d081c --- /dev/null +++ b/test/e2e/suites/smoke/02-sandbox-listed.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# smoke step: sandbox-listed + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" +# shellcheck source=../../lib/sandbox.sh +. "${LIB_DIR}/sandbox.sh" + +echo "smoke:sandbox-listed" +e2e_context_require E2E_SANDBOX_NAME +e2e_sandbox_assert_running diff --git a/test/e2e/suites/smoke/03-sandbox-shell.sh b/test/e2e/suites/smoke/03-sandbox-shell.sh new file mode 100755 index 0000000000..8e5186b726 --- /dev/null +++ b/test/e2e/suites/smoke/03-sandbox-shell.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# smoke step: sandbox-shell +# Verifies that `nemoclaw shell` can execute a trivial command inside the +# sandbox. Honors E2E_DRY_RUN. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +# shellcheck source=../../lib/env.sh +. "${LIB_DIR}/env.sh" +# shellcheck source=../../lib/context.sh +. "${LIB_DIR}/context.sh" + +echo "smoke:sandbox-shell" +e2e_context_require E2E_SANDBOX_NAME + +if e2e_env_is_dry_run; then + echo "[dry-run] would run: nemoclaw shell -- echo ok" + exit 0 +fi + +name="$(e2e_context_get E2E_SANDBOX_NAME)" +output="$(nemoclaw shell "${name}" -- echo ok 2>&1)" +echo "${output}" +if ! echo "${output}" | grep -q '^ok$'; then + echo "smoke:sandbox-shell: did not receive expected 'ok' from sandbox" >&2 + exit 1 +fi From e34826e638777b32f8b697123a822788ae956e72 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Fri, 8 May 2026 17:51:40 -0400 Subject: [PATCH 02/60] fix(e2e): make run-suites.sh summary loops safe under bash 3.2 (macOS) Under `set -u` on bash 3.2 (the default on macOS runners) \"\${arr[@]}\" on an empty array raises \"unbound variable\" and fails the summary loops at the end of a successful run. Switch to the \${arr[@]+...} safe-expansion pattern so the loops expand to nothing when no steps were recorded. Unblocks macos-e2e (Scenario 9.1) on PR #3290. --- test/e2e/run-suites.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/e2e/run-suites.sh b/test/e2e/run-suites.sh index bf03f0fa38..6c1edb70db 100755 --- a/test/e2e/run-suites.sh +++ b/test/e2e/run-suites.sh @@ -122,10 +122,12 @@ done echo echo "== suite summary ==" -for p in "${PASSED_STEPS[@]}"; do +# bash 3.2 (macOS) fails on "${arr[@]}" when the array is empty under `set -u`; +# use the `${arr[@]+...}` guard to expand to nothing when empty. +for p in ${PASSED_STEPS[@]+"${PASSED_STEPS[@]}"}; do echo " PASS ${p}" done -for f in "${FAILED_STEPS[@]}"; do +for f in ${FAILED_STEPS[@]+"${FAILED_STEPS[@]}"}; do echo " FAIL ${f}" done From a52bcce2590a87389acc4273a3608df978b095c7 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Mon, 11 May 2026 10:39:46 -0400 Subject: [PATCH 03/60] refactor(e2e): reorganize lib/ and suites/ by scenario concern; address CodeRabbit review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reshape the scaffolding so the file system reflects the scenario organization informed by the UAT / NV QA bug hotspot analysis (446 issues traced to 213 fix PRs), and fold in CodeRabbit's 15 actionable review items on PR #3290. ## Reorganization `test/e2e/lib/`: lib/ artifacts.sh cleanup.sh context.sh (generic scaffolding - unchanged) emit-context-from-plan.sh env.sh install-path-refresh.sh sandbox-teardown.sh (existing; preserved) setup/ <-- install.sh, onboard.sh (dimension dispatchers) assert/ <-- gateway-alive.sh (was gateway.sh), sandbox-alive.sh (was sandbox.sh) fixtures/ <-- roadmap README; fixtures land with first consumers `test/e2e/suites/` — grouped by functional area matching the bug hotspot buckets: suites/ smoke/ (unchanged; baseline) onboarding/ <-- hermes/ (was hermes-specific/) inference/ <-- cloud/ (was direct files), ollama-gpu/ (was local-ollama-inference/), ollama-auth-proxy/ (was ollama-proxy/) security/ <-- credentials/ (was at suites/credentials/) platform/ <-- macos/ (was platform-macos/), wsl/ (was platform-wsl/) lifecycle/ sandbox/ messaging/ (new dirs with roadmap READMEs) Each new directory ships with a README.md documenting the originating bug class, the legacy `test/e2e/test-*.sh` script (where one exists), and the planned coverage. Suite IDs in `suites.yaml` stay stable; only script paths move. ## CodeRabbit review items addressed 1. `.github/workflows/e2e-scenarios.yaml` — add a `resolve-runner` job that routes each scenario to the correct runner (macos-latest / windows-latest / self-hosted / ubuntu-latest) based on the scenario id prefix. Previously `runs-on: ubuntu-latest` was hard-coded for every scenario. 2. All test files — add `timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000)` to every `spawnSync` call so a stuck subprocess cannot block a Vitest worker. 3. `coverage-report.sh` — use `npx --no-install tsx` so the lockfile pins the version; fail closed if tsx is missing. 4. `lib/context.sh` — validate keys as POSIX identifiers before interpolating into grep regexes; reject newlines in values that would corrupt the line-oriented `context.env` format. 5. `lib/emit-context-from-plan.sh` — fail fast if `plan.json` is missing its `scenario_id` field rather than silently seeding empty. 6. `lib/setup/install.sh` — pin the installer source via `E2E_INSTALLER_URL` / `E2E_INSTALLER_SHA256` overrides; download to a temp file and sha256-verify before exec instead of streaming `curl | bash` over the network. 7. `lib/assert/sandbox-alive.sh` — fix regex that had an empty first alternative (`"^|..."`) and therefore always matched. Replace with `"(^|[[:space:]])name([[:space:]]|$)"` to properly detect "sandbox not found". 8. `test/e2e/README.md` — regenerate to reflect the current 7-scenario catalog, new directory layout, runner contracts, and post-reorg roadmap. 9. `resolver/index.ts` (`validate-state`) — require an explicit `--probes-from-state` flag to seed probes from the expected state. `run-scenario.sh` passes the flag in `--dry-run` mode only; live mode now fails closed when real probes are missing rather than silently self-validating. 10. `run-scenario.sh` resolver fallback — use `npx --no-install tsx` and fail closed with a clear message if tsx is not installed. 11. `run-scenario.sh` (non-dry-run) — exit 4 instead of 0 when full suite execution is not yet wired for the scenario. Silent-pass is now observable in CI. 12-15. `suites/inference/**` — replace `curl ... | head -c N` with `body="$(curl ...)"; printf '%s\n' "${body:0:N}"`. The pipe pattern was brittle under `pipefail`: `head` closing early could make successful requests appear failed. ## Test state 55/55 Vitest `cli` tests pass after reorg and fixes. `prek run --all-files` exits 0. Signed-off-by: Julie Yaunches --- .github/workflows/e2e-scenarios.yaml | 30 ++++- test/e2e-context-helper.test.ts | 2 + test/e2e-expected-state-validator.test.ts | 3 +- test/e2e-lib-helpers.test.ts | 4 +- test/e2e-scenario-additional-families.test.ts | 1 + test/e2e-scenario-first-migration.test.ts | 5 +- test/e2e-scenario-resolver.test.ts | 2 + test/e2e-suite-runner.test.ts | 1 + test/e2e/README.md | 121 +++++++++++++----- test/e2e/coverage-report.sh | 8 +- test/e2e/lib/assert/README.md | 22 ++++ .../{gateway.sh => assert/gateway-alive.sh} | 2 +- .../{sandbox.sh => assert/sandbox-alive.sh} | 7 +- test/e2e/lib/context.sh | 37 +++++- test/e2e/lib/emit-context-from-plan.sh | 7 + test/e2e/lib/fixtures/README.md | 24 ++++ test/e2e/lib/setup/README.md | 22 ++++ test/e2e/lib/{ => setup}/install.sh | 24 +++- test/e2e/lib/{ => setup}/onboard.sh | 2 +- test/e2e/resolver/index.ts | 45 ++++++- test/e2e/run-scenario.sh | 43 +++++-- test/e2e/suites.yaml | 20 +-- .../inference/{ => cloud}/00-models-health.sh | 2 +- .../{ => cloud}/01-chat-completion.sh | 7 +- .../02-inference-local-from-sandbox.sh | 9 +- .../ollama-auth-proxy}/00-proxy-reachable.sh | 2 +- .../ollama-gpu}/00-ollama-models-health.sh | 8 +- .../ollama-gpu}/01-ollama-chat-completion.sh | 10 +- test/e2e/suites/lifecycle/README.md | 24 ++++ test/e2e/suites/messaging/README.md | 24 ++++ test/e2e/suites/onboarding/README.md | 31 +++++ .../hermes}/00-hermes-health.sh | 2 +- .../macos}/00-macos-smoke.sh | 2 +- .../wsl}/00-wsl-smoke.sh | 2 +- test/e2e/suites/sandbox/README.md | 31 +++++ test/e2e/suites/security/README.md | 31 +++++ .../credentials/00-credentials-present.sh | 2 +- test/e2e/suites/smoke/01-gateway-health.sh | 4 +- test/e2e/suites/smoke/02-sandbox-listed.sh | 4 +- 39 files changed, 523 insertions(+), 104 deletions(-) create mode 100644 test/e2e/lib/assert/README.md rename test/e2e/lib/{gateway.sh => assert/gateway-alive.sh} (95%) rename test/e2e/lib/{sandbox.sh => assert/sandbox-alive.sh} (72%) create mode 100644 test/e2e/lib/fixtures/README.md create mode 100644 test/e2e/lib/setup/README.md rename test/e2e/lib/{ => setup}/install.sh (58%) rename test/e2e/lib/{ => setup}/onboard.sh (95%) rename test/e2e/suites/inference/{ => cloud}/00-models-health.sh (94%) rename test/e2e/suites/inference/{ => cloud}/01-chat-completion.sh (81%) rename test/e2e/suites/inference/{ => cloud}/02-inference-local-from-sandbox.sh (70%) rename test/e2e/suites/{ollama-proxy => inference/ollama-auth-proxy}/00-proxy-reachable.sh (93%) rename test/e2e/suites/{local-ollama-inference => inference/ollama-gpu}/00-ollama-models-health.sh (70%) rename test/e2e/suites/{local-ollama-inference => inference/ollama-gpu}/01-ollama-chat-completion.sh (69%) create mode 100644 test/e2e/suites/lifecycle/README.md create mode 100644 test/e2e/suites/messaging/README.md create mode 100644 test/e2e/suites/onboarding/README.md rename test/e2e/suites/{hermes-specific => onboarding/hermes}/00-hermes-health.sh (93%) rename test/e2e/suites/{platform-macos => platform/macos}/00-macos-smoke.sh (94%) rename test/e2e/suites/{platform-wsl => platform/wsl}/00-wsl-smoke.sh (93%) create mode 100644 test/e2e/suites/sandbox/README.md create mode 100644 test/e2e/suites/security/README.md rename test/e2e/suites/{ => security}/credentials/00-credentials-present.sh (93%) diff --git a/.github/workflows/e2e-scenarios.yaml b/.github/workflows/e2e-scenarios.yaml index 32f1175a84..76d3b76970 100644 --- a/.github/workflows/e2e-scenarios.yaml +++ b/.github/workflows/e2e-scenarios.yaml @@ -38,8 +38,36 @@ concurrency: cancel-in-progress: false jobs: - run-scenario: + # Route the scenario to the correct runner. + # + # Scenario ids encode their target platform as the first segment + # (e.g. `macos-repo-cloud-openclaw`, `wsl-repo-cloud-openclaw`, + # `gpu-repo-local-ollama-openclaw`). The workflow previously pinned + # `runs-on: ubuntu-latest` for every scenario, which caused non-Ubuntu + # scenarios to fail on the wrong runner (CodeRabbit review item #1). + resolve-runner: runs-on: ubuntu-latest + outputs: + runner: ${{ steps.pick.outputs.runner }} + steps: + - id: pick + env: + SCENARIO: ${{ github.event.inputs.scenario }} + run: | + case "${SCENARIO}" in + macos-*) echo "runner=macos-latest" >> "$GITHUB_OUTPUT" ;; + wsl-*) echo "runner=windows-latest" >> "$GITHUB_OUTPUT" ;; + gpu-*) echo "runner=self-hosted" >> "$GITHUB_OUTPUT" ;; + ubuntu-*|brev-*) echo "runner=ubuntu-latest" >> "$GITHUB_OUTPUT" ;; + *) + echo "::error::Unknown scenario prefix for runner selection: ${SCENARIO}" >&2 + exit 1 + ;; + esac + + run-scenario: + needs: resolve-runner + runs-on: ${{ needs.resolve-runner.outputs.runner }} timeout-minutes: 45 steps: - uses: actions/checkout@v4 diff --git a/test/e2e-context-helper.test.ts b/test/e2e-context-helper.test.ts index bac9d19c30..4526787aa4 100644 --- a/test/e2e-context-helper.test.ts +++ b/test/e2e-context-helper.test.ts @@ -15,6 +15,7 @@ function runBash(script: string, env: Record = {}): SpawnSyncRet return spawnSync("bash", ["-c", script], { env: { ...process.env, ...env }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }); } @@ -94,6 +95,7 @@ describe("E2E context helper (lib/context.sh)", () => { { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); diff --git a/test/e2e-expected-state-validator.test.ts b/test/e2e-expected-state-validator.test.ts index 0c6fd111e8..6c93109e92 100644 --- a/test/e2e-expected-state-validator.test.ts +++ b/test/e2e-expected-state-validator.test.ts @@ -101,7 +101,7 @@ describe("expected state validator", () => { const inferenceSuite: ResolvedSuite = { id: "inference", requires_state: { "inference.expected": "available" }, - steps: [{ id: "models-health", script: "suites/inference/00-models-health.sh" }], + steps: [{ id: "models-health", script: "suites/inference/cloud/00-models-health.sh" }], }; const report = validateExpectedState({ stateId: "cloud-openclaw-ready", @@ -141,6 +141,7 @@ describe("runner_should_not_run_suites_when_expected_state_fails", () => { E2E_VALIDATE_EXPECTED_STATE: "1", }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); diff --git a/test/e2e-lib-helpers.test.ts b/test/e2e-lib-helpers.test.ts index dbb4485b76..ee131a9d73 100644 --- a/test/e2e-lib-helpers.test.ts +++ b/test/e2e-lib-helpers.test.ts @@ -15,6 +15,7 @@ function runBash(script: string, env: Record = {}): SpawnSyncRet return spawnSync("bash", ["-c", script], { env: { ...process.env, ...env }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }); } @@ -73,7 +74,7 @@ describe("E2E shell helpers", () => { ` set -euo pipefail . "${LIB}/context.sh" - . "${LIB}/sandbox.sh" + . "${LIB}/assert/sandbox-alive.sh" e2e_context_init e2e_context_set E2E_SCENARIO test e2e_sandbox_assert_running @@ -101,6 +102,7 @@ describe("E2E shell helpers", () => { E2E_TRACE_FILE: trace, }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); diff --git a/test/e2e-scenario-additional-families.test.ts b/test/e2e-scenario-additional-families.test.ts index f35bfbd050..41fa08b0cd 100644 --- a/test/e2e-scenario-additional-families.test.ts +++ b/test/e2e-scenario-additional-families.test.ts @@ -28,6 +28,7 @@ function planOnly(scenarioId: string): { stdout: string; stderr: string; status: const r = spawnSync("bash", [RUN_SCENARIO, scenarioId, "--plan-only"], { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }); let plan = {}; diff --git a/test/e2e-scenario-first-migration.test.ts b/test/e2e-scenario-first-migration.test.ts index a295672bcf..86a721f461 100644 --- a/test/e2e-scenario-first-migration.test.ts +++ b/test/e2e-scenario-first-migration.test.ts @@ -35,7 +35,8 @@ describe("Phase 6: ubuntu-repo-cloud-openclaw migration", () => { const r = spawnSync( "bash", [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--plan-only"], - { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", cwd: REPO_ROOT }, + { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT }, ); expect(r.status, r.stderr).toBe(0); expect(r.stdout).toMatch(/install=repo-current/); @@ -56,6 +57,7 @@ describe("Phase 6: ubuntu-repo-cloud-openclaw migration", () => { { env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_TRACE_FILE: trace }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); @@ -86,6 +88,7 @@ describe("Phase 6: ubuntu-repo-cloud-openclaw migration", () => { { env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); diff --git a/test/e2e-scenario-resolver.test.ts b/test/e2e-scenario-resolver.test.ts index a89bd29606..dac4575b62 100644 --- a/test/e2e-scenario-resolver.test.ts +++ b/test/e2e-scenario-resolver.test.ts @@ -187,6 +187,7 @@ describe("run-scenario.sh --plan-only", () => { { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); @@ -220,6 +221,7 @@ describe("run-scenario.sh --plan-only", () => { { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }, ); diff --git a/test/e2e-suite-runner.test.ts b/test/e2e-suite-runner.test.ts index c4611893fd..2df4665a0e 100644 --- a/test/e2e-suite-runner.test.ts +++ b/test/e2e-suite-runner.test.ts @@ -14,6 +14,7 @@ function runSuites(args: string[], env: Record = {}): SpawnSyncR return spawnSync("bash", [RUN_SUITES, ...args], { env: { ...process.env, ...env }, encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT, }); } diff --git a/test/e2e/README.md b/test/e2e/README.md index ae3d4a6ef1..a098c4960d 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -3,8 +3,8 @@ # E2E Setup Scenario Matrix -This directory hosts NemoClaw's end-to-end tests, organized around **setup -scenarios** rather than per-workflow shell scripts. +This directory hosts NemoClaw's end-to-end tests, organized around +**setup scenarios** rather than per-workflow shell scripts. ## Core model @@ -24,27 +24,25 @@ setup scenario → expected state config → suite sequence [`suites.yaml`](suites.yaml). Suites consume `.e2e/context.env` and do not re-run install or onboarding. -The runner resolves a scenario, prints a plan, runs setup/install/ -onboarding once, validates the expected state, and then runs the scenario's -ordered suites against the resulting environment. - -## Sparse matrix - -The initial matrix is deliberately sparse — three scenarios covering three -common setup paths: +## Scenario catalog (current) | Scenario | Platform | Install | Runtime | Onboarding | Expected state | |---|---|---|---|---|---| | `ubuntu-repo-cloud-openclaw` | `ubuntu-local` | `repo-current` | `docker-running` | `cloud-openclaw` | `cloud-openclaw-ready` | | `ubuntu-repo-cloud-hermes` | `ubuntu-local` | `repo-current` | `docker-running` | `cloud-hermes` | `cloud-hermes-ready` | | `gpu-repo-local-ollama-openclaw` | `gpu-runner` | `repo-current` | `gpu-docker-cdi` | `local-ollama-openclaw` | `local-ollama-openclaw-ready` | +| `macos-repo-cloud-openclaw` | `macos-local` | `repo-current` | `docker-running` | `cloud-openclaw` | `cloud-openclaw-ready` | +| `wsl-repo-cloud-openclaw` | `wsl-local` | `repo-current` | `docker-running` | `cloud-openclaw` | `cloud-openclaw-ready` | +| `brev-launchable-cloud-openclaw` | `brev-launchable` | `launchable` | `docker-running` | `cloud-openclaw` | `cloud-openclaw-ready` | +| `ubuntu-no-docker-preflight-negative` | `ubuntu-local` | `repo-current` | `docker-missing` | `cloud-openclaw` | `preflight-failure-no-sandbox` | -Additional scenarios (macOS, WSL, Brev/launchable, DGX Spark, negative -preflight) are migrated incrementally in later phases. The matrix is not -meant to be Cartesian — each scenario should exist because a real current -coverage path needs it. +The matrix is deliberately not Cartesian — each scenario exists because a +real current coverage path needs it. Additional scenarios (e.g. onboard +resume, rebuild-preserves-presets) land incrementally; see +[`suites/*/README.md`](suites) for the roadmap informed by the UAT / NV QA +bug hotspot analysis. -## Files +## File layout ```text test/e2e/ @@ -52,28 +50,70 @@ test/e2e/ expected-states.yaml # reusable expected state contracts suites.yaml # ordered suite definitions README.md # this file + + run-scenario.sh # main entry; resolve → plan → setup → validate + run-suites.sh # suite step runner + coverage-report.sh # Markdown coverage matrix + + resolver/ # TypeScript plan + validator + coverage + index.ts load.ts plan.ts schema.ts validator.ts coverage.ts + js-yaml.d.ts + + lib/ # shared shell scaffolding, organized by role + artifacts.sh # best-effort artifact collection + cleanup.sh # trap helpers (wraps sandbox-teardown.sh) + context.sh # .e2e/context.env key/value store + emit-context-from-plan.sh + env.sh # non-interactive env + trace + dry-run + install-path-refresh.sh # (existing helper; preserved) + sandbox-teardown.sh # (existing helper; preserved) + + setup/ # dimension dispatchers + install.sh # e2e_install: repo-checkout | curl-install-script | ... + onboard.sh # e2e_onboard: cloud-openclaw | cloud-hermes | ... + + assert/ # outcome assertions + gateway-alive.sh + sandbox-alive.sh + # (fixtures for inference-works, no-credentials-leaked, policy-preset-applied + # land with their first consuming suite.) + + fixtures/ # reusable scenario fixtures (see README for roadmap) + + suites/ # functional suites, grouped by scenario area + smoke/ # baseline: cli, gateway, sandbox, shell + onboarding/ # onboarding lifecycle (Hermes today; more on the way) + inference/ # cloud, ollama-gpu, ollama-auth-proxy + security/ # credentials today; shields / rebuild-preserves-presets planned + platform/ # macos, wsl (spark planned) + # lifecycle/ sandbox/ messaging/ — dir + README committed; suites to land ``` -Runner scripts live alongside the metadata: - -- `run-scenario.sh [--plan-only|--dry-run]` resolves a scenario, - prints the plan, writes `${E2E_CONTEXT_DIR:-.e2e}/plan.json`, and (in - non-plan-only mode) drives setup → install → onboard → gateway check - → sandbox check → expected-state validation. In `--dry-run` mode each - helper short-circuits and emits a trace line to `E2E_TRACE_FILE` if - set — useful for integration tests and for reviewing scenario wiring. -- `run-suites.sh ...` reads `.e2e/context.env` and runs one - or more suites' ordered step scripts, failing fast on the first - non-zero step and printing a PASS/FAIL summary. -- `coverage-report.sh` prints a Markdown coverage report. The +## Runner contracts + +- `run-scenario.sh [--plan-only|--dry-run]` + - `--plan-only`: resolve and print plan, write + `${E2E_CONTEXT_DIR:-.e2e}/plan.json`. No install/onboard/suites. + - `--dry-run` (`E2E_DRY_RUN=1`): helpers short-circuit; each one writes a + trace line to `$E2E_TRACE_FILE` if set. The expected-state validator + runs with `--probes-from-state` so the declared state acts as a fake + probe source; targeted probe failures are simulated with + `E2E_PROBE_OVERRIDE_=value`. + - Live mode (no flags): runs the full setup path. The validator requires + real probe values; it fails closed rather than self-validating against + the declared state. +- `run-suites.sh ...`: reads `.e2e/context.env`, runs one or + more suites' ordered step scripts, fails fast on the first non-zero + step, prints a PASS/FAIL summary. +- `coverage-report.sh`: prints a Markdown coverage report. The `e2e-scenarios` workflow appends the same report to `GITHUB_STEP_SUMMARY`. -The TypeScript resolver lives under `resolver/` and is invoked via +The TypeScript resolver is invoked via `tsx resolver/index.ts {plan|validate-state|coverage}`. Shell wrappers -call it so runners and CI need only `bash`. +call it so runners and CI need only `bash` + a lockfile-pinned `tsx`. -Overriding the artifact directory: set `E2E_CONTEXT_DIR=` so local +Override the artifact directory with `E2E_CONTEXT_DIR=` so local runs and tests do not clobber the repo-root `.e2e/`. The directory is gitignored. @@ -82,16 +122,18 @@ gitignored. 1. Pick (or add) profiles for platform, install, runtime, and onboarding in `scenarios.yaml`. Reuse existing profiles when possible. 2. Add a scenario entry under `setup_scenarios:` with a kebab-case ID that - encodes the distinguishing dimensions. + encodes the distinguishing dimensions. **The first segment must be the + platform prefix** (e.g. `ubuntu-`, `macos-`, `wsl-`, `gpu-`, `brev-`) + so the `e2e-scenarios.yaml` workflow can route the run to the correct + runner. 3. Reference exactly one `expected_state` (singular; string key). 4. List the `suites` to run, in execution order. 5. If an appropriate expected state does not exist, add one to `expected-states.yaml`. Keep keys structural, not behavioral. 6. If an appropriate suite does not exist, add one to `suites.yaml` and - land its scripts under `suites//`. Suites must consume - `.e2e/context.env`, not rediscover scenario state. -7. Validate references with `bash test/e2e/run-scenario.sh --plan-only` - (once the resolver lands). + land its scripts under `suites///`. Suites must + consume `.e2e/context.env`, not rediscover scenario state. +7. Validate references with `bash test/e2e/run-scenario.sh --plan-only`. ## Adding a new expected state @@ -111,3 +153,12 @@ Add a new key under `suites:` in `suites.yaml`: Keep suites narrowly scoped and idempotent. Suites must not install, onboard, or otherwise mutate setup state. + +## Roadmap (from UAT / NV QA bug hotspot analysis) + +Placeholder READMEs under `lib/{setup,assert,fixtures}/` and +`suites/{onboarding,sandbox,lifecycle,security,messaging}/` track the +scenarios that migrate in next, informed by the 446 UAT / NV QA issues +traced during planning. Each README names the originating bug class and +the legacy script (where one exists) so rewiring and coverage gaps remain +visible in the repo. diff --git a/test/e2e/coverage-report.sh b/test/e2e/coverage-report.sh index f4ef473302..8649569157 100755 --- a/test/e2e/coverage-report.sh +++ b/test/e2e/coverage-report.sh @@ -16,5 +16,11 @@ TSX_BIN="${REPO_ROOT}/node_modules/.bin/tsx" if [[ -x "${TSX_BIN}" ]]; then "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" coverage else - (cd "${REPO_ROOT}" && npx --yes tsx "${SCRIPT_DIR}/resolver/index.ts" coverage) + # CodeRabbit review items #3, #10: fall back to --no-install so we rely on + # the lockfile-pinned tsx rather than a network fetch, and fail closed + # with a clear hint if tsx is not installed. + if ! (cd "${REPO_ROOT}" && npx --no-install tsx "${SCRIPT_DIR}/resolver/index.ts" coverage); then + echo "coverage-report: tsx not available. Run 'npm ci' at the repo root to install devDependencies." >&2 + exit 1 + fi fi diff --git a/test/e2e/lib/assert/README.md b/test/e2e/lib/assert/README.md new file mode 100644 index 0000000000..e1f15458cd --- /dev/null +++ b/test/e2e/lib/assert/README.md @@ -0,0 +1,22 @@ + + + +# Assertion helpers + +Outcome checks that multiple suites share. Each helper prints a one-line +PASS/FAIL status and returns 0 on success, non-zero on failure. + +## Current + +| Helper | What it asserts | +|---|---| +| `gateway-alive.sh` | Gateway container is present and HTTP-healthy at `E2E_GATEWAY_URL`. | +| `sandbox-alive.sh` | Named sandbox is registered and in `Running` phase. | + +## Planned (from UAT/NV QA hotspot analysis) + +| Helper | First consumer | Purpose | +|---|---|---| +| `inference-works.sh` | `inference/cloud/`, `inference/ollama-gpu/` | Single round-trip chat-completion assertion against whichever gateway route is active. | +| `no-credentials-leaked.sh` | `security/credentials/`, `security/rebuild-preserves-presets/` | Scan migration bundle + blueprint digest + sandbox filesystem for credential patterns. Covers the UAT #1912 / credential-sanitization class. | +| `policy-preset-applied.sh` | `security/shields/`, `security/rebuild-preserves-presets/` | Verify the declared policy presets are actually in the gateway's active policy (UAT #1952, #2010 class). | diff --git a/test/e2e/lib/gateway.sh b/test/e2e/lib/assert/gateway-alive.sh similarity index 95% rename from test/e2e/lib/gateway.sh rename to test/e2e/lib/assert/gateway-alive.sh index a101e3ffff..42e98b362b 100755 --- a/test/e2e/lib/gateway.sh +++ b/test/e2e/lib/assert/gateway-alive.sh @@ -4,7 +4,7 @@ # # Gateway helpers. -_E2E_GW_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +_E2E_GW_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # shellcheck source=env.sh . "${_E2E_GW_LIB_DIR}/env.sh" # shellcheck source=context.sh diff --git a/test/e2e/lib/sandbox.sh b/test/e2e/lib/assert/sandbox-alive.sh similarity index 72% rename from test/e2e/lib/sandbox.sh rename to test/e2e/lib/assert/sandbox-alive.sh index 52ffbb934c..e8528d09e1 100755 --- a/test/e2e/lib/sandbox.sh +++ b/test/e2e/lib/assert/sandbox-alive.sh @@ -4,7 +4,7 @@ # # Sandbox helpers. -_E2E_SB_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +_E2E_SB_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # shellcheck source=env.sh . "${_E2E_SB_LIB_DIR}/env.sh" # shellcheck source=context.sh @@ -28,7 +28,10 @@ e2e_sandbox_assert_running() { echo "e2e_sandbox_assert_running: nemoclaw CLI not on PATH" >&2 return 1 fi - if ! nemoclaw list 2>/dev/null | grep -q -E "^|[[:space:]]${name}[[:space:]]|${name}\$"; then + # Match ${name} as a whole token at start of line or surrounded by + # whitespace/line boundary (the earlier "^|..." regex had an empty + # first alternative that always matched — CodeRabbit review item #7). + if ! nemoclaw list 2>/dev/null | grep -qE "(^|[[:space:]])${name}([[:space:]]|$)"; then echo "e2e_sandbox_assert_running: sandbox '${name}' not found in 'nemoclaw list'" >&2 return 1 fi diff --git a/test/e2e/lib/context.sh b/test/e2e/lib/context.sh index 5160226e27..7061f16fb7 100755 --- a/test/e2e/lib/context.sh +++ b/test/e2e/lib/context.sh @@ -52,16 +52,43 @@ e2e_context_path() { printf '%s\n' "${E2E_CONTEXT_DIR}/context.env" } +# CodeRabbit review item #4: validate that KEY is a plain POSIX identifier +# (so we never interpolate metacharacters into grep regexes) and that VALUE +# has no newlines or control characters that could break the line-oriented +# context.env format. +_e2e_context_validate_key() { + local key="${1:-}" + if [[ -z "${key}" ]]; then + echo "e2e_context: missing key" >&2 + return 2 + fi + if [[ ! "${key}" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]]; then + echo "e2e_context: invalid key (POSIX identifier required): ${key}" >&2 + return 2 + fi +} + +_e2e_context_validate_value() { + local value="${1-}" + # Reject newlines that would corrupt the line-oriented context.env + # format. We deliberately do not reject all control characters since + # tabs and escape sequences can appear in legitimate values (e.g. test + # fixtures that seed tracing markers). Newlines are the only format + # break. (CodeRabbit review item #4.) + if [[ "${value}" == *$'\n'* ]] || [[ "${value}" == *$'\r'* ]]; then + echo "e2e_context: value contains newline characters; reject" >&2 + return 2 + fi +} + # e2e_context_set KEY VALUE # Appends or updates a single key in context.env. Value is written literally; # callers are responsible for not embedding newlines. e2e_context_set() { local key="${1:-}" local value="${2:-}" - if [[ -z "${key}" ]]; then - echo "e2e_context_set: missing key" >&2 - return 2 - fi + _e2e_context_validate_key "${key}" || return 2 + _e2e_context_validate_value "${value}" || return 2 _e2e_context_resolve_dir local ctx="${E2E_CONTEXT_DIR}/context.env" if [[ ! -f "${ctx}" ]]; then @@ -80,6 +107,7 @@ e2e_context_set() { # Prints the value of KEY (empty if missing). Does not fail. e2e_context_get() { local key="${1:-}" + _e2e_context_validate_key "${key}" || return 2 _e2e_context_resolve_dir local ctx="${E2E_CONTEXT_DIR}/context.env" [[ -f "${ctx}" ]] || return 0 @@ -96,6 +124,7 @@ e2e_context_require() { local missing=() local key value for key in "$@"; do + _e2e_context_validate_key "${key}" || return 2 if [[ -f "${ctx}" ]]; then value="$(grep "^${key}=" "${ctx}" | tail -n1 || true)" value="${value#"${key}"=}" diff --git a/test/e2e/lib/emit-context-from-plan.sh b/test/e2e/lib/emit-context-from-plan.sh index 268fa382f5..407b7d767f 100755 --- a/test/e2e/lib/emit-context-from-plan.sh +++ b/test/e2e/lib/emit-context-from-plan.sh @@ -39,6 +39,13 @@ read_plan_value() { } SCENARIO_ID="$(read_plan_value scenario_id)" +if [[ -z "${SCENARIO_ID}" ]]; then + # Fail fast when the plan is missing its scenario id (CodeRabbit review + # item #5). Downstream helpers all index context by scenario and will + # silently misbehave if this is empty. + echo "emit-context-from-plan: plan.json is missing 'scenario_id': ${PLAN_JSON}" >&2 + exit 2 +fi PLATFORM_OS="$(read_plan_value dimensions.platform.profile.os)" EXECUTION_TARGET="$(read_plan_value dimensions.platform.profile.execution_target)" INSTALL_METHOD="$(read_plan_value dimensions.install.profile.method)" diff --git a/test/e2e/lib/fixtures/README.md b/test/e2e/lib/fixtures/README.md new file mode 100644 index 0000000000..5232f39e32 --- /dev/null +++ b/test/e2e/lib/fixtures/README.md @@ -0,0 +1,24 @@ + + + +# Fixtures + +Reusable scenario fixtures that start/stop test doubles or prepare +preconditions shared across multiple suites. + +## Planned fixtures (from UAT/NV QA hotspot analysis) + +| Fixture | First consumer | Purpose | +|---|---|---| +| `fake-openai.sh` | `inference/cloud/` fast-mode variant | Start/stop a local OpenAI-compatible endpoint so inference assertions can run on PR CI without hitting real NVIDIA endpoints. Targets the 12 real-cloud tests that today flake on `integrate.api.nvidia.com` latency (UAT #2600). | +| `fake-telegram.sh` | `messaging/providers/` | Local Telegram API stub. Removes dependency on real `api.telegram.org` in CI. | +| `older-base-image.sh` | `sandbox/rebuild-openclaw/`, `sandbox/rebuild-hermes/`, `sandbox/upgrade-stale/` | Pull an older base image tag from ghcr + build a temporary Dockerfile that pins the prior OpenClaw version. Dedupes the three hand-rolled implementations the original E2E tests share. | + +## Contract + +Each fixture must expose: + +- `fixture__up` — start; block until ready; export required env vars. +- `fixture__down` — stop; idempotent; safe from trap. + +Failure in `_up` must be fatal; failure in `_down` must log and continue. diff --git a/test/e2e/lib/setup/README.md b/test/e2e/lib/setup/README.md new file mode 100644 index 0000000000..9878726c7e --- /dev/null +++ b/test/e2e/lib/setup/README.md @@ -0,0 +1,22 @@ + + + +# Setup helpers + +Scenario-setup dispatchers. Each file owns one setup dimension. The runner +(`run-scenario.sh`) sources the dispatcher and calls the dimension-level +entry point; the dispatcher routes by the profile id from `scenarios.yaml`. + +| File | Dimension | Entry point | Routes by | +|---|---|---|---| +| `install.sh` | install method | `e2e_install` | `install.method` (e.g. `repo-checkout`, `curl-install-script`, `brev-launchable`) | +| `onboard.sh` | onboarding path | `e2e_onboard` | `onboarding.agent` + `onboarding.provider` (e.g. `cloud-openclaw`, `cloud-hermes`, `local-ollama-openclaw`) | + +All setup helpers honour `E2E_DRY_RUN=1` (short-circuit with a trace line) +and write canonical context keys to `$E2E_CONTEXT_DIR/context.env` via +`lib/context.sh`. + +Reuses the existing shell helpers rather than duplicating them: + +- `install.sh` sources `lib/install-path-refresh.sh` +- `cleanup.sh` (sibling at `lib/`) sources `lib/sandbox-teardown.sh` diff --git a/test/e2e/lib/install.sh b/test/e2e/lib/setup/install.sh similarity index 58% rename from test/e2e/lib/install.sh rename to test/e2e/lib/setup/install.sh index 8adbc70596..b947543df2 100755 --- a/test/e2e/lib/install.sh +++ b/test/e2e/lib/setup/install.sh @@ -5,7 +5,7 @@ # Install helper: exposes a single `e2e_install` entrypoint that dispatches # by install method and honours E2E_DRY_RUN. -_E2E_INSTALL_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +_E2E_INSTALL_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # shellcheck source=env.sh . "${_E2E_INSTALL_LIB_DIR}/env.sh" @@ -51,5 +51,25 @@ e2e_install_from_repo_checkout() { } e2e_install_from_public_curl() { - curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh | bash + # Pin the installer source so CI runs do not implicitly follow main's + # head (CodeRabbit review item #6). Callers override E2E_INSTALLER_URL + # or E2E_INSTALLER_SHA256 to pin to a specific revision / digest. + local url="${E2E_INSTALLER_URL:-https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh}" + local sha256="${E2E_INSTALLER_SHA256:-}" + local tmp + tmp="$(mktemp -t nemoclaw-installer.XXXXXX.sh)" + trap 'rm -f "${tmp}"' RETURN + if ! curl -fsSL --retry 3 --retry-delay 2 -o "${tmp}" "${url}"; then + echo "e2e_install_from_public_curl: failed to download ${url}" >&2 + return 1 + fi + if [[ -n "${sha256}" ]]; then + local got + got="$(shasum -a 256 "${tmp}" 2>/dev/null | awk '{print $1}')" + if [[ "${got}" != "${sha256}" ]]; then + echo "e2e_install_from_public_curl: sha256 mismatch (expected ${sha256}, got ${got})" >&2 + return 1 + fi + fi + bash "${tmp}" } diff --git a/test/e2e/lib/onboard.sh b/test/e2e/lib/setup/onboard.sh similarity index 95% rename from test/e2e/lib/onboard.sh rename to test/e2e/lib/setup/onboard.sh index 0b3bd63e2c..efaa48946f 100755 --- a/test/e2e/lib/onboard.sh +++ b/test/e2e/lib/setup/onboard.sh @@ -4,7 +4,7 @@ # # Onboard helper. Dispatches by onboarding profile id and honors dry-run. -_E2E_ONBOARD_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +_E2E_ONBOARD_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" # shellcheck source=env.sh . "${_E2E_ONBOARD_LIB_DIR}/env.sh" # shellcheck source=context.sh diff --git a/test/e2e/resolver/index.ts b/test/e2e/resolver/index.ts index e79d2932bb..63c35ad29c 100644 --- a/test/e2e/resolver/index.ts +++ b/test/e2e/resolver/index.ts @@ -30,11 +30,13 @@ function parseArgs(argv: string[]): { scenarioId?: string; contextDir: string; metadataDir: string; + probesFromState: boolean; } { const args = argv.slice(2); const command = args.shift() ?? ""; let scenarioId: string | undefined; let contextDir = process.env.E2E_CONTEXT_DIR ?? ".e2e"; + let probesFromState = false; const scriptDir = path.dirname(fileURLToPath(import.meta.url)); // resolver/ lives under test/e2e/, so metadata dir is one level up. let metadataDir = path.resolve(scriptDir, ".."); @@ -48,6 +50,13 @@ function parseArgs(argv: string[]): { const v = args.shift(); if (!v) throw new Error("--metadata-dir requires a value"); metadataDir = v; + } else if (a === "--probes-from-state") { + // Dry-run affordance: seed probes from the expected state itself so + // the validator can exercise its logic without real probe values. + // Non-dry-run callers MUST NOT pass this flag (CodeRabbit review + // item #9); the resolver will fail closed when required probe keys + // are missing without this flag. + probesFromState = true; } else if (a && !a.startsWith("--") && !scenarioId) { scenarioId = a; } else if (a === "--help" || a === "-h") { @@ -56,7 +65,7 @@ function parseArgs(argv: string[]): { throw new Error(`unexpected argument: ${a}`); } } - return { command, scenarioId, contextDir, metadataDir }; + return { command, scenarioId, contextDir, metadataDir, probesFromState }; } function main(): number { @@ -95,7 +104,12 @@ function main(): number { return 0; } if (command === "validate-state") { - const probes = probesFromEnvAndState(plan.expected_state.config); + // CodeRabbit review item #9: only self-seed probes when the caller + // explicitly opts in (dry-run / test contexts). Non-dry-run callers + // without real probes wired should fail, not quietly self-validate. + const probes = parsed.probesFromState + ? probesFromEnvAndState(plan.expected_state.config) + : probesFromEnvOnly(); const report = validateExpectedState({ stateId: plan.expected_state.id, state: plan.expected_state.config, @@ -138,14 +152,31 @@ function flattenState( } } +/** + * Read probe overrides from the environment without seeding from state. + * + * Used in non-dry-run mode: the validator then reports a concrete failure + * for any expected-state key that has no corresponding probe value. + */ +function probesFromEnvOnly(): ProbeResults { + const probes: ProbeResults = {}; + const prefix = "E2E_PROBE_OVERRIDE_"; + for (const [envKey, value] of Object.entries(process.env)) { + if (!envKey.startsWith(prefix) || value === undefined) continue; + const key = envKey.slice(prefix.length).toLowerCase().replace(/_/g, "."); + probes[key] = coerceProbeValue(value); + } + return probes; +} + /** * Build a probe results map. * - * In dry-run mode we do not probe real services; instead we default every - * expected-state leaf to its declared value so the validator passes, and - * then allow targeted overrides via E2E_PROBE_OVERRIDE_=value. This - * lets tests simulate specific failure modes without spinning up a real - * gateway or sandbox. + * In dry-run / test mode we do not probe real services; instead we default + * every expected-state leaf to its declared value so the validator passes, + * and then allow targeted overrides via E2E_PROBE_OVERRIDE_=value. + * This lets tests simulate specific failure modes without spinning up a + * real gateway or sandbox. */ function probesFromEnvAndState(state: unknown): ProbeResults { const probes: ProbeResults = {}; diff --git a/test/e2e/run-scenario.sh b/test/e2e/run-scenario.sh index cf4113086c..6046165014 100755 --- a/test/e2e/run-scenario.sh +++ b/test/e2e/run-scenario.sh @@ -87,8 +87,13 @@ fi run_resolver() { if [[ -n "${TSX_BIN}" ]]; then "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" "$@" - else - (cd "${REPO_ROOT}" && npx --yes tsx "${SCRIPT_DIR}/resolver/index.ts" "$@") + return + fi + # CodeRabbit review item #10: fail closed with a clear hint instead of + # silently pulling tsx from the network via `npx --yes`. + if ! (cd "${REPO_ROOT}" && npx --no-install tsx "${SCRIPT_DIR}/resolver/index.ts" "$@"); then + echo "run-scenario: tsx is required but not installed. Run 'npm ci' at the repo root and retry." >&2 + return 1 fi } @@ -105,14 +110,14 @@ fi . "${SCRIPT_DIR}/lib/env.sh" # shellcheck source=lib/context.sh . "${SCRIPT_DIR}/lib/context.sh" -# shellcheck source=lib/install.sh -. "${SCRIPT_DIR}/lib/install.sh" -# shellcheck source=lib/onboard.sh -. "${SCRIPT_DIR}/lib/onboard.sh" -# shellcheck source=lib/gateway.sh -. "${SCRIPT_DIR}/lib/gateway.sh" -# shellcheck source=lib/sandbox.sh -. "${SCRIPT_DIR}/lib/sandbox.sh" +# shellcheck source=lib/setup/install.sh +. "${SCRIPT_DIR}/lib/setup/install.sh" +# shellcheck source=lib/setup/onboard.sh +. "${SCRIPT_DIR}/lib/setup/onboard.sh" +# shellcheck source=lib/assert/gateway-alive.sh +. "${SCRIPT_DIR}/lib/assert/gateway-alive.sh" +# shellcheck source=lib/assert/sandbox-alive.sh +. "${SCRIPT_DIR}/lib/assert/sandbox-alive.sh" # Apply standard non-interactive env (and trace it). e2e_env_apply_noninteractive @@ -154,7 +159,14 @@ e2e_sandbox_assert_running # overrides; wiring real probes through the validator happens as # scenarios migrate. if [[ "${E2E_VALIDATE_EXPECTED_STATE:-0}" == "1" || "${DRY_RUN}" -ne 1 ]]; then - if ! run_resolver validate-state "${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}"; then + validate_args=("${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}") + if [[ "${DRY_RUN}" -eq 1 ]]; then + # CodeRabbit review item #9: explicitly opt in to seeding probes from + # the expected state in dry-run/test mode. Live runs go through real + # probes and must fail closed if any are missing. + validate_args+=(--probes-from-state) + fi + if ! run_resolver validate-state "${validate_args[@]}"; then echo "run-scenario: expected-state validation failed; suites will NOT run" >&2 exit 3 fi @@ -165,5 +177,10 @@ if [[ "${DRY_RUN}" -eq 1 ]]; then exit 0 fi -echo "run-scenario: full suite execution is not implemented yet (Phase 9 migrates additional scenarios)" >&2 -exit 0 +# CodeRabbit review item #11: do not exit 0 when no suites were executed. +# Full suite execution against a live environment lands in subsequent +# scenarios; calling run-scenario.sh in non-dry-run mode must not masquerade +# as success until that wiring exists for the requested scenario. +echo "run-scenario: full suite execution is not implemented yet for this scenario." >&2 +echo "run-scenario: pass --dry-run to exercise the plan+context path, or run the suite runner directly with a live environment." >&2 +exit 4 diff --git a/test/e2e/suites.yaml b/test/e2e/suites.yaml index 716e00f9ec..e6bee35864 100644 --- a/test/e2e/suites.yaml +++ b/test/e2e/suites.yaml @@ -38,18 +38,18 @@ suites: inference.expected: available steps: - id: models-health - script: suites/inference/00-models-health.sh + script: suites/inference/cloud/00-models-health.sh - id: chat-completion - script: suites/inference/01-chat-completion.sh + script: suites/inference/cloud/01-chat-completion.sh - id: sandbox-inference-local - script: suites/inference/02-inference-local-from-sandbox.sh + script: suites/inference/cloud/02-inference-local-from-sandbox.sh credentials: requires_state: credentials.expected: present steps: - id: credentials-present - script: suites/credentials/00-credentials-present.sh + script: suites/security/credentials/00-credentials-present.sh local-ollama-inference: requires_state: @@ -58,9 +58,9 @@ suites: inference.expected: available steps: - id: ollama-models-health - script: suites/local-ollama-inference/00-ollama-models-health.sh + script: suites/inference/ollama-gpu/00-ollama-models-health.sh - id: ollama-chat-completion - script: suites/local-ollama-inference/01-ollama-chat-completion.sh + script: suites/inference/ollama-gpu/01-ollama-chat-completion.sh ollama-proxy: requires_state: @@ -68,7 +68,7 @@ suites: sandbox.status: running steps: - id: proxy-reachable - script: suites/ollama-proxy/00-proxy-reachable.sh + script: suites/inference/ollama-auth-proxy/00-proxy-reachable.sh platform-macos: requires_state: @@ -76,7 +76,7 @@ suites: sandbox.status: running steps: - id: macos-smoke - script: suites/platform-macos/00-macos-smoke.sh + script: suites/platform/macos/00-macos-smoke.sh platform-wsl: requires_state: @@ -84,7 +84,7 @@ suites: sandbox.status: running steps: - id: wsl-smoke - script: suites/platform-wsl/00-wsl-smoke.sh + script: suites/platform/wsl/00-wsl-smoke.sh hermes-specific: requires_state: @@ -93,4 +93,4 @@ suites: sandbox.agent: hermes steps: - id: hermes-health - script: suites/hermes-specific/00-hermes-health.sh + script: suites/onboarding/hermes/00-hermes-health.sh diff --git a/test/e2e/suites/inference/00-models-health.sh b/test/e2e/suites/inference/cloud/00-models-health.sh similarity index 94% rename from test/e2e/suites/inference/00-models-health.sh rename to test/e2e/suites/inference/cloud/00-models-health.sh index 31b998b161..05aa133b48 100755 --- a/test/e2e/suites/inference/00-models-health.sh +++ b/test/e2e/suites/inference/cloud/00-models-health.sh @@ -8,7 +8,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh diff --git a/test/e2e/suites/inference/01-chat-completion.sh b/test/e2e/suites/inference/cloud/01-chat-completion.sh similarity index 81% rename from test/e2e/suites/inference/01-chat-completion.sh rename to test/e2e/suites/inference/cloud/01-chat-completion.sh index 316539a588..1d2a05888b 100755 --- a/test/e2e/suites/inference/01-chat-completion.sh +++ b/test/e2e/suites/inference/cloud/01-chat-completion.sh @@ -7,7 +7,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh @@ -25,8 +25,9 @@ url="$(e2e_context_get E2E_GATEWAY_URL)" payload='{"model":"default","messages":[{"role":"user","content":"say ok"}],"max_tokens":8}' response="$(curl -fsS --max-time 30 -H 'Content-Type: application/json' \ -d "${payload}" "${url%/}/v1/chat/completions")" -echo "${response}" | head -c 1024 -echo +# CodeRabbit review item #12: substring expansion instead of `| head` +# avoids SIGPIPE-driven false failures under `set -o pipefail`. +printf '%s\n' "${response:0:1024}" if [[ -z "${response}" ]]; then echo "inference:chat-completion: empty response" >&2 exit 1 diff --git a/test/e2e/suites/inference/02-inference-local-from-sandbox.sh b/test/e2e/suites/inference/cloud/02-inference-local-from-sandbox.sh similarity index 70% rename from test/e2e/suites/inference/02-inference-local-from-sandbox.sh rename to test/e2e/suites/inference/cloud/02-inference-local-from-sandbox.sh index 2a60a68325..4cf35e08d7 100755 --- a/test/e2e/suites/inference/02-inference-local-from-sandbox.sh +++ b/test/e2e/suites/inference/cloud/02-inference-local-from-sandbox.sh @@ -8,7 +8,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh @@ -24,6 +24,7 @@ fi name="$(e2e_context_get E2E_SANDBOX_NAME)" route="$(e2e_context_get E2E_INFERENCE_ROUTE)" -nemoclaw shell "${name}" -- curl -fsS --max-time 10 "http://${route}/v1/models" \ - | head -c 512 -echo +# CodeRabbit review item #13: capture then truncate to avoid `| head` racing +# curl under `pipefail` and flagging a successful request as failed. +body="$(nemoclaw shell "${name}" -- curl -fsS --max-time 10 "http://${route}/v1/models")" +printf '%s\n' "${body:0:512}" diff --git a/test/e2e/suites/ollama-proxy/00-proxy-reachable.sh b/test/e2e/suites/inference/ollama-auth-proxy/00-proxy-reachable.sh similarity index 93% rename from test/e2e/suites/ollama-proxy/00-proxy-reachable.sh rename to test/e2e/suites/inference/ollama-auth-proxy/00-proxy-reachable.sh index 107d8d87fa..876afef017 100755 --- a/test/e2e/suites/ollama-proxy/00-proxy-reachable.sh +++ b/test/e2e/suites/inference/ollama-auth-proxy/00-proxy-reachable.sh @@ -7,7 +7,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh diff --git a/test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh b/test/e2e/suites/inference/ollama-gpu/00-ollama-models-health.sh similarity index 70% rename from test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh rename to test/e2e/suites/inference/ollama-gpu/00-ollama-models-health.sh index 2ee434a332..4d35243597 100755 --- a/test/e2e/suites/local-ollama-inference/00-ollama-models-health.sh +++ b/test/e2e/suites/inference/ollama-gpu/00-ollama-models-health.sh @@ -7,7 +7,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh @@ -20,5 +20,7 @@ if e2e_env_is_dry_run; then exit 0 fi url="$(e2e_context_get E2E_GATEWAY_URL)" -curl -fsS --max-time 10 "${url%/}/api/tags" | head -c 512 -echo +# CodeRabbit review item #14: capture then truncate; avoids `| head` causing +# curl to receive SIGPIPE mid-response under `pipefail`. +body="$(curl -fsS --max-time 10 "${url%/}/api/tags")" +printf '%s\n' "${body:0:512}" diff --git a/test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh b/test/e2e/suites/inference/ollama-gpu/01-ollama-chat-completion.sh similarity index 69% rename from test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh rename to test/e2e/suites/inference/ollama-gpu/01-ollama-chat-completion.sh index 9707a9b00d..34c54516df 100755 --- a/test/e2e/suites/local-ollama-inference/01-ollama-chat-completion.sh +++ b/test/e2e/suites/inference/ollama-gpu/01-ollama-chat-completion.sh @@ -7,7 +7,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh @@ -21,6 +21,8 @@ if e2e_env_is_dry_run; then fi url="$(e2e_context_get E2E_GATEWAY_URL)" payload='{"model":"default","messages":[{"role":"user","content":"say ok"}],"max_tokens":8}' -curl -fsS --max-time 30 -H 'Content-Type: application/json' \ - -d "${payload}" "${url%/}/v1/chat/completions" | head -c 1024 -echo +# CodeRabbit review item #15: capture then truncate; `curl | head` is brittle +# under `pipefail` and can fail successful requests. +body="$(curl -fsS --max-time 30 -H 'Content-Type: application/json' \ + -d "${payload}" "${url%/}/v1/chat/completions")" +printf '%s\n' "${body:0:1024}" diff --git a/test/e2e/suites/lifecycle/README.md b/test/e2e/suites/lifecycle/README.md new file mode 100644 index 0000000000..ec325898dc --- /dev/null +++ b/test/e2e/suites/lifecycle/README.md @@ -0,0 +1,24 @@ + + + +# Lifecycle suites + +Post-onboard CLI lifecycle: `list`, `status`, `destroy`, `stop`, `connect`, +and their reconciliation between registry / OpenShell / gateway state. + +This bucket is new. The CLI Entry + Gateway/Runtime hotspots (17 + 11 fix +PRs) concentrate bugs where registry state, live OpenShell state, and +gateway state drift out of sync during abnormal shutdown paths. Existing +`test-sandbox-operations.sh` covers the happy path only. + +## Planned (from UAT/NV QA hotspot analysis) + +| Suite | Originating bug class | +|---|---| +| `multi-sandbox-destroy/` | `nemoclaw destroy` kills shared dashboard port forward even when another sandbox is running (UAT #1690). | +| `stop-command-parity/` | `nemoclaw stop` only manages host cloudflared, leaves messaging bridges running inside sandbox (UAT #1825, #2103). | +| `ghost-reconciliation/` | `list` shows ghost sandboxes after gateway restart / reboot (UAT #1316). | +| `abnormal-shutdown-recovery/` | Kill gateway mid-operation; verify next command reconciles (UAT #1160, #2103 class). | + +All lifecycle suites require `gateway.health: healthy` and a reachable +registry. Most can reuse the `ubuntu-repo-cloud-openclaw` expected state. diff --git a/test/e2e/suites/messaging/README.md b/test/e2e/suites/messaging/README.md new file mode 100644 index 0000000000..91be38381b --- /dev/null +++ b/test/e2e/suites/messaging/README.md @@ -0,0 +1,24 @@ + + + +# Messaging suites + +Telegram, Discord, and Slack bridge behavior. + +Messaging always touches a policy preset OR `onboard.ts` — it is never +purely in the messaging module (§5.5 of the hotspot analysis). That +architectural entanglement means messaging suites benefit from running +against both fresh-onboard **and** post-rebuild scenario variants. + +## Planned (from UAT/NV QA hotspot analysis) + +| Suite | Originating bug class | Migrating from | +|---|---|---| +| `providers/` | Telegram + Discord provider / placeholder / L7-proxy chain with fake tokens. | `test-messaging-providers.sh` | +| `token-rotation/` | Rotating a messaging token triggers sandbox rebuild (UAT #1903). | `test-token-rotation.sh` | +| `telegram-injection/` | Shell command injection via Telegram bridge (PR #119 regression). | `test-telegram-injection.sh` (currently unwired) | +| `discord-facade/` | Local Discord facade emulates Discord Gateway+REST (PR #3293). | **NEW** — landed upstream during scenario-matrix development; not yet reflected in the matrix | + +Coverage gap explicitly called out by the hotspot analysis: no +messaging × rebuild × policy fixture today. The UAT #1952 (Telegram policy +lost on rebuild) bug literally proves this is a live hole. diff --git a/test/e2e/suites/onboarding/README.md b/test/e2e/suites/onboarding/README.md new file mode 100644 index 0000000000..d30625f3da --- /dev/null +++ b/test/e2e/suites/onboarding/README.md @@ -0,0 +1,31 @@ + + + +# Onboarding suites + +Suites that validate the onboarding lifecycle. Onboarding is the #1 UAT/NV QA +bug hotspot (62 traced fix PRs; `src/lib/onboard.ts` touched by 53 PRs), so +this bucket is deliberately the widest. + +## Current + +| Suite | Scenario | Covers | +|---|---|---| +| `hermes/` | `ubuntu-repo-cloud-hermes` | Hermes agent onboarding health check. | + +## Planned (from UAT/NV QA hotspot analysis) + +| Suite | Originating bug class | Migrating from | +|---|---|---| +| `smoke/` | Happy-path onboarding baseline | today's `test-full-e2e.sh` | +| `resume/` | Interrupted onboard → `--resume` completes (regression #446) | `test-onboard-resume.sh` (currently unwired) | +| `repair/` | Resume-repair + invalidation of missing sandboxes (regression #446) | `test-onboard-repair.sh` (currently unwired) | +| `double-onboard/` | Gateway reuse, stale-registry reconciliation, rebuild guidance (UAT #2174) | `test-double-onboard.sh` (currently unwired) | +| `provider-reconfig/` | Re-entering onboard with bad credentials (UAT #1568, #1912, #1960) | **NEW** | +| `gateway-restart-mid-onboard/` | Gateway healthy but provider setup fails (UAT #2020) | **NEW** | +| `skip-permissions/` | `--dangerously-skip-permissions` activates permissive policy (not Pending) | `test-skip-permissions-policy.sh` | + +Coverage gap explicitly called out by the hotspot analysis: the 7 scripts +prefixed with `test-onboard-` / `test-double-onboard` are written but **not +wired to any workflow today** (§1, E2E categorization). Rewiring them into +this directory is one of the highest-leverage moves in the migration. diff --git a/test/e2e/suites/hermes-specific/00-hermes-health.sh b/test/e2e/suites/onboarding/hermes/00-hermes-health.sh similarity index 93% rename from test/e2e/suites/hermes-specific/00-hermes-health.sh rename to test/e2e/suites/onboarding/hermes/00-hermes-health.sh index c6306ca1da..938f7a9cc1 100755 --- a/test/e2e/suites/hermes-specific/00-hermes-health.sh +++ b/test/e2e/suites/onboarding/hermes/00-hermes-health.sh @@ -8,7 +8,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh diff --git a/test/e2e/suites/platform-macos/00-macos-smoke.sh b/test/e2e/suites/platform/macos/00-macos-smoke.sh similarity index 94% rename from test/e2e/suites/platform-macos/00-macos-smoke.sh rename to test/e2e/suites/platform/macos/00-macos-smoke.sh index eb9f2806a7..2239566f40 100755 --- a/test/e2e/suites/platform-macos/00-macos-smoke.sh +++ b/test/e2e/suites/platform/macos/00-macos-smoke.sh @@ -10,7 +10,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh diff --git a/test/e2e/suites/platform-wsl/00-wsl-smoke.sh b/test/e2e/suites/platform/wsl/00-wsl-smoke.sh similarity index 93% rename from test/e2e/suites/platform-wsl/00-wsl-smoke.sh rename to test/e2e/suites/platform/wsl/00-wsl-smoke.sh index 538afb12cc..507d901724 100755 --- a/test/e2e/suites/platform-wsl/00-wsl-smoke.sh +++ b/test/e2e/suites/platform/wsl/00-wsl-smoke.sh @@ -8,7 +8,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh diff --git a/test/e2e/suites/sandbox/README.md b/test/e2e/suites/sandbox/README.md new file mode 100644 index 0000000000..2cdfc0ed10 --- /dev/null +++ b/test/e2e/suites/sandbox/README.md @@ -0,0 +1,31 @@ + + + +# Sandbox suites + +Sandbox creation, rebuild, snapshot, and survival behavior. + +This bucket is new to the scenario-based runner. Three existing rebuild +tests share a hand-rolled "older-base-image" setup that lives in +`lib/fixtures/older-base-image.sh` in the new layout. + +## Planned (from UAT/NV QA hotspot analysis) + +| Suite | Originating bug class | Migrating from | +|---|---|---| +| `operations/` | TC-SBX-01..11: sandbox ops (status, connect, destroy, multi-sandbox). | `test-sandbox-operations.sh` | +| `survival/` | Sandbox survives gateway restart (UAT #486, #888, #859, #1086). | `test-sandbox-survival.sh` | +| `snapshot/` | Snapshot create/list/restore lifecycle. | `test-snapshot-commands.sh` | +| `rebuild-openclaw/` | OpenClaw upgrade (NVBug 6076156): old image → rebuild → markers survive. | `test-rebuild-openclaw.sh` | +| `rebuild-hermes/` | Hermes upgrade path (older base → rebuild → verify state survived). | `test-rebuild-hermes.sh` | +| `upgrade-stale/` | `upgrade-sandboxes --check` detects stale sandbox (UAT #1904). | `test-upgrade-stale-sandbox.sh` | +| `runtime-overrides/` | Runtime config overrides (model, CORS) via short-lived containers. | `test-runtime-overrides.sh` | +| `rebuild-baseline/` | Rebuild lifecycle proofs (NVBug 6076156): version detection, state preservation. | `test-sandbox-rebuild.sh` | + +Coverage gaps explicitly called out by the hotspot analysis: + +- **A2 (Ollama) has zero sandbox-lifecycle coverage.** Ollama users hitting + rebuild/survival/token-rotation have no regression net today. +- **Policy preservation during rebuild is untested.** UAT #1952 (Telegram + policy lost on rebuild) + UAT #2010 (telegram policy apparently applied + but gateway blocks it) remain live blind spots. diff --git a/test/e2e/suites/security/README.md b/test/e2e/suites/security/README.md new file mode 100644 index 0000000000..9ee6ba73e5 --- /dev/null +++ b/test/e2e/suites/security/README.md @@ -0,0 +1,31 @@ + + + +# Security suites + +Shields, policy presets, credential handling, and secret-sanitization. + +Shields/Policy/Security is the #6 UAT/NV QA hotspot (15 fix PRs). The +surface has three layers (sandbox base policy, presets, user overrides) and +two enforcement points (gateway L7 proxy, OpenShell landlock); mismatches +surface as 403/denied/undefined-behavior and are hard to attribute. + +## Current + +| Suite | Scenario | Covers | +|---|---|---| +| `credentials/` | `ubuntu-repo-cloud-openclaw` | Asserts `$NVIDIA_API_KEY` is present and not leaked into the sandbox. | + +## Planned (from UAT/NV QA hotspot analysis) + +| Suite | Originating bug class | Migrating from | +|---|---|---| +| `credential-sanitization/` | Credentials stripped from migration bundles + blueprint digest checks. | `test-credential-sanitization.sh` (currently unwired — 805 LOC, prime re-wire candidate) | +| `shields/` | Shields down/up lifecycle + config get/set/rotate-token (UAT #3114). | `test-shields-config.sh` | +| `rebuild-preserves-presets/` | Rebuild drops policy presets (UAT #1952, #2010). | **NEW** — explicit coverage for the §5.1 cross-cutting blind spot | +| `shields-hermes/` | Hermes shields down fails (UAT #3168). | **NEW** — Hermes × shields crossover currently untested | +| `skip-permissions/` | `--dangerously-skip-permissions` activates permissive policy (not Pending). | `test-skip-permissions-policy.sh` | + +Coverage gap explicitly called out by the hotspot analysis (§5.1): the +Onboarding × Sandbox × Policy triple has no E2E test today. Adding +`rebuild-preserves-presets/` is the single highest-value net here. diff --git a/test/e2e/suites/credentials/00-credentials-present.sh b/test/e2e/suites/security/credentials/00-credentials-present.sh similarity index 93% rename from test/e2e/suites/credentials/00-credentials-present.sh rename to test/e2e/suites/security/credentials/00-credentials-present.sh index 5df36195b7..5594f853a9 100755 --- a/test/e2e/suites/credentials/00-credentials-present.sh +++ b/test/e2e/suites/security/credentials/00-credentials-present.sh @@ -7,7 +7,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" +LIB_DIR="$(cd "${SCRIPT_DIR}/../../../lib" && pwd)" # shellcheck source=../../lib/env.sh . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh diff --git a/test/e2e/suites/smoke/01-gateway-health.sh b/test/e2e/suites/smoke/01-gateway-health.sh index d29bb98847..cd569044be 100755 --- a/test/e2e/suites/smoke/01-gateway-health.sh +++ b/test/e2e/suites/smoke/01-gateway-health.sh @@ -12,8 +12,8 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh . "${LIB_DIR}/context.sh" -# shellcheck source=../../lib/gateway.sh -. "${LIB_DIR}/gateway.sh" +# shellcheck source=../../lib/assert/gateway-alive.sh +. "${LIB_DIR}/assert/gateway-alive.sh" echo "smoke:gateway-health" e2e_context_require E2E_GATEWAY_URL diff --git a/test/e2e/suites/smoke/02-sandbox-listed.sh b/test/e2e/suites/smoke/02-sandbox-listed.sh index 9ad45d081c..78bdabdf96 100755 --- a/test/e2e/suites/smoke/02-sandbox-listed.sh +++ b/test/e2e/suites/smoke/02-sandbox-listed.sh @@ -12,8 +12,8 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../lib" && pwd)" . "${LIB_DIR}/env.sh" # shellcheck source=../../lib/context.sh . "${LIB_DIR}/context.sh" -# shellcheck source=../../lib/sandbox.sh -. "${LIB_DIR}/sandbox.sh" +# shellcheck source=../../lib/assert/sandbox-alive.sh +. "${LIB_DIR}/assert/sandbox-alive.sh" echo "smoke:sandbox-listed" e2e_context_require E2E_SANDBOX_NAME From a3215e7a3ced7f391b0145cdbb7be61b7a01084a Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Mon, 11 May 2026 17:05:24 -0400 Subject: [PATCH 04/60] docs(e2e): add MIGRATION.md tracking legacy-to-scenario mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces test/e2e/MIGRATION.md — an in-tree tracker of legacy test-*.sh scripts being migrated to the scenario matrix. Per-wave completion is recorded there as the migration progresses. --- test/e2e/MIGRATION.md | 121 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 test/e2e/MIGRATION.md diff --git a/test/e2e/MIGRATION.md b/test/e2e/MIGRATION.md new file mode 100644 index 0000000000..6492808545 --- /dev/null +++ b/test/e2e/MIGRATION.md @@ -0,0 +1,121 @@ + + + +# E2E Migration Tracker + +This PR migrates all existing `test/e2e/test-*.sh` scripts into the +scenario-based runner introduced by PR #3290. Full deep migration +(Strategy B). Legacy scripts remain in the repo during this PR and run +in parallel for 1–2 nightly cycles after merge; a follow-up PR retires +them once parity is verified. + +**Merge gate:** All 40 legacy entry points must have a scenario-based +equivalent that produces the same PASS/FAIL outcomes as the legacy +script in a side-by-side CI run. + +## Status summary + +| Bucket | Legacy LOC | Status | +|---|---:|---| +| Wave 0 — shared fixtures, asserts, setup split | — | ⬜ not started | +| Wave 1 — onboarding baseline | 1,101 | ⬜ | +| Wave 2 — onboarding lifecycle | 2,013 | ⬜ | +| Wave 3 — sandbox lifecycle | 2,891 | ⬜ | +| Wave 4 — rebuild / upgrade | 1,292 | ⬜ | +| Wave 5 — inference variants | 2,593 | ⬜ | +| Wave 6 — Hermes | 1,646 | ⬜ | +| Wave 7 — messaging | 3,397 | ⬜ | +| Wave 8 — security / policy | 2,241 | ⬜ | +| Wave 9 — runtime / platform services | 1,696 | ⬜ | +| Wave 10 — platform + remote | 1,589 | ⬜ | +| Wave 11 — misc | 405 | ⬜ | +| **Total** | **20,864** | **0 / 40 scripts migrated** | + +## Per-script tracker + +Legend: ⬜ not started · 🟨 in progress · ✅ migrated · 🔵 parity verified + +### Wave 1 — onboarding baseline + +- ⬜ `test-full-e2e.sh` (473) → `onboarding/happy-path/` + scenario `ubuntu-curl-cloud-openclaw` +- ⬜ `test-cloud-onboard-e2e.sh` (337) → `onboarding/public-installer/` +- ⬜ `test-cloud-inference-e2e.sh` (291) → extends `inference/cloud/` + +### Wave 2 — onboarding lifecycle + +- ⬜ `test-double-onboard.sh` (717) → `onboarding/double-onboard/` +- ⬜ `test-gpu-double-onboard.sh` (571) → `onboarding/double-onboard/` on GPU scenario +- ⬜ `test-onboard-repair.sh` (372) → `onboarding/repair/` +- ⬜ `test-onboard-resume.sh` (353) → `onboarding/resume/` + +### Wave 3 — sandbox lifecycle + +- ⬜ `test-sandbox-operations.sh` (828) → `sandbox/operations/` +- ⬜ `test-sandbox-survival.sh` (721) → `sandbox/survival/` +- ⬜ `test-snapshot-commands.sh` (281) → `sandbox/snapshot/` +- ⬜ `test-diagnostics.sh` (452) → `sandbox/diagnostics/` +- ⬜ `test-issue-2478-crash-loop-recovery.sh` (609) → `sandbox/crash-loop-recovery/` + +### Wave 4 — rebuild / upgrade + +- ⬜ `test-rebuild-openclaw.sh` (453) → `sandbox/rebuild-openclaw/` (uses `lib/fixtures/older-base-image.sh`) +- ⬜ `test-rebuild-hermes.sh` (401) → `sandbox/rebuild-hermes/` +- ⬜ `test-upgrade-stale-sandbox.sh` (241) → `sandbox/upgrade-stale/` +- ⬜ `test-sandbox-rebuild.sh` (197) → folded into `sandbox/rebuild-openclaw/` + +### Wave 5 — inference variants + +- ⬜ `test-gpu-e2e.sh` (565) → `inference/ollama-gpu/` (deep port) +- ⬜ `test-ollama-auth-proxy-e2e.sh` (548) → `inference/ollama-auth-proxy/` (deep port) +- ⬜ `test-inference-routing.sh` (715) → `inference/routing-errors/` +- ⬜ `test-kimi-inference-compat.sh` (765) → `inference/kimi-compat/` + +### Wave 6 — Hermes + +- ⬜ `test-hermes-e2e.sh` (591) → `onboarding/hermes/` (deep port; currently 1-step health) +- ⬜ `test-hermes-slack-e2e.sh` (537) → `messaging/slack/hermes/` +- ⬜ `test-hermes-discord-e2e.sh` (518) → `messaging/discord/hermes/` + +### Wave 7 — messaging + +- ⬜ `test-messaging-providers.sh` (1,677) → `messaging/providers/{telegram,discord,slack}/` +- ⬜ `test-token-rotation.sh` (575) → `messaging/token-rotation/` +- ⬜ `test-telegram-injection.sh` (475) → `security/telegram-injection/` +- ⬜ `test-messaging-compatible-endpoint.sh` (670) → `messaging/compatible-endpoint/` + +### Wave 8 — security / policy + +- ⬜ `test-shields-config.sh` (550) → `security/shields/` +- ⬜ `test-network-policy.sh` (579) → `security/network-policy/` +- ⬜ `test-credential-sanitization.sh` (810) → `security/credentials/sanitization/` +- ⬜ `test-credential-migration.sh` (302) → `security/credentials/migration/` + +### Wave 9 — runtime / platform services + +- ⬜ `test-runtime-overrides.sh` (272) → `sandbox/runtime-overrides/` +- ⬜ `test-overlayfs-autofix.sh` (537) → `sandbox/overlayfs-autofix/` +- ⬜ `test-device-auth-health.sh` (373) → `lifecycle/device-auth-health/` +- ⬜ `test-deployment-services.sh` (514) → `lifecycle/deployment-services/` + +### Wave 10 — platform + remote + +- ⬜ `test-spark-install.sh` (157) → `platform/spark/` +- ⬜ `test-launchable-smoke.sh` (589) → `platform/launchable/` +- ⬜ `brev-e2e.test.ts` (843) → `platform/brev-remote/` + +### Wave 11 — misc + +- ⬜ `test-skill-agent-e2e.sh` (244) → `onboarding/skill-agent/` +- ⬜ `test-docs-validation.sh` (161) → `lifecycle/docs-validation/` + +## Parallel verification + +Before merge, `.github/workflows/e2e-parity-compare.yaml` (Wave 0.F.1) +will run each migrated scenario next to its legacy counterpart and diff +PASS/FAIL per assertion via `test/e2e/parity-map.yaml` + +`scripts/e2e/compare-parity.sh`. + +Merge gate: **zero divergence**. Documented flaky assertions are +compared as "both-pass-or-both-fail" rather than strict equality. + +Internal plan document (not committed): `specs/2026-05-08_e2e-setup-scenario-matrix/migration-plan.md`. From 7ee5675310c3da86126ab71fcf1b48eb15004e14 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Mon, 11 May 2026 12:30:01 -0400 Subject: [PATCH 05/60] test(e2e): add failing tests for Phase 1 infrastructure Adds 30 tests covering Phase 1 deliverables from specs/2026-05-11_e2e-test-migration/ (tests.md 1.A-1.H): - 1.A logging helpers (lib/logging.sh) - 1.B sandbox-exec helper (lib/sandbox-exec.sh) - 1.C fixtures (fake-openai, fake-{telegram,discord,slack}, older-base-image) - 1.D assertion helpers (inference-works, no-credentials-leaked, policy-preset-applied, messaging-bridge-reachable) - 1.E install dispatcher splits (install-{repo,curl,ollama,launchable}.sh) - 1.F run-scenario.sh --validate-only flag - 1.G convention lint (scripts/e2e/lint-conventions.ts) - 1.H parity harness (scripts/e2e/compare-parity.sh + parity-map.yaml) All 30 tests fail as expected (red phase) \u2014 implementation follows. --- test/e2e-convention-lint.test.ts | 211 +++++++++++++++ test/e2e-expected-state-validator.test.ts | 67 +++++ test/e2e-lib-helpers.test.ts | 301 ++++++++++++++++++++++ 3 files changed, 579 insertions(+) create mode 100644 test/e2e-convention-lint.test.ts diff --git a/test/e2e-convention-lint.test.ts b/test/e2e-convention-lint.test.ts new file mode 100644 index 0000000000..2be420aaee --- /dev/null +++ b/test/e2e-convention-lint.test.ts @@ -0,0 +1,211 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { spawnSync, type SpawnSyncReturns } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const REPO_ROOT = path.resolve(import.meta.dirname, ".."); +const LINT_BIN = path.join(REPO_ROOT, "scripts/e2e/lint-conventions.ts"); +const COMPARE_PARITY = path.join(REPO_ROOT, "scripts/e2e/compare-parity.sh"); +const PARITY_MAP_REAL = path.join(REPO_ROOT, "test/e2e/parity-map.yaml"); + +function runTsx(scriptPath: string, args: string[] = [], env: Record = {}): SpawnSyncReturns { + const tsx = path.join(REPO_ROOT, "node_modules/.bin/tsx"); + return spawnSync(tsx, [scriptPath, ...args], { + env: { ...process.env, ...env }, + encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), + cwd: REPO_ROOT, + }); +} + +function runBash(script: string, env: Record = {}): SpawnSyncReturns { + return spawnSync("bash", ["-c", script], { + env: { ...process.env, ...env }, + encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), + cwd: REPO_ROOT, + }); +} + +/** + * Create a synthetic repo layout mirroring the paths the lint walks: + * /test/e2e/suites//.sh (suite step scripts) + * /test/e2e/test-*.sh (legacy scripts) + * /test/e2e/parity-map.yaml (mapping file) + */ +function makeSyntheticRepo(): string { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-lint-")); + fs.mkdirSync(path.join(tmp, "test/e2e/suites/example"), { recursive: true }); + fs.writeFileSync(path.join(tmp, "test/e2e/parity-map.yaml"), "scripts: {}\n"); + return tmp; +} + +function writeStep(tmp: string, name: string, body: string) { + const p = path.join(tmp, "test/e2e/suites/example", name); + fs.writeFileSync(p, `#!/usr/bin/env bash\n${body}\n`); +} + +function writeLegacy(tmp: string, name: string, body: string) { + const p = path.join(tmp, "test/e2e", name); + fs.writeFileSync(p, `#!/usr/bin/env bash\n${body}\n`); +} + +describe("Phase 1.G convention lint", () => { + let tmp: string; + beforeEach(() => { + tmp = makeSyntheticRepo(); + }); + afterEach(() => { + fs.rmSync(tmp, { recursive: true, force: true }); + }); + + it("lint_should_flag_step_that_reexports_noninteractive_env", () => { + writeStep(tmp, "00-bad.sh", 'export DEBIAN_FRONTEND=noninteractive\necho hi'); + const r = runTsx(LINT_BIN, ["--root", tmp]); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/00-bad\.sh/); + expect(r.stdout + r.stderr).toMatch(/DEBIAN_FRONTEND|non.?interactive/i); + }); + + it("lint_should_flag_step_that_registers_own_trap", () => { + writeStep(tmp, "00-trap.sh", 'trap cleanup EXIT'); + const r = runTsx(LINT_BIN, ["--root", tmp]); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/00-trap\.sh/); + expect(r.stdout + r.stderr).toMatch(/trap/i); + }); + + it("lint_should_flag_step_that_calls_section", () => { + writeStep(tmp, "00-section.sh", 'section "Phase 3: X"'); + const r = runTsx(LINT_BIN, ["--root", tmp]); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/00-section\.sh/); + expect(r.stdout + r.stderr).toMatch(/section/i); + }); + + it("lint_should_flag_step_writing_to_tmp_log_path", () => { + writeStep(tmp, "00-tmplog.sh", 'echo hi > /tmp/foo.log'); + const r = runTsx(LINT_BIN, ["--root", tmp]); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/00-tmplog\.sh/); + expect(r.stdout + r.stderr).toMatch(/\/tmp.*\.log|E2E_CONTEXT_DIR/); + }); + + it("lint_should_flag_nonstandard_repo_root_discovery_pattern", () => { + writeStep(tmp, "00-reporoot.sh", 'REPO_ROOT="$(git rev-parse --show-toplevel)"'); + const r = runTsx(LINT_BIN, ["--root", tmp]); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/repo.?root|git rev-parse/i); + }); + + it("lint_should_flag_new_legacy_test_script_with_no_parity_map_entry", () => { + writeLegacy(tmp, "test-new-thing.sh", '# legacy script\npass "something"'); + const r = runTsx(LINT_BIN, ["--root", tmp]); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/test-new-thing\.sh/); + expect(r.stdout + r.stderr).toMatch(/parity.?map/i); + }); + + it("lint_should_pass_on_current_repo_state", () => { + const r = runTsx(LINT_BIN); + expect(r.status, r.stdout + r.stderr).toBe(0); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.H — Parity harness (compare-parity.sh) +// ───────────────────────────────────────────────────────────────────────────── + +function writeMap(tmp: string, content: string): string { + const p = path.join(tmp, "parity-map.yaml"); + fs.writeFileSync(p, content); + return p; +} + +describe("Phase 1.H parity harness", () => { + let tmp: string; + beforeEach(() => { + tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-parity-")); + }); + afterEach(() => { + fs.rmSync(tmp, { recursive: true, force: true }); + }); + + it("compare_parity_should_produce_empty_diff_when_map_is_empty", () => { + const mapPath = writeMap(tmp, "scripts: {}\n"); + const legacyLog = path.join(tmp, "legacy.log"); + const scenarioLog = path.join(tmp, "scenario.log"); + fs.writeFileSync(legacyLog, ""); + fs.writeFileSync(scenarioLog, ""); + const r = runBash( + `bash "${COMPARE_PARITY}" --script none.sh --legacy "${legacyLog}" --scenario "${scenarioLog}" --map "${mapPath}"`, + ); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/no.?divergence|no.?mappings/i); + }); + + it("compare_parity_should_exit_nonzero_when_any_assertion_diverges", () => { + const mapPath = writeMap( + tmp, + ` +scripts: + sample.sh: + scenario: dummy + assertions: + - legacy: "thing works" + id: thing.works +`.trimStart(), + ); + const legacyLog = path.join(tmp, "legacy.log"); + const scenarioLog = path.join(tmp, "scenario.log"); + // Legacy passed, scenario failed → divergence. + fs.writeFileSync(legacyLog, 'PASS: thing works\n'); + fs.writeFileSync(scenarioLog, 'FAIL: thing.works\n'); + const r = runBash( + `bash "${COMPARE_PARITY}" --script sample.sh --legacy "${legacyLog}" --scenario "${scenarioLog}" --map "${mapPath}"`, + ); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/thing\.works|thing works/); + expect(r.stdout + r.stderr).toMatch(/diverg/i); + }); + + it("compare_parity_should_treat_flaky_marked_assertion_as_both_pass_or_both_fail", () => { + const mapPath = writeMap( + tmp, + ` +scripts: + sample.sh: + scenario: dummy + assertions: + - legacy: "sometimes breaks" + id: sometimes.breaks + flaky: true +`.trimStart(), + ); + const legacyLog = path.join(tmp, "legacy.log"); + const scenarioLog = path.join(tmp, "scenario.log"); + // Both FAIL → flaky should accept this as non-divergent. + fs.writeFileSync(legacyLog, 'FAIL: sometimes breaks\n'); + fs.writeFileSync(scenarioLog, 'FAIL: sometimes.breaks\n'); + const r = runBash( + `bash "${COMPARE_PARITY}" --script sample.sh --legacy "${legacyLog}" --scenario "${scenarioLog}" --map "${mapPath}"`, + ); + expect(r.status, r.stdout + r.stderr).toBe(0); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Static: parity-map.yaml must exist (empty but parseable). +// ───────────────────────────────────────────────────────────────────────────── + +describe("parity-map.yaml seed", () => { + it("should_exist_under_test_e2e_and_be_valid_yaml_even_when_empty", () => { + expect(fs.existsSync(PARITY_MAP_REAL)).toBe(true); + const content = fs.readFileSync(PARITY_MAP_REAL, "utf8"); + expect(content).toMatch(/scripts:/); + }); +}); diff --git a/test/e2e-expected-state-validator.test.ts b/test/e2e-expected-state-validator.test.ts index 6c93109e92..9453c9b15a 100644 --- a/test/e2e-expected-state-validator.test.ts +++ b/test/e2e-expected-state-validator.test.ts @@ -161,3 +161,70 @@ describe("runner_should_not_run_suites_when_expected_state_fails", () => { } }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.F — --validate-only flag on run-scenario.sh +// ───────────────────────────────────────────────────────────────────────────── + +describe("run-scenario --validate-only flag", () => { + it("runs only validator and emits probe results json on stdout without running install/onboard/suites", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-validate-only-")); + try { + const trace = path.join(tmp, "trace.log"); + // Pre-populate a context.env: --validate-only assumes setup has already run. + fs.writeFileSync( + path.join(tmp, "context.env"), + "E2E_SCENARIO=ubuntu-repo-cloud-openclaw\n", + ); + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--validate-only"], + { + env: { + ...process.env, + E2E_CONTEXT_DIR: tmp, + E2E_TRACE_FILE: trace, + // Supply probe overrides for every key the expected state needs. + E2E_PROBE_OVERRIDE_CLI_INSTALLED: "true", + E2E_PROBE_OVERRIDE_GATEWAY_EXPECTED: "present", + E2E_PROBE_OVERRIDE_GATEWAY_HEALTH: "healthy", + E2E_PROBE_OVERRIDE_SANDBOX_EXPECTED: "present", + E2E_PROBE_OVERRIDE_SANDBOX_STATUS: "running", + E2E_PROBE_OVERRIDE_SANDBOX_AGENT: "openclaw", + E2E_PROBE_OVERRIDE_INFERENCE_EXPECTED: "available", + E2E_PROBE_OVERRIDE_INFERENCE_PROVIDER: "nvidia", + E2E_PROBE_OVERRIDE_INFERENCE_ROUTE: "inference-local", + E2E_PROBE_OVERRIDE_INFERENCE_MODE: "gateway-routed", + E2E_PROBE_OVERRIDE_CREDENTIALS_EXPECTED: "present", + E2E_PROBE_OVERRIDE_CREDENTIALS_STORAGE: "gateway-managed", + }, + encoding: "utf8", + timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), + cwd: REPO_ROOT, + }, + ); + expect(r.status, r.stderr).toBe(0); + // Must NOT have traced install or onboard. + const contents = fs.existsSync(trace) ? fs.readFileSync(trace, "utf8") : ""; + expect(contents).not.toMatch(/install:/); + expect(contents).not.toMatch(/onboard:/); + // Must have emitted an expected-state-report.json (probe results). + const reportPath = path.join(tmp, "expected-state-report.json"); + expect(fs.existsSync(reportPath), `missing ${reportPath}`).toBe(true); + const report = JSON.parse(fs.readFileSync(reportPath, "utf8")); + expect(report.ok).toBe(true); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("is_mutually_exclusive_with_plan_only", () => { + const r = spawnSync( + "bash", + [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--validate-only", "--plan-only"], + { encoding: "utf8", timeout: 15_000, cwd: REPO_ROOT }, + ); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/mutually.exclusive|cannot.*both|--plan-only.*--validate-only|--validate-only.*--plan-only/i); + }); +}); diff --git a/test/e2e-lib-helpers.test.ts b/test/e2e-lib-helpers.test.ts index ee131a9d73..7626948179 100644 --- a/test/e2e-lib-helpers.test.ts +++ b/test/e2e-lib-helpers.test.ts @@ -20,6 +20,11 @@ function runBash(script: string, env: Record = {}): SpawnSyncRet }); } +// ────────────────────────────────────────────────────────────────────────── +// Phase 1 helpers (logging, sandbox-exec, fixtures, assertions, install +// splits) — extends the pre-existing e2e shell helper coverage. +// ────────────────────────────────────────────────────────────────────────── + describe("E2E shell helpers", () => { it("env_helper_should_set_standard_noninteractive_env", () => { const r = runBash(` @@ -121,3 +126,299 @@ describe("E2E shell helpers", () => { } }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.A — Logging helpers (lib/logging.sh) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Phase 1.A logging helpers", () => { + it("logging_should_emit_stable_pass_marker_when_e2e_pass_called", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/logging.sh" + e2e_pass "assertion X" + `); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/^PASS:.*assertion X/m); + }); + + it("logging_should_emit_stable_fail_marker_and_nonzero_exit_when_e2e_fail_called", () => { + const r = runBash(` + . "${LIB}/logging.sh" + ( e2e_fail "assertion Y" ) + `); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/FAIL:.*assertion Y/); + }); + + it("logging_should_include_phase_prefix_when_e2e_section_called", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/logging.sh" + e2e_section "Phase 2: onboarding" + `); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/^=== Phase 2:.*onboarding/m); + }); + + it("logging_should_autosource_logging_when_env_sh_sourced", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/env.sh" + # e2e_pass must be defined after sourcing env.sh alone. + e2e_pass "from env.sh" + `); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/^PASS:.*from env.sh/m); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.B — Sandbox exec helper (lib/sandbox-exec.sh) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Phase 1.B sandbox-exec helper", () => { + it("sandbox_exec_should_propagate_exit_code_when_command_fails", () => { + // Use a fake nemoclaw on PATH that exits 1. + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sbex-fail-")); + try { + const bin = path.join(tmp, "bin"); + fs.mkdirSync(bin); + fs.writeFileSync( + path.join(bin, "nemoclaw"), + "#!/usr/bin/env bash\nexit 1\n", + { mode: 0o755 }, + ); + const r = runBash( + ` + . "${LIB}/sandbox-exec.sh" + e2e_sandbox_exec sb1 -- false + echo "rc=$?" + `, + { PATH: `${bin}:${process.env.PATH}` }, + ); + expect(r.stdout).toMatch(/rc=1/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("sandbox_exec_should_dry_run_short_circuit_when_e2e_dry_run_set", () => { + const r = runBash( + ` + set -euo pipefail + . "${LIB}/sandbox-exec.sh" + e2e_sandbox_exec sb1 -- rm -rf / + `, + { E2E_DRY_RUN: "1", PATH: "/does-not-exist" }, + ); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout + r.stderr).toMatch(/dry[- ]run/i); + }); + + it("sandbox_exec_stdin_should_quote_args_safely_when_piped", () => { + // Verify that $TOKEN is NOT expanded on the host side before being + // delivered to the sandbox. We stub nemoclaw to echo back stdin. + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sbex-stdin-")); + try { + const bin = path.join(tmp, "bin"); + fs.mkdirSync(bin); + // Fake nemoclaw: when called as `nemoclaw shell sb1 -- cat` read + // stdin and print it verbatim so the test can see what the sandbox + // would have received. + fs.writeFileSync( + path.join(bin, "nemoclaw"), + '#!/usr/bin/env bash\ncat\n', + { mode: 0o755 }, + ); + const r = runBash( + ` + set -euo pipefail + . "${LIB}/sandbox-exec.sh" + printf 'hello $TOKEN' | e2e_sandbox_exec_stdin sb1 -- cat + `, + { PATH: `${bin}:${process.env.PATH}`, TOKEN: "SHOULD_NOT_EXPAND" }, + ); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toContain("hello $TOKEN"); + expect(r.stdout).not.toContain("SHOULD_NOT_EXPAND"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.C — Fixtures (lib/fixtures/) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Phase 1.C fixtures", () => { + it("fake_openai_should_start_and_stop_cleanly_and_serve_chat_completions", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/fixtures/fake-openai.sh" + fake_openai_start + : "\${FAKE_OPENAI_PORT:?not exported}" + URL="http://127.0.0.1:\${FAKE_OPENAI_PORT}/v1/chat/completions" + body='{"model":"x","messages":[{"role":"user","content":"hi"}]}' + out=$(curl -fsS -H 'Content-Type: application/json' -d "$body" "$URL") + echo "$out" + fake_openai_stop + `); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/choices/); + expect(r.stdout).toMatch(/content/); + }); + + it("older_base_image_should_emit_dockerfile_pointing_at_tagged_base", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/fixtures/older-base-image.sh" + df="$(older_base_image_prepare v0.0.1-test)" + echo "DF=$df" + head -n1 "$df" + `); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout).toMatch(/^FROM .*:v0\.0\.1-test/m); + }); + + it("fake_messaging_fixtures_should_bind_a_port_and_accept_stub_requests", () => { + for (const provider of ["telegram", "discord", "slack"]) { + const r = runBash(` + set -euo pipefail + . "${LIB}/fixtures/fake-${provider}.sh" + fake_${provider}_start + : "\${FAKE_${provider.toUpperCase()}_PORT:?port not exported}" + URL="http://127.0.0.1:\${FAKE_${provider.toUpperCase()}_PORT}/ping" + code=$(curl -fsS -o /dev/null -w '%{http_code}' "$URL" || echo failed) + echo "code=$code" + fake_${provider}_stop + `); + expect(r.status, `${provider}: ${r.stderr}`).toBe(0); + expect(r.stdout).toMatch(/code=200/); + } + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.D — Assertion helpers (lib/assert/) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Phase 1.D assertion helpers", () => { + it("inference_works_should_pass_when_round_trip_returns_ok", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/fixtures/fake-openai.sh" + . "${LIB}/assert/inference-works.sh" + fake_openai_start + URL="http://127.0.0.1:\${FAKE_OPENAI_PORT}" + e2e_assert_inference_works "$URL" + rc=$? + fake_openai_stop + exit $rc + `); + expect(r.status, r.stderr).toBe(0); + }); + + it("no_credentials_leaked_should_fail_when_pattern_leaks_in_bundle", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-creds-")); + try { + const bundle = path.join(tmp, "bundle"); + fs.mkdirSync(bundle); + fs.writeFileSync(path.join(bundle, "leak.txt"), "token=sk-abc123DEADBEEFCAFE0000111122223333"); + const r = runBash(` + . "${LIB}/assert/no-credentials-leaked.sh" + e2e_assert_no_credentials_leaked "${bundle}" + `); + expect(r.status).not.toBe(0); + expect(r.stdout + r.stderr).toMatch(/FAIL:/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("policy_preset_applied_should_pass_when_active_presets_match_declared_set", () => { + // Stub `nemoclaw policies list` to emit a known set. + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-pol-")); + try { + const bin = path.join(tmp, "bin"); + fs.mkdirSync(bin); + fs.writeFileSync( + path.join(bin, "nemoclaw"), + '#!/usr/bin/env bash\nif [[ "$1" == "policies" && "$2" == "list" ]]; then\n printf "slack\\ndiscord\\n"\nfi\n', + { mode: 0o755 }, + ); + const r = runBash( + ` + set -euo pipefail + . "${LIB}/assert/policy-preset-applied.sh" + e2e_assert_policy_preset_applied slack discord + `, + { PATH: `${bin}:${process.env.PATH}` }, + ); + expect(r.status, r.stderr).toBe(0); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("messaging_bridge_reachable_should_pass_when_provider_endpoint_alive", () => { + const r = runBash(` + set -euo pipefail + . "${LIB}/fixtures/fake-telegram.sh" + . "${LIB}/assert/messaging-bridge-reachable.sh" + fake_telegram_start + export MESSAGING_BRIDGE_URL="http://127.0.0.1:\${FAKE_TELEGRAM_PORT}" + e2e_assert_messaging_bridge_reachable telegram + rc=$? + fake_telegram_stop + exit $rc + `); + expect(r.status, r.stderr).toBe(0); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Phase 1.E — Install-method dispatcher splits +// ───────────────────────────────────────────────────────────────────────────── + +describe("Phase 1.E install dispatcher splits", () => { + function dispatchDryRun(profile: string): SpawnSyncReturns { + return runBash( + ` + set -euo pipefail + . "${LIB}/setup/install.sh" + e2e_install "${profile}" + `, + { E2E_DRY_RUN: "1" }, + ); + } + + it("install_should_dispatch_to_install_repo_helper_for_repo_current_profile", () => { + const r = dispatchDryRun("repo-current"); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout + r.stderr).toMatch(/install-repo/); + expect(r.stdout + r.stderr).not.toMatch(/install-curl|install-ollama|install-launchable/); + }); + + it("install_should_dispatch_to_install_curl_helper_for_public_installer_profile", () => { + const r = dispatchDryRun("public-installer"); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout + r.stderr).toMatch(/install-curl/); + expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-ollama|install-launchable/); + }); + + it("install_should_dispatch_to_install_ollama_helper_for_ollama_profile", () => { + const r = dispatchDryRun("ollama"); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout + r.stderr).toMatch(/install-ollama/); + expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-curl|install-launchable/); + }); + + it("install_should_dispatch_to_install_launchable_helper_for_launchable_profile", () => { + const r = dispatchDryRun("launchable"); + expect(r.status, r.stderr).toBe(0); + expect(r.stdout + r.stderr).toMatch(/install-launchable/); + expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-curl|install-ollama/); + }); +}); From 711aaef081a73c4f09cc5eba22d3784d1186434d Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Mon, 11 May 2026 13:08:33 -0400 Subject: [PATCH 06/60] feat(e2e): Phase 1 \u2014 pre-flight infrastructure for migration Lands shared fixtures, helpers, assertion helpers, install-method splits, conventions + lint, and the parity-compare CI harness that unblock the per-wave migration phases (2\u201312). Deliverables (per specs/2026-05-11_e2e-test-migration/spec.md Phase 1): Fixtures (test/e2e/lib/fixtures/): - fake-openai.sh: local OpenAI-compatible endpoint (Risk #2 mitigation) - fake-{telegram,discord,slack}.sh: messaging stubs via shared _fake-http-stub.sh harness - older-base-image.sh: tagged ghcr base-image Dockerfile generator Helpers (test/e2e/lib/): - logging.sh: canonical e2e_{section,info,pass,fail} with stable PASS:/FAIL:/=== Phase markers (absorbs reuse category #1) - sandbox-exec.sh: canonical nemoclaw-shell wrapper with safe quoting, exit-code propagation, and dry-run short-circuit (category #10) - env.sh: auto-sources logging.sh so every consumer gets it for free Assertion helpers (test/e2e/lib/assert/): - inference-works.sh: chat-completion round-trip - no-credentials-leaked.sh: credential-pattern scan - policy-preset-applied.sh: gateway policy preset verification - messaging-bridge-reachable.sh: L7 proxy / bridge reachability Install dispatcher splits (test/e2e/lib/setup/): - install-{repo,curl,ollama,launchable}.sh (four profiles) - install.sh: dispatcher routes by profile/method name (category #5) Runtime probe wiring: - run-scenario.sh: adds --validate-only flag (probe-only, no setup) - resolver/index.ts: E2E_PROBE_OVERRIDES_JSON escape hatch for keys with embedded underscores (e.g. security.policy_engine) Convention lint + parity harness: - scripts/e2e/lint-conventions.ts: enforces 6 conventions on suite step scripts + requires parity-map.yaml entries for legacy scripts - scripts/e2e/compare-parity.sh: diffs legacy vs scenario PASS/FAIL via parity-map.yaml; flaky: true marker supported (Risk #4) - test/e2e/parity-map.yaml: seeded with one entry per existing legacy script; migration phases 2\u201312 append assertion mappings - .github/workflows/e2e-parity-compare.yaml: dispatches legacy script + migrated scenario on same runner and diffs outcomes Tests (all passing, 41 total): - test/e2e-lib-helpers.test.ts: +18 tests (1.A\u20131.E) - test/e2e-convention-lint.test.ts: +11 tests (1.G\u20131.H) - test/e2e-expected-state-validator.test.ts: +2 tests (1.F) No regressions: full cli Vitest project (3258 tests) still green. --- .github/workflows/e2e-parity-compare.yaml | 122 ++++++++++ scripts/e2e/compare-parity.sh | 185 ++++++++++++++ scripts/e2e/lint-conventions.ts | 230 ++++++++++++++++++ test/e2e-expected-state-validator.test.ts | 5 + test/e2e-lib-helpers.test.ts | 4 +- test/e2e/lib/assert/inference-works.sh | 62 +++++ .../lib/assert/messaging-bridge-reachable.sh | 57 +++++ test/e2e/lib/assert/no-credentials-leaked.sh | 66 +++++ test/e2e/lib/assert/policy-preset-applied.sh | 53 ++++ test/e2e/lib/env.sh | 10 + test/e2e/lib/fixtures/_fake-http-stub.sh | 90 +++++++ test/e2e/lib/fixtures/fake-discord.sh | 21 ++ test/e2e/lib/fixtures/fake-openai.sh | 109 +++++++++ test/e2e/lib/fixtures/fake-slack.sh | 21 ++ test/e2e/lib/fixtures/fake-telegram.sh | 21 ++ test/e2e/lib/fixtures/older-base-image.sh | 74 ++++++ test/e2e/lib/logging.sh | 68 ++++++ test/e2e/lib/sandbox-exec.sh | 84 +++++++ test/e2e/lib/setup/install-curl.sh | 42 ++++ test/e2e/lib/setup/install-launchable.sh | 29 +++ test/e2e/lib/setup/install-ollama.sh | 32 +++ test/e2e/lib/setup/install-repo.sh | 30 +++ test/e2e/lib/setup/install.sh | 75 +++--- test/e2e/parity-map.yaml | 138 +++++++++++ test/e2e/resolver/index.ts | 21 ++ test/e2e/run-scenario.sh | 45 +++- 26 files changed, 1639 insertions(+), 55 deletions(-) create mode 100644 .github/workflows/e2e-parity-compare.yaml create mode 100755 scripts/e2e/compare-parity.sh create mode 100644 scripts/e2e/lint-conventions.ts create mode 100644 test/e2e/lib/assert/inference-works.sh create mode 100644 test/e2e/lib/assert/messaging-bridge-reachable.sh create mode 100644 test/e2e/lib/assert/no-credentials-leaked.sh create mode 100644 test/e2e/lib/assert/policy-preset-applied.sh create mode 100644 test/e2e/lib/fixtures/_fake-http-stub.sh create mode 100644 test/e2e/lib/fixtures/fake-discord.sh create mode 100644 test/e2e/lib/fixtures/fake-openai.sh create mode 100644 test/e2e/lib/fixtures/fake-slack.sh create mode 100644 test/e2e/lib/fixtures/fake-telegram.sh create mode 100644 test/e2e/lib/fixtures/older-base-image.sh create mode 100644 test/e2e/lib/logging.sh create mode 100644 test/e2e/lib/sandbox-exec.sh create mode 100644 test/e2e/lib/setup/install-curl.sh create mode 100644 test/e2e/lib/setup/install-launchable.sh create mode 100644 test/e2e/lib/setup/install-ollama.sh create mode 100644 test/e2e/lib/setup/install-repo.sh create mode 100644 test/e2e/parity-map.yaml diff --git a/.github/workflows/e2e-parity-compare.yaml b/.github/workflows/e2e-parity-compare.yaml new file mode 100644 index 0000000000..dec09b63ca --- /dev/null +++ b/.github/workflows/e2e-parity-compare.yaml @@ -0,0 +1,122 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# E2E parity compare. +# +# Runs a legacy `test/e2e/test-*.sh` script AND its migrated scenario on +# the same runner, collects PASS/FAIL per assertion from both, and fails +# the job if any mapped assertion in test/e2e/parity-map.yaml diverges. +# +# Manual-only (workflow_dispatch). Each migration phase dispatches this +# workflow for every scenario it introduces and records zero-divergence +# before marking the phase complete. + +name: e2e-parity-compare + +on: + workflow_dispatch: + inputs: + legacy_script: + description: "Legacy script filename under test/e2e/ (e.g. test-full-e2e.sh). Empty = no legacy run, empty-diff only." + required: false + default: "" + type: string + scenario: + description: "Migrated scenario id (e.g. ubuntu-repo-cloud-openclaw). Empty = no scenario run, empty-diff only." + required: false + default: "" + type: string + +permissions: + contents: read + +concurrency: + group: e2e-parity-compare-${{ github.event.inputs.legacy_script }}-${{ github.event.inputs.scenario }} + cancel-in-progress: false + +jobs: + resolve-runner: + runs-on: ubuntu-latest + outputs: + runner: ${{ steps.pick.outputs.runner }} + steps: + - id: pick + env: + SCENARIO: ${{ github.event.inputs.scenario }} + run: | + case "${SCENARIO}" in + macos-*) echo "runner=macos-latest" >> "$GITHUB_OUTPUT" ;; + wsl-*) echo "runner=windows-latest" >> "$GITHUB_OUTPUT" ;; + gpu-*) echo "runner=self-hosted" >> "$GITHUB_OUTPUT" ;; + ubuntu-*|brev-*|"") echo "runner=ubuntu-latest" >> "$GITHUB_OUTPUT" ;; + *) + echo "::error::Unknown scenario prefix for runner selection: ${SCENARIO}" >&2 + exit 1 + ;; + esac + + compare: + needs: resolve-runner + runs-on: ${{ needs.resolve-runner.outputs.runner }} + timeout-minutes: 60 + steps: + - uses: actions/checkout@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: npm + + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Run legacy script + id: legacy + if: ${{ github.event.inputs.legacy_script != '' }} + env: + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + run: | + mkdir -p .e2e/parity + LOG=".e2e/parity/legacy.log" + if [ ! -x "test/e2e/${{ github.event.inputs.legacy_script }}" ]; then + echo "::error::legacy script not found: test/e2e/${{ github.event.inputs.legacy_script }}" + exit 1 + fi + bash "test/e2e/${{ github.event.inputs.legacy_script }}" 2>&1 | tee "$LOG" || true + + - name: Run migrated scenario + id: scenario + if: ${{ github.event.inputs.scenario != '' }} + env: + NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} + run: | + mkdir -p .e2e/parity + LOG=".e2e/parity/scenario.log" + bash test/e2e/run-scenario.sh "${{ github.event.inputs.scenario }}" 2>&1 | tee "$LOG" || true + + - name: Compare parity + env: + LEGACY_SCRIPT: ${{ github.event.inputs.legacy_script }} + run: | + mkdir -p .e2e/parity + LEGACY_LOG=".e2e/parity/legacy.log" + SCENARIO_LOG=".e2e/parity/scenario.log" + [ -f "$LEGACY_LOG" ] || : > "$LEGACY_LOG" + [ -f "$SCENARIO_LOG" ] || : > "$SCENARIO_LOG" + SCRIPT_ARG="${LEGACY_SCRIPT:-none.sh}" + bash scripts/e2e/compare-parity.sh \ + --script "$SCRIPT_ARG" \ + --legacy "$LEGACY_LOG" \ + --scenario "$SCENARIO_LOG" \ + --map test/e2e/parity-map.yaml + + - name: Upload parity artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-parity-${{ github.event.inputs.scenario }}-${{ github.event.inputs.legacy_script }} + path: | + .e2e/ + if-no-files-found: warn + retention-days: 14 diff --git a/scripts/e2e/compare-parity.sh b/scripts/e2e/compare-parity.sh new file mode 100755 index 0000000000..56cdb0b16a --- /dev/null +++ b/scripts/e2e/compare-parity.sh @@ -0,0 +1,185 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Compare PASS/FAIL outcomes between a legacy e2e log and a migrated +# scenario log using the mapping in test/e2e/parity-map.yaml. +# +# Usage: +# scripts/e2e/compare-parity.sh \ +# --script .sh \ +# --legacy \ +# --scenario \ +# [--map ] +# +# Emits a JSON divergence report on stdout when divergence is found, plus +# a human summary line. Exits 0 on no divergence, non-zero on divergence +# or misuse. +# +# The "normalize both logs into {assertion_id, status}" logic is kept in +# one place so CI and local repro stay in lock-step. + +set -euo pipefail + +SCRIPT_NAME="" +LEGACY_LOG="" +SCENARIO_LOG="" +MAP_FILE="" + +usage() { + cat >&2 <<'USAGE' +Usage: compare-parity.sh --script --legacy --scenario [--map ] +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --script) SCRIPT_NAME="${2:?}"; shift 2 ;; + --legacy) LEGACY_LOG="${2:?}"; shift 2 ;; + --scenario) SCENARIO_LOG="${2:?}"; shift 2 ;; + --map) MAP_FILE="${2:?}"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "compare-parity: unknown arg: $1" >&2; usage; exit 2 ;; + esac +done + +if [[ -z "${SCRIPT_NAME}" || -z "${LEGACY_LOG}" || -z "${SCENARIO_LOG}" ]]; then + usage + exit 2 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +if [[ -z "${MAP_FILE}" ]]; then + MAP_FILE="${REPO_ROOT}/test/e2e/parity-map.yaml" +fi +if [[ ! -f "${MAP_FILE}" ]]; then + echo "compare-parity: map file not found: ${MAP_FILE}" >&2 + exit 2 +fi + +# The comparison logic is implemented in Node (available on all CI runners +# without extra setup) so we can parse YAML cleanly. +node --no-warnings - "${SCRIPT_NAME}" "${LEGACY_LOG}" "${SCENARIO_LOG}" "${MAP_FILE}" <<'JS' +const fs = require("node:fs"); +const path = require("node:path"); + +const [scriptName, legacyLog, scenarioLog, mapFile] = process.argv.slice(2); + +function loadYaml(file) { + // Use the repo's vendored js-yaml (a root dependency) when available; + // otherwise fall back to a tiny parser sufficient for the narrow schema. + try { + const yaml = require("js-yaml"); + return yaml.load(fs.readFileSync(file, "utf8")) ?? {}; + } catch (_) { + // Ultra-minimal YAML fallback: only handles the parity-map shape. + const text = fs.readFileSync(file, "utf8"); + const out = { scripts: {} }; + let currentScript = null; + let currentEntry = null; + const lines = text.split("\n"); + for (const raw of lines) { + if (raw.trimStart().startsWith("#")) continue; + if (/^scripts:\s*(\{\})?\s*$/.test(raw)) continue; + // scripts: + // name.sh: + let m = raw.match(/^\s{2}([\w.\-]+):\s*$/); + if (m) { currentScript = m[1]; out.scripts[currentScript] = { assertions: [] }; currentEntry = null; continue; } + m = raw.match(/^\s{4}scenario:\s*(.+?)\s*$/); + if (m && currentScript) { out.scripts[currentScript].scenario = m[1]; continue; } + m = raw.match(/^\s{4}assertions:\s*$/); + if (m && currentScript) { out.scripts[currentScript].assertions = []; continue; } + m = raw.match(/^\s{6}-\s*legacy:\s*"(.*)"\s*$/); + if (m && currentScript) { currentEntry = { legacy: m[1] }; out.scripts[currentScript].assertions.push(currentEntry); continue; } + m = raw.match(/^\s{8}id:\s*(.+?)\s*$/); + if (m && currentEntry) { currentEntry.id = m[1]; continue; } + m = raw.match(/^\s{8}flaky:\s*(true|false)\s*$/); + if (m && currentEntry) { currentEntry.flaky = m[1] === "true"; continue; } + } + return out; + } +} + +function readLog(file) { + try { return fs.readFileSync(file, "utf8"); } catch { return ""; } +} + +function normalize(logText, legacyString, scenarioId) { + // Returns { legacy: "PASS"|"FAIL"|"MISSING", scenario: ... } + const has = (needle) => { + if (!needle) return null; + const lines = logText.split(/\r?\n/); + let pass = false, fail = false; + for (const line of lines) { + if (line.startsWith("PASS:") && line.includes(needle)) pass = true; + if (line.startsWith("FAIL:") && line.includes(needle)) fail = true; + } + if (fail) return "FAIL"; + if (pass) return "PASS"; + return "MISSING"; + }; + return { legacy: has(legacyString), scenario: has(scenarioId) }; +} + +const map = loadYaml(mapFile); +const entry = (map.scripts ?? {})[scriptName]; +if (!entry || !Array.isArray(entry.assertions) || entry.assertions.length === 0) { + console.log(JSON.stringify({ script: scriptName, divergence: [], note: "no mappings" })); + console.log(`compare-parity: no mappings for ${scriptName}; no-divergence`); + process.exit(0); +} + +const legacyText = readLog(legacyLog); +const scenarioText = readLog(scenarioLog); +const divergence = []; +for (const a of entry.assertions) { + const n = normalize("", a.legacy, a.id); // placeholder + // Run legacy lookup against the legacy log, scenario against the scenario log. + const legacyStatus = (() => { + const lines = legacyText.split(/\r?\n/); + let pass = false, fail = false; + for (const line of lines) { + if (line.startsWith("PASS:") && line.includes(a.legacy)) pass = true; + if (line.startsWith("FAIL:") && line.includes(a.legacy)) fail = true; + } + if (fail) return "FAIL"; + if (pass) return "PASS"; + return "MISSING"; + })(); + const scenarioStatus = (() => { + const lines = scenarioText.split(/\r?\n/); + let pass = false, fail = false; + const needle = a.id; + for (const line of lines) { + if (line.startsWith("PASS:") && line.includes(needle)) pass = true; + if (line.startsWith("FAIL:") && line.includes(needle)) fail = true; + } + if (fail) return "FAIL"; + if (pass) return "PASS"; + return "MISSING"; + })(); + + if (a.flaky) { + // Flaky: both-pass-or-both-fail counts as aligned. + if (legacyStatus !== scenarioStatus) { + divergence.push({ id: a.id, legacy: legacyStatus, scenario: scenarioStatus, flaky: true }); + } + continue; + } + if (legacyStatus !== scenarioStatus) { + divergence.push({ id: a.id, legacy: legacyStatus, scenario: scenarioStatus }); + } +} + +const report = { script: scriptName, divergence }; +console.log(JSON.stringify(report)); +if (divergence.length > 0) { + console.error(`compare-parity: ${divergence.length} diverging assertion(s) for ${scriptName}`); + for (const d of divergence) { + console.error(` ${d.id}: legacy=${d.legacy} scenario=${d.scenario}`); + } + process.exit(1); +} +console.log(`compare-parity: no divergence for ${scriptName}`); +JS diff --git a/scripts/e2e/lint-conventions.ts b/scripts/e2e/lint-conventions.ts new file mode 100644 index 0000000000..b4e7bd6973 --- /dev/null +++ b/scripts/e2e/lint-conventions.ts @@ -0,0 +1,230 @@ +#!/usr/bin/env tsx +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * E2E convention lint. + * + * Enforces the migration-spec conventions on `test/e2e/suites/**` step + * scripts and the `test/e2e/test-*.sh` legacy frontier: + * + * - Suite step scripts MUST NOT re-export non-interactive env vars + * (use lib/env.sh::e2e_env_apply_noninteractive instead). + * - Suite step scripts MUST NOT register their own traps + * (lib/cleanup.sh owns teardown). + * - Suite step scripts MUST NOT call `section "..."` — filenames carry + * the phase label, and e2e_section is emitted by the runner. + * - Suite step scripts MUST NOT write to `/tmp/*.log` — use + * `$E2E_CONTEXT_DIR/logs///.log`. + * - Non-standard repo-root discovery (`git rev-parse --show-toplevel`) + * is rejected in suite step scripts; use + * `SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"` and + * walk up. + * - Every `test/e2e/test-*.sh` script MUST have an entry in + * `test/e2e/parity-map.yaml` (Risk #1: guards against new legacy + * scripts landing unmapped). + * + * Invocation: + * tsx scripts/e2e/lint-conventions.ts [--root ] + * Exits 0 on success, 1 on violations, 2 on misuse. + */ + +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +interface Rule { + id: string; + describe: string; + test: (body: string) => string | null; +} + +const STEP_RULES: Rule[] = [ + { + id: "no-noninteractive-reexport", + describe: "suite step re-exports non-interactive env vars", + test: (body) => { + const patterns = [ + /export\s+DEBIAN_FRONTEND\s*=\s*noninteractive/, + /export\s+NEMOCLAW_NON_INTERACTIVE\s*=\s*1/, + ]; + for (const p of patterns) { + if (p.test(body)) + return `matched ${p.source}; use lib/env.sh::e2e_env_apply_noninteractive`; + } + return null; + }, + }, + { + id: "no-own-trap", + describe: "suite step registers its own trap", + test: (body) => { + // Ignore commented lines and ignore `trap` inside quoted strings by + // requiring a leading non-quote character. + const lines = body.split("\n"); + for (const raw of lines) { + const line = raw.replace(/^\s+/, ""); + if (line.startsWith("#")) continue; + if (/^trap\s+[^#]/.test(line)) { + return "registered own trap; cleanup lives in lib/cleanup.sh"; + } + } + return null; + }, + }, + { + id: "no-section-call", + describe: "suite step calls section/e2e_section", + test: (body) => { + const lines = body.split("\n"); + for (const raw of lines) { + const line = raw.replace(/^\s+/, ""); + if (line.startsWith("#")) continue; + if (/^section\s+["']/.test(line)) { + return "calls section; filename carries the phase label"; + } + } + return null; + }, + }, + { + id: "no-tmp-log", + describe: "suite step writes to /tmp/*.log", + test: (body) => { + if (/>\s*\/tmp\/[^\s]*\.log/.test(body)) { + return "writes to /tmp/*.log; use $E2E_CONTEXT_DIR/logs///.log"; + } + return null; + }, + }, + { + id: "no-git-rev-parse-repo-root", + describe: "suite step uses `git rev-parse --show-toplevel` for repo root", + test: (body) => { + if (/git\s+rev-parse\s+--show-toplevel/.test(body)) { + return 'use SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" instead'; + } + return null; + }, + }, +]; + +interface LintFinding { + file: string; + rule: string; + message: string; +} + +function walkShellScripts(root: string): string[] { + const out: string[] = []; + const walk = (dir: string) => { + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(dir, { withFileTypes: true }); + } catch { + return; + } + for (const ent of entries) { + const full = path.join(dir, ent.name); + if (ent.isDirectory()) { + walk(full); + } else if (ent.isFile() && ent.name.endsWith(".sh")) { + out.push(full); + } + } + }; + walk(root); + return out; +} + +function parseArgs(argv: string[]): { root: string } { + let root: string | undefined; + const args = argv.slice(2); + while (args.length > 0) { + const a = args.shift()!; + if (a === "--root") root = args.shift(); + else if (a === "-h" || a === "--help") { + process.stdout.write("tsx scripts/e2e/lint-conventions.ts [--root ]\n"); + process.exit(0); + } else { + process.stderr.write(`lint-conventions: unexpected arg: ${a}\n`); + process.exit(2); + } + } + if (!root) { + const scriptDir = path.dirname(fileURLToPath(import.meta.url)); + root = path.resolve(scriptDir, "..", ".."); + } + return { root }; +} + +function lintSuiteSteps(root: string): LintFinding[] { + const findings: LintFinding[] = []; + const suitesRoot = path.join(root, "test/e2e/suites"); + if (!fs.existsSync(suitesRoot)) return findings; + for (const file of walkShellScripts(suitesRoot)) { + const body = fs.readFileSync(file, "utf8"); + for (const rule of STEP_RULES) { + const msg = rule.test(body); + if (msg) { + findings.push({ file: path.relative(root, file), rule: rule.id, message: msg }); + } + } + } + return findings; +} + +/** + * Read `test/e2e/parity-map.yaml` and return the set of legacy-script + * names that have an entry. Uses a narrow parser to avoid a runtime + * dependency when js-yaml is not available. + */ +function readParityMapScripts(mapFile: string): Set { + const set = new Set(); + if (!fs.existsSync(mapFile)) return set; + const text = fs.readFileSync(mapFile, "utf8"); + for (const raw of text.split("\n")) { + const m = raw.match(/^\s{2}([\w.\-]+):\s*$/); + if (m) set.add(m[1]); + } + return set; +} + +function lintLegacyFrontier(root: string): LintFinding[] { + const findings: LintFinding[] = []; + const e2eDir = path.join(root, "test/e2e"); + const mapFile = path.join(e2eDir, "parity-map.yaml"); + const mapped = readParityMapScripts(mapFile); + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(e2eDir, { withFileTypes: true }); + } catch { + return findings; + } + for (const ent of entries) { + if (!ent.isFile()) continue; + if (!/^test-.*\.sh$/.test(ent.name)) continue; + if (mapped.has(ent.name)) continue; + findings.push({ + file: `test/e2e/${ent.name}`, + rule: "legacy-script-needs-parity-map-entry", + message: `new legacy test/e2e/${ent.name} has no entry in test/e2e/parity-map.yaml (Risk #1)`, + }); + } + return findings; +} + +function main(): number { + const { root } = parseArgs(process.argv); + const findings = [...lintSuiteSteps(root), ...lintLegacyFrontier(root)]; + if (findings.length === 0) { + return 0; + } + for (const f of findings) { + process.stderr.write(`${f.file}: [${f.rule}] ${f.message}\n`); + } + process.stderr.write(`\ne2e-convention-lint: ${findings.length} violation(s)\n`); + return 1; +} + +process.exit(main()); diff --git a/test/e2e-expected-state-validator.test.ts b/test/e2e-expected-state-validator.test.ts index 9453c9b15a..46aa4c1959 100644 --- a/test/e2e-expected-state-validator.test.ts +++ b/test/e2e-expected-state-validator.test.ts @@ -197,6 +197,11 @@ describe("run-scenario --validate-only flag", () => { E2E_PROBE_OVERRIDE_INFERENCE_MODE: "gateway-routed", E2E_PROBE_OVERRIDE_CREDENTIALS_EXPECTED: "present", E2E_PROBE_OVERRIDE_CREDENTIALS_STORAGE: "gateway-managed", + E2E_PROBE_OVERRIDE_SECURITY_SHIELDS: "supported", + // `security.policy_engine` has an embedded underscore, which the + // E2E_PROBE_OVERRIDE_* convention cannot express. Use the + // JSON escape hatch for this one. + E2E_PROBE_OVERRIDES_JSON: JSON.stringify({ "security.policy_engine": "supported" }), }, encoding: "utf8", timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), diff --git a/test/e2e-lib-helpers.test.ts b/test/e2e-lib-helpers.test.ts index 7626948179..d6adc65eb7 100644 --- a/test/e2e-lib-helpers.test.ts +++ b/test/e2e-lib-helpers.test.ts @@ -204,13 +204,15 @@ describe("Phase 1.B sandbox-exec helper", () => { }); it("sandbox_exec_should_dry_run_short_circuit_when_e2e_dry_run_set", () => { + // Use a PATH that has bash itself but no nemoclaw — dry-run must + // short-circuit before the CLI lookup. const r = runBash( ` set -euo pipefail . "${LIB}/sandbox-exec.sh" e2e_sandbox_exec sb1 -- rm -rf / `, - { E2E_DRY_RUN: "1", PATH: "/does-not-exist" }, + { E2E_DRY_RUN: "1", PATH: "/usr/bin:/bin" }, ); expect(r.status, r.stderr).toBe(0); expect(r.stdout + r.stderr).toMatch(/dry[- ]run/i); diff --git a/test/e2e/lib/assert/inference-works.sh b/test/e2e/lib/assert/inference-works.sh new file mode 100644 index 0000000000..617f4f5d63 --- /dev/null +++ b/test/e2e/lib/assert/inference-works.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Inference round-trip assertion. +# +# Verifies that an OpenAI-compatible endpoint answers a `chat/completions` +# request with a well-shaped response. Used both against the real gateway +# and against `fake-openai.sh` for deterministic fast-mode parity runs. +# +# Usage: +# e2e_assert_inference_works [--model ] [--api-key ] +# +# Exits 0 on success. On failure, prints a FAIL: line and returns non-zero +# (does NOT call e2e_fail so callers can decide whether to abort the step). + +_E2E_INF_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=../env.sh +. "${_E2E_INF_LIB_DIR}/env.sh" + +e2e_assert_inference_works() { + local base_url="${1:-}" + if [[ -z "${base_url}" ]]; then + echo "FAIL: e2e_assert_inference_works: missing base URL" >&2 + return 2 + fi + shift + local model="fake-model" + local api_key="" + while [[ $# -gt 0 ]]; do + case "$1" in + --model) model="${2:?value required}"; shift 2 ;; + --api-key) api_key="${2:?value required}"; shift 2 ;; + *) echo "e2e_assert_inference_works: unknown arg: $1" >&2; return 2 ;; + esac + done + + e2e_env_trace "assert:inference-works" "${base_url}" "model=${model}" + + local url="${base_url%/}/v1/chat/completions" + local body + body='{"model":"'"${model}"'","messages":[{"role":"user","content":"ping"}]}' + local curl_args=(-fsS --max-time 15 -H "Content-Type: application/json") + if [[ -n "${api_key}" ]]; then + curl_args+=(-H "Authorization: Bearer ${api_key}") + fi + local out + if ! out="$(curl "${curl_args[@]}" -d "${body}" "${url}" 2>/dev/null)"; then + echo "FAIL: inference round-trip to ${url} failed" >&2 + return 1 + fi + # Minimal shape check: must contain a `choices` array with some content. + if ! printf '%s' "${out}" | grep -q '"choices"'; then + echo "FAIL: inference response missing 'choices' field: ${out}" >&2 + return 1 + fi + if ! printf '%s' "${out}" | grep -q '"content"'; then + echo "FAIL: inference response missing 'content' field: ${out}" >&2 + return 1 + fi + return 0 +} diff --git a/test/e2e/lib/assert/messaging-bridge-reachable.sh b/test/e2e/lib/assert/messaging-bridge-reachable.sh new file mode 100644 index 0000000000..edebc951f0 --- /dev/null +++ b/test/e2e/lib/assert/messaging-bridge-reachable.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Messaging-bridge reachability assertion. +# +# For a given provider (telegram | discord | slack), verify that the L7 +# proxy + bridge is reachable from outside the sandbox. Compatible with +# both the real provider URLs and the local `fake-{provider}.sh` fixture +# (which exports `MESSAGING_BRIDGE_URL` or the provider-specific +# `FAKE__URL`). +# +# Usage: +# e2e_assert_messaging_bridge_reachable + +_E2E_MB_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=../env.sh +. "${_E2E_MB_LIB_DIR}/env.sh" + +e2e_assert_messaging_bridge_reachable() { + local provider="${1:-}" + if [[ -z "${provider}" ]]; then + echo "FAIL: e2e_assert_messaging_bridge_reachable: missing provider" >&2 + return 2 + fi + + case "${provider}" in + telegram|discord|slack) ;; + *) echo "FAIL: unknown messaging provider: ${provider}" >&2; return 2 ;; + esac + + local upper + upper="$(printf '%s' "${provider}" | tr '[:lower:]' '[:upper:]')" + # Resolve URL: explicit override > provider-specific fake URL. + local url="${MESSAGING_BRIDGE_URL:-}" + if [[ -z "${url}" ]]; then + local var="FAKE_${upper}_URL" + url="${!var:-}" + fi + if [[ -z "${url}" ]]; then + echo "FAIL: no bridge URL (set MESSAGING_BRIDGE_URL or start fake-${provider} fixture)" >&2 + return 1 + fi + + e2e_env_trace "assert:messaging-bridge-reachable" "${provider}" "${url}" + + local code + code="$(curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 "${url}/ping" 2>/dev/null || echo 000)" + if [[ "${code}" != "200" ]]; then + code="$(curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 "${url}" 2>/dev/null || echo 000)" + fi + if [[ "${code}" != "200" && "${code}" != "204" ]]; then + echo "FAIL: messaging bridge for ${provider} unreachable at ${url} (http=${code})" >&2 + return 1 + fi + return 0 +} diff --git a/test/e2e/lib/assert/no-credentials-leaked.sh b/test/e2e/lib/assert/no-credentials-leaked.sh new file mode 100644 index 0000000000..cfcbf8768e --- /dev/null +++ b/test/e2e/lib/assert/no-credentials-leaked.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Credential-leak scan. +# +# Scans a directory (e.g. a migration bundle, a blueprint digest, or a +# sandbox filesystem mount) for common credential patterns. Any match is +# a failure. +# +# Usage: +# e2e_assert_no_credentials_leaked [--pattern ]... +# +# Default patterns cover OpenAI / NVIDIA / GitHub / generic tokens. Callers +# can supply additional --pattern flags to extend the set. + +e2e_assert_no_credentials_leaked() { + local target="${1:-}" + if [[ -z "${target}" ]]; then + echo "FAIL: e2e_assert_no_credentials_leaked: missing target path" >&2 + return 2 + fi + if [[ ! -e "${target}" ]]; then + echo "FAIL: e2e_assert_no_credentials_leaked: target not found: ${target}" >&2 + return 2 + fi + shift + # Default credential patterns. grep -E syntax. + local patterns=( + 'sk-[A-Za-z0-9]{16,}' # OpenAI-style + 'nvapi-[A-Za-z0-9_-]{16,}' # NVIDIA API keys + 'ghp_[A-Za-z0-9]{20,}' # GitHub PAT + 'xox[abp]-[A-Za-z0-9-]{10,}' # Slack tokens + 'AKIA[0-9A-Z]{16}' # AWS access key + ) + while [[ $# -gt 0 ]]; do + case "$1" in + --pattern) patterns+=("${2:?value required}"); shift 2 ;; + *) echo "e2e_assert_no_credentials_leaked: unknown arg: $1" >&2; return 2 ;; + esac + done + + local found=0 + local p + for p in "${patterns[@]}"; do + if [[ -d "${target}" ]]; then + if grep -r -E -l "${p}" "${target}" >/dev/null 2>&1; then + echo "FAIL: credential pattern matched in ${target}: ${p}" >&2 + # Print up to 5 matching file paths; word-split is intentional here. + while IFS= read -r hit; do + printf ' hit: %s\n' "${hit}" >&2 + done < <(grep -r -E -l "${p}" "${target}" 2>/dev/null | head -5) + found=1 + fi + else + if grep -E -q "${p}" "${target}" 2>/dev/null; then + echo "FAIL: credential pattern matched in ${target}: ${p}" >&2 + found=1 + fi + fi + done + if (( found == 1 )); then + return 1 + fi + return 0 +} diff --git a/test/e2e/lib/assert/policy-preset-applied.sh b/test/e2e/lib/assert/policy-preset-applied.sh new file mode 100644 index 0000000000..cdb815cbfc --- /dev/null +++ b/test/e2e/lib/assert/policy-preset-applied.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Policy-preset assertion. +# +# Verifies that the active gateway policy set matches the caller's declared +# presets. Shells out to `nemoclaw policies list` and compares against the +# expected preset ids (order-independent). +# +# Usage: +# e2e_assert_policy_preset_applied ... + +_E2E_POL_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=../env.sh +. "${_E2E_POL_LIB_DIR}/env.sh" + +e2e_assert_policy_preset_applied() { + if [[ $# -eq 0 ]]; then + echo "FAIL: e2e_assert_policy_preset_applied: no preset ids given" >&2 + return 2 + fi + local expected=("$@") + e2e_env_trace "assert:policy-preset-applied" "${expected[*]}" + + if ! command -v nemoclaw >/dev/null 2>&1; then + echo "FAIL: nemoclaw CLI not on PATH" >&2 + return 1 + fi + local active + if ! active="$(nemoclaw policies list 2>/dev/null)"; then + echo "FAIL: 'nemoclaw policies list' failed" >&2 + return 1 + fi + local missing=() + local p + for p in "${expected[@]}"; do + # Match lines that start with the preset id (possibly followed by + # whitespace / a description / a marker column). Anchor at line-start + # so a preset id that is a substring of another (e.g. `slack` vs + # `slack-app`) does not false-positive. + if ! printf '%s\n' "${active}" | grep -qE "^${p}([[:space:]]|$)"; then + missing+=("${p}") + fi + done + if (( ${#missing[@]} > 0 )); then + echo "FAIL: policy presets not applied: ${missing[*]}" >&2 + echo " active:" >&2 + printf '%s\n' "${active}" | sed 's/^/ /' >&2 + return 1 + fi + return 0 +} diff --git a/test/e2e/lib/env.sh b/test/e2e/lib/env.sh index 1318221b1e..ba770163aa 100755 --- a/test/e2e/lib/env.sh +++ b/test/e2e/lib/env.sh @@ -7,6 +7,16 @@ # Applies the same defaults historically set ad-hoc at the top of each # `test/e2e/test-*.sh` script. Safe to source from any scenario runner. +# Auto-source the logging helpers so every consumer of env.sh gets +# e2e_section / e2e_info / e2e_pass / e2e_fail for free. Scenario runner +# and every suite step script sources env.sh — this keeps the logging +# contract DRY (reuse category #1). +_E2E_ENV_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [[ -f "${_E2E_ENV_LIB_DIR}/logging.sh" ]]; then + # shellcheck source=logging.sh + . "${_E2E_ENV_LIB_DIR}/logging.sh" +fi + e2e_env_apply_noninteractive() { export NEMOCLAW_NON_INTERACTIVE=1 export DEBIAN_FRONTEND=noninteractive diff --git a/test/e2e/lib/fixtures/_fake-http-stub.sh b/test/e2e/lib/fixtures/_fake-http-stub.sh new file mode 100644 index 0000000000..80b42618c6 --- /dev/null +++ b/test/e2e/lib/fixtures/_fake-http-stub.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Shared primitive for fake HTTP stub fixtures. +# +# Spawns a small Node.js HTTP server that answers any path with 200/JSON +# and echoes the request shape. Used by `fake-telegram.sh`, `fake-discord.sh`, +# and `fake-slack.sh` to avoid duplicating the listener harness. +# +# Function: +# _fake_http_stub_start +# Writes the spawned server's PID into $pid-var and port into $port-var +# (via `printf -v`). Exports ${provider-label-upper}_PORT and _PID. +# _fake_http_stub_stop +# Kills the stored PID. Idempotent. + +_fake_http_stub_start() { + local label="${1:?provider label required}" + local pid_var="${2:?pid var name required}" + local port_var="${3:?port var name required}" + + local tmp_port + tmp_port="$(mktemp)" + + node -e ' + const http = require("http"); + const fs = require("fs"); + const portFile = process.argv[1]; + const label = process.argv[2]; + const server = http.createServer((req, res) => { + let body = ""; + req.setEncoding("utf8"); + req.on("data", (d) => { body += d; }); + req.on("end", () => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + ok: true, + provider: label, + method: req.method, + url: req.url, + body, + })); + }); + }); + server.listen(0, "127.0.0.1", () => { + fs.writeFileSync(portFile, String(server.address().port)); + }); + process.on("SIGTERM", () => server.close(() => process.exit(0))); + process.on("SIGINT", () => server.close(() => process.exit(0))); + ' "${tmp_port}" "${label}" & + local pid=$! + + local i + for i in $(seq 1 50); do + [[ -s "${tmp_port}" ]] && break + : "${i}" # quiet unused-var check + sleep 0.1 + done + if [[ ! -s "${tmp_port}" ]]; then + echo "_fake_http_stub_start: ${label} server failed to report port" >&2 + kill "${pid}" 2>/dev/null || true + rm -f "${tmp_port}" + return 1 + fi + local port + port="$(cat "${tmp_port}")" + rm -f "${tmp_port}" + + # shellcheck disable=SC2229 # dynamic name is the point + printf -v "${pid_var}" '%s' "${pid}" + printf -v "${port_var}" '%s' "${port}" + + local upper + upper="$(printf '%s' "${label}" | tr '[:lower:]' '[:upper:]')" + export "FAKE_${upper}_PORT=${port}" + export "FAKE_${upper}_PID=${pid}" + export "FAKE_${upper}_URL=http://127.0.0.1:${port}" +} + +_fake_http_stub_stop() { + local pid_var="${1:?pid var name required}" + local pid="${!pid_var:-}" + if [[ -n "${pid}" ]]; then + kill "${pid}" 2>/dev/null || true + wait "${pid}" 2>/dev/null || true + fi + # shellcheck disable=SC2229 + printf -v "${pid_var}" '%s' "" +} diff --git a/test/e2e/lib/fixtures/fake-discord.sh b/test/e2e/lib/fixtures/fake-discord.sh new file mode 100644 index 0000000000..dee5f1cca5 --- /dev/null +++ b/test/e2e/lib/fixtures/fake-discord.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Local Discord API stub. Removes dependency on discord.com in CI. +# See _fake-http-stub.sh for the shared harness contract. + +_E2E_FAKE_DC_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=_fake-http-stub.sh +. "${_E2E_FAKE_DC_DIR}/_fake-http-stub.sh" + +_E2E_FAKE_DISCORD_PID="" + +fake_discord_start() { + _fake_http_stub_start discord _E2E_FAKE_DISCORD_PID FAKE_DISCORD_PORT +} + +fake_discord_stop() { + _fake_http_stub_stop _E2E_FAKE_DISCORD_PID + unset FAKE_DISCORD_PORT FAKE_DISCORD_PID FAKE_DISCORD_URL +} diff --git a/test/e2e/lib/fixtures/fake-openai.sh b/test/e2e/lib/fixtures/fake-openai.sh new file mode 100644 index 0000000000..f133d2f08f --- /dev/null +++ b/test/e2e/lib/fixtures/fake-openai.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Fake OpenAI-compatible endpoint fixture. +# +# Spawns a tiny Node.js HTTP server that responds to `/v1/chat/completions` +# and `/v1/models` with deterministic stub payloads. Removes dependency on +# real NVIDIA / OpenAI endpoints for parity comparisons and fast-mode +# inference probes (Risk #2 mitigation in the migration spec). +# +# Follows the same inline-Node pattern as test-messaging-providers.sh: +# a `bash` wrapper that spawns `node -e 'http.createServer(...)'` and +# exposes the chosen port on an `_PORT` env var. +# +# Contract: +# fake_openai_start — start server, block until ready, export +# FAKE_OPENAI_PORT and FAKE_OPENAI_PID. If +# E2E_CONTEXT_DIR is set, also records these in +# context.env so later teardown can find them. +# fake_openai_stop — stop the server. Idempotent. + +_E2E_FAKE_OPENAI_PID="" +_E2E_FAKE_OPENAI_PORT="" + +fake_openai_start() { + # Pick an ephemeral port deterministically via the server itself. + local tmp_port + tmp_port="$(mktemp)" + # shellcheck disable=SC2064 + trap "rm -f '${tmp_port}'" RETURN + + node -e ' + const http = require("http"); + const fs = require("fs"); + const portFile = process.argv[1]; + const server = http.createServer((req, res) => { + let body = ""; + req.setEncoding("utf8"); + req.on("data", (d) => { body += d; }); + req.on("end", () => { + if (req.url === "/v1/models") { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + object: "list", + data: [{ id: "fake-model", object: "model" }], + })); + return; + } + if (req.url === "/v1/chat/completions") { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + id: "chatcmpl-fake", + object: "chat.completion", + choices: [{ + index: 0, + message: { role: "assistant", content: "pong" }, + finish_reason: "stop", + }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + })); + return; + } + res.writeHead(404); + res.end(); + }); + }); + server.listen(0, "127.0.0.1", () => { + fs.writeFileSync(portFile, String(server.address().port)); + }); + process.on("SIGTERM", () => server.close(() => process.exit(0))); + process.on("SIGINT", () => server.close(() => process.exit(0))); + ' "${tmp_port}" & + _E2E_FAKE_OPENAI_PID=$! + + # Wait up to ~5s for the server to write its port. + local i + for i in $(seq 1 50); do + if [[ -s "${tmp_port}" ]]; then + break + fi + : "${i}" # quiet unused-var check + sleep 0.1 + done + if [[ ! -s "${tmp_port}" ]]; then + echo "fake_openai_start: server failed to report port" >&2 + kill "${_E2E_FAKE_OPENAI_PID}" 2>/dev/null || true + return 1 + fi + _E2E_FAKE_OPENAI_PORT="$(cat "${tmp_port}")" + export FAKE_OPENAI_PORT="${_E2E_FAKE_OPENAI_PORT}" + export FAKE_OPENAI_PID="${_E2E_FAKE_OPENAI_PID}" + export FAKE_OPENAI_URL="http://127.0.0.1:${_E2E_FAKE_OPENAI_PORT}" + if [[ -n "${E2E_CONTEXT_DIR:-}" && -d "${E2E_CONTEXT_DIR}" ]]; then + printf 'FAKE_OPENAI_PORT=%s\n' "${_E2E_FAKE_OPENAI_PORT}" >>"${E2E_CONTEXT_DIR}/context.env" 2>/dev/null || true + printf 'FAKE_OPENAI_PID=%s\n' "${_E2E_FAKE_OPENAI_PID}" >>"${E2E_CONTEXT_DIR}/context.env" 2>/dev/null || true + fi +} + +fake_openai_stop() { + local pid="${FAKE_OPENAI_PID:-${_E2E_FAKE_OPENAI_PID:-}}" + if [[ -n "${pid}" ]]; then + kill "${pid}" 2>/dev/null || true + wait "${pid}" 2>/dev/null || true + fi + unset FAKE_OPENAI_PORT FAKE_OPENAI_PID FAKE_OPENAI_URL + _E2E_FAKE_OPENAI_PID="" + _E2E_FAKE_OPENAI_PORT="" +} diff --git a/test/e2e/lib/fixtures/fake-slack.sh b/test/e2e/lib/fixtures/fake-slack.sh new file mode 100644 index 0000000000..34eac39f32 --- /dev/null +++ b/test/e2e/lib/fixtures/fake-slack.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Local Slack API stub. Removes dependency on slack.com in CI. +# See _fake-http-stub.sh for the shared harness contract. + +_E2E_FAKE_SL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=_fake-http-stub.sh +. "${_E2E_FAKE_SL_DIR}/_fake-http-stub.sh" + +_E2E_FAKE_SLACK_PID="" + +fake_slack_start() { + _fake_http_stub_start slack _E2E_FAKE_SLACK_PID FAKE_SLACK_PORT +} + +fake_slack_stop() { + _fake_http_stub_stop _E2E_FAKE_SLACK_PID + unset FAKE_SLACK_PORT FAKE_SLACK_PID FAKE_SLACK_URL +} diff --git a/test/e2e/lib/fixtures/fake-telegram.sh b/test/e2e/lib/fixtures/fake-telegram.sh new file mode 100644 index 0000000000..ca453d6685 --- /dev/null +++ b/test/e2e/lib/fixtures/fake-telegram.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Local Telegram API stub. Removes dependency on api.telegram.org in CI. +# See _fake-http-stub.sh for the shared harness contract. + +_E2E_FAKE_TG_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=_fake-http-stub.sh +. "${_E2E_FAKE_TG_DIR}/_fake-http-stub.sh" + +_E2E_FAKE_TELEGRAM_PID="" + +fake_telegram_start() { + _fake_http_stub_start telegram _E2E_FAKE_TELEGRAM_PID FAKE_TELEGRAM_PORT +} + +fake_telegram_stop() { + _fake_http_stub_stop _E2E_FAKE_TELEGRAM_PID + unset FAKE_TELEGRAM_PORT FAKE_TELEGRAM_PID FAKE_TELEGRAM_URL +} diff --git a/test/e2e/lib/fixtures/older-base-image.sh b/test/e2e/lib/fixtures/older-base-image.sh new file mode 100644 index 0000000000..3619528684 --- /dev/null +++ b/test/e2e/lib/fixtures/older-base-image.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Older-base-image fixture. +# +# Absorbs reuse category #7 from the migration spec: three hand-rolled +# Docker-older-base-image patterns in `test-rebuild-openclaw.sh`, +# `test-rebuild-hermes.sh`, and `test-sandbox-rebuild.sh`. +# +# Contract: +# older_base_image_prepare [--registry ghcr.io/nvidia/nemoclaw] +# Writes a minimal Dockerfile to a temp location whose first line is +# `FROM :`, and prints the Dockerfile path on stdout. +# Honors E2E_DRY_RUN: skips the `docker pull` step (but still writes +# the Dockerfile, which is what callers inspect). +# older_base_image_cleanup +# Removes the generated Dockerfile and (if present) its build context. + +_E2E_OBI_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=../env.sh +. "${_E2E_OBI_LIB_DIR}/../env.sh" + +older_base_image_prepare() { + local tag="${1:?tag required}" + shift || true + local registry="ghcr.io/nvidia/nemoclaw" + while [[ $# -gt 0 ]]; do + case "$1" in + --registry) + registry="${2:?value required}" + shift 2 + ;; + *) + echo "older_base_image_prepare: unknown arg: $1" >&2 + return 2 + ;; + esac + done + + local dir + dir="$(mktemp -d)" + local dockerfile="${dir}/Dockerfile.older-base" + cat >"${dockerfile}" </dev/null 2>&1; then + docker pull "${registry}:${tag}" >&2 || \ + echo "older_base_image_prepare: docker pull failed (continuing; build may still succeed on cached layers)" >&2 + fi + fi + printf '%s\n' "${dockerfile}" +} + +older_base_image_cleanup() { + local dockerfile="${1:-}" + if [[ -z "${dockerfile}" || ! -f "${dockerfile}" ]]; then + return 0 + fi + local dir + dir="$(dirname "${dockerfile}")" + rm -f "${dockerfile}" + # Only remove the temp dir if it looks like one we created. + case "${dir}" in + /tmp/*|/var/folders/*) rm -rf "${dir}" ;; + esac +} diff --git a/test/e2e/lib/logging.sh b/test/e2e/lib/logging.sh new file mode 100644 index 0000000000..e0c32c2072 --- /dev/null +++ b/test/e2e/lib/logging.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Canonical logging helpers for E2E scenarios. +# +# Collapses the ad-hoc `section` / `info` / `pass` / `fail` functions that +# the 40 legacy `test/e2e/test-*.sh` scripts each re-declare with subtle +# drift. Emits stable markers that `scripts/e2e/compare-parity.sh` parses +# when diffing legacy vs. migrated runs. +# +# Contract: +# PASS: — asserting success +# FAIL: — asserting failure; `e2e_fail` exits non-zero +# === Phase N: