NVIDIA · jyaunches · May 8, 2026 · May 8, 2026 · May 11, 2026 · coderabbitai
diff --git a/.github/workflows/e2e-scenarios.yaml b/.github/workflows/e2e-scenarios.yaml
@@ -0,0 +1,112 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Scenario-based E2E. Runs a single setup scenario by id against the
+# matching runner; can also validate resolution / coverage via --plan-only.
+#
+# Manual-only (workflow_dispatch) while scenario-based coverage migrates.
+# Existing nightly-e2e / macos-e2e / wsl-e2e workflows remain unchanged.
+
+name: e2e-scenarios
+
+on:
+  workflow_dispatch:
+    inputs:
+      scenario:
+        description: "Scenario id (e.g. ubuntu-repo-cloud-openclaw)"
+        required: true
+        type: string
+      plan_only:
+        description: "Resolve and print plan only (no install/onboard/suites)"
+        required: false
+        default: "false"
+        type: choice
+        options:
+          - "true"
+          - "false"
+      suite_filter:
+        description: "Comma-separated suite ids to run (optional; defaults to the scenario's full suite list)"
+        required: false
+        default: ""
+        type: string
+
+permissions:
+  contents: read
+
+concurrency:
+  group: e2e-scenarios-${{ github.event.inputs.scenario }}
+  cancel-in-progress: false
+
+jobs:
+  # Route the scenario to the correct runner.
+  #
+  # Scenario ids encode their target platform as the first segment
+  # (e.g. `macos-repo-cloud-openclaw`, `wsl-repo-cloud-openclaw`,
+  # `gpu-repo-local-ollama-openclaw`). The workflow previously pinned
+  # `runs-on: ubuntu-latest` for every scenario, which caused non-Ubuntu
+  # scenarios to fail on the wrong runner (CodeRabbit review item #1).
+  resolve-runner:
+    runs-on: ubuntu-latest
+    outputs:
+      runner: ${{ steps.pick.outputs.runner }}
+    steps:
+      - id: pick
+        env:
+          SCENARIO: ${{ github.event.inputs.scenario }}
+        run: |
+          case "${SCENARIO}" in
+            macos-*)  echo "runner=macos-latest"   >> "$GITHUB_OUTPUT" ;;
+            wsl-*)    echo "runner=windows-latest" >> "$GITHUB_OUTPUT" ;;
+            gpu-*)    echo "runner=self-hosted"    >> "$GITHUB_OUTPUT" ;;
+            ubuntu-*|brev-*) echo "runner=ubuntu-latest" >> "$GITHUB_OUTPUT" ;;
+            *)
+              echo "::error::Unknown scenario prefix for runner selection: ${SCENARIO}" >&2
+              exit 1
+              ;;
+          esac
+
+  run-scenario:
+    needs: resolve-runner
+    runs-on: ${{ needs.resolve-runner.outputs.runner }}
+    timeout-minutes: 45
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install root dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Render coverage report
+        run: |
+          mkdir -p .e2e
+          bash test/e2e/coverage-report.sh > .e2e/coverage.md
+          echo '## E2E scenario coverage' >> "$GITHUB_STEP_SUMMARY"
+          cat .e2e/coverage.md >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Show resolved plan
+        run: |
+          bash test/e2e/run-scenario.sh "${{ github.event.inputs.scenario }}" --plan-only
+
+      - name: Run scenario
+        if: github.event.inputs.plan_only != 'true'
+        env:
+          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+          E2E_SUITE_FILTER: ${{ github.event.inputs.suite_filter }}
+        run: |
+          bash test/e2e/run-scenario.sh "${{ github.event.inputs.scenario }}"
+
+      - name: Upload scenario artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-scenario-${{ github.event.inputs.scenario }}
+          path: |
+            .e2e/
+            test/e2e/logs/
+          if-no-files-found: warn
+          retention-days: 14
diff --git a/.gitignore b/.gitignore
@@ -46,3 +46,4 @@ secrets.json
 secrets.yaml
 service-account*.json
 token.json
+.e2e/
diff --git a/AGENTS.md b/AGENTS.md
@@ -27,7 +27,7 @@ This repo ships agent skills under `.agents/skills/`, organized into three audie
 | `nemoclaw-blueprint/model-specific-setup/` | JSON | Agent-scoped model/provider compatibility registry |
 | `scripts/` | Bash/JS/TS | Install helpers, setup, automation, E2E tooling |
 | `test/` | JavaScript (ESM) | Root-level integration tests (Vitest) |
-| `test/e2e/` | Bash/JS | End-to-end tests (Brev cloud instances) |
+| `test/e2e/` | Bash/JS/TS | End-to-end tests, scenario-based runner (see `test/e2e/README.md`) |
 | `docs/` | Markdown (MyST) | User-facing docs (Sphinx) |
 
 ## Quick Reference

diff --git a/test/e2e-context-helper.test.ts b/test/e2e-context-helper.test.ts
@@ -0,0 +1,123 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, it, expect } from "vitest";
+import { spawnSync, type SpawnSyncReturns } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "..");
+const CONTEXT_LIB = path.join(REPO_ROOT, "test/e2e/lib/context.sh");
+const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e/run-scenario.sh");
+
+function runBash(script: string, env: Record<string, string> = {}): SpawnSyncReturns<string> {
+  return spawnSync("bash", ["-c", script], {
+    env: { ...process.env, ...env },
+    encoding: "utf8",
+    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
+    cwd: REPO_ROOT,
+  });
+}
+
+describe("E2E context helper (lib/context.sh)", () => {
+  it("context_should_write_and_source_values", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-"));
+    try {
+      const script = `
+        set -euo pipefail
+        . "${CONTEXT_LIB}"
+        export E2E_CONTEXT_DIR="${tmp}"
+        e2e_context_init
+        e2e_context_set E2E_SCENARIO ubuntu-repo-cloud-openclaw
+        e2e_context_set E2E_AGENT openclaw
+        # In a fresh shell, source the context and print the values.
+        bash -c 'set -euo pipefail; . "${tmp}/context.env"; echo "SCENARIO=$E2E_SCENARIO"; echo "AGENT=$E2E_AGENT"'
+      `;
+      const r = runBash(script);
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).toContain("SCENARIO=ubuntu-repo-cloud-openclaw");
+      expect(r.stdout).toContain("AGENT=openclaw");
+      expect(fs.existsSync(path.join(tmp, "context.env"))).toBe(true);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("context_require_should_fail_for_missing_value", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-"));
+    try {
+      const script = `
+        set -euo pipefail
+        . "${CONTEXT_LIB}"
+        export E2E_CONTEXT_DIR="${tmp}"
+        e2e_context_init
+        e2e_context_require E2E_SANDBOX_NAME
+      `;
+      const r = runBash(script);
+      expect(r.status).not.toBe(0);
+      expect(r.stderr).toMatch(/E2E_SANDBOX_NAME/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("context_dump_should_redact_sensitive_values", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-"));
+    try {
+      const script = `
+        set -euo pipefail
+        . "${CONTEXT_LIB}"
+        export E2E_CONTEXT_DIR="${tmp}"
+        e2e_context_init
+        e2e_context_set E2E_SCENARIO ubuntu-repo-cloud-openclaw
+        e2e_context_set NVIDIA_API_KEY super-secret-api-key-value
+        e2e_context_set OPENAI_API_TOKEN nothing-to-see-here-token
+        e2e_context_dump
+      `;
+      const r = runBash(script);
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).not.toContain("super-secret-api-key-value");
+      expect(r.stdout).not.toContain("nothing-to-see-here-token");
+      expect(r.stdout).toMatch(/NVIDIA_API_KEY=.*REDACTED/);
+      expect(r.stdout).toContain("ubuntu-repo-cloud-openclaw");
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("scenario_plan_execution_should_emit_context_under_dry_run", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-"));
+    try {
+      const r = spawnSync(
+        "bash",
+        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
+        {
+          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
+          encoding: "utf8",
+    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
+          cwd: REPO_ROOT,
+        },
+      );
+      expect(r.status, r.stderr).toBe(0);
+      const ctxPath = path.join(tmp, "context.env");
+      expect(fs.existsSync(ctxPath), `context.env missing in ${tmp}`).toBe(true);
+      const ctx = fs.readFileSync(ctxPath, "utf8");
+      for (const key of [
+        "E2E_SCENARIO",
+        "E2E_PLATFORM_OS",
+        "E2E_INSTALL_METHOD",
+        "E2E_ONBOARDING_PATH",
+        "E2E_AGENT",
+        "E2E_PROVIDER",
+        "E2E_SANDBOX_NAME",
+        "E2E_GATEWAY_URL",
+        "E2E_INFERENCE_ROUTE",
+      ]) {
+        expect(ctx, `${key} missing from context.env`).toMatch(new RegExp(`^${key}=`, "m"));
+      }
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/test/e2e-coverage-report.test.ts b/test/e2e-coverage-report.test.ts
@@ -0,0 +1,87 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, it, expect } from "vitest";
+import path from "node:path";
+
+import { loadMetadataFromDir, loadMetadataFromObjects } from "./e2e/resolver/load.ts";
+import { renderCoverageReport } from "./e2e/resolver/coverage.ts";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "..");
+const E2E_DIR = path.join(REPO_ROOT, "test/e2e");
+
+describe("coverage report", () => {
+  it("should_render_single_coverage_table", () => {
+    const meta = loadMetadataFromDir(E2E_DIR);
+    const md = renderCoverageReport(meta);
+    // Exactly one primary Scenario Coverage table.
+    const headers = md.match(/\|\s*Scenario\s*\|\s*Platform\s*\|\s*Install\s*\|\s*Runtime\s*\|\s*Onboarding\s*\|\s*Expected state\s*\|\s*Suites\s*\|/g);
+    expect(headers).toBeTruthy();
+    expect(headers?.length).toBe(1);
+    // Every scenario should appear as a row.
+    for (const id of Object.keys(meta.scenarios.setup_scenarios)) {
+      expect(md).toContain(id);
+    }
+    // Rows should be sorted deterministically (alphabetically).
+    const rowOrder = Object.keys(meta.scenarios.setup_scenarios).sort();
+    let pos = 0;
+    for (const id of rowOrder) {
+      const idx = md.indexOf(`| ${id} |`, pos);
+      expect(idx, `row ${id} not found in order. report:\n${md}`).toBeGreaterThanOrEqual(0);
+      pos = idx;
+    }
+  });
+
+  it("should_flag_scenarios_without_suites", () => {
+    const meta = loadMetadataFromObjects({
+      scenarios: {
+        platforms: { p: {} },
+        installs: { i: {} },
+        runtimes: { r: {} },
+        onboarding: { o: { agent: "openclaw", provider: "nvidia" } },
+        setup_scenarios: {
+          "empty-suite-scenario": {
+            dimensions: { platform: "p", install: "i", runtime: "r", onboarding: "o" },
+            expected_state: "some-state",
+            suites: [],
+          },
+        },
+      },
+      expectedStates: { expected_states: { "some-state": { gateway: { health: "healthy" } } } },
+      suites: { suites: {} },
+    });
+    const md = renderCoverageReport(meta);
+    expect(md).toMatch(/## Gaps/);
+    expect(md).toMatch(/empty-suite-scenario.*no suites|no suites.*empty-suite-scenario/s);
+  });
+
+  it("should_flag_expected_states_not_used_by_any_scenario", () => {
+    const meta = loadMetadataFromObjects({
+      scenarios: {
+        platforms: { p: {} },
+        installs: { i: {} },
+        runtimes: { r: {} },
+        onboarding: { o: { agent: "openclaw", provider: "nvidia" } },
+        setup_scenarios: {
+          s1: {
+            dimensions: { platform: "p", install: "i", runtime: "r", onboarding: "o" },
+            expected_state: "used-state",
+            suites: ["smoke"],
+          },
+        },
+      },
+      expectedStates: {
+        expected_states: {
+          "used-state": { gateway: { health: "healthy" } },
+          "unused-state": { gateway: { health: "healthy" } },
+        },
+      },
+      suites: {
+        suites: { smoke: { steps: [{ id: "a", script: "suites/smoke/a.sh" }] } },
+      },
+    });
+    const md = renderCoverageReport(meta);
+    expect(md).toMatch(/## Gaps/);
+    expect(md).toMatch(/unused-state/);
+  });
+});