From 27fadfc91d5dbdbe0d7821986968ead5d1572374 Mon Sep 17 00:00:00 2001 From: Julie Yaunches Date: Mon, 8 Jun 2026 14:41:19 -0400 Subject: [PATCH] fix(e2e): gate scenario fan-out by onboarding+secret support contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two PR Review Advisor 🛠️ Needs-attention findings on HEAD 70df0519b: 1. Live fan-out includes scenarios without onboarding and secret plumbing (test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh:32): The typed registry declares 12 scenarios whose onboarding profile has no case in dispatch.sh and/or whose requiredSecrets are not declared in the scenario workflows. Live fan-out via e2e-scenarios-all.yaml would either fail at onboarding dispatch ('unsupported onboarding profile') or run without the credential boundary the registry declares. Path chosen (per advisor): filter the generated matrix to fully supported scenarios + add a static contract test. Implementation: - New module test/e2e-scenario/scenarios/runtime-support.ts exporting SUPPORTED_ONBOARDING_IDS (mirrors dispatch.sh case statement), WORKFLOW_AVAILABLE_SECRETS (mirrors workflow secrets:), and isScenarioFullyWired(scenario) which combines them with the compiler's docker-missing onboarding rewrite logic. - buildScenarioMatrix() in run.ts now filters via isScenarioFullyWired and writes a structured per-scenario explanation of skipped scenarios to stderr. The matrix went from 23 entries to 11; the 12 skipped scenarios stay in the registry as roadmap items. - New framework test e2e-scenario-fully-wired.test.ts locks the contract: parses dispatch.sh and the two scenario workflows, asserts SUPPORTED_ONBOARDING_IDS == dispatcher case set, WORKFLOW_AVAILABLE_SECRETS == workflow secrets, every emitted matrix entry is fully wired, every filtered scenario has at least one structured reason. - Updated e2e-scenario-matrix.test.ts to assert matrix == fully-wired subset (was: matrix == every registered scenario). 2. No-docker negative onboarding truncates the framework-seeded context (test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh:35): The TS runner seeds context.env before phase actions, and dispatch-action.sh explicitly forbids workers from calling e2e_context_init because it opens with ': > ctx' and wipes seeded keys. The no-docker negative worker still called e2e_context_init, wiping E2E_SCENARIO/E2E_SANDBOX_NAME/E2E_GATEWAY_URL before state-validation probes (gateway-absent, sandbox-absent) need them. Removed the call. Added a comment block at the same site explaining why workers must not call e2e_context_init, mirroring the contract already documented in dispatch-action.sh. Verified: vitest test/e2e-scenario/ 374/374 (8 new contract tests); typecheck:cli clean; npx tsx scenarios/run.ts --emit-matrix produces 11 fully-wired entries with the 12 filtered scenarios reported on stderr; bash -n + shfmt clean on edited shell file. --- .../e2e-scenario-fully-wired.test.ts | 164 ++++++++++++++++++ .../e2e-scenario-matrix.test.ts | 20 ++- .../onboard/cloud-openclaw-no-docker.sh | 9 +- test/e2e-scenario/scenarios/run.ts | 43 ++++- .../e2e-scenario/scenarios/runtime-support.ts | 119 +++++++++++++ 5 files changed, 341 insertions(+), 14 deletions(-) create mode 100644 test/e2e-scenario/framework-tests/e2e-scenario-fully-wired.test.ts create mode 100644 test/e2e-scenario/scenarios/runtime-support.ts diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-fully-wired.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-fully-wired.test.ts new file mode 100644 index 0000000000..d6ba45b40c --- /dev/null +++ b/test/e2e-scenario/framework-tests/e2e-scenario-fully-wired.test.ts @@ -0,0 +1,164 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Contract test for the scenario fan-out runtime-support gate. + * + * Three invariants: + * + * 1. SUPPORTED_ONBOARDING_IDS in scenarios/runtime-support.ts matches + * the bash dispatcher (nemoclaw_scenarios/onboard/dispatch.sh) + * case statement. The dispatcher is the source of truth for what + * profiles can actually be invoked at runtime; the typed set must + * mirror it. + * + * 2. WORKFLOW_AVAILABLE_SECRETS matches the secrets declared in + * .github/workflows/e2e-scenarios.yaml AND + * .github/workflows/e2e-scenarios-all.yaml. A secret listed in + * one but not the other would still leave fan-out runs without + * the secret at runtime. + * + * 3. Every scenario emitted by buildScenarioMatrix() satisfies + * isScenarioFullyWired (positive sanity) and every scenario + * filtered out has a real reason listed (no silent drops). + * + * Adding a new dispatcher case or workflow secret requires updating + * the typed sets in runtime-support.ts. Adding a new scenario whose + * onboarding profile or required secrets are not yet plumbed will + * NOT fail this test — the scenario stays in the registry as a + * roadmap item but the matrix emitter skips it. + */ + +import { describe, expect, it } from "vitest"; +import fs from "node:fs"; +import path from "node:path"; + +import { buildScenarioMatrix } from "../scenarios/run.ts"; +import { listScenarios } from "../scenarios/registry.ts"; +import { + isScenarioFullyWired, + SUPPORTED_ONBOARDING_IDS, + WORKFLOW_AVAILABLE_SECRETS, +} from "../scenarios/runtime-support.ts"; + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const DISPATCH_SH = path.join( + REPO_ROOT, + "test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh", +); +const E2E_SCENARIOS_YAML = path.join(REPO_ROOT, ".github/workflows/e2e-scenarios.yaml"); +const E2E_SCENARIOS_ALL_YAML = path.join(REPO_ROOT, ".github/workflows/e2e-scenarios-all.yaml"); + +/** + * Parse the dispatcher case-statement labels. Matches indented lines of + * the form `)` or ` | )` inside the `case "${profile}" + * in ... esac` block. Tolerates leading whitespace and `*)` default. + */ +function dispatcherProfileIds(): Set { + const source = fs.readFileSync(DISPATCH_SH, "utf8"); + const ids = new Set(); + // Match e.g. ` cloud-openclaw)` or ` cloud-openclaw-invalid-nvidia-key | cloud-openclaw-gateway-port-conflict)` + const re = /^\s+([a-z][a-z0-9-]*(?:\s*\|\s*[a-z][a-z0-9-]*)*)\)\s*$/gm; + let match: RegExpExecArray | null; + // biome-ignore lint/suspicious/noAssignInExpressions: classic regex exec loop + while ((match = re.exec(source)) !== null) { + for (const id of match[1].split("|")) { + const trimmed = id.trim(); + if (trimmed && trimmed !== "*") ids.add(trimmed); + } + } + return ids; +} + +/** + * Parse the secret keys declared under any `secrets:` block in a workflow + * YAML. Light text-level parser to avoid pulling in js-yaml just for two + * files; the structure is stable. + */ +function workflowSecretKeys(yamlPath: string): Set { + const source = fs.readFileSync(yamlPath, "utf8"); + const keys = new Set(); + const lines = source.split("\n"); + let inSecrets = false; + let secretsIndent = -1; + for (const line of lines) { + if (/^\s*secrets:\s*$/.test(line)) { + inSecrets = true; + secretsIndent = line.search(/\S/); + continue; + } + if (inSecrets) { + if (line.trim() === "" || line.search(/\S/) <= secretsIndent) { + // De-dent or blank line ends the block. + if (line.search(/\S/) >= 0 && line.search(/\S/) <= secretsIndent) { + inSecrets = false; + } + continue; + } + const m = /^\s+([A-Z][A-Z0-9_]*):/.exec(line); + if (m) keys.add(m[1]); + } + } + return keys; +} + +describe("scenario runtime-support contract", () => { + it("SUPPORTED_ONBOARDING_IDS matches dispatcher case statement", () => { + const dispatcherIds = dispatcherProfileIds(); + // The dispatcher's negative variants (`-no-docker`) live in + // separate `*.sh` files sourced by dispatch.sh; ids that appear in + // the case statement but represent compiler-synthesized variants + // should also be in the typed set. Cross-check both directions. + const typedSet = new Set(SUPPORTED_ONBOARDING_IDS); + const missingFromTyped = [...dispatcherIds].filter((id) => !typedSet.has(id)); + const missingFromDispatcher = [...typedSet].filter((id) => !dispatcherIds.has(id)); + expect( + missingFromTyped, + `dispatcher routes profiles the typed set forgot: ${missingFromTyped.join(", ")}`, + ).toEqual([]); + expect( + missingFromDispatcher, + `typed set claims profiles the dispatcher does not route: ${missingFromDispatcher.join(", ")}`, + ).toEqual([]); + }); + + it("WORKFLOW_AVAILABLE_SECRETS matches both scenario workflows", () => { + const fromScenarios = workflowSecretKeys(E2E_SCENARIOS_YAML); + const fromAll = workflowSecretKeys(E2E_SCENARIOS_ALL_YAML); + const typed = new Set(WORKFLOW_AVAILABLE_SECRETS); + expect(fromScenarios, "e2e-scenarios.yaml secrets must match typed set").toEqual(typed); + expect(fromAll, "e2e-scenarios-all.yaml secrets must match typed set").toEqual(typed); + }); + + it("every scenario emitted by buildScenarioMatrix is fully wired", () => { + const matrix = buildScenarioMatrix(); + const idsInMatrix = new Set(matrix.map((entry) => entry.id)); + expect(idsInMatrix.size).toBeGreaterThan(0); + for (const scenario of listScenarios()) { + if (!idsInMatrix.has(scenario.id)) continue; + const wired = isScenarioFullyWired(scenario); + expect( + wired, + `scenario ${scenario.id} appears in matrix but is not fully wired`, + ).toEqual({ ok: true }); + } + }); + + it("every filtered-out scenario has a structured reason list", () => { + const matrix = buildScenarioMatrix(); + const idsInMatrix = new Set(matrix.map((entry) => entry.id)); + const filtered = listScenarios().filter((s) => !idsInMatrix.has(s.id)); + expect(filtered.length).toBeGreaterThan(0); // Confirms the gate is actually filtering something. + for (const scenario of filtered) { + const wired = isScenarioFullyWired(scenario); + if (wired.ok) { + throw new Error( + `scenario ${scenario.id} is fully wired but was filtered out by buildScenarioMatrix`, + ); + } + expect(wired.reasons.length, `${scenario.id} must have at least one reason`).toBeGreaterThan( + 0, + ); + } + }); +}); diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-matrix.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-matrix.test.ts index 95ffa6db49..4571c6a0b3 100644 --- a/test/e2e-scenario/framework-tests/e2e-scenario-matrix.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-scenario-matrix.test.ts @@ -9,6 +9,7 @@ import { describe, expect, it } from "vitest"; import { buildScenarioMatrix } from "../scenarios/run.ts"; import { listScenarios } from "../scenarios/registry.ts"; import { resolveRunnerForScenario } from "../scenarios/runner-routing.ts"; +import { isScenarioFullyWired } from "../scenarios/runtime-support.ts"; import { scenario } from "../scenarios/builder.ts"; const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); @@ -24,10 +25,17 @@ function runEmitMatrix() { } describe("typed scenario matrix", () => { - it("emits one matrix entry per registered scenario", () => { + it("emits one matrix entry per fully-wired registered scenario", () => { + // The matrix is intentionally a subset: scenarios whose onboarding + // profile is not routable by the bash dispatcher, or whose required + // secrets are not declared in the workflow, are filtered by + // isScenarioFullyWired (see scenarios/runtime-support.ts and the + // dedicated contract test e2e-scenario-fully-wired.test.ts). const matrix = buildScenarioMatrix(); - const ids = listScenarios().map((s) => s.id); - expect(matrix.map((entry) => entry.id).sort()).toEqual([...ids].sort()); + const wiredIds = listScenarios() + .filter((s) => isScenarioFullyWired(s).ok) + .map((s) => s.id); + expect(matrix.map((entry) => entry.id).sort()).toEqual([...wiredIds].sort()); }); it("resolves a runner label for every scenario", () => { @@ -109,7 +117,11 @@ describe("typed scenario matrix", () => { expect(lines.length, "matrix output must be a single line").toBe(1); const parsed = JSON.parse(lines[0]); expect(Array.isArray(parsed)).toBe(true); - expect(parsed.length).toBe(listScenarios().length); + // Matrix length matches the fully-wired subset; runtime-support + // filtered the rest with structured reasons on stderr. + const wiredCount = listScenarios().filter((s) => isScenarioFullyWired(s).ok).length; + expect(parsed.length).toBe(wiredCount); + expect(parsed.length).toBeGreaterThan(0); for (const entry of parsed) { expect(entry).toMatchObject({ id: expect.any(String), diff --git a/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh b/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh index 9c7b9803f1..fb1a6a61aa 100755 --- a/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh +++ b/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh @@ -32,7 +32,14 @@ e2e_onboard_cloud_openclaw_no_docker() { e2e_env_apply_noninteractive - e2e_context_init + # Do NOT call e2e_context_init: the TS framework + # (ScenarioRunner.seedContextEnv) is the single owner of context.env + # initialization for the run. e2e_context_init opens with `: > ctx` + # which would truncate the file and wipe seeded keys (E2E_SCENARIO, + # E2E_SANDBOX_NAME, E2E_GATEWAY_URL) before the state-validation + # phase's gateway-absent / sandbox-absent probes run. Use + # e2e_context_set for additional keys only. Mirrors the contract + # documented in nemoclaw_scenarios/dispatch-action.sh. local log shim_dir rc=0 log="${E2E_CONTEXT_DIR}/negative-preflight.log" diff --git a/test/e2e-scenario/scenarios/run.ts b/test/e2e-scenario/scenarios/run.ts index ff9fb056c4..927028ffa9 100644 --- a/test/e2e-scenario/scenarios/run.ts +++ b/test/e2e-scenario/scenarios/run.ts @@ -8,6 +8,7 @@ import { compileRunPlans, renderPlanText, writePlanArtifacts } from "./compiler. import { ScenarioRunner } from "./orchestrators/runner.ts"; import { listScenarios } from "./registry.ts"; import { resolveRunnerForScenario } from "./runner-routing.ts"; +import { isScenarioFullyWired } from "./runtime-support.ts"; import type { PhaseResult, ScenarioDefinition } from "./types.ts"; interface Args { @@ -85,20 +86,44 @@ function buildLabel(scenario: ScenarioDefinition): string { } /** - * Build the GitHub Actions matrix for every scenario in the typed registry. + * Build the GitHub Actions matrix for every scenario in the typed registry + * that is currently fully wired end-to-end. Scenarios whose onboarding + * profile has no case in `nemoclaw_scenarios/onboard/dispatch.sh`, or + * whose `requiredSecrets` are not declared in the workflow, are filtered + * out and reported on stderr so the matrix never fans out into + * Mode-A `unsupported onboarding profile` failures or runs without the + * declared credential boundary. + * * Sorted by id so workflow runs are deterministic and diffable. */ export function buildScenarioMatrix(): ScenarioMatrixEntry[] { - return listScenarios().map((scenario): ScenarioMatrixEntry => { + const skipped: Array<{ id: string; reasons: readonly string[] }> = []; + const matrix = listScenarios().flatMap((scenario): ScenarioMatrixEntry[] => { + const wired = isScenarioFullyWired(scenario); + if (!wired.ok) { + skipped.push({ id: scenario.id, reasons: wired.reasons }); + return []; + } const { runner } = resolveRunnerForScenario(scenario); - return { - id: scenario.id, - runner, - label: buildLabel(scenario), - platform: scenario.environment?.platform ?? "unknown", - suites: scenario.suiteIds ?? [], - }; + return [ + { + id: scenario.id, + runner, + label: buildLabel(scenario), + platform: scenario.environment?.platform ?? "unknown", + suites: scenario.suiteIds ?? [], + }, + ]; }); + if (skipped.length > 0) { + process.stderr.write( + `[buildScenarioMatrix] skipping ${skipped.length} not-yet-wired scenario(s):\n`, + ); + for (const entry of skipped) { + process.stderr.write(` - ${entry.id}: ${entry.reasons.join("; ")}\n`); + } + } + return matrix; } function emitMatrix() { diff --git a/test/e2e-scenario/scenarios/runtime-support.ts b/test/e2e-scenario/scenarios/runtime-support.ts new file mode 100644 index 0000000000..fa545f7b0d --- /dev/null +++ b/test/e2e-scenario/scenarios/runtime-support.ts @@ -0,0 +1,119 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Runtime-support contract for typed scenarios. + * + * The typed scenario registry (`scenarios/scenarios/baseline.ts`) declares + * scenarios for every product surface, including ones whose onboarding + * profiles or required secrets are not yet wired through the bash + * dispatcher (`nemoclaw_scenarios/onboard/dispatch.sh`), the compiler + * secret allowlist (`scenarios/compiler.ts:ONBOARD_PROFILE_SECRET_ENV`), + * or the workflow secret declarations + * (`.github/workflows/e2e-scenarios{,-all}.yaml`). + * + * Live fan-out via `e2e-scenarios-all.yaml` would otherwise dispatch + * every registered scenario, and the unsupported ones would either: + * 1. fail at onboarding with `e2e_onboard: unsupported onboarding + * profile: ` (Mode A failure mode), or + * 2. run without the credential boundary the registry declares. + * + * `isScenarioFullyWired` is the gate. It is consumed by: + * - `buildScenarioMatrix()` in `run.ts` to filter `--emit-matrix`. + * - `e2e-scenario-fully-wired.test.ts` to lock the contract: every + * scenario that survives the filter must also be routable by the + * bash dispatcher and have its requiredSecrets covered by the + * workflow secret allowlist. + * + * Adding a new scenario whose profile is not in `SUPPORTED_ONBOARDING_IDS` + * is fine — it stays in the registry for documentation and future + * implementation, but the matrix emitter will skip it until the + * dispatcher case + secret plumbing land. + */ + +import type { ScenarioDefinition } from "./types.ts"; + +/** + * Onboarding-profile ids the bash dispatcher + * (test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh) currently + * routes to a worker. Mirrors the dispatcher's case statement plus the + * `-no-docker` variants the compiler synthesizes for negative + * scenarios with `runtime: "docker-missing"`. + * + * Lock-checked by `e2e-scenario-fully-wired.test.ts` against the + * dispatcher source — the test fails if the two ever drift. + */ +export const SUPPORTED_ONBOARDING_IDS: ReadonlySet = new Set([ + "cloud-openclaw", + "cloud-openclaw-no-docker", + "cloud-openclaw-custom-policies", + "cloud-openclaw-invalid-nvidia-key", + "cloud-openclaw-gateway-port-conflict", + "cloud-hermes", + "local-ollama-openclaw", +]); + +/** + * Secrets the scenario workflows + * (.github/workflows/e2e-scenarios.yaml + e2e-scenarios-all.yaml) pass + * through to runs. Lock-checked by `e2e-scenario-fully-wired.test.ts` + * against the workflow YAML. + * + * Adding a secret here without also declaring it in the workflow + * widens the scenario fan-out surface but the new scenarios still + * won't actually have the secret at runtime. The test catches that. + */ +export const WORKFLOW_AVAILABLE_SECRETS: ReadonlySet = new Set([ + "NVIDIA_API_KEY", +]); + +/** + * Compute the onboarding-profile id the compiler will actually invoke + * for a scenario. Mirrors the runtime-rewrite logic in + * `compiler.ts:phaseActions` for the onboarding phase: scenarios with + * `runtime: "docker-missing"` swap their base profile for the + * `-no-docker` worker. + * + * Kept out of compiler.ts so runtime-support can import it without + * pulling in the full compiler graph. + */ +export function effectiveOnboardingId(scenario: ScenarioDefinition): string | null { + const env = scenario.environment; + if (!env) return null; + const base = env.onboarding; + if (!base) return null; + return env.runtime === "docker-missing" ? `${base}-no-docker` : base; +} + +export type WiredCheck = + | { readonly ok: true } + | { readonly ok: false; readonly reasons: readonly string[] }; + +/** + * Decide whether a scenario can actually run end-to-end given the + * current dispatcher and workflow plumbing. Returns the structured + * reason list when not wired, so the matrix emitter can log a + * scenario-by-scenario explanation of what was skipped and why. + */ +export function isScenarioFullyWired(scenario: ScenarioDefinition): WiredCheck { + const reasons: string[] = []; + + const onboarding = effectiveOnboardingId(scenario); + if (onboarding === null) { + reasons.push("scenario has no environment.onboarding"); + } else if (!SUPPORTED_ONBOARDING_IDS.has(onboarding)) { + reasons.push( + `onboarding profile '${onboarding}' has no case in nemoclaw_scenarios/onboard/dispatch.sh`, + ); + } + + const required = scenario.requiredSecrets ?? []; + const missing = required.filter((secret) => !WORKFLOW_AVAILABLE_SECRETS.has(secret)); + if (missing.length > 0) { + reasons.push( + `required secret(s) not declared in workflow: ${missing.join(", ")}`, + ); + } + + return reasons.length === 0 ? { ok: true } : { ok: false, reasons }; +}