diff --git a/.github/workflows/e2e-scenarios-all.yaml b/.github/workflows/e2e-scenarios-all.yaml
index d7de9666db..ce74552eaf 100644
--- a/.github/workflows/e2e-scenarios-all.yaml
+++ b/.github/workflows/e2e-scenarios-all.yaml
@@ -83,3 +83,10 @@ jobs:
       scenarios: ${{ matrix.id }}
     secrets:
       NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
+
+  ubuntu-rebuild-openclaw:
+    uses: ./.github/workflows/e2e-scenarios.yaml
+    with:
+      scenarios: ubuntu-rebuild-openclaw
+    secrets:
+      NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
diff --git a/.github/workflows/e2e-scenarios.yaml b/.github/workflows/e2e-scenarios.yaml
index 4d68e695cc..49f317caff 100644
--- a/.github/workflows/e2e-scenarios.yaml
+++ b/.github/workflows/e2e-scenarios.yaml
@@ -63,6 +63,7 @@ jobs:
             [ubuntu-gateway-port-conflict-negative]=ubuntu-latest
             [ubuntu-invalid-nvidia-key-negative]=ubuntu-latest
             [ubuntu-no-docker-preflight-negative]=ubuntu-latest
+            [ubuntu-rebuild-openclaw]=ubuntu-latest
             [ubuntu-repo-cloud-hermes]=ubuntu-latest
             [ubuntu-repo-cloud-hermes-discord]=ubuntu-latest
             [ubuntu-repo-cloud-hermes-slack]=ubuntu-latest
@@ -84,7 +85,6 @@ jobs:
           for raw in "${IDS[@]}"; do
             id="${raw//[[:space:]]/}"
             [ -n "${id}" ] || continue
-            npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "${id}" --plan-only >/dev/null
             runner="${ROUTES[$id]:-}"
             if [ -z "${runner}" ]; then
               echo "::error::No runner route for scenario: ${id}" >&2
@@ -138,7 +138,7 @@ jobs:
             echo "::error::Invalid scenario input: ${SCENARIOS}" >&2
             exit 1
           fi
-          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "${SCENARIOS}" --dry-run
+          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "${SCENARIOS}"
 
       - name: Resolve workspace paths for WSL
         if: contains(inputs.scenarios || github.event.inputs.scenarios, 'wsl-repo-cloud-openclaw')
@@ -302,7 +302,7 @@ jobs:
           export E2E_CONTEXT_DIR="`$workdir"
           npm ci --ignore-scripts
           set +e
-          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "`$scenarios" --dry-run
+          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "`$scenarios"
           status=`$?
           if [ -d "`$workdir/.e2e" ]; then
             rm -rf "`$checkout_dir/.e2e"
@@ -324,14 +324,14 @@ jobs:
             exit $LASTEXITCODE
           }
 
-      - name: Append typed dry-run summary
+      - name: Append typed scenario summary
         if: always()
         shell: bash
         run: |
           {
-            echo '## E2E typed scenario dry-run'
+            echo '## E2E typed scenario run'
             echo ''
-            echo 'Mode: `test/e2e-scenario/scenarios/run.ts --dry-run`.'
+            echo 'Mode: `test/e2e-scenario/scenarios/run.ts --scenarios <id[,id...]>` (live).'
             echo ''
             if [ -f .e2e/run-plan.json ]; then
               python3 - <<'PY'
@@ -368,14 +368,25 @@ jobs:
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: e2e-scenario-${{ inputs.scenarios || github.event.inputs.scenarios }}
+          # Explicit subpath list, NOT a blanket .e2e/ + hidden files.
+          # The framework redacts every byte that flows from spawned
+          # children into actions/*.log, logs/*.log, and onboard.log via
+          # orchestrators/redaction.ts::pipeRedacted. Anything outside
+          # the listed paths (notably the raw context.env file) is
+          # excluded so secret-bearing key=value lines cannot leak via
+          # the artifact even if a future helper writes there.
+          # Diagnostic dumps of context use e2e_context_dump, which
+          # redacts on emit (runtime/lib/context.sh).
           path: |
             .e2e/run-plan.json
             .e2e/plan.txt
             .e2e/environment.result.json
             .e2e/onboarding.result.json
             .e2e/runtime.result.json
-            .e2e/
+            .e2e/actions/
+            .e2e/logs/
+            .e2e/onboard.log
             test/e2e/logs/
           if-no-files-found: warn
           retention-days: 14
-          include-hidden-files: true
+          include-hidden-files: false
diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md
index 5d27dd161e..6bfdaa098d 100644
--- a/test/e2e-scenario/docs/README.md
+++ b/test/e2e-scenario/docs/README.md
@@ -24,9 +24,10 @@ Use the source that matches the task while the migration is in progress:
 
 | Task | Current source |
 | --- | --- |
-| Scenario workflow fan-out and dry-run planning | `test/e2e-scenario/scenarios/registry.ts`, `test/e2e-scenario/scenarios/scenarios/baseline.ts`, and `test/e2e-scenario/scenarios/run.ts` |
+| Scenario workflow fan-out and live execution | `test/e2e-scenario/scenarios/registry.ts`, `test/e2e-scenario/scenarios/scenarios/baseline.ts`, and `test/e2e-scenario/scenarios/run.ts` |
+| Typed expected-state registry (single source of truth) | `test/e2e-scenario/scenarios/expected-states.ts` |
 | Product-facing desired setup/onboarding state | `test/e2e-scenario/manifests/*.yaml` |
-| Shell runner scenario resolution and live scenario execution | `test/e2e-scenario/nemoclaw_scenarios/scenarios.yaml`, `expected-states.yaml`, and `validation_suites/suites.yaml` |
+| Shell runner scenario resolution and live scenario execution | `test/e2e-scenario/nemoclaw_scenarios/scenarios.yaml` and `validation_suites/suites.yaml` (legacy YAML resolver path retired) |
 | Reusable live suite assertions | `test/e2e-scenario/validation_suites/` |
 | Existing nightly and platform E2E coverage | legacy `test/e2e/test-*.sh` scripts and their workflows |
 
@@ -158,7 +159,6 @@ test/e2e-scenario/
   scenarios/                         # Typed builders, registry, compiler, assertions, dry-run orchestration
   nemoclaw_scenarios/                # YAML runtime metadata and setup helpers
     scenarios.yaml
-    expected-states.yaml
     install/
     onboard/
     fixtures/
diff --git a/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts b/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts
index 6a7c97959f..0134d6adc9 100644
--- a/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts
@@ -9,7 +9,6 @@ import path from "node:path";
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CONTEXT_LIB = path.join(REPO_ROOT, "test/e2e-scenario/runtime/lib/context.sh");
-const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-scenario.sh");
 
 function runBash(script: string, env: Record<string, string> = {}): SpawnSyncReturns<string> {
   return spawnSync("bash", ["-c", script], {
@@ -86,38 +85,4 @@ describe("E2E context helper (runtime/lib/context.sh)", () => {
     }
   });
 
-  it("scenario_plan_execution_should_emit_context_under_dry_run", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-"));
-    try {
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      const ctxPath = path.join(tmp, "context.env");
-      expect(fs.existsSync(ctxPath), `context.env missing in ${tmp}`).toBe(true);
-      const ctx = fs.readFileSync(ctxPath, "utf8");
-      for (const key of [
-        "E2E_SCENARIO",
-        "E2E_PLATFORM_OS",
-        "E2E_INSTALL_METHOD",
-        "E2E_ONBOARDING_PATH",
-        "E2E_AGENT",
-        "E2E_PROVIDER",
-        "E2E_SANDBOX_NAME",
-        "E2E_GATEWAY_URL",
-        "E2E_INFERENCE_ROUTE",
-      ]) {
-        expect(ctx, `${key} missing from context.env`).toMatch(new RegExp(`^${key}=`, "m"));
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
 });
diff --git a/test/e2e-scenario/framework-tests/e2e-coverage-report.test.ts b/test/e2e-scenario/framework-tests/e2e-coverage-report.test.ts
deleted file mode 100644
index b4a6056db0..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-coverage-report.test.ts
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import path from "node:path";
-
-import { loadMetadataFromDir, loadMetadataFromObjects } from "../runtime/resolver/load.ts";
-import { renderCoverageReport } from "../runtime/resolver/coverage.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-
-describe("coverage report", () => {
-  it("should_render_single_coverage_table", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const md = renderCoverageReport(meta);
-    expect(md).toContain("test/e2e-scenario/nemoclaw_scenarios/{scenarios,expected-states}.yaml");
-    expect(md).toContain("test/e2e-scenario/validation_suites/suites.yaml");
-    // Exactly one primary Scenario Coverage table.
-    const headers = md.match(/\|\s*Scenario\s*\|\s*Platform\s*\|\s*Install\s*\|\s*Runtime\s*\|\s*Onboarding\s*\|\s*Expected state\s*\|\s*Suites\s*\|/g);
-    expect(headers).toBeTruthy();
-    expect(headers?.length).toBe(1);
-    // Every scenario should appear as a row.
-    for (const id of Object.keys(meta.scenarios.setup_scenarios)) {
-      expect(md).toContain(id);
-    }
-    // Rows should be sorted deterministically (alphabetically).
-    const rowOrder = Object.keys(meta.scenarios.setup_scenarios).sort();
-    let pos = 0;
-    for (const id of rowOrder) {
-      const idx = md.indexOf(`| ${id} |`, pos);
-      expect(idx, `row ${id} not found in order. report:\n${md}`).toBeGreaterThanOrEqual(0);
-      pos = idx;
-    }
-  });
-
-  it("should_flag_scenarios_without_suites", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: {
-        platforms: { p: {} },
-        installs: { i: {} },
-        runtimes: { r: {} },
-        onboarding: { o: { agent: "openclaw", provider: "nvidia" } },
-        setup_scenarios: {
-          "empty-suite-scenario": {
-            dimensions: { platform: "p", install: "i", runtime: "r", onboarding: "o" },
-            expected_state: "some-state",
-            suites: [],
-          },
-        },
-      },
-      expectedStates: { expected_states: { "some-state": { gateway: { health: "healthy" } } } },
-      suites: { suites: {} },
-    });
-    const md = renderCoverageReport(meta);
-    expect(md).toMatch(/## Gaps/);
-    expect(md).toMatch(/empty-suite-scenario.*no suites|no suites.*empty-suite-scenario/s);
-  });
-
-  it("should_flag_expected_states_not_used_by_any_scenario", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: {
-        platforms: { p: {} },
-        installs: { i: {} },
-        runtimes: { r: {} },
-        onboarding: { o: { agent: "openclaw", provider: "nvidia" } },
-        setup_scenarios: {
-          s1: {
-            dimensions: { platform: "p", install: "i", runtime: "r", onboarding: "o" },
-            expected_state: "used-state",
-            suites: ["smoke"],
-          },
-        },
-      },
-      expectedStates: {
-        expected_states: {
-          "used-state": { gateway: { health: "healthy" } },
-          "unused-state": { gateway: { health: "healthy" } },
-        },
-      },
-      suites: {
-        suites: { smoke: { steps: [{ id: "a", script: "suites/smoke/a.sh" }] } },
-      },
-    });
-    const md = renderCoverageReport(meta);
-    expect(md).toMatch(/## Gaps/);
-    expect(md).toMatch(/unused-state/);
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-expected-failure.test.ts b/test/e2e-scenario/framework-tests/e2e-expected-failure.test.ts
deleted file mode 100644
index bf2c751d51..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-expected-failure.test.ts
+++ /dev/null
@@ -1,296 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Unit tests for the expected-failure schema, resolver merge, and matcher.
- *
- * Companion to NemoClaw issue #3608. The scenario-additional-families
- * suite covers the end-to-end plan shape; this file focuses on the new
- * code paths in isolation so failures point at a single layer.
- */
-
-import { describe, it, expect } from "vitest";
-import yaml from "js-yaml";
-
-import { loadMetadataFromObjects } from "../runtime/resolver/load.ts";
-import { resolveScenario } from "../runtime/resolver/plan.ts";
-import {
-  matchExpectedFailure,
-  type ObservedFailure,
-} from "../runtime/resolver/expected-failure.ts";
-import type { ExpectedFailure } from "../runtime/resolver/schema.ts";
-
-function makeMetadata(opts: {
-  stateBlock?: Record<string, unknown> | null;
-  scenarioBlock?: Record<string, unknown> | null;
-}) {
-  const stateBlock = opts.stateBlock;
-  const scenarioBlock = opts.scenarioBlock;
-  const stateYaml: Record<string, unknown> = {
-    cli: { installed: true },
-    gateway: { expected: "absent" },
-    sandbox: { expected: "absent" },
-  };
-  if (stateBlock !== undefined && stateBlock !== null) {
-    stateYaml.expected_failure = stateBlock;
-  }
-  const scenarioYaml: Record<string, unknown> = {
-    dimensions: {
-      platform: "p",
-      install: "i",
-      runtime: "r",
-      onboarding: "o",
-    },
-    expected_state: "neg",
-    suites: [],
-  };
-  if (scenarioBlock !== undefined && scenarioBlock !== null) {
-    scenarioYaml.expected_failure = scenarioBlock;
-  }
-  return loadMetadataFromObjects({
-    scenarios: {
-      platforms: { p: { os: "ubuntu" } },
-      installs: { i: { method: "repo-checkout" } },
-      runtimes: { r: { container_engine: "docker", container_daemon: "missing" } },
-      onboarding: { o: { agent: "openclaw", provider: "nvidia" } },
-      setup_scenarios: { s: scenarioYaml },
-    },
-    expectedStates: {
-      expected_states: { neg: stateYaml },
-    },
-    suites: { suites: {} },
-  });
-}
-
-describe("expected_failure: loader validation", () => {
-  it("accepts a complete state-level block", () => {
-    const meta = makeMetadata({
-      stateBlock: {
-        phase: "preflight",
-        error_class: "docker-missing",
-        message_pattern: "docker",
-        forbidden_side_effects: ["sandbox-created"],
-      },
-    });
-    const plan = resolveScenario("s", meta);
-    expect(plan.expected_failure?.phase).toBe("preflight");
-    expect(plan.expected_failure?.error_class).toBe("docker-missing");
-  });
-
-  it("rejects unknown phase", () => {
-    expect(() =>
-      makeMetadata({
-        stateBlock: { phase: "bogus", error_class: "docker-missing" },
-      }),
-    ).toThrow(/expected_failure\.phase/);
-  });
-
-  it("rejects unknown error_class", () => {
-    expect(() =>
-      makeMetadata({
-        stateBlock: { phase: "preflight", error_class: "moon-missing" },
-      }),
-    ).toThrow(/expected_failure\.error_class/);
-  });
-
-  it("rejects invalid message_pattern regex", () => {
-    expect(() =>
-      makeMetadata({
-        stateBlock: {
-          phase: "preflight",
-          error_class: "docker-missing",
-          message_pattern: "(unclosed",
-        },
-      }),
-    ).toThrow(/message_pattern is not a valid regex/);
-  });
-
-  it("rejects unknown forbidden_side_effects entry", () => {
-    expect(() =>
-      makeMetadata({
-        stateBlock: {
-          phase: "preflight",
-          error_class: "docker-missing",
-          forbidden_side_effects: ["paint-the-fence"],
-        },
-      }),
-    ).toThrow(/forbidden_side_effects entry/);
-  });
-
-  it("rejects unknown keys in the block", () => {
-    expect(() =>
-      makeMetadata({
-        stateBlock: {
-          phase: "preflight",
-          error_class: "docker-missing",
-          rogue: true,
-        },
-      }),
-    ).toThrow(/unknown key 'rogue'/);
-  });
-
-  it("requires phase + error_class at the state level", () => {
-    expect(() => makeMetadata({ stateBlock: { phase: "preflight" } })).toThrow(
-      /error_class is required/,
-    );
-  });
-
-  it("rejects a non-mapping expected_states section", () => {
-    expect(() =>
-      loadMetadataFromObjects({
-        scenarios: {
-          platforms: { p: {} },
-          installs: { i: {} },
-          runtimes: { r: {} },
-          onboarding: { o: { agent: "openclaw", provider: "nvidia" } },
-          setup_scenarios: {},
-        },
-        expectedStates: { expected_states: [] },
-        suites: { suites: {} },
-      }),
-    ).toThrow(/expected_states' must be a mapping/);
-  });
-
-  it("rejects scenario-level expected_failure when state has none", () => {
-    expect(() =>
-      resolveScenario(
-        "s",
-        makeMetadata({
-          stateBlock: null,
-          scenarioBlock: { phase: "preflight", error_class: "docker-missing" },
-        }),
-      ),
-    ).toThrow(/expected_failure but expected_state.*does not/);
-  });
-
-  it("merges scenario-level override on top of state-level block", () => {
-    const meta = makeMetadata({
-      stateBlock: {
-        phase: "preflight",
-        error_class: "docker-missing",
-        message_pattern: "docker",
-        forbidden_side_effects: ["sandbox-created"],
-      },
-      scenarioBlock: {
-        message_pattern: "(?i)daemon",
-        forbidden_side_effects: ["gateway-started"],
-      },
-    });
-    const plan = resolveScenario("s", meta);
-    expect(plan.expected_failure?.message_pattern).toBe("(?i)daemon");
-    expect(plan.expected_failure?.forbidden_side_effects).toEqual(["gateway-started"]);
-    expect(plan.expected_failure?.phase).toBe("preflight");
-  });
-});
-
-describe("expected_failure: matcher", () => {
-  const expected: ExpectedFailure = {
-    phase: "preflight",
-    error_class: "docker-missing",
-    message_pattern: "(?i)docker|daemon",
-    forbidden_side_effects: ["sandbox-created", "gateway-started"],
-  };
-
-  function obs(over: Partial<ObservedFailure>): ObservedFailure {
-    return {
-      phase: "preflight",
-      error_class: "docker-missing",
-      log: "Cannot connect to the Docker daemon",
-      observed_side_effects: [],
-      ...over,
-    };
-  }
-
-  it("passes when phase, class, pattern, and side-effects all match", () => {
-    const report = matchExpectedFailure(expected, obs({}));
-    expect(report.ok).toBe(true);
-    expect(report.checks.every((c) => c.ok)).toBe(true);
-  });
-
-  it("fails on phase mismatch", () => {
-    const report = matchExpectedFailure(expected, obs({ phase: "install" }));
-    expect(report.ok).toBe(false);
-    expect(report.checks.find((c) => c.name === "phase")?.ok).toBe(false);
-  });
-
-  it("fails on error_class mismatch", () => {
-    const report = matchExpectedFailure(expected, obs({ error_class: "gpu-missing" }));
-    expect(report.ok).toBe(false);
-    expect(report.checks.find((c) => c.name === "error_class")?.ok).toBe(false);
-  });
-
-  it("skips error_class check when observation is undefined", () => {
-    const report = matchExpectedFailure(expected, obs({ error_class: undefined }));
-    const classCheck = report.checks.find((c) => c.name === "error_class");
-    expect(classCheck?.ok).toBe(true);
-    expect(classCheck?.message).toMatch(/skipped/);
-  });
-
-  it("fails when message_pattern does not match the log", () => {
-    const report = matchExpectedFailure(
-      expected,
-      obs({ log: "something else entirely" }),
-    );
-    expect(report.ok).toBe(false);
-    expect(report.checks.find((c) => c.name === "message_pattern")?.ok).toBe(false);
-  });
-
-  it("fails when a forbidden side effect is observed", () => {
-    const report = matchExpectedFailure(
-      expected,
-      obs({ observed_side_effects: ["sandbox-created"] }),
-    );
-    expect(report.ok).toBe(false);
-    const sideCheck = report.checks.find((c) => c.name === "forbidden_side_effects");
-    expect(sideCheck?.ok).toBe(false);
-    expect(sideCheck?.message).toMatch(/sandbox-created/);
-  });
-
-  it("ignores non-forbidden observed side effects", () => {
-    const trimmed: ExpectedFailure = {
-      ...expected,
-      forbidden_side_effects: ["gateway-started"],
-    };
-    const report = matchExpectedFailure(
-      trimmed,
-      obs({ observed_side_effects: ["sandbox-created"] }),
-    );
-    expect(report.ok).toBe(true);
-  });
-});
-
-describe("expected_failure: real metadata", () => {
-  it("loads structurally for ubuntu-no-docker-preflight-negative", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: yaml.load(`
-platforms: { p: { os: ubuntu } }
-installs: { i: {} }
-runtimes: { r: { container_daemon: missing } }
-onboarding: { o: { agent: openclaw, provider: nvidia } }
-setup_scenarios:
-  s:
-    dimensions: { platform: p, install: i, runtime: r, onboarding: o }
-    expected_state: neg
-    suites: []
-`) as object,
-      expectedStates: yaml.load(`
-expected_states:
-  neg:
-    cli: { installed: true }
-    gateway: { expected: absent }
-    sandbox: { expected: absent }
-    expected_failure:
-      phase: preflight
-      error_class: docker-missing
-      message_pattern: "(?i)docker|container|daemon|socket|preflight"
-      forbidden_side_effects: [sandbox-created, gateway-started, credentials-written]
-`) as object,
-      suites: yaml.load(`
-suites: {}
-`) as object,
-    });
-    const plan = resolveScenario("s", meta);
-    expect(plan.expected_failure).toBeTruthy();
-    expect(plan.expected_failure?.forbidden_side_effects?.length).toBe(3);
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-expected-state-validator.test.ts b/test/e2e-scenario/framework-tests/e2e-expected-state-validator.test.ts
deleted file mode 100644
index ba1f2b5f31..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-expected-state-validator.test.ts
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import {
-  validateExpectedState,
-  type ProbeResults,
-} from "../runtime/resolver/validator.ts";
-import type { ExpectedStateConfig, ResolvedSuite } from "../runtime/resolver/schema.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-scenario.sh");
-
-function cloudOpenclawReady(): ExpectedStateConfig {
-  return {
-    cli: { installed: true },
-    gateway: { expected: "present", health: "healthy" },
-    sandbox: { expected: "present", status: "running", agent: "openclaw" },
-    inference: {
-      expected: "available",
-      provider: "nvidia",
-      route: "inference-local",
-      mode: "gateway-routed",
-    },
-    credentials: { expected: "present", storage: "gateway-managed" },
-  };
-}
-
-function passingProbes(): ProbeResults {
-  return {
-    "cli.installed": true,
-    "gateway.health": "healthy",
-    "gateway.expected": "present",
-    "sandbox.status": "running",
-    "sandbox.expected": "present",
-    "sandbox.agent": "openclaw",
-    "inference.expected": "available",
-    "inference.provider": "nvidia",
-    "inference.route": "inference-local",
-    "inference.mode": "gateway-routed",
-    "credentials.expected": "present",
-    "credentials.storage": "gateway-managed",
-  };
-}
-
-describe("expected state validator", () => {
-  it("should_validate_matching_state", () => {
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state: cloudOpenclawReady(),
-      probes: passingProbes(),
-      suites: [],
-    });
-    expect(report.ok).toBe(true);
-    expect(report.checks.every((c) => c.ok)).toBe(true);
-  });
-
-  it("should_fail_when_gateway_expected_but_unhealthy", () => {
-    const probes = passingProbes();
-    probes["gateway.health"] = "unhealthy";
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state: cloudOpenclawReady(),
-      probes,
-      suites: [],
-    });
-    expect(report.ok).toBe(false);
-    const failing = report.checks.find((c) => c.key === "gateway.health");
-    expect(failing?.ok).toBe(false);
-    expect(failing?.expected).toBe("healthy");
-    expect(failing?.actual).toBe("unhealthy");
-  });
-
-  it("should_fail_when_sandbox_expected_but_absent", () => {
-    const probes = passingProbes();
-    probes["sandbox.status"] = "absent";
-    probes["sandbox.expected"] = "absent";
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state: cloudOpenclawReady(),
-      probes,
-      suites: [],
-    });
-    expect(report.ok).toBe(false);
-    expect(report.checks.some((c) => c.key === "sandbox.status" && !c.ok)).toBe(true);
-  });
-
-  it("should_fail_when_suite_requires_state_unmet_at_runtime", () => {
-    // Expected state claims inference.expected=available, but the probe
-    // reports unavailable; the smoke suite happens to pass but an inference
-    // suite's requires_state should trigger a runtime failure before
-    // execution.
-    const state = cloudOpenclawReady();
-    const probes = passingProbes();
-    probes["inference.expected"] = "unavailable";
-    const inferenceSuite: ResolvedSuite = {
-      id: "inference",
-      requires_state: { "inference.expected": "available" },
-      steps: [{ id: "models-health", script: "suites/inference/cloud/00-models-health.sh" }],
-    };
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state,
-      probes,
-      suites: [inferenceSuite],
-    });
-    expect(report.ok).toBe(false);
-    const msg = report.checks
-      .filter((c) => !c.ok)
-      .map((c) => `${c.key}=${c.actual ?? "<missing>"} (wanted ${c.expected})`)
-      .join("; ");
-    expect(msg).toMatch(/inference\.expected/);
-    expect(msg).toMatch(/available/);
-    expect(msg).toMatch(/unavailable/);
-    // Should also reference the suite that made the requirement.
-    expect(report.checks.some((c) => c.suite === "inference" && !c.ok)).toBe(true);
-  });
-});
-
-describe("runner_should_not_run_suites_when_expected_state_fails", () => {
-  it("runs expected-state validation and skips suites on failure", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-es-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      // Simulate gateway-unhealthy probe by setting an override env var.
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: {
-            ...process.env,
-            E2E_CONTEXT_DIR: tmp,
-            E2E_TRACE_FILE: trace,
-            // validator reads these overrides in dry-run mode to fake probes
-            E2E_PROBE_OVERRIDE_GATEWAY_HEALTH: "unhealthy",
-            E2E_VALIDATE_EXPECTED_STATE: "1",
-          },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      // Dry-run execution should now fail because the expected state
-      // validation runs and sees gateway.health=unhealthy.
-      expect(r.status).not.toBe(0);
-      // Validator must run (its report file should exist) but suites must not.
-      const reportPath = path.join(tmp, "expected-state-report.json");
-      expect(fs.existsSync(reportPath), `missing ${reportPath}`).toBe(true);
-      const report = JSON.parse(fs.readFileSync(reportPath, "utf8"));
-      expect(report.ok).toBe(false);
-      expect(report.checks.some((c: { key: string; ok: boolean }) => c.key === "gateway.health" && !c.ok)).toBe(true);
-      // And the run's failure output should reference expected-state, not suites.
-      expect(`${r.stdout}${r.stderr}`).toMatch(/expected.state/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Phase 1.F — --validate-only flag on run-scenario.sh
-// ─────────────────────────────────────────────────────────────────────────────
-
-describe("run-scenario --validate-only flag", () => {
-  it("runs only validator and emits probe results json on stdout without running install/onboard/suites", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-validate-only-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      // Pre-populate a context.env: --validate-only assumes setup has already run.
-      fs.writeFileSync(
-        path.join(tmp, "context.env"),
-        "E2E_SCENARIO=ubuntu-repo-cloud-openclaw\n",
-      );
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--validate-only"],
-        {
-          env: {
-            ...process.env,
-            E2E_CONTEXT_DIR: tmp,
-            E2E_TRACE_FILE: trace,
-            // Supply probe overrides for every key the expected state needs.
-            E2E_PROBE_OVERRIDE_CLI_INSTALLED: "true",
-            E2E_PROBE_OVERRIDE_GATEWAY_EXPECTED: "present",
-            E2E_PROBE_OVERRIDE_GATEWAY_HEALTH: "healthy",
-            E2E_PROBE_OVERRIDE_SANDBOX_EXPECTED: "present",
-            E2E_PROBE_OVERRIDE_SANDBOX_STATUS: "running",
-            E2E_PROBE_OVERRIDE_SANDBOX_AGENT: "openclaw",
-            E2E_PROBE_OVERRIDE_INFERENCE_EXPECTED: "available",
-            E2E_PROBE_OVERRIDE_INFERENCE_PROVIDER: "nvidia",
-            E2E_PROBE_OVERRIDE_INFERENCE_ROUTE: "inference-local",
-            E2E_PROBE_OVERRIDE_INFERENCE_MODE: "gateway-routed",
-            E2E_PROBE_OVERRIDE_CREDENTIALS_EXPECTED: "present",
-            E2E_PROBE_OVERRIDE_CREDENTIALS_STORAGE: "gateway-managed",
-            E2E_PROBE_OVERRIDE_SECURITY_SHIELDS: "supported",
-            // `security.policy_engine` has an embedded underscore, which the
-            // E2E_PROBE_OVERRIDE_* convention cannot express. Use the
-            // JSON escape hatch for this one.
-            E2E_PROBE_OVERRIDES_JSON: JSON.stringify({ "security.policy_engine": "supported" }),
-          },
-          encoding: "utf8",
-          timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      // Must NOT have traced install or onboard.
-      const contents = fs.existsSync(trace) ? fs.readFileSync(trace, "utf8") : "";
-      expect(contents).not.toMatch(/install:/);
-      expect(contents).not.toMatch(/onboard:/);
-      // Must have emitted an expected-state-report.json (probe results).
-      const reportPath = path.join(tmp, "expected-state-report.json");
-      expect(fs.existsSync(reportPath), `missing ${reportPath}`).toBe(true);
-      const report = JSON.parse(fs.readFileSync(reportPath, "utf8"));
-      expect(report.ok).toBe(true);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("is_mutually_exclusive_with_plan_only", () => {
-    const r = spawnSync(
-      "bash",
-      [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--validate-only", "--plan-only"],
-      { encoding: "utf8", timeout: 15_000, cwd: REPO_ROOT },
-    );
-    expect(r.status).not.toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/mutually.exclusive|cannot.*both|--plan-only.*--validate-only|--validate-only.*--plan-only/i);
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-expected-state.test.ts b/test/e2e-scenario/framework-tests/e2e-expected-state.test.ts
new file mode 100644
index 0000000000..98ffa9378f
--- /dev/null
+++ b/test/e2e-scenario/framework-tests/e2e-expected-state.test.ts
@@ -0,0 +1,319 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, it } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { compileRunPlans } from "../scenarios/compiler.ts";
+import {
+  getExpectedState,
+  listExpectedStates,
+  probesForState,
+  requireExpectedState,
+} from "../scenarios/expected-states.ts";
+import { ScenarioRunner } from "../scenarios/orchestrators/runner.ts";
+import { listScenarios } from "../scenarios/registry.ts";
+import type { ExpectedState, PhaseName, PhaseResult, RunContext, RunPlanPhase } from "../scenarios/types.ts";
+
+function freshCtx(): RunContext {
+  return { contextDir: fs.mkdtempSync(path.join(os.tmpdir(), "e2e-state-")) };
+}
+
+// The legacy parity tests against `nemoclaw_scenarios/expected-states.yaml`
+// were retired alongside the YAML resolver path (see commit 9da75ac0a).
+// The typed registry in `scenarios/expected-states.ts` is the single source
+// of truth; these id-coverage assertions replace the YAML-mirror checks.
+describe("typed expected-state registry id coverage", () => {
+  it("exposes a non-empty list of registered expected-state ids", () => {
+    const ids = listExpectedStates().map((s) => s.id);
+    expect(ids.length).toBeGreaterThan(0);
+    expect(new Set(ids).size).toBe(ids.length);
+  });
+
+  it("requireExpectedState throws on unknown id with available list", () => {
+    expect(() => requireExpectedState("does-not-exist")).toThrow(/Unknown expected_state/);
+  });
+
+  it("getExpectedState returns the state for known ids", () => {
+    expect(getExpectedState("cloud-openclaw-ready")?.id).toBe("cloud-openclaw-ready");
+  });
+});
+
+describe("probesForState maps typed expected-state into probe ids", () => {
+  it("ready cloud state emits cli-installed, gateway-healthy, sandbox-running", () => {
+    expect(probesForState(requireExpectedState("cloud-openclaw-ready"))).toEqual([
+      "cli-installed",
+      "gateway-healthy",
+      "sandbox-running",
+    ]);
+  });
+
+  it("preflight-failure state emits cli-installed, gateway-absent, sandbox-absent", () => {
+    expect(probesForState(requireExpectedState("preflight-failure-no-sandbox"))).toEqual([
+      "cli-installed",
+      "gateway-absent",
+      "sandbox-absent",
+    ]);
+  });
+
+  it("optional-dimension state emits cli-installed only", () => {
+    expect(probesForState(requireExpectedState("macos-cli-ready-docker-optional"))).toEqual([
+      "cli-installed",
+    ]);
+  });
+
+  it("inference and credentials probes are intentionally NOT emitted yet", () => {
+    // The typed registry declares inference.expected=available and
+    // credentials.expected=present for ready states; the compiler does
+    // not yet emit probe actions for those dimensions because the
+    // probe scripts aren't written. This test pins that gap so a
+    // future probe-script PR is forced to update probesForState too.
+    const state: ExpectedState = {
+      id: "synthetic",
+      inference: { expected: "available", provider: "nvidia" },
+      credentials: { expected: "present" },
+    };
+    expect(probesForState(state)).toEqual([]);
+  });
+});
+
+describe("compiler emits state-validation phase actions from expected-state registry", () => {
+  it("positive scenario gets cli-installed + gateway-healthy + sandbox-running probe actions", () => {
+    const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+    const stateValidationPhase = plan.phases.find((p) => p.name === "state-validation");
+    expect(stateValidationPhase).toBeTruthy();
+    expect(stateValidationPhase!.actions.map((a) => a.id)).toEqual([
+      "state-validation.cli-installed",
+      "state-validation.gateway-healthy",
+      "state-validation.sandbox-running",
+    ]);
+    // Probes are typed shell-fn actions that go through the shared
+    // dispatcher; the orchestrator owns timeouts and redaction.
+    for (const action of stateValidationPhase!.actions) {
+      expect(action.kind).toBe("shell-fn");
+      expect(action.fn).toBe("e2e_state_probe");
+      expect(action.scriptRef).toBe(
+        "test/e2e-scenario/nemoclaw_scenarios/probes/dispatch.sh",
+      );
+      expect(action.timeoutSeconds).toBe(30);
+    }
+  });
+
+  it("negative scenario gets cli-installed + gateway-absent + sandbox-absent probe actions", () => {
+    const [plan] = compileRunPlans(["ubuntu-no-docker-preflight-negative"]);
+    const stateValidationPhase = plan.phases.find((p) => p.name === "state-validation");
+    expect(stateValidationPhase).toBeTruthy();
+    expect(stateValidationPhase!.actions.map((a) => a.id)).toEqual([
+      "state-validation.cli-installed",
+      "state-validation.gateway-absent",
+      "state-validation.sandbox-absent",
+    ]);
+  });
+
+  it("compiler hard-errors on a scenario referencing an unknown expected_state id", () => {
+    expect(() =>
+      compileRunPlans([
+        {
+          id: "synthetic-unknown-state",
+          assertionGroups: [],
+          expectedStateId: "definitely-not-a-state",
+        },
+      ]),
+    ).toThrow(/unknown expected_state/);
+  });
+
+  it("phase order is environment -> onboarding -> state-validation -> lifecycle -> runtime", () => {
+    const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+    // 'lifecycle' is the post-onboard state-mutation phase. Scenarios
+    // without a `environment.lifecycle` profile (e.g. this one) emit
+    // an empty action list for the phase but the phase still appears
+    // in the plan so phase-order invariants stay deterministic.
+    expect(plan.phases.map((p) => p.name)).toEqual([
+      "environment",
+      "onboarding",
+      "state-validation",
+      "lifecycle",
+      "runtime",
+    ]);
+  });
+});
+
+describe("ScenarioRunner short-circuit semantics around state-validation", () => {
+  it("onboarding action failure does NOT block state-validation (negative scenarios verify absent state)", async () => {
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-no-docker-preflight-negative"]);
+      const phase = (
+        name: PhaseName,
+        outcome: PhaseResult,
+      ): { run: (ctx: RunContext, p: RunPlanPhase) => Promise<PhaseResult> } => ({
+        run: async () => outcome,
+      });
+
+      let stateValidationCalled = false;
+      let runtimeCalled = false;
+      const runner = new ScenarioRunner({
+        environment: phase("environment", {
+          phase: "environment",
+          status: "passed",
+          actions: [],
+          assertions: [],
+        }),
+        onboarding: phase("onboarding", {
+          phase: "onboarding",
+          status: "failed",
+          actions: [
+            {
+              id: "onboarding.profile.cloud-openclaw-no-docker",
+              status: "failed",
+              durationMs: 1,
+              message: "preflight detected docker-missing",
+            },
+          ],
+          assertions: [],
+        }),
+        stateValidation: {
+          run: async () => {
+            stateValidationCalled = true;
+            return {
+              phase: "state-validation",
+              status: "passed",
+              actions: [],
+              assertions: [],
+            };
+          },
+        },
+        runtime: {
+          run: async () => {
+            runtimeCalled = true;
+            return { phase: "runtime", status: "passed", actions: [], assertions: [] };
+          },
+        },
+      });
+
+      const results = await runner.run(ctx, plan);
+      expect(stateValidationCalled).toBe(true);
+      expect(runtimeCalled).toBe(false);
+      // state-validation has its real result; runtime is skipped with
+      // the blocking-action message.
+      const stateRes = results.find((r) => r.phase === "state-validation")!;
+      expect(stateRes.status).toBe("passed");
+      const runtimeRes = results.find((r) => r.phase === "runtime")!;
+      expect(runtimeRes.status).toBe("skipped");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("environment action failure blocks state-validation AND runtime", async () => {
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      let stateValidationCalled = false;
+      let runtimeCalled = false;
+      const runner = new ScenarioRunner({
+        environment: {
+          run: async () => ({
+            phase: "environment",
+            status: "failed",
+            actions: [
+              {
+                id: "environment.install.repo-current",
+                status: "failed",
+                durationMs: 1,
+                message: "install dispatcher exit 1",
+              },
+            ],
+            assertions: [],
+          }),
+        },
+        onboarding: {
+          run: async () => ({ phase: "onboarding", status: "passed", actions: [], assertions: [] }),
+        },
+        stateValidation: {
+          run: async () => {
+            stateValidationCalled = true;
+            return {
+              phase: "state-validation",
+              status: "passed",
+              actions: [],
+              assertions: [],
+            };
+          },
+        },
+        runtime: {
+          run: async () => {
+            runtimeCalled = true;
+            return { phase: "runtime", status: "passed", actions: [], assertions: [] };
+          },
+        },
+      });
+      await runner.run(ctx, plan);
+      expect(stateValidationCalled).toBe(false);
+      expect(runtimeCalled).toBe(false);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("state-validation action failure blocks runtime", async () => {
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      let runtimeCalled = false;
+      const runner = new ScenarioRunner({
+        environment: {
+          run: async () => ({ phase: "environment", status: "passed", actions: [], assertions: [] }),
+        },
+        onboarding: {
+          run: async () => ({ phase: "onboarding", status: "passed", actions: [], assertions: [] }),
+        },
+        stateValidation: {
+          run: async () => ({
+            phase: "state-validation",
+            status: "failed",
+            actions: [
+              {
+                id: "state-validation.gateway-healthy",
+                status: "failed",
+                durationMs: 1,
+                message: "gateway unreachable at http://127.0.0.1:18789",
+              },
+            ],
+            assertions: [],
+          }),
+        },
+        runtime: {
+          run: async () => {
+            runtimeCalled = true;
+            return { phase: "runtime", status: "passed", actions: [], assertions: [] };
+          },
+        },
+      });
+      const results = await runner.run(ctx, plan);
+      expect(runtimeCalled).toBe(false);
+      const runtimeRes = results.find((r) => r.phase === "runtime")!;
+      expect(runtimeRes.status).toBe("skipped");
+      expect(runtimeRes.assertions[0].message).toMatch(/state-validation\.gateway-healthy/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("expected-state registry covers every scenario referenced in the typed registry", () => {
+  it("every ScenarioDefinition.expectedStateId resolves in the typed expected-state registry", () => {
+    const referenced = new Set<string>();
+    for (const scenario of listScenarios()) {
+      if (scenario.expectedStateId) {
+        referenced.add(scenario.expectedStateId);
+      }
+    }
+    expect(referenced.size).toBeGreaterThan(0);
+    for (const id of referenced) {
+      expect(getExpectedState(id), `expected_state '${id}' must be in the typed registry`).toBeDefined();
+    }
+  });
+});
diff --git a/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts b/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts
index 9dc179fa95..e68eaff830 100644
--- a/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts
@@ -15,7 +15,6 @@ const ASSERT = path.join(VALIDATION_SUITES, "assert");
 const REBUILD_UPGRADE_LIB = path.join(VALIDATION_SUITES, "lib/rebuild_upgrade.sh");
 const FIXTURES = path.join(REPO_ROOT, "test/e2e-scenario/nemoclaw_scenarios/fixtures");
 const INSTALL_DIR = path.join(REPO_ROOT, "test/e2e-scenario/nemoclaw_scenarios/install");
-const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-scenario.sh");
 
 function runBash(script: string, env: Record<string, string> = {}): SpawnSyncReturns<string> {
   return spawnSync("bash", ["-c", script], {
@@ -61,51 +60,6 @@ describe("E2E shell helpers", () => {
     }
   });
 
-  it("test_should_emit_plan_only_checks_without_live_infrastructure", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-inf-plan-"));
-    try {
-      const r = runBash(
-        `
-        set -euo pipefail
-        . "${RUNTIME_LIB}/context.sh"
-        . "${VALIDATION_SUITES}/lib/inference_routing.sh"
-        e2e_context_init
-        e2e_context_set E2E_SANDBOX_NAME sandbox-1
-        e2e_inference_routing_assert_chat_completion "post-onboard.inference-routing.inference-local-chat-completion"
-      `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(r.stdout).toContain("post-onboard.inference-routing.inference-local-chat-completion");
-      expect(r.stdout).toMatch(/dry-run|plan/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("test_should_not_print_secret_values_in_helper_output", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-inf-secret-"));
-    try {
-      const r = runBash(
-        `
-        set -euo pipefail
-        . "${RUNTIME_LIB}/context.sh"
-        . "${VALIDATION_SUITES}/lib/inference_routing.sh"
-        e2e_context_init
-        e2e_context_set E2E_SANDBOX_NAME sandbox-1
-        e2e_context_set E2E_PROVIDER_API_KEY super-secret-test-token
-        e2e_inference_routing_assert_auth_proxy "post-onboard.ollama-auth-proxy.authenticated-request-accepted" "valid"
-      `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(r.stdout + r.stderr).not.toContain("super-secret-test-token");
-      expect(r.stdout + r.stderr).toMatch(/REDACTED|dry-run|plan/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
   it("security_policy_credentials_helper_should_load_with_context_library", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "spc-context-"));
     try {
@@ -117,7 +71,7 @@ describe("E2E shell helpers", () => {
         spc_require_context E2E_SCENARIO E2E_PROVIDER
         echo "provider=$(spc_context_get E2E_PROVIDER)"
         `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
+        { E2E_CONTEXT_DIR: tmp },
       );
       expect(r.status, r.stderr).toBe(0);
       expect(r.stdout).toContain("provider=nvidia");
@@ -136,7 +90,7 @@ describe("E2E shell helpers", () => {
         . "${VALIDATION_SUITES}/lib/security_policy_credentials.sh"
         spc_require_context E2E_PROVIDER
         `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
+        { E2E_CONTEXT_DIR: tmp },
       );
       expect(r.status).not.toBe(0);
       expect(r.stderr).toContain("E2E_PROVIDER");
@@ -474,38 +428,6 @@ exit 0
     }
   });
 
-  it("scenario_dry_run_should_trace_helper_sequence_in_order", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-trace-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: {
-            ...process.env,
-            E2E_CONTEXT_DIR: tmp,
-            E2E_TRACE_FILE: trace,
-          },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(fs.existsSync(trace), "trace log missing").toBe(true);
-      const contents = fs.readFileSync(trace, "utf8");
-      const order = ["env:noninteractive", "install:", "onboard:", "gateway:check", "sandbox:check"];
-      let pos = 0;
-      for (const marker of order) {
-        const idx = contents.indexOf(marker, pos);
-        expect(idx, `trace missing marker in order: ${marker}\nfull:\n${contents}`).toBeGreaterThanOrEqual(0);
-        pos = idx + marker.length;
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
 });
 
 // ─────────────────────────────────────────────────────────────────────────────
@@ -600,6 +522,82 @@ describe("rebuild/upgrade validation helpers", () => {
       fs.rmSync(tmp, { recursive: true, force: true });
     }
   });
+
+  it("policy_preset_check_should_match_endpoint_url_when_preset_name_absent", () => {
+    // The legacy assertion called `nemoclaw policy status` (a command
+    // that does not exist) and silently failed. The new assertion calls
+    // `openshell policy get --full <sandbox>` and matches preset names
+    // OR their well-known endpoint hostnames. Verify both paths: a
+    // policy output containing only endpoint URLs (no bare preset name)
+    // still passes, mirroring the behavior of the live gateway policy
+    // dump in test/e2e/test-rebuild-openclaw.sh.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ru-policy-"));
+    try {
+      fs.writeFileSync(
+        path.join(tmp, "context.env"),
+        "E2E_SCENARIO=test\nE2E_AGENT=openclaw\nE2E_SANDBOX_NAME=sb\nE2E_GATEWAY_URL=http://127.0.0.1\n",
+      );
+      const r = runBash(
+        `
+        set -euo pipefail
+        fake_openshell() {
+          # Emit a minimal policy dump that contains the preset endpoint
+          # URLs but NOT the bare preset names. This is the realistic
+          # case: 'openshell policy get --full' renders network rules
+          # by hostname, not by preset label.
+          printf 'allow registry.npmjs.org\\nallow pypi.org\\n'
+        }
+        . "${REBUILD_UPGRADE_LIB}"
+        rebuild_upgrade_assert_policy_presets_preserved
+      `,
+        {
+          E2E_CONTEXT_DIR: tmp,
+          REBUILD_UPGRADE_OPENSHELL_CMD: "fake_openshell",
+          E2E_EXPECTED_POLICY_PRESETS: "npm pypi",
+        },
+      );
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).toContain("suite.rebuild.policy_presets_preserved");
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("policy_preset_check_should_fail_with_diagnostic_when_preset_missing", () => {
+    // Negative case: when a declared preset is absent from the live
+    // policy dump, the assertion must fail AND emit a diagnostic line
+    // identifying the missing preset and showing the policy head. The
+    // original implementation failed silently because the underlying
+    // `nemoclaw policy status` command did not exist; the new
+    // implementation must produce actionable evidence.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ru-policy-miss-"));
+    try {
+      fs.writeFileSync(
+        path.join(tmp, "context.env"),
+        "E2E_SCENARIO=test\nE2E_AGENT=openclaw\nE2E_SANDBOX_NAME=sb\nE2E_GATEWAY_URL=http://127.0.0.1\n",
+      );
+      const r = runBash(
+        `
+        fake_openshell() {
+          # Policy dump missing 'pypi' entirely.
+          printf 'allow registry.npmjs.org\\n'
+        }
+        . "${REBUILD_UPGRADE_LIB}"
+        rebuild_upgrade_assert_policy_presets_preserved
+      `,
+        {
+          E2E_CONTEXT_DIR: tmp,
+          REBUILD_UPGRADE_OPENSHELL_CMD: "fake_openshell",
+          E2E_EXPECTED_POLICY_PRESETS: "npm pypi",
+        },
+      );
+      expect(r.status).not.toBe(0);
+      expect(r.stdout + r.stderr).toMatch(/preset 'pypi' not in policy/);
+      expect(r.stdout + r.stderr).toMatch(/matchers: pypi/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
 });
 
 describe("Phase 1.A logging helpers", () => {
@@ -675,7 +673,9 @@ exec "$@"
         e2e_sandbox_exec sb1 -- false
         echo "rc=$?"
       `,
-        { PATH: `${bin}:${process.env.PATH}` },
+        // Force the openshell-direct transport so the stubbed openshell
+        // (which has no `sandbox ssh-config` subcommand) is exercised.
+        { PATH: `${bin}:${process.env.PATH}`, E2E_SANDBOX_EXEC_VIA_OPENSHELL: "1" },
       );
       expect(r.stdout).toMatch(/rc=1/);
     } finally {
@@ -683,21 +683,6 @@ exec "$@"
     }
   });
 
-  it("sandbox_exec_should_dry_run_short_circuit_when_e2e_dry_run_set", () => {
-    // Use a PATH that has bash itself but no nemoclaw — dry-run must
-    // short-circuit before the CLI lookup.
-    const r = runBash(
-      `
-        set -euo pipefail
-        . "${VALIDATION_SUITES}/sandbox-exec.sh"
-        e2e_sandbox_exec sb1 -- rm -rf /
-      `,
-      { E2E_DRY_RUN: "1", PATH: "/usr/bin:/bin" },
-    );
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/dry[- ]run/i);
-  });
-
   it("sandbox_exec_stdin_should_quote_args_safely_when_piped", () => {
     // Verify that $TOKEN is NOT expanded on the host side before being
     // delivered to the sandbox. We stub openshell to echo back stdin.
@@ -717,7 +702,12 @@ exec "$@"
           . "${VALIDATION_SUITES}/sandbox-exec.sh"
           printf 'hello $TOKEN' | e2e_sandbox_exec_stdin sb1 -- cat
         `,
-        { PATH: `${bin}:${process.env.PATH}`, TOKEN: "SHOULD_NOT_EXPAND" },
+        {
+          PATH: `${bin}:${process.env.PATH}`,
+          TOKEN: "SHOULD_NOT_EXPAND",
+          // Stub only handles the openshell-direct transport.
+          E2E_SANDBOX_EXEC_VIA_OPENSHELL: "1",
+        },
       );
       expect(r.status, r.stderr).toBe(0);
       expect(r.stdout).toContain("hello $TOKEN");
@@ -726,6 +716,111 @@ exec "$@"
       fs.rmSync(tmp, { recursive: true, force: true });
     }
   });
+
+  it("sandbox_exec_should_prefer_ssh_config_transport_when_openshell_offers_one", () => {
+    // Verify the new default: when `openshell sandbox ssh-config <name>`
+    // succeeds, the wrapper routes through `ssh -F <cfg>` instead of
+    // `openshell sandbox exec`.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sbex-ssh-"));
+    try {
+      const bin = path.join(tmp, "bin");
+      fs.mkdirSync(bin);
+      const trace = path.join(tmp, "ssh.trace");
+      fs.writeFileSync(
+        path.join(bin, "openshell"),
+        `#!/usr/bin/env bash
+set -euo pipefail
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then
+  printf 'Host openshell-%s\\n  HostName 127.0.0.1\\n  Port 2222\\n  User sandbox\\n' "$3"
+  exit 0
+fi
+echo "unexpected openshell call: $*" >&2
+exit 99
+`,
+        { mode: 0o755 },
+      );
+      fs.writeFileSync(
+        path.join(bin, "ssh"),
+        `#!/usr/bin/env bash
+set -euo pipefail
+printf '%s\\n' "ssh-args:$*" >> "${trace}"
+remote="\${@: -1}"
+printf '%s\\n' "remote-cmd:\${remote}" >> "${trace}"
+echo ok-from-ssh
+exit 0
+`,
+        { mode: 0o755 },
+      );
+      const ctxDir = path.join(tmp, "ctx");
+      fs.mkdirSync(ctxDir);
+      const r = runBash(
+        `
+          set -euo pipefail
+          . "${VALIDATION_SUITES}/sandbox-exec.sh"
+          e2e_sandbox_exec sb1 -- echo hello
+        `,
+        {
+          PATH: `${bin}:${process.env.PATH}`,
+          E2E_CONTEXT_DIR: ctxDir,
+        },
+      );
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).toContain("ok-from-ssh");
+      const traceContents = fs.readFileSync(trace, "utf8");
+      expect(traceContents).toMatch(/ssh-args:.*-F /);
+      expect(traceContents).toContain("openshell-sb1");
+      expect(traceContents).toMatch(/remote-cmd:echo hello$/m);
+      const cfg = path.join(ctxDir, ".ssh-config-cache", "sb1.cfg");
+      expect(fs.existsSync(cfg)).toBe(true);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("sandbox_exec_should_fall_back_to_openshell_when_ssh_config_unavailable", () => {
+    // If `openshell sandbox ssh-config` fails, the wrapper must fall
+    // back to `openshell sandbox exec`.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sbex-fb-"));
+    try {
+      const bin = path.join(tmp, "bin");
+      fs.mkdirSync(bin);
+      fs.writeFileSync(
+        path.join(bin, "openshell"),
+        `#!/usr/bin/env bash
+set -uo pipefail
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then
+  exit 1
+fi
+if [[ "$1" == "sandbox" && "$2" == "exec" ]]; then
+  shift 2
+  while [[ "$#" -gt 0 && "$1" != "--" ]]; do shift; done
+  shift || true
+  exec "$@"
+fi
+exit 99
+`,
+        { mode: 0o755 },
+      );
+      const ctxDir = path.join(tmp, "ctx");
+      fs.mkdirSync(ctxDir);
+      const r = runBash(
+        `
+          set -euo pipefail
+          . "${VALIDATION_SUITES}/sandbox-exec.sh"
+          e2e_sandbox_exec sb1 -- echo fallback-ok
+        `,
+        {
+          PATH: `${bin}:${process.env.PATH}`,
+          E2E_CONTEXT_DIR: ctxDir,
+        },
+      );
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).toContain("fallback-ok");
+      expect(r.stderr).toMatch(/ssh-config unavailable for sb1/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
 });
 
 // ─────────────────────────────────────────────────────────────────────────────
@@ -971,53 +1066,6 @@ describe("Issue #3810 messaging provider helper library", () => {
   });
 });
 
-// ─────────────────────────────────────────────────────────────────────────────
-// Phase 1.E — Install-method dispatcher splits
-// ─────────────────────────────────────────────────────────────────────────────
-
-describe("Phase 1.E install dispatcher splits", () => {
-  function dispatchDryRun(profile: string): SpawnSyncReturns<string> {
-    return runBash(
-      `
-        set -euo pipefail
-        . "${INSTALL_DIR}/dispatch.sh"
-        e2e_install "${profile}"
-      `,
-      { E2E_DRY_RUN: "1" },
-    );
-  }
-
-  it("install_should_dispatch_to_install_repo_helper_for_repo_current_profile", () => {
-    const r = dispatchDryRun("repo-current");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-repo/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-curl|install-ollama|install-launchable/);
-  });
-
-  it("install_should_dispatch_to_install_curl_helper_for_public_installer_profile", () => {
-    const r = dispatchDryRun("public-installer");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-curl/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-ollama|install-launchable/);
-  });
-
-  it("install_should_dispatch_to_install_ollama_helper_for_ollama_profile", () => {
-    const r = dispatchDryRun("ollama");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-ollama/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-curl|install-launchable/);
-  });
-
-  it("install_should_dispatch_to_install_launchable_helper_for_launchable_profile", () => {
-    const r = dispatchDryRun("launchable");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-launchable/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-curl|install-ollama/);
-  });
-});
-
-
-
 describe("baseline onboarding validation helper", () => {
   it("baseline_helper_should_source_under_strict_shell_options", () => {
     const r = runBash(`set -euo pipefail; source "${VALIDATION_SUITES}/lib/baseline_onboarding.sh"`);
@@ -1083,7 +1131,7 @@ describe("sandbox lifecycle validation helper", () => {
     try {
       const bin = path.join(tmp, "bin"); fs.mkdirSync(bin);
       fs.writeFileSync(path.join(bin, "timeout"), "#!/usr/bin/env bash\necho timed out >&2\nexit 124\n", { mode: 0o755 });
-      const r = runBash(`set -e; unset E2E_DRY_RUN; . "${VALIDATION_SUITES}/lib/sandbox_lifecycle.sh"; sandbox_lifecycle_run_with_timeout 1 bash -c 'sleep 5'`, { PATH: `${bin}:${process.env.PATH}` });
+      const r = runBash(`set -e; . "${VALIDATION_SUITES}/lib/sandbox_lifecycle.sh"; sandbox_lifecycle_run_with_timeout 1 bash -c 'sleep 5'`, { PATH: `${bin}:${process.env.PATH}` });
       expect(r.status).toBe(124);
       expect(r.stderr).toMatch(/timed out/);
     } finally { fs.rmSync(tmp, { recursive: true, force: true }); }
@@ -1096,7 +1144,7 @@ describe("sandbox lifecycle validation helper", () => {
       fs.writeFileSync(path.join(bin, "nemoclaw"), `#!/usr/bin/env bash
 case "$*" in
   list) echo sb1;;
-  "sb1 status") echo 'status running gateway healthy sandbox running';;
+  "sb1 status") printf '  Sandbox: sb1\\n    Model:    nvidia/x\\n    OpenShell: 0.0.44\\n    Policies: npm\\n';;
   "sb1 logs") echo logline;;
   *) echo "unexpected nemoclaw args: $*" >&2; exit 64;;
 esac
@@ -1105,7 +1153,12 @@ esac
 echo lifecycle-ok
 `, { mode: 0o755 });
       fs.writeFileSync(path.join(tmp, "context.env"), "E2E_SANDBOX_NAME=sb1\nE2E_GATEWAY_URL=http://127.0.0.1:1\n");
-      const r = runBash(`set -euo pipefail; . "${VALIDATION_SUITES}/lib/sandbox_lifecycle.sh"; sandbox_lifecycle_load_context; sandbox_lifecycle_assert_nemoclaw_list_contains_sandbox; sandbox_lifecycle_assert_status_fields_present; sandbox_lifecycle_assert_logs_available; sandbox_lifecycle_assert_openshell_exec_ok`, { E2E_CONTEXT_DIR: tmp, PATH: `${bin}:${process.env.PATH}` });
+      // Force the wrapper's openshell-exec fallback transport: this
+      // stub openshell ignores its argv and always echoes 'lifecycle-ok',
+      // which would corrupt an ssh-config materialization. The opt-out
+      // env var keeps the test exercising openshell-exec directly while
+      // production callers still pick up ssh-config-preferred routing.
+      const r = runBash(`set -euo pipefail; . "${VALIDATION_SUITES}/lib/sandbox_lifecycle.sh"; sandbox_lifecycle_load_context; sandbox_lifecycle_assert_nemoclaw_list_contains_sandbox; sandbox_lifecycle_assert_status_fields_present; sandbox_lifecycle_assert_logs_available; sandbox_lifecycle_assert_openshell_exec_ok`, { E2E_CONTEXT_DIR: tmp, PATH: `${bin}:${process.env.PATH}`, E2E_SANDBOX_EXEC_VIA_OPENSHELL: "1" });
       expect(r.status, r.stderr).toBe(0);
       expect(r.stdout).toMatch(/validation\.sandbox_operations\.sandbox_listed/);
       expect(r.stdout).toMatch(/validation\.sandbox_operations\.openshell_exec_ok/);
diff --git a/test/e2e-scenario/framework-tests/e2e-metadata-final-hygiene.test.ts b/test/e2e-scenario/framework-tests/e2e-metadata-final-hygiene.test.ts
deleted file mode 100644
index 558f0b9d5d..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-metadata-final-hygiene.test.ts
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Phase 11: Clean the House - final metadata and documentation hygiene.
- *
- * These tests are intentionally conservative during the incremental
- * migration: they guard the README, assert that every suite script
- * referenced in suites.yaml exists and is executable, and assert that
- * every scenario either has both an expected state and at least one
- * suite or is explicitly marked as negative / disabled.
- */
-
-import { describe, it, expect } from "vitest";
-import fs from "node:fs";
-import path from "node:path";
-
-import { loadMetadataFromDir } from "../runtime/resolver/load.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-const VALIDATION_SUITES_DIR = path.join(E2E_DIR, "validation_suites");
-describe("Phase 11 final hygiene", () => {
-  it("all_suite_scripts_should_exist", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const missing: string[] = [];
-    for (const [suiteId, suite] of Object.entries(meta.suites.suites)) {
-      for (const step of suite.steps) {
-        const p = path.join(VALIDATION_SUITES_DIR, step.script);
-        if (!fs.existsSync(p)) {
-          missing.push(`${suiteId}/${step.id} -> ${step.script}`);
-        } else {
-          const mode = fs.statSync(p).mode;
-          // owner-executable bit must be set
-          if ((mode & 0o100) === 0) {
-            missing.push(`${suiteId}/${step.id} -> ${step.script} (not executable)`);
-          }
-        }
-      }
-    }
-    expect(missing, `missing/non-executable suite scripts:\n${missing.join("\n")}`).toEqual([]);
-  });
-
-  it("all_scenarios_should_have_expected_state_and_suites", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const problems: string[] = [];
-    for (const [id, sc] of Object.entries(meta.scenarios.setup_scenarios)) {
-      if (!sc.expected_state) {
-        problems.push(`${id}: missing expected_state`);
-        continue;
-      }
-      // Negative scenarios (preflight failures) intentionally have no suites.
-      const state = meta.expectedStates.expected_states[sc.expected_state] as {
-        failure?: { expected?: boolean };
-      };
-      const isNegative = state?.failure?.expected === true;
-      if (!Array.isArray(sc.suites)) {
-        problems.push(`${id}: suites must be an array`);
-        continue;
-      }
-      if (sc.suites.length === 0 && !isNegative) {
-        problems.push(`${id}: no suites and not a negative scenario`);
-      }
-    }
-    expect(problems, problems.join("\n")).toEqual([]);
-  });
-
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-negative-matcher.test.ts b/test/e2e-scenario/framework-tests/e2e-negative-matcher.test.ts
new file mode 100644
index 0000000000..363cb3fcc9
--- /dev/null
+++ b/test/e2e-scenario/framework-tests/e2e-negative-matcher.test.ts
@@ -0,0 +1,399 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, it } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+
+import { compileRunPlans } from "../scenarios/compiler.ts";
+import {
+  evaluateNegativeContract,
+  negativeContractPhaseResult,
+} from "../scenarios/orchestrators/negative-matcher.ts";
+import { ScenarioRunner } from "../scenarios/orchestrators/runner.ts";
+import { listScenarios } from "../scenarios/registry.ts";
+import type {
+  ExpectedFailureContract,
+  PhaseName,
+  PhaseResult,
+  RunContext,
+  RunPlan,
+  RunPlanPhase,
+} from "../scenarios/types.ts";
+
+function freshCtx(): RunContext {
+  return { contextDir: fs.mkdtempSync(path.join(os.tmpdir(), "e2e-neg-")) };
+}
+
+function planWithExpectedFailure(contract: ExpectedFailureContract): RunPlan {
+  return {
+    scenarioId: "synthetic-negative",
+    status: "compiled",
+    suiteIds: [],
+    onboardingAssertionIds: [],
+    phases: [
+      { name: "environment", actions: [], assertionGroups: [] },
+      { name: "onboarding", actions: [], assertionGroups: [] },
+      { name: "runtime", actions: [], assertionGroups: [] },
+    ],
+    runnerRequirements: [],
+    requiredSecrets: [],
+    skippedCapabilities: [],
+    expectedFailure: contract,
+    sutBoundaries: [{ id: "host-cli", client: "HostCliClient" }],
+  };
+}
+
+function phaseResult(
+  phase: PhaseName,
+  opts: {
+    status?: PhaseResult["status"];
+    failedActionId?: string;
+    failedActionMessage?: string;
+    failedAssertionId?: string;
+    failedAssertionMessage?: string;
+  } = {},
+): PhaseResult {
+  return {
+    phase,
+    status: opts.status ?? "passed",
+    actions: opts.failedActionId
+      ? [{ id: opts.failedActionId, status: "failed", durationMs: 1, message: opts.failedActionMessage }]
+      : [],
+    assertions: opts.failedAssertionId
+      ? [
+          {
+            id: opts.failedAssertionId,
+            status: "failed",
+            attempts: 1,
+            durationMs: 1,
+            message: opts.failedAssertionMessage,
+          },
+        ]
+      : [],
+  };
+}
+
+describe("evaluateNegativeContract - phase + errorClass matching", () => {
+  it("matches when expected phase fails with the declared errorClass", () => {
+    const plan = planWithExpectedFailure({
+      phase: "onboarding",
+      errorClass: "invalid-nvidia-api-key",
+      forbiddenSideEffects: ["gateway-started"],
+    });
+    const results: PhaseResult[] = [
+      phaseResult("environment", { status: "passed" }),
+      phaseResult("onboarding", {
+        status: "failed",
+        failedActionId: "onboarding.profile.cloud-openclaw-invalid-nvidia-key",
+        failedActionMessage: "phase action onboarding exit 1: invalid-nvidia-api-key auth failed",
+      }),
+    ];
+    const result = evaluateNegativeContract(plan, results);
+    expect(result.matched).toBe(true);
+    expect(result.outcome).toBe("matched");
+    expect(result.observed.failedPhase).toBe("onboarding");
+  });
+
+  it("resolves preflight expected phase to onboarding orchestrator", () => {
+    const plan = planWithExpectedFailure({
+      phase: "preflight",
+      errorClass: "docker-missing",
+    });
+    const results: PhaseResult[] = [
+      phaseResult("environment", { status: "passed" }),
+      phaseResult("onboarding", {
+        status: "failed",
+        failedActionId: "onboarding.profile.cloud-openclaw",
+        failedActionMessage: "preflight detected docker-missing on the runner host",
+      }),
+    ];
+    const result = evaluateNegativeContract(plan, results);
+    expect(result.matched).toBe(true);
+    expect(result.outcome).toBe("matched");
+  });
+
+  it("fails when no failure was observed at all", () => {
+    const plan = planWithExpectedFailure({ phase: "onboarding", errorClass: "docker-missing" });
+    const results: PhaseResult[] = [
+      phaseResult("environment", { status: "passed" }),
+      phaseResult("onboarding", { status: "passed" }),
+      phaseResult("runtime", { status: "passed" }),
+    ];
+    const result = evaluateNegativeContract(plan, results);
+    expect(result.matched).toBe(false);
+    expect(result.outcome).toBe("no-failure-observed");
+    expect(result.message).toMatch(/all phases passed/);
+  });
+
+  it("fails when the wrong phase failed", () => {
+    const plan = planWithExpectedFailure({ phase: "onboarding", errorClass: "docker-missing" });
+    const results: PhaseResult[] = [
+      phaseResult("environment", {
+        status: "failed",
+        failedActionId: "environment.install.ubuntu-repo-no-docker",
+        failedActionMessage: "install dispatcher exit 1: docker-missing",
+      }),
+    ];
+    const result = evaluateNegativeContract(plan, results);
+    expect(result.matched).toBe(false);
+    expect(result.outcome).toBe("wrong-phase");
+    expect(result.message).toMatch(/expected onboarding failure/);
+    expect(result.observed.failedPhase).toBe("environment");
+  });
+
+  it("fails when the right phase failed for the wrong errorClass", () => {
+    const plan = planWithExpectedFailure({
+      phase: "onboarding",
+      errorClass: "gateway-port-conflict",
+    });
+    const results: PhaseResult[] = [
+      phaseResult("onboarding", {
+        status: "failed",
+        failedActionId: "onboarding.profile.cloud-openclaw-gateway-port-conflict",
+        failedActionMessage: "onboard exit 1: invalid-nvidia-api-key authentication failed",
+      }),
+    ];
+    const result = evaluateNegativeContract(plan, results);
+    expect(result.matched).toBe(false);
+    expect(result.outcome).toBe("wrong-error-class");
+    expect(result.message).toMatch(/errorClass mismatch/);
+  });
+
+  it("ignores the runtime side-effect probe step when scanning for observed failure", () => {
+    const plan = planWithExpectedFailure({ phase: "onboarding", errorClass: "docker-missing" });
+    const results: PhaseResult[] = [
+      phaseResult("environment", { status: "passed" }),
+      phaseResult("onboarding", {
+        status: "failed",
+        failedActionId: "onboarding.profile.cloud-openclaw",
+        failedActionMessage: "onboard exit 1: docker-missing daemon unreachable",
+      }),
+      // runtime phase has only the required pending side-effect step
+      // that fails closed until the probe lands. The matcher must NOT
+      // treat that as the observed failure mode.
+      {
+        phase: "runtime",
+        status: "failed",
+        actions: [],
+        assertions: [
+          {
+            id: "runtime.expected-failure.no-side-effects",
+            status: "failed",
+            attempts: 1,
+            durationMs: 0,
+            message: "required pending step not implemented: expectedFailureNoSideEffectsProbe",
+          },
+        ],
+      },
+    ];
+    const result = evaluateNegativeContract(plan, results);
+    expect(result.matched).toBe(true);
+    expect(result.observed.failedActionId).toBe("onboarding.profile.cloud-openclaw");
+  });
+
+  it("matches errorClass case-insensitively and across separator variants", () => {
+    const plan = planWithExpectedFailure({ phase: "onboarding", errorClass: "docker-missing" });
+    const results: PhaseResult[] = [
+      phaseResult("onboarding", {
+        status: "failed",
+        failedActionId: "onboarding",
+        failedActionMessage: "Onboard exit 1: Docker_Missing daemon socket unreachable",
+      }),
+    ];
+    expect(evaluateNegativeContract(plan, results).matched).toBe(true);
+  });
+
+  it("throws if invoked for a plan without expectedFailure", () => {
+    const plan: RunPlan = { ...planWithExpectedFailure({ phase: "onboarding", errorClass: "x" }), expectedFailure: undefined };
+    expect(() => evaluateNegativeContract(plan, [])).toThrow(/no expectedFailure declared/);
+  });
+
+  it("synthetic phase result reflects matched status", () => {
+    const plan = planWithExpectedFailure({ phase: "onboarding", errorClass: "docker-missing" });
+    const results: PhaseResult[] = [
+      phaseResult("onboarding", {
+        status: "failed",
+        failedActionId: "onboarding",
+        failedActionMessage: "docker-missing",
+      }),
+    ];
+    const synthetic = negativeContractPhaseResult(evaluateNegativeContract(plan, results));
+    expect(synthetic.phase).toBe("negative-contract");
+    expect(synthetic.status).toBe("passed");
+    expect(synthetic.assertions[0]).toEqual(
+      expect.objectContaining({ id: "negative-contract.match", status: "passed" }),
+    );
+  });
+});
+
+describe("ScenarioRunner appends negative-contract phase", () => {
+  it("invokes matcher and appends a passing synthetic phase when contract matched", async () => {
+    const ctx = freshCtx();
+    try {
+      const fakePhase = (
+        phase: PhaseName,
+        outcome: PhaseResult,
+      ) => ({
+        run: async (
+          _ctx: RunContext,
+          _runPhase: RunPlanPhase,
+          _prior?: PhaseResult[],
+        ): Promise<PhaseResult> => outcome,
+      });
+
+      const runner = new ScenarioRunner({
+        environment: fakePhase("environment", { phase: "environment", status: "passed", actions: [], assertions: [] }),
+        onboarding: fakePhase("onboarding", {
+          phase: "onboarding",
+          status: "failed",
+          actions: [
+            {
+              id: "onboarding.profile.cloud-openclaw",
+              status: "failed",
+              durationMs: 1,
+              message: "onboard exit 1: docker-missing daemon unreachable",
+            },
+          ],
+          assertions: [],
+        }),
+        runtime: fakePhase("runtime", { phase: "runtime", status: "passed", actions: [], assertions: [] }),
+      });
+
+      const plan = planWithExpectedFailure({ phase: "preflight", errorClass: "docker-missing" });
+      const results = await runner.run(ctx, plan);
+
+      const contractPhase = results[results.length - 1];
+      expect(contractPhase.phase).toBe("negative-contract");
+      expect(contractPhase.status).toBe("passed");
+
+      // Artifact emitted to ctx.contextDir/.e2e/negative-contract.json
+      const artifact = path.join(ctx.contextDir, ".e2e", "negative-contract.json");
+      expect(fs.existsSync(artifact)).toBe(true);
+      const parsed = JSON.parse(fs.readFileSync(artifact, "utf8"));
+      expect(parsed.matched).toBe(true);
+      expect(parsed.outcome).toBe("matched");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("emits a failed synthetic phase when the wrong phase failed", async () => {
+    const ctx = freshCtx();
+    try {
+      const fakePhase = (outcome: PhaseResult) => ({
+        run: async (): Promise<PhaseResult> => outcome,
+      });
+
+      const runner = new ScenarioRunner({
+        environment: fakePhase({
+          phase: "environment",
+          status: "failed",
+          actions: [
+            {
+              id: "environment.install.ubuntu-repo-no-docker",
+              status: "failed",
+              durationMs: 1,
+              message: "install dispatcher exit 1: dns-resolution-error",
+            },
+          ],
+          assertions: [],
+        }),
+        onboarding: fakePhase({ phase: "onboarding", status: "skipped", actions: [], assertions: [] }),
+        runtime: fakePhase({ phase: "runtime", status: "skipped", actions: [], assertions: [] }),
+      });
+
+      const plan = planWithExpectedFailure({ phase: "onboarding", errorClass: "docker-missing" });
+      const results = await runner.run(ctx, plan);
+
+      const contractPhase = results[results.length - 1];
+      expect(contractPhase.phase).toBe("negative-contract");
+      expect(contractPhase.status).toBe("failed");
+      expect(contractPhase.assertions[0].message).toMatch(/expected onboarding failure/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("does NOT append negative-contract phase for positive scenarios", async () => {
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      expect(plan.expectedFailure).toBeUndefined();
+
+      const fakePhase = (phase: PhaseName) => ({
+        run: async (): Promise<PhaseResult> => ({
+          phase,
+          status: "passed",
+          actions: [],
+          assertions: [],
+        }),
+      });
+      const runner = new ScenarioRunner({
+        environment: fakePhase("environment"),
+        onboarding: fakePhase("onboarding"),
+        stateValidation: fakePhase("state-validation"),
+        lifecycle: fakePhase("lifecycle"),
+        runtime: fakePhase("runtime"),
+      });
+
+      const results = await runner.run(ctx, plan);
+      expect(results.map((r) => r.phase)).toEqual([
+        "environment",
+        "onboarding",
+        "state-validation",
+        "lifecycle",
+        "runtime",
+      ]);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("registry contract: every negative scenario opts into the side-effect probe", () => {
+  it("scenario.expectedFailure implies the runtime no-side-effects required pending step", () => {
+    const negatives = listScenarios().filter((scenario) => scenario.expectedFailure);
+    expect(negatives.length).toBeGreaterThan(0);
+    for (const scenario of negatives) {
+      const runtimeGroups = scenario.assertionGroups.filter((group) => group.phase === "runtime");
+      const hasProbeStep = runtimeGroups.some((group) =>
+        group.steps.some(
+          (step) =>
+            step.id === "runtime.expected-failure.no-side-effects" &&
+            step.implementation?.kind === "pending" &&
+            step.required === true,
+        ),
+      );
+      expect(hasProbeStep, `scenario ${scenario.id} must include the required side-effect pending step`).toBe(true);
+    }
+  });
+});
+
+describe("compiler validates the typed expected-failure contract", () => {
+  it("rejects an invalid phase value", () => {
+    expect(() =>
+      compileRunPlans([
+        {
+          id: "synthetic-bad-phase",
+          assertionGroups: [],
+          // Force the bad shape the compiler must reject.
+          expectedFailure: { phase: "bogus" as never, errorClass: "x" },
+        },
+      ]),
+    ).toThrow(/expectedFailure\.phase invalid/);
+  });
+
+  it("rejects an empty errorClass", () => {
+    expect(() =>
+      compileRunPlans([
+        {
+          id: "synthetic-empty-class",
+          assertionGroups: [],
+          expectedFailure: { phase: "onboarding", errorClass: "" },
+        },
+      ]),
+    ).toThrow(/errorClass must be a non-empty string/);
+  });
+});
diff --git a/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts b/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
index 497dac3387..52ec95cddb 100644
--- a/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
@@ -3,19 +3,39 @@
 
 import { describe, expect, it } from "vitest";
 import fs from "node:fs";
+import os from "node:os";
 import path from "node:path";
 
 import { HostCliClient } from "../scenarios/clients/host-cli.ts";
 import { compileRunPlans } from "../scenarios/compiler.ts";
 import { PhaseOrchestrator } from "../scenarios/orchestrators/phase.ts";
 import { ScenarioRunner } from "../scenarios/orchestrators/runner.ts";
-import type { AssertionStep, PhaseName, PhaseResult, RunContext, RunPlanPhase } from "../scenarios/types.ts";
+import type {
+  AssertionStep,
+  PhaseAction,
+  PhaseName,
+  PhaseResult,
+  RunContext,
+  RunPlanPhase,
+} from "../scenarios/types.ts";
 
-function fakeCtx(): RunContext {
-  return { contextDir: fs.mkdtempSync(path.join(process.cwd(), ".tmp-e2e-phase-")), dryRun: true };
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+
+function freshCtx(): RunContext {
+  return { contextDir: fs.mkdtempSync(path.join(os.tmpdir(), "e2e-phase-")) };
+}
+
+function shellStep(id: string, phase: PhaseName, ref: string, reliability?: AssertionStep["reliability"]): AssertionStep {
+  return {
+    id,
+    phase,
+    implementation: { kind: "shell", ref },
+    evidencePath: `.e2e/assertions/${id}.log`,
+    reliability,
+  };
 }
 
-function fakeStep(id: string, phase: PhaseName, ref = "fake-pass"): AssertionStep {
+function probeStep(id: string, phase: PhaseName, ref = "no-such-probe"): AssertionStep {
   return {
     id,
     phase,
@@ -24,97 +44,916 @@ function fakeStep(id: string, phase: PhaseName, ref = "fake-pass"): AssertionSte
   };
 }
 
-function fakePhase(step: AssertionStep): RunPlanPhase {
+function pendingStep(id: string, phase: PhaseName): AssertionStep {
+  return {
+    id,
+    phase,
+    implementation: { kind: "pending", ref: "not-yet" },
+  };
+}
+
+function makePhase(steps: AssertionStep[]): RunPlanPhase {
   return {
-    name: step.phase,
+    name: steps[0].phase,
     actions: [],
-    assertionGroups: [{ id: `group.${step.id}`, phase: step.phase, migrationStatus: "complete", steps: [step] }],
+    assertionGroups: [{ id: `group.${steps[0].id}`, phase: steps[0].phase, migrationStatus: "complete", steps }],
+  };
+}
+
+function writeTempScript(dir: string, name: string, body: string): string {
+  const p = path.join(dir, name);
+  fs.writeFileSync(p, `#!/usr/bin/env bash\nset -euo pipefail\n${body}\n`, { mode: 0o755 });
+  return p;
+}
+
+function shellAction(
+  id: string,
+  phase: PhaseName,
+  scriptRef: string,
+  opts: { timeoutSeconds?: number; arg?: string } = {},
+): PhaseAction {
+  return {
+    id,
+    phase,
+    kind: "shell",
+    scriptRef,
+    arg: opts.arg,
+    timeoutSeconds: opts.timeoutSeconds,
+  };
+}
+
+function makePhaseWithActions(
+  phase: PhaseName,
+  actions: PhaseAction[],
+  steps: AssertionStep[],
+): RunPlanPhase {
+  return {
+    name: phase,
+    actions,
+    assertionGroups:
+      steps.length > 0
+        ? [{ id: `group.${steps[0].id}`, phase, migrationStatus: "complete", steps }]
+        : [],
   };
 }
 
-describe("phase orchestrators", () => {
+describe("phase orchestrators - top-level delegation", () => {
   it("test_should_execute_phase_assertions_from_phase_orchestrators_not_top_level_runner", async () => {
-    const ctx = fakeCtx();
+    const ctx = freshCtx();
     try {
       const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
       const calls: string[] = [];
       const fakeOrchestrator = (phase: PhaseName) => ({
         run: async (_ctx: RunContext, runPhase: RunPlanPhase, _prior?: PhaseResult[]): Promise<PhaseResult> => {
           calls.push(runPhase.name);
-          return { phase, status: "passed", assertions: [] };
+          return { phase, status: "passed", actions: [], assertions: [] };
         },
       });
       const runner = new ScenarioRunner({
         environment: fakeOrchestrator("environment"),
         onboarding: fakeOrchestrator("onboarding"),
+        stateValidation: fakeOrchestrator("state-validation"),
+        lifecycle: fakeOrchestrator("lifecycle"),
         runtime: fakeOrchestrator("runtime"),
       });
 
       const results = await runner.run(ctx, plan);
 
-      expect(calls).toEqual(["environment", "onboarding", "runtime"]);
-      expect(results.map((result) => result.phase)).toEqual(["environment", "onboarding", "runtime"]);
+      expect(calls).toEqual([
+        "environment",
+        "onboarding",
+        "state-validation",
+        "lifecycle",
+        "runtime",
+      ]);
+      expect(results.map((result) => result.phase)).toEqual([
+        "environment",
+        "onboarding",
+        "state-validation",
+        "lifecycle",
+        "runtime",
+      ]);
     } finally {
       fs.rmSync(ctx.contextDir, { recursive: true, force: true });
     }
   });
+});
 
-  it("test_should_record_step_status_attempts_duration_classifier_and_evidence", async () => {
-    const ctx = fakeCtx();
+describe("phase orchestrators - real shell execution", () => {
+  it("shell_step_passes_when_script_exits_zero", async () => {
+    const ctx = freshCtx();
     try {
-      const step = fakeStep("runtime.retry-pass", "runtime", "fake-retry-once-pass");
-      step.reliability = { retry: { attempts: 2, on: ["gateway-transient"] } };
+      const script = writeTempScript(ctx.contextDir, "ok.sh", "echo hello-from-real-shell");
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.real-pass", "runtime", ref);
       const orchestrator = new PhaseOrchestrator("runtime");
 
-      const result = await orchestrator.run(ctx, fakePhase(step));
+      const result = await orchestrator.run(ctx, makePhase([step]));
 
       expect(result.status).toBe("passed");
       expect(result.assertions[0]).toEqual(
-        expect.objectContaining({
-          id: "runtime.retry-pass",
-          status: "passed",
-          attempts: 2,
-          classifier: "gateway-transient",
-          evidence: ".e2e/assertions/runtime.retry-pass.json",
-        }),
+        expect.objectContaining({ id: "runtime.real-pass", status: "passed", attempts: 1 }),
       );
-      expect(result.assertions[0].durationMs).toBeGreaterThanOrEqual(0);
+      const log = fs.readFileSync(result.assertions[0].evidence!, "utf8");
+      expect(log).toContain("hello-from-real-shell");
     } finally {
       fs.rmSync(ctx.contextDir, { recursive: true, force: true });
     }
   });
 
-  it("test_should_enforce_timeout_and_retry_policy_in_orchestrator", async () => {
-    const ctx = fakeCtx();
+  it("shell_step_fails_when_script_exits_nonzero_and_records_stderr_tail", async () => {
+    const ctx = freshCtx();
     try {
-      const step = fakeStep("runtime.retry-fail", "runtime", "fake-always-transient");
-      step.reliability = { timeoutSeconds: 1, retry: { attempts: 2, on: ["provider-transient"] } };
+      const script = writeTempScript(ctx.contextDir, "fail.sh", 'echo "boom: real failure" >&2; exit 7');
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.real-fail", "runtime", ref);
       const orchestrator = new PhaseOrchestrator("runtime");
 
-      const result = await orchestrator.run(ctx, fakePhase(step));
+      const result = await orchestrator.run(ctx, makePhase([step]));
 
       expect(result.status).toBe("failed");
-      expect(result.assertions[0]).toEqual(
-        expect.objectContaining({
-          id: "runtime.retry-fail",
-          status: "failed",
-          attempts: 2,
-          classifier: "provider-transient",
+      expect(result.assertions[0].status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/exit 7/);
+      expect(result.assertions[0].message).toMatch(/boom: real failure/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("shell_step_times_out_via_orchestrator_policy_not_script", async () => {
+    const ctx = freshCtx();
+    try {
+      const script = writeTempScript(ctx.contextDir, "slow.sh", "sleep 30");
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.real-timeout", "runtime", ref, { timeoutSeconds: 1 });
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const started = Date.now();
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const elapsed = Date.now() - started;
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/exceeded 1s/);
+      expect(elapsed).toBeLessThan(15_000);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  }, 20_000);
+
+  it("shell_step_retries_on_classified_transient_then_passes", async () => {
+    const ctx = freshCtx();
+    try {
+      const counterFile = path.join(ctx.contextDir, "counter");
+      fs.writeFileSync(counterFile, "0");
+      const script = writeTempScript(
+        ctx.contextDir,
+        "gateway-flaky.sh",
+        `n=$(cat "${counterFile}"); n=$((n+1)); echo "$n" > "${counterFile}"; if [ "$n" -lt 2 ]; then echo "gateway-transient: try again" >&2; exit 1; fi; echo ok`,
+      );
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.gateway-retry", "runtime", ref, {
+        retry: { attempts: 2, on: ["gateway-transient"] },
+      });
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("passed");
+      expect(result.assertions[0].attempts).toBe(2);
+      expect(result.assertions[0].classifier).toBe("gateway-transient");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("shell_step_fails_with_clear_message_when_script_missing", async () => {
+    const ctx = freshCtx();
+    try {
+      const step = shellStep("runtime.missing", "runtime", "test/e2e-scenario/does-not-exist.sh");
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/script not found/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("probe_step_without_registered_probe_skips_visibly_never_passes_falsely", async () => {
+    const ctx = freshCtx();
+    try {
+      const step = probeStep("runtime.probe-pending", "runtime");
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.assertions[0].status).toBe("skipped");
+      expect(result.assertions[0].message).toMatch(/probe not registered/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("pending_step_skips_visibly_with_pending_marker", async () => {
+    const ctx = freshCtx();
+    try {
+      const step = pendingStep("runtime.pending", "runtime");
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.assertions[0].status).toBe("skipped");
+      expect(result.assertions[0].message).toMatch(/^pending:/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("phase orchestrators - actions execute before assertions", () => {
+  it("phase_action_runs_before_assertions_and_records_evidence", async () => {
+    const ctx = freshCtx();
+    try {
+      const actionScript = writeTempScript(ctx.contextDir, "setup.sh", "echo phase-action-evidence");
+      const action = shellAction("environment.setup-ok", "environment", path.relative(REPO_ROOT, actionScript));
+      const stepScript = writeTempScript(ctx.contextDir, "after.sh", "echo after-action");
+      const step = shellStep("environment.assert-ok", "environment", path.relative(REPO_ROOT, stepScript));
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], [step]));
+
+      expect(result.status).toBe("passed");
+      expect(result.actions).toHaveLength(1);
+      expect(result.actions[0]).toEqual(
+        expect.objectContaining({ id: "environment.setup-ok", status: "passed" }),
+      );
+      expect(result.actions[0].evidence).toBeTruthy();
+      const actionLog = fs.readFileSync(result.actions[0].evidence!, "utf8");
+      expect(actionLog).toContain("phase-action-evidence");
+      expect(result.assertions).toHaveLength(1);
+      expect(result.assertions[0].status).toBe("passed");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_failure_short_circuits_assertions", async () => {
+    const ctx = freshCtx();
+    try {
+      const failScript = writeTempScript(ctx.contextDir, "fail.sh", 'echo "setup boom" >&2; exit 5');
+      const action = shellAction("environment.setup-fail", "environment", path.relative(REPO_ROOT, failScript));
+      const stepScript = writeTempScript(ctx.contextDir, "after.sh", "echo should-not-run");
+      const step = shellStep("environment.never-runs", "environment", path.relative(REPO_ROOT, stepScript));
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], [step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.actions).toHaveLength(1);
+      expect(result.actions[0].status).toBe("failed");
+      expect(result.actions[0].message).toMatch(/exit 5/);
+      // Assertions must NOT have run, so they must NOT show a misleading
+      // pass for an environment that was never set up.
+      expect(result.assertions).toEqual([]);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_times_out_via_orchestrator_policy", async () => {
+    const ctx = freshCtx();
+    try {
+      const slow = writeTempScript(ctx.contextDir, "slow.sh", "sleep 30");
+      const action = shellAction("environment.setup-slow", "environment", path.relative(REPO_ROOT, slow), {
+        timeoutSeconds: 1,
+      });
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const started = Date.now();
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], []));
+
+      expect(result.status).toBe("failed");
+      expect(result.actions[0].status).toBe("failed");
+      expect(result.actions[0].message).toMatch(/exceeded 1s/);
+      // The orchestrator must enforce the timeout, not depend on the
+      // script self-killing. Allow some headroom but fail if we waited
+      // anywhere near the script's 30s sleep.
+      expect(Date.now() - started).toBeLessThan(15_000);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_publishes_alias_path_on_success", async () => {
+    const ctx = freshCtx();
+    try {
+      const actionScript = writeTempScript(ctx.contextDir, "alias.sh", "echo aliased-output");
+      const action: PhaseAction = {
+        id: "onboarding.profile.alias-demo",
+        phase: "onboarding",
+        kind: "shell",
+        scriptRef: path.relative(REPO_ROOT, actionScript),
+        aliasPath: "onboard.log",
+      };
+      const orchestrator = new PhaseOrchestrator("onboarding");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("onboarding", [action], []));
+
+      expect(result.actions[0].status).toBe("passed");
+      const aliasContents = fs.readFileSync(path.join(ctx.contextDir, "onboard.log"), "utf8");
+      expect(aliasContents).toContain("aliased-output");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_evidence_log_is_flushed_before_resolve", async () => {
+    const ctx = freshCtx();
+    try {
+      const actionScript = writeTempScript(ctx.contextDir, "flush.sh", "echo flushed-phase-action-output");
+      const action = shellAction("environment.flush", "environment", path.relative(REPO_ROOT, actionScript));
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], []));
+
+      // Synchronous read must already see the output - the orchestrator
+      // must wait for the WriteStream's 'finish' before resolving.
+      const log = fs.readFileSync(result.actions[0].evidence!, "utf8");
+      expect(log).toContain("flushed-phase-action-output");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("plan compiler emits phase actions for canonical scenarios", () => {
+  it("compiler_emits_install_and_onboard_actions_for_canonical_scenarios", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const ids = [
+      "ubuntu-repo-cloud-openclaw",
+      "ubuntu-repo-cloud-hermes",
+      "gpu-repo-local-ollama-openclaw",
+      "macos-repo-cloud-openclaw",
+      "wsl-repo-cloud-openclaw",
+      "brev-launchable-cloud-openclaw",
+      "ubuntu-no-docker-preflight-negative",
+    ];
+    const plans = compileRunPlans(ids);
+    expect(plans).toHaveLength(ids.length);
+    for (const plan of plans) {
+      const env = plan.phases.find((p) => p.name === "environment")!;
+      const onb = plan.phases.find((p) => p.name === "onboarding")!;
+      expect(env.actions.some((a) => a.id.startsWith("environment.install."))).toBe(true);
+      expect(onb.actions.some((a) => a.id.startsWith("onboarding.profile."))).toBe(true);
+      // context.env emission is framework infrastructure (ScenarioRunner),
+      // not a shell action. The compiler must NOT emit a shell context
+      // action - if it did we'd be coupling back to the old resolver's
+      // plan.json shape.
+      expect(env.actions.map((a) => a.id)).not.toContain("environment.context.emit");
+      // Onboarding action must publish a stable alias path so legacy
+      // shell assertions referencing ${E2E_CONTEXT_DIR}/onboard.log
+      // keep working without coupling them to action ids.
+      const onboardingAction = onb.actions.find((a) => a.id.startsWith("onboarding.profile."));
+      expect(onboardingAction?.aliasPath).toBe("onboard.log");
+      // Every install/onboard action must be a typed shell-fn referencing
+      // the canonical dispatcher script - no free-form strings.
+      for (const action of [...env.actions, ...onb.actions]) {
+        if (action.id.startsWith("environment.install.") || action.id.startsWith("onboarding.profile.")) {
+          expect(action.kind).toBe("shell-fn");
+          expect(action.scriptRef).toMatch(/dispatch\.sh$/);
+          expect(action.fn).toMatch(/^e2e_(install|onboard)$/);
+          expect(action.arg).toBeTruthy();
+        }
+      }
+    }
+  });
+
+  it("compiler_routes_docker_missing_runtime_to_no_docker_onboarding_profile", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    // Negative scenario declares runtime=docker-missing in scenarios.yaml.
+    // The compiler must substitute the onboarding profile id from the
+    // base 'cloud-openclaw' to 'cloud-openclaw-no-docker' so the
+    // dispatcher routes to the worker that installs the docker shim and
+    // captures negative-preflight.log. Without this routing, the
+    // 'onboarding.preflight.expected-failed' assertion has nothing to grep.
+    const [plan] = compileRunPlans(["ubuntu-no-docker-preflight-negative"]);
+    const onb = plan.phases.find((p) => p.name === "onboarding")!;
+    const action = onb.actions.find((a) => a.id.startsWith("onboarding.profile."));
+    expect(action?.id).toBe("onboarding.profile.cloud-openclaw-no-docker");
+    expect(action?.arg).toBe("cloud-openclaw-no-docker");
+    expect(action?.evidencePath).toBe(
+      ".e2e/actions/onboarding.profile.cloud-openclaw-no-docker.log",
+    );
+    // Secret env must still include NVIDIA_API_KEY so behavior matches
+    // a real user invocation (CLI loads creds even if preflight aborts).
+    expect(action?.secretEnv).toContain("NVIDIA_API_KEY");
+    // Positive scenarios must NOT pick up the -no-docker suffix.
+    const [posPlan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+    const posAction = posPlan.phases
+      .find((p) => p.name === "onboarding")!
+      .actions.find((a) => a.id.startsWith("onboarding.profile."));
+    expect(posAction?.arg).toBe("cloud-openclaw");
+  });
+
+  it("compiler_emits_lifecycle_phase_action_when_scenario_declares_lifecycle_profile", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    // Rebuild scenario declares environment.lifecycle =
+    // 'rebuild-current-version'. The compiler must emit a single
+    // lifecycle phase action that dispatches to the canonical
+    // lifecycle dispatcher; without this, runtime-phase rebuild
+    // assertions run against a sandbox that was never rebuilt.
+    const [plan] = compileRunPlans(["ubuntu-rebuild-openclaw"]);
+    const lifecycle = plan.phases.find((p) => p.name === "lifecycle")!;
+    expect(lifecycle).toBeTruthy();
+    expect(lifecycle.actions).toHaveLength(1);
+    const action = lifecycle.actions[0];
+    expect(action.id).toBe("lifecycle.profile.rebuild-current-version");
+    expect(action.arg).toBe("rebuild-current-version");
+    expect(action.scriptRef).toMatch(/lifecycle\/dispatch\.sh$/);
+    expect(action.fn).toBe("e2e_lifecycle");
+    expect(action.evidencePath).toBe(
+      ".e2e/actions/lifecycle.profile.rebuild-current-version.log",
+    );
+    // Secret env: nemoclaw rebuild re-reads NVIDIA_API_KEY when the
+    // post-rebuild sandbox is brought back up.
+    expect(action.secretEnv).toContain("NVIDIA_API_KEY");
+  });
+
+  it("compiler_emits_no_lifecycle_actions_when_scenario_does_not_declare_lifecycle", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    // Default scenarios omit environment.lifecycle. The lifecycle
+    // phase still appears in the plan (deterministic phase order)
+    // but emits zero actions and runs no assertions.
+    const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+    const lifecycle = plan.phases.find((p) => p.name === "lifecycle")!;
+    expect(lifecycle).toBeTruthy();
+    expect(lifecycle.actions).toHaveLength(0);
+    expect(lifecycle.assertionGroups).toHaveLength(0);
+  });
+
+  it("compiler_drops_rebuild_and_upgrade_supplemental_suites_from_cloud_openclaw", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    // The 'rebuild' and 'upgrade' suites used to be supplementally
+    // attached to ubuntu-repo-cloud-openclaw, which produced
+    // fake-failures (no rebuild ran -> nothing could be preserved).
+    // Coverage now lives on ubuntu-rebuild-openclaw, which actually
+    // runs the lifecycle phase. The cloud-openclaw scenario must NOT
+    // include those suites' assertion groups.
+    const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+    const runtime = plan.phases.find((p) => p.name === "runtime")!;
+    const groupIds = runtime.assertionGroups.map((g) => g.id);
+    expect(groupIds).not.toContain("suite.rebuild");
+    expect(groupIds).not.toContain("suite.upgrade");
+  });
+
+  it("compiler_includes_rebuild_and_upgrade_groups_on_ubuntu_rebuild_openclaw", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const [plan] = compileRunPlans(["ubuntu-rebuild-openclaw"]);
+    const runtime = plan.phases.find((p) => p.name === "runtime")!;
+    const groupIds = runtime.assertionGroups.map((g) => g.id);
+    expect(groupIds).toContain("suite.rebuild");
+    expect(groupIds).toContain("suite.upgrade");
+  });
+});
+
+describe("ScenarioRunner seeds context.env and short-circuits across phases", () => {
+  it("seedContextEnv_writes_normalized_keys_at_top_level_context_env_path", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const { seedContextEnv } = await import("../scenarios/orchestrators/context.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      const result = seedContextEnv(ctx, plan);
+
+      // Path matches the shell helper's e2e_context_init: top-level,
+      // not under .e2e/. Runtime steps source ${E2E_CONTEXT_DIR}/context.env.
+      expect(result.path).toBe(path.join(ctx.contextDir, "context.env"));
+      const body = fs.readFileSync(result.path, "utf8");
+      // Required keys downstream shell assertions look up.
+      expect(body).toMatch(/^E2E_SCENARIO=ubuntu-repo-cloud-openclaw$/m);
+      expect(body).toMatch(/^E2E_PLATFORM_OS=ubuntu$/m);
+      expect(body).toMatch(/^E2E_AGENT=openclaw$/m);
+      expect(body).toMatch(/^E2E_PROVIDER=nvidia$/m);
+      expect(body).toMatch(/^E2E_GATEWAY_URL=http:\/\/127\.0\.0\.1:18789$/m);
+      expect(body).toMatch(/^E2E_SANDBOX_NAME=e2e-ubuntu-repo-cloud-openclaw$/m);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("hermes_scenario_seeds_hermes_gateway_url", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const { seedContextEnv } = await import("../scenarios/orchestrators/context.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-hermes"]);
+      const result = seedContextEnv(ctx, plan);
+      const body = fs.readFileSync(result.path, "utf8");
+      expect(body).toMatch(/^E2E_AGENT=hermes$/m);
+      expect(body).toMatch(/^E2E_GATEWAY_URL=http:\/\/127\.0\.0\.1:8642$/m);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("runner_skips_downstream_phases_when_prior_phase_action_fails", async () => {
+    const { ScenarioRunner } = await import("../scenarios/orchestrators/runner.ts");
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      // Inject a failing environment phase to simulate an install action
+      // failure. Onboarding and runtime must report skipped, not run
+      // their own actions or assertions.
+      const failingEnv = {
+        run: async () => ({
+          phase: "environment" as const,
+          status: "failed" as const,
+          actions: [
+            {
+              id: "environment.install.repo-current",
+              status: "failed" as const,
+              durationMs: 5,
+              message: "simulated install failure",
+            },
+          ],
+          assertions: [],
+        }),
+      };
+      let onboardingCalled = false;
+      let runtimeCalled = false;
+      const onboarding = {
+        run: async () => {
+          onboardingCalled = true;
+          return { phase: "onboarding" as const, status: "passed" as const, actions: [], assertions: [] };
+        },
+      };
+      const runtime = {
+        run: async () => {
+          runtimeCalled = true;
+          return { phase: "runtime" as const, status: "passed" as const, actions: [], assertions: [] };
+        },
+      };
+      let stateValidationCalled = false;
+      const stateValidation = {
+        run: async () => {
+          stateValidationCalled = true;
+          return {
+            phase: "state-validation" as const,
+            status: "passed" as const,
+            actions: [],
+            assertions: [],
+          };
+        },
+      };
+      const runner = new ScenarioRunner({
+        environment: failingEnv,
+        onboarding,
+        stateValidation,
+        runtime,
+      });
+
+      const results = await runner.run(ctx, plan);
+
+      // Downstream orchestrators must NOT have been invoked. An
+      // environment failure means install never ran; there is nothing
+      // for state-validation to probe.
+      expect(onboardingCalled).toBe(false);
+      expect(stateValidationCalled).toBe(false);
+      expect(runtimeCalled).toBe(false);
+      // Each phase still has a result, and the downstream ones are
+      // skipped with a message that names the blocking action.
+      expect(results.map((r) => r.phase)).toEqual([
+        "environment",
+        "onboarding",
+        "state-validation",
+        "lifecycle",
+        "runtime",
+      ]);
+      expect(results[1].status).toBe("skipped");
+      expect(results[2].status).toBe("skipped");
+      expect(results[3].status).toBe("skipped");
+      expect(results[4].status).toBe("skipped");
+      expect(results[1].assertions[0].message).toMatch(/blocked by prior failure/);
+      expect(results[1].assertions[0].message).toMatch(/environment.install.repo-current/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("runner_does_not_short_circuit_on_assertion_failure_only", async () => {
+    // Assertion failures (as opposed to action failures) must not block
+    // downstream phases - reviewers need to see all failure layers.
+    const { ScenarioRunner } = await import("../scenarios/orchestrators/runner.ts");
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      const env = {
+        run: async () => ({
+          phase: "environment" as const,
+          status: "failed" as const,
+          actions: [],
+          assertions: [
+            { id: "environment.something", status: "failed" as const, attempts: 1, durationMs: 1 },
+          ],
         }),
+      };
+      let onboardingCalled = false;
+      const onboarding = {
+        run: async () => {
+          onboardingCalled = true;
+          return { phase: "onboarding" as const, status: "passed" as const, actions: [], assertions: [] };
+        },
+      };
+      const runner = new ScenarioRunner({
+        environment: env,
+        onboarding,
+        runtime: {
+          run: async () => ({ phase: "runtime" as const, status: "passed" as const, actions: [], assertions: [] }),
+        },
+      });
+
+      await runner.run(ctx, plan);
+      expect(onboardingCalled).toBe(true);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("required probe and pending steps fail closed", () => {
+  it("test_required_probe_step_that_is_unregistered_fails_the_phase", async () => {
+    const ctx = freshCtx();
+    try {
+      const step: AssertionStep = {
+        id: "runtime.security.required-probe",
+        phase: "runtime",
+        implementation: { kind: "probe", ref: "unregisteredSecurityProbe" },
+        evidencePath: ".e2e/assertions/runtime.security.required-probe.json",
+        required: true,
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/required probe not registered/);
+      expect(result.assertions[0].message).toContain("unregisteredSecurityProbe");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_non_required_probe_step_continues_to_skip_visibly", async () => {
+    const ctx = freshCtx();
+    try {
+      const step: AssertionStep = {
+        id: "runtime.diagnostics.non-required-probe",
+        phase: "runtime",
+        // Use an intentionally-unregistered ref so this test exercises
+        // the "missing probe" code path. `diagnosticsProbe` is now a
+        // real built-in registered at orchestrator import time, so
+        // referring to it here would actually invoke nemoclaw and the
+        // assertion would fail (or pass) on real CLI behavior —
+        // unrelated to what this test verifies.
+        implementation: { kind: "probe", ref: "unregisteredFakeProbe" },
+        evidencePath: ".e2e/assertions/runtime.diagnostics.non-required-probe.json",
+        // required intentionally omitted (defaults to false)
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.assertions[0].status).toBe("skipped");
+      expect(result.assertions[0].message).toMatch(/probe not registered/);
+      // Non-required skipped step does not fail the phase.
+      expect(result.status).not.toBe("failed");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_required_pending_step_fails_closed", async () => {
+    const ctx = freshCtx();
+    try {
+      const step: AssertionStep = {
+        id: "runtime.expected-failure.no-side-effects",
+        phase: "runtime",
+        implementation: { kind: "pending", ref: "expectedFailureNoSideEffectsProbe" },
+        evidencePath: ".e2e/assertions/runtime.expected-failure.no-side-effects.json",
+        required: true,
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/required pending step not implemented/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_security_suite_groups_in_registry_mark_their_steps_as_required", async () => {
+    const { assertionGroupForSuite } = await import("../scenarios/assertions/registry.ts");
+    for (const suiteId of ["security-shields", "security-policy", "security-injection"]) {
+      const group = assertionGroupForSuite(suiteId);
+      expect(group, `missing assertion group for suite ${suiteId}`).toBeDefined();
+      for (const step of group?.steps ?? []) {
+        expect(
+          step.required,
+          `${suiteId} step ${step.id} must be required so it fails closed`,
+        ).toBe(true);
+      }
+    }
+  });
+
+  it("test_expected_failure_no_side_effects_step_in_registry_is_required", async () => {
+    const { assertionRegistry } = await import("../scenarios/assertions/registry.ts");
+    const group = assertionRegistry.groups.find(
+      (g) => g.id === "runtime.expected-failure.no-side-effects",
+    );
+    expect(group).toBeDefined();
+    for (const step of group?.steps ?? []) {
+      expect(step.required).toBe(true);
+    }
+  });
+});
+
+describe("framework-owned secret hygiene at the spawn boundary", () => {
+  it("test_should_not_persist_secret_shaped_child_output_into_evidence", async () => {
+    const ctx = freshCtx();
+    try {
+      // Child writes secret-shaped tokens (NVIDIA, GitHub, OpenAI,
+      // Slack, Bearer-prefixed) on both stdout and stderr, then exits
+      // non-zero so stderrTail also flows into result.message. None of
+      // those literal tokens may persist anywhere in the evidence.
+      const body = [
+        'echo "step prints nvapi-1234567890abcdef0123456789"',
+        'echo "and ghp_abcdefghijklmnopqrstuvwxyz0123456789"',
+        'echo "and sk-abcdefghijklmnopqrstuvwxyz0123456789"',
+        'echo "and xoxb-9876543210-fake-bot-token-abc"',
+        'echo "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature" 1>&2',
+        'exit 7',
+      ].join("\n");
+      const script = writeTempScript(ctx.contextDir, "leak.sh", body);
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.leak", "runtime", ref);
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const assertion = result.assertions[0];
+      const logBody = fs.readFileSync(path.join(ctx.contextDir, ".e2e", "logs", `${step.id}.log`), "utf8");
+      const phaseResultJson = fs.readFileSync(
+        path.join(ctx.contextDir, ".e2e", "runtime.result.json"),
+        "utf8",
+      );
+      const surfaces = [logBody, assertion.message ?? "", phaseResultJson];
+
+      // Every secret-shaped token canonicalized in
+      // src/lib/security/secret-patterns.ts must be redacted on the
+      // way to disk, regardless of which surface is read.
+      const forbiddenPatterns = [
+        /nvapi-[A-Za-z0-9_-]{10,}/,
+        /ghp_[A-Za-z0-9_-]{10,}/,
+        /sk-[A-Za-z0-9_-]{20,}/,
+        /(?:xox[bpas]|xapp)-[A-Za-z0-9-]{10,}/,
+        /Bearer\s+[A-Za-z0-9_.+\/=-]{10,}/i,
+      ];
+      for (const surface of surfaces) {
+        for (const pat of forbiddenPatterns) {
+          expect(surface, `evidence surface must not contain ${pat}`).not.toMatch(pat);
+        }
+        expect(surface).toMatch(/<REDACTED>/);
+      }
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_should_drop_non_allowlisted_parent_env_unless_declared_in_secretEnv", async () => {
+    const ctx = freshCtx();
+    const sentinelKey = "SECRET_LEAK_PROBE_TOKEN";
+    const previous = process.env[sentinelKey];
+    process.env[sentinelKey] = "sentinel-value-that-must-not-leak";
+    try {
+      const script = writeTempScript(
+        ctx.contextDir,
+        "env-leak.sh",
+        `printenv | sort\n`,
+      );
+      const ref = path.relative(REPO_ROOT, script);
+      // Step does NOT declare SECRET_LEAK_PROBE_TOKEN in secretEnv,
+      // so the framework must drop it before spawn.
+      const step = shellStep("runtime.env-drop", "runtime", ref);
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const logBody = fs.readFileSync(path.join(ctx.contextDir, ".e2e", "logs", `${step.id}.log`), "utf8");
+
+      expect(result.assertions[0].status).toBe("passed");
+      expect(logBody, "non-allowlisted parent env must not reach the child").not.toContain(sentinelKey);
+      expect(logBody).not.toContain("sentinel-value-that-must-not-leak");
+      // Framework allowlist + overlay still arrive: PATH and E2E_PHASE.
+      expect(logBody).toMatch(/^PATH=/m);
+      expect(logBody).toMatch(/^E2E_PHASE=runtime$/m);
+    } finally {
+      if (previous === undefined) delete process.env[sentinelKey];
+      else process.env[sentinelKey] = previous;
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_should_pass_declared_secretEnv_through_to_child", async () => {
+    const ctx = freshCtx();
+    const declaredKey = "NEMOCLAW_TEST_API_KEY"; // matches SECRET_ENV_KEY_SHAPE
+    const previous = process.env[declaredKey];
+    process.env[declaredKey] = "declared-secret-value-passes-through";
+    try {
+      const script = writeTempScript(
+        ctx.contextDir,
+        "declared.sh",
+        `printenv ${declaredKey} || echo MISSING\n`,
       );
+      const ref = path.relative(REPO_ROOT, script);
+      const step: AssertionStep = {
+        ...shellStep("runtime.env-declared", "runtime", ref),
+        secretEnv: [declaredKey],
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const logBody = fs.readFileSync(path.join(ctx.contextDir, ".e2e", "logs", `${step.id}.log`), "utf8");
+
+      expect(result.assertions[0].status).toBe("passed");
+      // Declared secret reaches the child verbatim.
+      expect(logBody).toContain("declared-secret-value-passes-through");
+      // It is NOT redacted in printenv output because nothing about
+      // the literal value matches a token-shape pattern. (Real
+      // secrets that match secret-patterns.ts WILL be redacted as a
+      // second line of defense; this synthetic value is intentionally
+      // shape-free to isolate the env-passthrough behavior.)
     } finally {
+      if (previous === undefined) delete process.env[declaredKey];
+      else process.env[declaredKey] = previous;
       fs.rmSync(ctx.contextDir, { recursive: true, force: true });
     }
   });
 
+  it("test_should_reject_non_secret_shaped_keys_in_secretEnv_at_runtime", async () => {
+    const { buildChildEnv } = await import("../scenarios/orchestrators/redaction.ts");
+    expect(() =>
+      buildChildEnv(process.env, { secretEnv: ["FOO_VAR"], frameworkOverlay: {} }),
+    ).toThrow(/secret-key shape/);
+  });
+
+  it("test_should_declare_NVIDIA_API_KEY_only_for_cloud_onboarding_actions", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const plans = compileRunPlans([
+      "ubuntu-repo-cloud-openclaw",
+      "gpu-repo-local-ollama-openclaw",
+    ]);
+    const cloudOnboard = plans[0].phases
+      .find((p) => p.name === "onboarding")
+      ?.actions.find((a) => a.id.startsWith("onboarding.profile."));
+    const localOnboard = plans[1].phases
+      .find((p) => p.name === "onboarding")
+      ?.actions.find((a) => a.id.startsWith("onboarding.profile."));
+    expect(cloudOnboard?.secretEnv).toEqual(["NVIDIA_API_KEY"]);
+    expect(localOnboard?.secretEnv).toEqual([]);
+  });
+});
+
+describe("clients are pass/fail/policy free", () => {
   it("test_should_keep_clients_free_of_pass_fail_and_retry_semantics", () => {
-    const source = fs.readFileSync(
-      path.join(process.cwd(), "test/e2e-scenario/scenarios/clients/host-cli.ts"),
-      "utf8",
-    );
     const observation = new HostCliClient().observeVersion();
 
+    // The client returns a raw act/observe shape only: the command it would
+    // run. It must NOT decide pass/fail, attach retry policy, surface a
+    // classifier, or expose AssertionResult/PhaseResult-shaped fields.
     expect(observation).toEqual(expect.objectContaining({ command: ["nemoclaw", "--version"] }));
-    expect(source).not.toMatch(/AssertionResult|PhaseResult|retry|timeout|passed|failed/);
+    // Raw act/observe fields are allowed (exitCode/stdout/stderr/timing).
+    // Pass/fail and reliability-policy fields are not.
+    const forbiddenKeys = [
+      "status",
+      "attempts",
+      "classifier",
+      "evidence",
+      "retry",
+      "timeout",
+      "timeoutSeconds",
+      "phase",
+      "assertions",
+      "passed",
+      "failed",
+    ];
+    for (const key of forbiddenKeys) {
+      expect(observation).not.toHaveProperty(key);
+    }
   });
 });
diff --git a/test/e2e-scenario/framework-tests/e2e-probes.test.ts b/test/e2e-scenario/framework-tests/e2e-probes.test.ts
new file mode 100644
index 0000000000..db90b47798
--- /dev/null
+++ b/test/e2e-scenario/framework-tests/e2e-probes.test.ts
@@ -0,0 +1,670 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+import {
+  listRegisteredProbes,
+  lookupProbe,
+  registerProbe,
+  resetProbeRegistry,
+} from "../scenarios/probes/registry.ts";
+import type { ProbeContext, ProbeOutcome } from "../scenarios/probes/types.ts";
+import { registerBuiltinProbes } from "../scenarios/probes/builtin.ts";
+
+const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../..");
+
+describe("probe registry", () => {
+  // The orchestrator side-effect-imports builtin.ts at module load,
+  // so the registry already contains the built-ins. Each test resets
+  // and re-registers explicitly so order independence holds.
+  beforeEach(() => {
+    resetProbeRegistry();
+  });
+
+  afterEach(() => {
+    // Restore the production wiring so subsequent test files don't
+    // see an empty registry (vitest shares module state across files
+    // within a worker).
+    resetProbeRegistry();
+    registerBuiltinProbes();
+  });
+
+  it("registerProbe_lookupProbe_round_trip", () => {
+    const fn = async (): Promise<ProbeOutcome> => ({ status: "passed" });
+    registerProbe("myProbe", fn);
+    expect(lookupProbe("myProbe")).toBe(fn);
+  });
+
+  it("lookupProbe_returns_undefined_for_unknown_ref", () => {
+    expect(lookupProbe("nonexistent")).toBeUndefined();
+  });
+
+  it("registerProbe_rejects_duplicate_registration", () => {
+    const fn = async (): Promise<ProbeOutcome> => ({ status: "passed" });
+    registerProbe("dup", fn);
+    expect(() => registerProbe("dup", fn)).toThrow(/already registered/);
+  });
+
+  it("registerProbe_rejects_empty_name", () => {
+    const fn = async (): Promise<ProbeOutcome> => ({ status: "passed" });
+    expect(() => registerProbe("", fn)).toThrow(/name is required/);
+  });
+
+  it("listRegisteredProbes_returns_sorted_names", () => {
+    registerProbe("zeta", async () => ({ status: "passed" }));
+    registerProbe("alpha", async () => ({ status: "passed" }));
+    registerProbe("mu", async () => ({ status: "passed" }));
+    expect(listRegisteredProbes()).toEqual(["alpha", "mu", "zeta"]);
+  });
+
+  it("registerBuiltinProbes_is_idempotent", () => {
+    registerBuiltinProbes();
+    const first = listRegisteredProbes();
+    expect(first).toContain("diagnosticsProbe");
+    expect(first).toContain("docsValidationProbe");
+    // Calling again must not throw on duplicate names.
+    expect(() => registerBuiltinProbes()).not.toThrow();
+    expect(listRegisteredProbes()).toEqual(first);
+  });
+
+  it("registerBuiltinProbes_registers_security_probes", () => {
+    // shieldsConfig / networkPolicy / injectionBlocked are marked
+    // `required: true` in scenarios/assertions/registry.ts. The
+    // orchestrator fails closed when a required probe is missing,
+    // so registering all three turns the security suites from
+    // 'silently skipped' into 'actually verified'.
+    registerBuiltinProbes();
+    const registered = listRegisteredProbes();
+    expect(registered).toContain("shieldsConfigProbe");
+    expect(registered).toContain("networkPolicyProbe");
+    expect(registered).toContain("injectionBlockedProbe");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// diagnosticsProbe — uses a fake `nemoclaw` on PATH so this test runs
+// reproducibly without depending on a real nemoclaw install.
+// ─────────────────────────────────────────────────────────────────────────────
+
+function makeProbeCtx(tmp: string, evidenceFile = "diag-evidence.json"): ProbeContext {
+  // contextDir doubles as the parent of the evidence file when the
+  // step does not specify an explicit path. Tests pass an explicit
+  // path here to keep the file under tmp.
+  return {
+    contextDir: tmp,
+    evidencePath: path.join(tmp, evidenceFile),
+    contextEnv: {},
+    sandboxName: null,
+    gatewayUrl: null,
+    repoRoot: REPO_ROOT,
+  };
+}
+
+function installFakeOnPath(
+  binDir: string,
+  name: string,
+  script: string,
+): { restore: () => void } {
+  fs.mkdirSync(binDir, { recursive: true });
+  fs.writeFileSync(path.join(binDir, name), script, { mode: 0o755 });
+  const oldPath = process.env.PATH;
+  process.env.PATH = `${binDir}:${oldPath ?? ""}`;
+  return {
+    restore: () => {
+      process.env.PATH = oldPath;
+    },
+  };
+}
+
+describe("diagnosticsProbe", () => {
+  it("passes_when_nemoclaw_debug_quick_writes_a_non_empty_archive", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "diag-probe-pass-"));
+    const fake = installFakeOnPath(
+      path.join(tmp, "bin"),
+      "nemoclaw",
+      `#!/usr/bin/env bash
+# Stub: locate the --output value and write a small non-empty archive there.
+out=""
+while [[ "$#" -gt 0 ]]; do
+  case "$1" in
+    --output) out="$2"; shift 2 ;;
+    *) shift ;;
+  esac
+done
+[[ -n "$out" ]] || { echo "no --output" >&2; exit 2; }
+printf 'fake-archive-bytes' > "$out"
+exit 0
+`,
+    );
+    try {
+      const { diagnosticsProbe } = await import("../scenarios/probes/diagnostics.ts");
+      const outcome = await diagnosticsProbe(makeProbeCtx(tmp));
+      expect(outcome.status).toBe("passed");
+      expect(outcome.message).toMatch(/bundle ok/);
+      // Evidence JSON must exist and parse.
+      const ev = JSON.parse(fs.readFileSync(path.join(tmp, "diag-evidence.json"), "utf8"));
+      expect(ev.exitCode).toBe(0);
+      expect(ev.archiveSize).toBeGreaterThan(0);
+    } finally {
+      fake.restore();
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_nemoclaw_exits_nonzero", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "diag-probe-fail-"));
+    const fake = installFakeOnPath(
+      path.join(tmp, "bin"),
+      "nemoclaw",
+      `#!/usr/bin/env bash\necho "boom" >&2\nexit 7\n`,
+    );
+    try {
+      const { diagnosticsProbe } = await import("../scenarios/probes/diagnostics.ts");
+      const outcome = await diagnosticsProbe(makeProbeCtx(tmp));
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/exited 7/);
+      const ev = JSON.parse(fs.readFileSync(path.join(tmp, "diag-evidence.json"), "utf8"));
+      expect(ev.exitCode).toBe(7);
+      expect(ev.stderrTail).toContain("boom");
+    } finally {
+      fake.restore();
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_archive_is_empty", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "diag-probe-empty-"));
+    const fake = installFakeOnPath(
+      path.join(tmp, "bin"),
+      "nemoclaw",
+      `#!/usr/bin/env bash
+out=""
+while [[ "$#" -gt 0 ]]; do
+  case "$1" in --output) out="$2"; shift 2 ;; *) shift ;; esac
+done
+: > "$out"  # zero-byte archive
+exit 0
+`,
+    );
+    try {
+      const { diagnosticsProbe } = await import("../scenarios/probes/diagnostics.ts");
+      const outcome = await diagnosticsProbe(makeProbeCtx(tmp));
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/empty/);
+    } finally {
+      fake.restore();
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────────────────
+// docsValidationProbe — substitutes a fake check-docs.sh by overriding
+// the repoRoot in the ProbeContext so the resolved path points at a
+// scratch dir we control.
+// ─────────────────────────────────────────────────────────────────────────────
+
+describe("docsValidationProbe", () => {
+  function setupFakeCheckDocs(
+    tmp: string,
+    cliExit: number,
+    linksExit: number,
+  ): { ctx: ProbeContext } {
+    const scriptDir = path.join(tmp, "test/e2e/e2e-cloud-experimental");
+    fs.mkdirSync(scriptDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(scriptDir, "check-docs.sh"),
+      `#!/usr/bin/env bash
+case "$1" in
+  --only-cli)            exit ${cliExit} ;;
+  --only-links)          exit ${linksExit} ;;
+  *)                     echo "unknown: $*" >&2; exit 99 ;;
+esac
+`,
+      { mode: 0o755 },
+    );
+    return {
+      ctx: {
+        contextDir: tmp,
+        evidencePath: path.join(tmp, "docs-evidence.json"),
+        contextEnv: {},
+        sandboxName: null,
+        gatewayUrl: null,
+        repoRoot: tmp, // probe resolves check-docs.sh against this
+      },
+    };
+  }
+
+  it("passes_when_both_cli_and_links_checks_exit_zero", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "docs-probe-pass-"));
+    try {
+      const { ctx } = setupFakeCheckDocs(tmp, 0, 0);
+      const { docsValidationProbe } = await import("../scenarios/probes/docs-validation.ts");
+      const outcome = await docsValidationProbe(ctx);
+      expect(outcome.status).toBe("passed");
+      const ev = JSON.parse(fs.readFileSync(ctx.evidencePath, "utf8"));
+      expect(ev.results).toHaveLength(2);
+      expect(ev.results[0].phase).toBe("cli-parity");
+      expect(ev.results[0].exitCode).toBe(0);
+      expect(ev.results[1].phase).toBe("links-local");
+      expect(ev.results[1].exitCode).toBe(0);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_cli_parity_check_exits_nonzero", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "docs-probe-cli-fail-"));
+    try {
+      const { ctx } = setupFakeCheckDocs(tmp, 3, 0);
+      const { docsValidationProbe } = await import("../scenarios/probes/docs-validation.ts");
+      const outcome = await docsValidationProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/CLI\/docs parity failed.*exit 3/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_links_check_exits_nonzero", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "docs-probe-links-fail-"));
+    try {
+      const { ctx } = setupFakeCheckDocs(tmp, 0, 5);
+      const { docsValidationProbe } = await import("../scenarios/probes/docs-validation.ts");
+      const outcome = await docsValidationProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/markdown link check failed.*exit 5/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_with_actionable_message_when_check_docs_script_missing", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "docs-probe-missing-"));
+    try {
+      const { docsValidationProbe } = await import("../scenarios/probes/docs-validation.ts");
+      const ctx: ProbeContext = {
+        contextDir: tmp,
+        evidencePath: path.join(tmp, "docs-evidence.json"),
+        contextEnv: {},
+        sandboxName: null,
+        gatewayUrl: null,
+        repoRoot: tmp, // no test/e2e/... tree under tmp
+      };
+      const outcome = await docsValidationProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/check-docs\.sh not found/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────
+// Security probes — stub `nemoclaw` (host CLI) and `openshell` so the
+// canonical sandbox-exec wrapper resolves through the stub. The
+// wrapper's openshell-fallback path is exercised because the stub
+// does not implement `sandbox ssh-config`.
+// ──────────────────────────────────────────────────────────────────────────
+
+function makeProbeCtxFor(
+  tmp: string,
+  sandboxName: string,
+  contextEnv: Record<string, string> = {},
+): ProbeContext {
+  // Write context.env so spawned bash scripts that source the
+  // wrapper can pick up E2E_SANDBOX_NAME if needed.
+  const lines = Object.entries({ E2E_SANDBOX_NAME: sandboxName, ...contextEnv })
+    .map(([k, v]) => `${k}=${v}`)
+    .join("\n");
+  fs.writeFileSync(path.join(tmp, "context.env"), lines + "\n");
+  return {
+    contextDir: tmp,
+    evidencePath: path.join(tmp, "probe-evidence.json"),
+    contextEnv: { E2E_SANDBOX_NAME: sandboxName, ...contextEnv },
+    sandboxName,
+    gatewayUrl: null,
+    repoRoot: REPO_ROOT,
+  };
+}
+
+describe("shieldsConfigProbe", () => {
+  it("passes_when_shields_status_matches_expected_and_perms_match_state", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "shields-probe-pass-"));
+    const fakeBin = path.join(tmp, "bin");
+    fs.mkdirSync(fakeBin);
+    fs.writeFileSync(
+      path.join(fakeBin, "nemoclaw"),
+      `#!/usr/bin/env bash
+# nemoclaw <sandbox> shields status
+if [[ "$2" == "shields" && "$3" == "status" ]]; then
+  echo "Shields: DOWN"
+  exit 0
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+    fs.writeFileSync(
+      path.join(fakeBin, "openshell"),
+      `#!/usr/bin/env bash
+# Stub openshell. Reject ssh-config so wrapper falls back to sandbox exec.
+# Then implement 'sandbox exec --name <sb> -- <cmd>' by stripping args
+# until '--' and running what's left.
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then
+  exit 1
+fi
+if [[ "$1" == "sandbox" && "$2" == "exec" ]]; then
+  shift 2
+  while [[ "$#" -gt 0 && "$1" != "--" ]]; do shift; done
+  shift || true
+  # The 'stat -c %a %U:%G <path>' invocation: emit a fake permissions
+  # line that matches a DOWN-state sandbox config (sandbox-owned).
+  if [[ "$1" == "stat" ]]; then
+    echo "644 sandbox:sandbox"
+    exit 0
+  fi
+  exit 0
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${fakeBin}:${oldPath ?? ""}`;
+    try {
+      const { shieldsConfigProbe } = await import("../scenarios/probes/shields-config.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1", {
+        E2E_AGENT: "openclaw",
+        E2E_SHIELDS_EXPECTED_STATE: "down",
+      });
+      const outcome = await shieldsConfigProbe(ctx);
+      expect(outcome.status).toBe("passed");
+      expect(outcome.message).toMatch(/shields=down/);
+      const ev = JSON.parse(fs.readFileSync(ctx.evidencePath, "utf8"));
+      expect(ev.observed).toBe("down");
+      expect(ev.expected).toBe("down");
+      expect(ev.permissionsLine).toBe("644 sandbox:sandbox");
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_observed_state_disagrees_with_expected", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "shields-probe-mismatch-"));
+    const fakeBin = path.join(tmp, "bin");
+    fs.mkdirSync(fakeBin);
+    fs.writeFileSync(
+      path.join(fakeBin, "nemoclaw"),
+      `#!/usr/bin/env bash
+if [[ "$2" == "shields" && "$3" == "status" ]]; then
+  echo "Shields: UP"
+  exit 0
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${fakeBin}:${oldPath ?? ""}`;
+    try {
+      const { shieldsConfigProbe } = await import("../scenarios/probes/shields-config.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1", {
+        E2E_AGENT: "openclaw",
+        E2E_SHIELDS_EXPECTED_STATE: "down",
+      });
+      const outcome = await shieldsConfigProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/expected shields 'down', observed 'up'/);
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_perms_dont_match_observed_state", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "shields-probe-perms-"));
+    const fakeBin = path.join(tmp, "bin");
+    fs.mkdirSync(fakeBin);
+    fs.writeFileSync(
+      path.join(fakeBin, "nemoclaw"),
+      `#!/usr/bin/env bash
+if [[ "$2" == "shields" && "$3" == "status" ]]; then
+  # Shields claim UP, but the stub openshell will report sandbox-owned
+  # perms below — a mismatch the probe must catch.
+  echo "Shields: UP"
+  exit 0
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+    fs.writeFileSync(
+      path.join(fakeBin, "openshell"),
+      `#!/usr/bin/env bash
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then exit 1; fi
+if [[ "$1" == "sandbox" && "$2" == "exec" ]]; then
+  shift 2
+  while [[ "$#" -gt 0 && "$1" != "--" ]]; do shift; done
+  shift || true
+  # Sandbox-owned perms: would pass for DOWN, must FAIL for UP.
+  echo "644 sandbox:sandbox"
+  exit 0
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${fakeBin}:${oldPath ?? ""}`;
+    try {
+      const { shieldsConfigProbe } = await import("../scenarios/probes/shields-config.ts");
+      // Don't declare expected state — the probe should still fail on
+      // perms-vs-observed mismatch alone.
+      const ctx = makeProbeCtxFor(tmp, "sb1", { E2E_AGENT: "openclaw" });
+      const outcome = await shieldsConfigProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/shields are 'up' but .* permissions are/);
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("networkPolicyProbe", () => {
+  function fakeOpenshellEmittingHttpStatus(
+    binDir: string,
+    httpStatus: string,
+    curlExitCode: number = 0,
+  ): void {
+    fs.mkdirSync(binDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(binDir, "openshell"),
+      `#!/usr/bin/env bash
+# Opt out of ssh-config; force wrapper to use 'sandbox exec' fallback.
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then exit 1; fi
+if [[ "$1" == "sandbox" && "$2" == "exec" ]]; then
+  shift 2
+  while [[ "$#" -gt 0 && "$1" != "--" ]]; do shift; done
+  shift || true
+  # We're being asked to run curl inside the sandbox. Emit the test's
+  # chosen status to stdout (mirrors curl -w '%{http_code}') and exit
+  # with the test's chosen curl exit code.
+  printf '%s' "${httpStatus}"
+  exit ${curlExitCode}
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+  }
+
+  it("passes_when_blocked_url_returns_403", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "netpolicy-probe-403-"));
+    fakeOpenshellEmittingHttpStatus(path.join(tmp, "bin"), "403", 0);
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${path.join(tmp, "bin")}:${oldPath ?? ""}`;
+    try {
+      const { networkPolicyProbe } = await import("../scenarios/probes/network-policy.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1");
+      const outcome = await networkPolicyProbe(ctx);
+      expect(outcome.status).toBe("passed");
+      expect(outcome.message).toMatch(/blocked .*http_code=403/);
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("passes_when_curl_exits_nonzero_and_no_http_response", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "netpolicy-probe-conn-"));
+    // curl exit 7 = couldn't connect; status '000' = no HTTP response.
+    fakeOpenshellEmittingHttpStatus(path.join(tmp, "bin"), "000", 7);
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${path.join(tmp, "bin")}:${oldPath ?? ""}`;
+    try {
+      const { networkPolicyProbe } = await import("../scenarios/probes/network-policy.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1");
+      const outcome = await networkPolicyProbe(ctx);
+      expect(outcome.status).toBe("passed");
+      expect(outcome.message).toMatch(/curl exit 7/);
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_blocked_url_returns_200", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "netpolicy-probe-200-"));
+    fakeOpenshellEmittingHttpStatus(path.join(tmp, "bin"), "200", 0);
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${path.join(tmp, "bin")}:${oldPath ?? ""}`;
+    try {
+      const { networkPolicyProbe } = await import("../scenarios/probes/network-policy.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1");
+      const outcome = await networkPolicyProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/reachable from sandbox.*http_code=200/);
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_blocked_url_returns_401_indicating_policy_bypass", async () => {
+    // 401 means the request reached upstream auth, NOT that gateway
+    // dropped it. The probe must classify this as a policy bypass.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "netpolicy-probe-401-"));
+    fakeOpenshellEmittingHttpStatus(path.join(tmp, "bin"), "401", 0);
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${path.join(tmp, "bin")}:${oldPath ?? ""}`;
+    try {
+      const { networkPolicyProbe } = await import("../scenarios/probes/network-policy.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1");
+      const outcome = await networkPolicyProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/reachable from sandbox.*http_code=401/);
+    } finally {
+      process.env.PATH = oldPath;
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("injectionBlockedProbe", () => {
+  // For the injection probe we need a stub openshell that simulates a
+  // sandbox shell honestly: pre-clean, echo back stdin, and respond
+  // SAFE/EXPLOITED based on whether the marker file exists. We give
+  // each test its own tmp dir and stub script.
+  function setupInjectionStub(tmp: string, exploited: boolean): { restore: () => void } {
+    const binDir = path.join(tmp, "bin");
+    fs.mkdirSync(binDir, { recursive: true });
+    // Use a state file in tmp to track whether the 'exploit' branch
+    // should claim the marker exists.
+    const stateFile = path.join(tmp, "exploit.state");
+    fs.writeFileSync(stateFile, exploited ? "yes" : "no");
+    fs.writeFileSync(
+      path.join(binDir, "openshell"),
+      `#!/usr/bin/env bash
+state=\$(cat "${stateFile}" 2>/dev/null || echo no)
+if [[ "\$1" == "sandbox" && "\$2" == "ssh-config" ]]; then exit 1; fi
+if [[ "\$1" == "sandbox" && "\$2" == "exec" ]]; then
+  shift 2
+  while [[ "\$#" -gt 0 && "\$1" != "--" ]]; do shift; done
+  shift || true
+  # Recognize the three operations the probe issues:
+  #   1. sh -c 'rm -f <marker>'              — cleanup; always succeeds
+  #   2. sh -c 'MSG=\$(cat); printf %s\\n "\$MSG"'  — echo back stdin
+  #   3. sh -c 'test -f <marker> && echo EXPLOITED || echo SAFE'
+  cmd="\$*"
+  case "\$cmd" in
+    *"MSG="*"printf"*)
+      cat
+      ;;
+    *"test -f"*"EXPLOITED"*"SAFE"*)
+      if [[ "\$state" == "yes" ]]; then echo EXPLOITED; else echo SAFE; fi
+      ;;
+    *"rm -f"*)
+      :
+      ;;
+    *)
+      echo "unrecognized cmd: \$cmd" >&2
+      exit 64
+      ;;
+  esac
+  exit 0
+fi
+exit 99
+`,
+      { mode: 0o755 },
+    );
+    const oldPath = process.env.PATH;
+    process.env.PATH = `${binDir}:${oldPath ?? ""}`;
+    return {
+      restore: () => {
+        process.env.PATH = oldPath;
+      },
+    };
+  }
+
+  it("passes_when_payload_is_preserved_and_marker_absent", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "inj-probe-pass-"));
+    const stub = setupInjectionStub(tmp, false);
+    try {
+      const { injectionBlockedProbe } = await import("../scenarios/probes/injection-blocked.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1");
+      const outcome = await injectionBlockedProbe(ctx);
+      expect(outcome.status).toBe("passed");
+      const ev = JSON.parse(fs.readFileSync(ctx.evidencePath, "utf8"));
+      expect(ev.payloadPreservedLiterally).toBe(true);
+      expect(ev.markerAbsent).toBe(true);
+    } finally {
+      stub.restore();
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("fails_when_marker_file_was_created_indicating_command_substitution_executed", async () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "inj-probe-fail-"));
+    const stub = setupInjectionStub(tmp, true);
+    try {
+      const { injectionBlockedProbe } = await import("../scenarios/probes/injection-blocked.ts");
+      const ctx = makeProbeCtxFor(tmp, "sb1");
+      const outcome = await injectionBlockedProbe(ctx);
+      expect(outcome.status).toBe("failed");
+      expect(outcome.message).toMatch(/marker file .* present/);
+      expect(outcome.message).toMatch(/command substitution executed/);
+      const ev = JSON.parse(fs.readFileSync(ctx.evidencePath, "utf8"));
+      expect(ev.markerAbsent).toBe(false);
+    } finally {
+      stub.restore();
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts b/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts
new file mode 100644
index 0000000000..aab3b00f98
--- /dev/null
+++ b/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts
@@ -0,0 +1,53 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Parity test: the framework's local secret-pattern set
+ * (test/e2e-scenario/scenarios/orchestrators/redaction.ts) must stay in
+ * lockstep with the canonical product source
+ * (src/lib/security/secret-patterns.ts).
+ *
+ * The framework deliberately mirrors rather than imports — see the
+ * "Framework-local mirror" comment in redaction.ts for why — but the
+ * mirror is only safe if it is actually a mirror. This test imports
+ * the RegExp arrays from both modules and compares them by behavior
+ * (`.source` + `.flags`) rather than by source-text shape, so the
+ * source-shape budget (ci/source-shape-test-budget.json) stays at 0.
+ *
+ * The framework-runtime decoupling is preserved: redaction.ts itself
+ * does not import from src/lib/security/. Only this test crosses the
+ * boundary, which is the entire point of a parity test.
+ */
+
+import { describe, expect, it } from "vitest";
+
+import {
+  CONTEXT_PATTERNS as FRAMEWORK_CONTEXT_PATTERNS,
+  TOKEN_PREFIX_PATTERNS as FRAMEWORK_TOKEN_PREFIX_PATTERNS,
+} from "../scenarios/orchestrators/redaction.ts";
+import {
+  CONTEXT_PATTERNS as PRODUCT_CONTEXT_PATTERNS,
+  TOKEN_PREFIX_PATTERNS as PRODUCT_TOKEN_PREFIX_PATTERNS,
+} from "../../../src/lib/security/secret-patterns.ts";
+
+function fingerprint(patterns: readonly RegExp[]): string[] {
+  return patterns.map((re) => `${re.source}::${re.flags}`);
+}
+
+describe("framework redaction parity with product source-of-truth", () => {
+  it("framework TOKEN_PREFIX_PATTERNS matches product TOKEN_PREFIX_PATTERNS", () => {
+    const framework = fingerprint(FRAMEWORK_TOKEN_PREFIX_PATTERNS);
+    const product = fingerprint(PRODUCT_TOKEN_PREFIX_PATTERNS);
+    expect(framework.length).toBeGreaterThan(0);
+    expect(product.length).toBeGreaterThan(0);
+    expect(framework).toEqual(product);
+  });
+
+  it("framework CONTEXT_PATTERNS matches product CONTEXT_PATTERNS", () => {
+    const framework = fingerprint(FRAMEWORK_CONTEXT_PATTERNS);
+    const product = fingerprint(PRODUCT_CONTEXT_PATTERNS);
+    expect(framework.length).toBeGreaterThan(0);
+    expect(product.length).toBeGreaterThan(0);
+    expect(framework).toEqual(product);
+  });
+});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts
deleted file mode 100644
index 8c2e70caae..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts
+++ /dev/null
@@ -1,174 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Phase 9: Migrate Additional Scenario Families.
- * Verifies metadata for new scenarios (macOS, WSL, GPU local Ollama, Brev
- * launchable, Ubuntu cloud Hermes, and the no-docker negative preflight)
- * plus the deferred schema concepts (scenario-level overrides, negative
- * expected state).
- */
-
-import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import { loadMetadataFromDir } from "../runtime/resolver/load.ts";
-import { resolveScenario } from "../runtime/resolver/plan.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-const RUN_SCENARIO = path.join(E2E_DIR, "runtime", "run-scenario.sh");
-
-function planOnly(scenarioId: string): { stdout: string; stderr: string; status: number | null; plan: Record<string, unknown> } {
-  const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-p9-"));
-  try {
-    const r = spawnSync("bash", [RUN_SCENARIO, scenarioId, "--plan-only"], {
-      env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-      encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-      cwd: REPO_ROOT,
-    });
-    let plan = {};
-    const pj = path.join(tmp, "plan.json");
-    if (fs.existsSync(pj)) {
-      plan = JSON.parse(fs.readFileSync(pj, "utf8"));
-    }
-    return { stdout: r.stdout, stderr: r.stderr, status: r.status, plan };
-  } finally {
-    fs.rmSync(tmp, { recursive: true, force: true });
-  }
-}
-
-describe("Issue 3812: inference/provider suite families", () => {
-  it("test_should_route_inference_suite_families_to_domain_specific_steps", () => {
-    const { suites } = loadMetadataFromDir(E2E_DIR);
-    for (const family of ["inference-routing", "inference-switch", "kimi-compatibility", "ollama-auth-proxy", "model-router"]) {
-      const scripts = suites.suites[family]?.steps?.map((step) => step.script ?? "") ?? [];
-      expect(scripts.length, family).toBeGreaterThan(0);
-      expect(scripts.every((script) => script.startsWith("inference/")), family).toBe(true);
-      expect(scripts.some((script) => !script.startsWith("inference/cloud/")), family).toBe(true);
-    }
-  });
-});
-
-describe("Phase 9: additional scenario families - metadata", () => {
-  it("resolver should resolve all new scenarios", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const ids = [
-      "macos-repo-cloud-openclaw",
-      "wsl-repo-cloud-openclaw",
-      "gpu-repo-local-ollama-openclaw",
-      "brev-launchable-cloud-openclaw",
-      "ubuntu-repo-cloud-hermes",
-      "ubuntu-no-docker-preflight-negative",
-    ];
-    for (const id of ids) {
-      const plan = resolveScenario(id, meta);
-      expect(plan.scenario_id).toBe(id);
-      expect(plan.expected_state.id).toBeTypeOf("string");
-      expect(Array.isArray(plan.suites)).toBe(true);
-    }
-  });
-});
-
-describe("Phase 9: macOS / WSL plan-only", () => {
-  it("macos scenario plan identifies macOS platform", () => {
-    const { status, plan } = planOnly("macos-repo-cloud-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as { dimensions: { platform: { profile: { os?: string } } } }).dimensions;
-    expect(dims.platform.profile.os).toBe("macos");
-  });
-
-  it("wsl scenario plan identifies WSL platform", () => {
-    const { status, plan } = planOnly("wsl-repo-cloud-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as { dimensions: { platform: { profile: { os?: string } } } }).dimensions;
-    expect(dims.platform.profile.os).toBe("wsl");
-  });
-});
-
-describe("Phase 9: GPU local Ollama plan-only", () => {
-  it("runtime indicates GPU/CDI and provider is ollama", () => {
-    const { status, plan } = planOnly("gpu-repo-local-ollama-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as {
-      dimensions: {
-        runtime: { profile: { gpu_runtime?: string } };
-        onboarding: { profile: { provider?: string } };
-      };
-    }).dimensions;
-    expect(dims.runtime.profile.gpu_runtime).toBe("cdi");
-    expect(dims.onboarding.profile.provider).toBe("ollama");
-  });
-});
-
-describe("Phase 9: Brev launchable scenario (overrides schema)", () => {
-  it("should_support_scenario_overrides_on_brev_launchable", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const plan = resolveScenario("brev-launchable-cloud-openclaw", meta);
-    expect(plan.overrides).toBeTruthy();
-    const overrides = plan.overrides as {
-      onboarding?: { gateway?: { bind_address?: string } };
-    };
-    expect(overrides?.onboarding?.gateway?.bind_address).toBeTypeOf("string");
-    expect(overrides?.onboarding?.gateway?.bind_address?.length).toBeGreaterThan(0);
-  });
-
-  it("plan shows remote target, launchable install, and gateway bind override", () => {
-    const { status, stdout, plan } = planOnly("brev-launchable-cloud-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as {
-      dimensions: {
-        platform: { profile: { execution_target?: string } };
-        install: { id: string };
-      };
-    }).dimensions;
-    expect(dims.platform.profile.execution_target).toBe("remote");
-    expect(dims.install.id).toBe("launchable");
-    expect(stdout).toMatch(/Overrides:/);
-    expect(stdout).toMatch(/bind_address/);
-  });
-});
-
-describe("Phase 9: negative preflight", () => {
-  it("should_define_preflight_failure_no_sandbox_state", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const es = meta.expectedStates.expected_states["preflight-failure-no-sandbox"] as
-      | {
-          gateway?: { expected?: string };
-          sandbox?: { expected?: string };
-          failure?: { expected?: boolean };
-        }
-      | undefined;
-    expect(es, "preflight-failure-no-sandbox should be defined").toBeTruthy();
-    expect(es?.gateway?.expected).toBe("absent");
-    expect(es?.sandbox?.expected).toBe("absent");
-    expect(es?.failure?.expected).toBe(true);
-  });
-
-  it("negative scenario plan identifies docker missing and negative state", () => {
-    const { status, plan } = planOnly("ubuntu-no-docker-preflight-negative");
-    expect(status).toBe(0);
-    const p = plan as {
-      dimensions: { runtime: { profile: { container_daemon?: string } } };
-      expected_state: { id: string };
-      expected_failure?: {
-        phase?: string;
-        error_class?: string;
-        message_pattern?: string;
-        forbidden_side_effects?: string[];
-      };
-    };
-    expect(p.dimensions.runtime.profile.container_daemon).toBe("missing");
-    expect(p.expected_state.id).toBe("preflight-failure-no-sandbox");
-    expect(p.expected_failure?.phase).toBe("preflight");
-    expect(p.expected_failure?.error_class).toBe("docker-missing");
-    expect(p.expected_failure?.message_pattern).toBeTypeOf("string");
-    expect(p.expected_failure?.forbidden_side_effects).toEqual(
-      expect.arrayContaining(["sandbox-created", "gateway-started", "credentials-written"]),
-    );
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-first-migration.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-first-migration.test.ts
deleted file mode 100644
index 0307ca9103..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-scenario-first-migration.test.ts
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Phase 6: Migrate First Scenario - ubuntu-repo-cloud-openclaw.
- * Verifies resolver output, plan printout, and dry-run phase ordering.
- */
-
-import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import { loadMetadataFromDir } from "../runtime/resolver/load.ts";
-import { resolveScenario } from "../runtime/resolver/plan.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-const RUN_SCENARIO = path.join(E2E_DIR, "runtime", "run-scenario.sh");
-
-describe("Phase 6: ubuntu-repo-cloud-openclaw migration", () => {
-  it("ubuntu_repo_cloud_openclaw_should_resolve_to_cloud_openclaw_ready", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const plan = resolveScenario("ubuntu-repo-cloud-openclaw", meta);
-    expect(plan.expected_state.id).toBe("cloud-openclaw-ready");
-    const suiteIds = plan.suites.map((s) => s.id);
-    expect(suiteIds).toContain("smoke");
-    expect(suiteIds).toContain("inference");
-  });
-
-  it("ubuntu_repo_cloud_openclaw_plan_should_include_setup_install_onboard", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-first-"));
-    try {
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--plan-only"],
-        { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(r.stdout).toMatch(/install=repo-current/);
-      expect(r.stdout).toMatch(/runtime=docker-running/);
-      expect(r.stdout).toMatch(/onboarding=cloud-openclaw/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("ubuntu_repo_cloud_openclaw_dry_run_should_execute_phases_in_order", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-first-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_TRACE_FILE: trace },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(fs.existsSync(trace)).toBe(true);
-      const contents = fs.readFileSync(trace, "utf8");
-      const order = [
-        "env:noninteractive",
-        "install:repo-current",
-        "onboard:cloud-openclaw",
-        "gateway:check",
-        "sandbox:check",
-      ];
-      let pos = 0;
-      for (const marker of order) {
-        const idx = contents.indexOf(marker, pos);
-        expect(idx, `missing marker ${marker}. trace:\n${contents}`).toBeGreaterThanOrEqual(0);
-        pos = idx + marker.length;
-      }
-      // The run should also seed the context and produce plan.json.
-      expect(fs.existsSync(path.join(tmp, "context.env"))).toBe(true);
-      expect(fs.existsSync(path.join(tmp, "plan.json"))).toBe(true);
-      // After dry-run, suite runner should be able to execute the full
-      // suite sequence against the emitted context.
-      const suites = spawnSync(
-        "bash",
-        [path.join(E2E_DIR, "runtime", "run-suites.sh"), "smoke", "inference"],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(suites.status, `suite stderr:${suites.stderr}\nstdout:${suites.stdout}`).toBe(0);
-      expect(suites.stdout).toMatch(/PASS smoke\/cli-available/);
-      expect(suites.stdout).toMatch(/PASS inference\/models-health/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts
deleted file mode 100644
index 31965cffcb..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts
+++ /dev/null
@@ -1,275 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-import yaml from "js-yaml";
-
-import { resolveScenario, type ResolverInput } from "../runtime/resolver/plan.ts";
-import { loadMetadataFromDir, loadMetadataFromObjects } from "../runtime/resolver/load.ts";
-import { listScenarios } from "../scenarios/registry.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-
-function realMetadata(): ResolverInput {
-  return loadMetadataFromDir(E2E_DIR);
-}
-
-describe("E2E scenario resolver", () => {
-  it("should_resolve_valid_scenario", () => {
-    const meta = realMetadata();
-    const plan = resolveScenario("ubuntu-repo-cloud-openclaw", meta);
-    expect(plan.scenario_id).toBe("ubuntu-repo-cloud-openclaw");
-    expect(plan.dimensions.platform.id).toBe("ubuntu-local");
-    expect(plan.dimensions.install.id).toBe("repo-current");
-    expect(plan.dimensions.runtime.id).toBe("docker-running");
-    expect(plan.dimensions.onboarding.id).toBe("cloud-openclaw");
-    expect(plan.expected_state.id).toBe("cloud-openclaw-ready");
-    const suiteIds = plan.suites.map((s) => s.id);
-    expect(suiteIds).toEqual(["smoke", "inference", "credentials"]);
-    // each suite should carry its ordered steps with resolved scripts
-    expect(plan.suites[0].steps.length).toBeGreaterThan(0);
-    for (const s of plan.suites) {
-      for (const step of s.steps) {
-        expect(step.id).toBeTypeOf("string");
-        expect(step.script).toMatch(/\.sh$/);
-      }
-    }
-  });
-
-  it("should_resolve_onboard_negative_path_migration_scenarios", () => {
-    const meta = realMetadata();
-    const custom = resolveScenario("ubuntu-repo-cloud-openclaw-custom-policies", meta);
-    expect(custom.dimensions.onboarding.id).toBe("cloud-openclaw-custom-policies");
-    expect(custom.expected_state.id).toBe("cloud-openclaw-custom-policies-ready");
-    expect(custom.suites.map((s) => s.id)).toContain("onboarding-state");
-
-    const invalidKey = resolveScenario("ubuntu-invalid-nvidia-key-negative", meta);
-    expect(invalidKey.expected_state.config.failure).toMatchObject({
-      expected: true,
-      stage: "onboarding",
-      reason: "invalid-nvidia-api-key",
-      exit_code: 1,
-      no_stack_trace: true,
-    });
-
-    const portConflict = resolveScenario("ubuntu-gateway-port-conflict-negative", meta);
-    expect(portConflict.expected_state.config.failure).toMatchObject({
-      expected: true,
-      stage: "onboarding",
-      reason: "gateway-port-conflict",
-      exit_code: 1,
-      no_stack_trace: true,
-    });
-  });
-
-  it("should_resolve_every_typed_scenario_id_through_yaml_setup_scenarios", () => {
-    const meta = realMetadata();
-    const failures = listScenarios().flatMap((scenario) => {
-      try {
-        resolveScenario(scenario.id, meta);
-        return [];
-      } catch (error) {
-        return [`${scenario.id}: ${(error as Error).message}`];
-      }
-    });
-
-    expect(failures, failures.join("\n")).toEqual([]);
-  });
-
-  it("should_fail_for_unknown_scenario", () => {
-    const meta = realMetadata();
-    expect(() => resolveScenario("does-not-exist", meta)).toThrow(/does-not-exist/);
-  });
-
-  it("should_fail_for_missing_profile_reference", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: yaml.load(`
-platforms:
-  ubuntu-local: { os: ubuntu }
-installs:
-  repo-current: { method: repo-checkout }
-runtimes:
-  docker-running: { container_engine: docker }
-onboarding:
-  cloud-openclaw: { path: cloud, agent: openclaw, provider: nvidia }
-setup_scenarios:
-  broken:
-    dimensions:
-      platform: missing-platform
-      install: repo-current
-      runtime: docker-running
-      onboarding: cloud-openclaw
-    expected_state: some-state
-    suites: [smoke]
-`) as object,
-      expectedStates: yaml.load(`
-expected_states:
-  some-state:
-    gateway: { health: healthy }
-    sandbox: { status: running }
-`) as object,
-      suites: yaml.load(`
-suites:
-  smoke:
-    requires_state:
-      gateway.health: healthy
-      sandbox.status: running
-    steps:
-      - { id: step, script: suites/smoke/step.sh }
-`) as object,
-    });
-    expect(() => resolveScenario("broken", meta)).toThrow(/platform.*missing-platform/);
-  });
-
-  it("should_fail_for_missing_expected_state_reference", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: yaml.load(`
-platforms: { p: {} }
-installs: { i: {} }
-runtimes: { r: {} }
-onboarding: { o: { agent: openclaw, provider: nvidia } }
-setup_scenarios:
-  s:
-    dimensions: { platform: p, install: i, runtime: r, onboarding: o }
-    expected_state: ghost
-    suites: [smoke]
-`) as object,
-      expectedStates: yaml.load(`
-expected_states:
-  real: { gateway: { health: healthy } }
-`) as object,
-      suites: yaml.load(`
-suites:
-  smoke:
-    steps:
-      - { id: step, script: suites/smoke/step.sh }
-`) as object,
-    });
-    expect(() => resolveScenario("s", meta)).toThrow(/expected_state.*ghost/);
-  });
-
-  it("should_fail_for_missing_suite_reference", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: yaml.load(`
-platforms: { p: {} }
-installs: { i: {} }
-runtimes: { r: {} }
-onboarding: { o: { agent: openclaw, provider: nvidia } }
-setup_scenarios:
-  s:
-    dimensions: { platform: p, install: i, runtime: r, onboarding: o }
-    expected_state: real
-    suites: [smoke, phantom]
-`) as object,
-      expectedStates: yaml.load(`
-expected_states:
-  real: { gateway: { health: healthy } }
-`) as object,
-      suites: yaml.load(`
-suites:
-  smoke:
-    steps:
-      - { id: step, script: suites/smoke/step.sh }
-`) as object,
-    });
-    expect(() => resolveScenario("s", meta)).toThrow(/suite.*phantom/);
-  });
-
-  it("should_fail_when_suite_requires_state_incompatible_with_scenario_expected_state", () => {
-    const meta = loadMetadataFromObjects({
-      scenarios: yaml.load(`
-platforms: { p: {} }
-installs: { i: {} }
-runtimes: { r: {} }
-onboarding: { o: { agent: openclaw, provider: nvidia } }
-setup_scenarios:
-  s:
-    dimensions: { platform: p, install: i, runtime: r, onboarding: o }
-    expected_state: gw-unhealthy
-    suites: [smoke]
-`) as object,
-      expectedStates: yaml.load(`
-expected_states:
-  gw-unhealthy:
-    gateway: { health: unhealthy }
-    sandbox: { status: running }
-`) as object,
-      suites: yaml.load(`
-suites:
-  smoke:
-    requires_state:
-      gateway.health: healthy
-    steps:
-      - { id: step, script: suites/smoke/step.sh }
-`) as object,
-    });
-    expect(() => resolveScenario("s", meta)).toThrow(
-      /smoke.*gateway\.health.*healthy.*unhealthy/s,
-    );
-  });
-});
-
-describe("run-scenario.sh --plan-only", () => {
-  it("run_scenario_plan_only_should_print_plan", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-plan-"));
-    try {
-      const result = spawnSync(
-        "bash",
-        [
-          path.join(E2E_DIR, "runtime", "run-scenario.sh"),
-          "ubuntu-repo-cloud-openclaw",
-          "--plan-only",
-        ],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(result.status, result.stderr).toBe(0);
-      expect(result.stdout).toContain("ubuntu-repo-cloud-openclaw");
-      expect(result.stdout).toContain("cloud-openclaw-ready");
-      expect(result.stdout).toContain("smoke");
-      expect(result.stdout).toContain("inference");
-      const planJsonPath = path.join(tmp, "plan.json");
-      expect(fs.existsSync(planJsonPath)).toBe(true);
-      const doc = JSON.parse(fs.readFileSync(planJsonPath, "utf8"));
-      expect(doc.scenario_id).toBe("ubuntu-repo-cloud-openclaw");
-      expect(doc.expected_state.id).toBe("cloud-openclaw-ready");
-      expect(Array.isArray(doc.suites)).toBe(true);
-      expect(doc.suites.map((s: { id: string }) => s.id)).toContain("smoke");
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_scenario_plan_only_should_fail_for_unknown_scenario", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-plan-"));
-    try {
-      const result = spawnSync(
-        "bash",
-        [
-          path.join(E2E_DIR, "runtime", "run-scenario.sh"),
-          "does-not-exist",
-          "--plan-only",
-        ],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(result.status).not.toBe(0);
-      expect(`${result.stderr}${result.stdout}`).toMatch(/does-not-exist/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-schema.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-schema.test.ts
deleted file mode 100644
index b9768cf2dd..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-scenario-schema.test.ts
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-import yaml from "js-yaml";
-
-import { loadMetadataFromDir } from "../runtime/resolver/load.ts";
-
-const E2E_DIR = path.resolve(import.meta.dirname, "..");
-const SCENARIOS_PATH = path.join(E2E_DIR, "nemoclaw_scenarios", "scenarios.yaml");
-const STATES_PATH = path.join(E2E_DIR, "nemoclaw_scenarios", "expected-states.yaml");
-const SUITES_PATH = path.join(E2E_DIR, "validation_suites", "suites.yaml");
-
-type AnyRecord = Record<string, unknown>;
-
-function loadYaml(p: string): AnyRecord {
-  const raw = fs.readFileSync(p, "utf8");
-  const doc = yaml.load(raw);
-  if (!doc || typeof doc !== "object") {
-    throw new Error(`YAML file ${p} did not parse to an object`);
-  }
-  return doc as AnyRecord;
-}
-
-describe("E2E scenario metadata schema", () => {
-  it("should_parse_all_metadata_files", () => {
-    expect(fs.existsSync(SCENARIOS_PATH)).toBe(true);
-    expect(fs.existsSync(STATES_PATH)).toBe(true);
-    expect(fs.existsSync(SUITES_PATH)).toBe(true);
-    expect(() => loadYaml(SCENARIOS_PATH)).not.toThrow();
-    expect(() => loadYaml(STATES_PATH)).not.toThrow();
-    expect(() => loadYaml(SUITES_PATH)).not.toThrow();
-  });
-
-  it("should_have_required_top_level_sections", () => {
-    const scenarios = loadYaml(SCENARIOS_PATH);
-    expect(scenarios).toHaveProperty("platforms");
-    expect(scenarios).toHaveProperty("installs");
-    expect(scenarios).toHaveProperty("runtimes");
-    expect(scenarios).toHaveProperty("onboarding");
-    expect(scenarios).toHaveProperty("setup_scenarios");
-
-    const states = loadYaml(STATES_PATH);
-    expect(states).toHaveProperty("expected_states");
-
-    const suites = loadYaml(SUITES_PATH);
-    expect(suites).toHaveProperty("suites");
-  });
-
-  it("should_define_initial_required_scenarios", () => {
-    const scenarios = loadYaml(SCENARIOS_PATH);
-    const setup = scenarios.setup_scenarios as AnyRecord;
-    expect(setup).toBeTypeOf("object");
-    expect(setup).toHaveProperty("ubuntu-repo-cloud-openclaw");
-    expect(setup).toHaveProperty("ubuntu-repo-cloud-hermes");
-    expect(setup).toHaveProperty("gpu-repo-local-ollama-openclaw");
-  });
-
-  it("should_use_singular_expected_state_field", () => {
-    const scenarios = loadYaml(SCENARIOS_PATH);
-    const setup = scenarios.setup_scenarios as AnyRecord;
-    for (const [id, entry] of Object.entries(setup)) {
-      const s = entry as AnyRecord;
-      expect(s, `scenario ${id} missing expected_state`).toHaveProperty("expected_state");
-      expect(typeof s.expected_state, `scenario ${id}.expected_state must be a string`).toBe(
-        "string",
-      );
-      expect(
-        (s as AnyRecord).expected_states,
-        `scenario ${id} must not have array-style expected_states`,
-      ).toBeUndefined();
-    }
-  });
-
-  it("should_define_initial_expected_states", () => {
-    const states = loadYaml(STATES_PATH);
-    const es = states.expected_states as AnyRecord;
-    // Initial three states must exist; Phase 9 adds additional states
-    // (e.g. preflight-failure-no-sandbox) alongside their first consumer.
-    for (const id of [
-      "cloud-openclaw-ready",
-      "cloud-hermes-ready",
-      "local-ollama-openclaw-ready",
-    ]) {
-      expect(es, `expected state ${id} should be defined`).toHaveProperty(id);
-    }
-  });
-
-  it("should_define_initial_suites", () => {
-    const suites = loadYaml(SUITES_PATH);
-    const s = suites.suites as AnyRecord;
-    for (const id of [
-      "smoke",
-      "inference",
-      "credentials",
-      "local-ollama-inference",
-      "ollama-proxy",
-    ]) {
-      expect(s, `suite ${id} should be defined`).toHaveProperty(id);
-    }
-  });
-
-  it("platform_specific_scenarios_should_declare_runner_requirements", () => {
-    const scenarios = loadYaml(SCENARIOS_PATH);
-    const setup = scenarios.setup_scenarios as Record<string, AnyRecord>;
-    for (const id of [
-      "macos-repo-cloud-openclaw",
-      "wsl-repo-cloud-openclaw",
-      "gpu-repo-local-ollama-openclaw",
-      "brev-launchable-cloud-openclaw",
-    ]) {
-      expect(setup[id]?.runner_requirements, `${id} missing runner requirements`).toEqual(
-        expect.arrayContaining([expect.any(String)]),
-      );
-    }
-  });
-
-  it("should_reject_platform_specific_fixture_without_runner_requirements", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-schema-runner-"));
-    try {
-      fs.writeFileSync(
-        path.join(tmp, "scenarios.yaml"),
-        `
-platforms:
-  brev-launchable:
-    os: ubuntu
-    execution_target: remote
-installs:
-  launchable: {}
-runtimes:
-  docker-running: {}
-onboarding:
-  cloud-openclaw:
-    agent: openclaw
-setup_scenarios:
-  bad-brev:
-    dimensions:
-      platform: brev-launchable
-      install: launchable
-      runtime: docker-running
-      onboarding: cloud-openclaw
-    expected_state: ready
-    suites: [smoke]
-`,
-      );
-      fs.writeFileSync(tmp + "/expected-states.yaml", "expected_states:\n  ready: {}\n");
-      fs.writeFileSync(tmp + "/suites.yaml", "suites:\n  smoke:\n    steps: []\n");
-      expect(() => loadMetadataFromDir(tmp)).toThrow(/runner_requirements|bad-brev/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts
index 604ec1c033..106d46b339 100644
--- a/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts
@@ -95,8 +95,9 @@ jobs:
           "run-scenario job must use the resolved runner output",
           "run-scenario job missing step: Run typed scenarios in WSL",
           "artifact upload name must include the scenarios input",
-          "artifact upload must include hidden .e2e files",
-          "artifact upload path must include .e2e/",
+          "artifact upload must set include-hidden-files: false (raw context.env must not leak)",
+          "artifact upload path must include .e2e/actions/ (redacted action evidence)",
+          "artifact upload path must include .e2e/logs/ (redacted shell-step evidence)",
         ]),
       );
     } finally {
diff --git a/test/e2e-scenario/framework-tests/e2e-suite-runner.test.ts b/test/e2e-scenario/framework-tests/e2e-suite-runner.test.ts
deleted file mode 100644
index ded16c1917..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-suite-runner.test.ts
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import { spawnSync, type SpawnSyncReturns } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const RUN_SUITES = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-suites.sh");
-
-function runSuites(args: string[], env: Record<string, string> = {}): SpawnSyncReturns<string> {
-  return spawnSync("bash", [RUN_SUITES, ...args], {
-    env: { ...process.env, ...env },
-    encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-    cwd: REPO_ROOT,
-  });
-}
-
-function seedContext(tmp: string, values: Record<string, string>): void {
-  fs.mkdirSync(tmp, { recursive: true });
-  const ctx = Object.entries(values)
-    .map(([k, v]) => `${k}=${v}`)
-    .join("\n");
-  fs.writeFileSync(path.join(tmp, "context.env"), `${ctx}\n`);
-}
-
-function fullContext(): Record<string, string> {
-  return {
-    E2E_SCENARIO: "ubuntu-repo-cloud-openclaw",
-    E2E_PLATFORM_OS: "ubuntu",
-    E2E_EXECUTION_TARGET: "local",
-    E2E_INSTALL_METHOD: "repo-checkout",
-    E2E_CONTAINER_ENGINE: "docker",
-    E2E_CONTAINER_DAEMON: "running",
-    E2E_ONBOARDING_PATH: "cloud",
-    E2E_AGENT: "openclaw",
-    E2E_PROVIDER: "nvidia",
-    E2E_SANDBOX_NAME: "e2e-ubuntu-repo-cloud-openclaw",
-    E2E_GATEWAY_URL: "http://127.0.0.1:18789",
-    E2E_INFERENCE_ROUTE: "inference-local",
-  };
-}
-
-describe("Issue #3810 messaging suite wiring", () => {
-  it("should_define_real_steps_for_messaging_provider_suites", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-messaging-suites-"));
-    try {
-      const baseContext = {
-        ...fullContext(),
-        E2E_PROVIDER: "telegram",
-        E2E_MESSAGING_PROVIDER: "telegram",
-        E2E_MESSAGING_BRIDGE_URL: "http://127.0.0.1:18789",
-        E2E_MESSAGING_CONFIG_CONTENT: "TELEGRAM_BOT_TOKEN=PLACEHOLDER",
-      };
-      seedContext(tmp, baseContext);
-      const telegram = runSuites(["messaging-telegram"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(telegram.status, `stderr:${telegram.stderr}\nstdout:${telegram.stdout}`).toBe(0);
-      seedContext(tmp, {
-        ...baseContext,
-        E2E_MESSAGING_PROVIDER: "discord",
-        E2E_MESSAGING_CONFIG_CONTENT: "DISCORD_BOT_TOKEN=PLACEHOLDER",
-      });
-      const discord = runSuites(["messaging-discord"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(discord.status, `stderr:${discord.stderr}\nstdout:${discord.stdout}`).toBe(0);
-      seedContext(tmp, {
-        ...baseContext,
-        E2E_MESSAGING_PROVIDER: "slack",
-        E2E_MESSAGING_CHANNEL: "bot",
-        E2E_MESSAGING_CONFIG_CONTENT: "SLACK_BOT_TOKEN=PLACEHOLDER",
-      });
-      const slack = runSuites(["messaging-slack"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(slack.status, `stderr:${slack.stderr}\nstdout:${slack.stdout}`).toBe(0);
-      const output = `${telegram.stdout}\n${discord.stdout}\n${slack.stdout}`;
-      for (const id of [
-        "messaging-provider-attached",
-        "messaging-placeholder-configured",
-        "messaging-no-secret-leak",
-        "messaging-bridge-reachable",
-        "telegram-injection-safety",
-        "discord-gateway-path",
-        "slack-provider-state",
-        "slack.runtime-discovery",
-      ]) {
-        expect(output).toContain(id);
-      }
-      expect(output).not.toContain("cli-available");
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
-
-describe("run-suites.sh", () => {
-  it("security_credentials_suite_should_emit_stable_assertion_ids", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-security-credentials-"));
-    try {
-      seedContext(tmp, { ...fullContext(), E2E_CREDENTIALS_EXPECTED: "present" });
-      const r = runSuites(["security-credentials"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1", HOME: tmp });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      expect(r.stdout).toContain("post-onboard.credentials.gateway-list-redacts-values");
-      expect(r.stdout).toContain("post-onboard.credentials.no-plaintext-host-store");
-      expect(r.stdout).not.toMatch(/no-credentials-leaked|assert\//);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_suites_should_run_steps_in_declared_order", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["smoke"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      // Smoke order is: cli-available, gateway-health, sandbox-listed, sandbox-shell
-      const order = ["cli-available", "gateway-health", "sandbox-listed", "sandbox-shell"];
-      let pos = 0;
-      for (const marker of order) {
-        const idx = r.stdout.indexOf(marker, pos);
-        expect(idx, `missing marker ${marker} after ${pos} in:\n${r.stdout}`).toBeGreaterThanOrEqual(0);
-        pos = idx + marker.length;
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_suites_should_fail_on_unknown_suite", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["does-not-exist"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status).not.toBe(0);
-      expect(`${r.stdout}${r.stderr}`).toMatch(/does-not-exist/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_suites_should_stop_on_first_failed_step", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      // Use a fixture suites file with a failing middle step.
-      const fixtureSuites = path.join(tmp, "suites.yaml");
-      const fixtureDir = path.join(tmp, "suites", "fixture");
-      fs.mkdirSync(fixtureDir, { recursive: true });
-      fs.writeFileSync(path.join(fixtureDir, "00-a.sh"), "#!/usr/bin/env bash\necho A-RAN\nexit 0\n");
-      fs.writeFileSync(path.join(fixtureDir, "01-b.sh"), "#!/usr/bin/env bash\necho B-RAN\nexit 1\n");
-      fs.writeFileSync(path.join(fixtureDir, "02-c.sh"), "#!/usr/bin/env bash\necho C-RAN\nexit 0\n");
-      fs.chmodSync(path.join(fixtureDir, "00-a.sh"), 0o755);
-      fs.chmodSync(path.join(fixtureDir, "01-b.sh"), 0o755);
-      fs.chmodSync(path.join(fixtureDir, "02-c.sh"), 0o755);
-      fs.writeFileSync(
-        fixtureSuites,
-        `suites:
-  fixture:
-    steps:
-      - { id: a, script: suites/fixture/00-a.sh }
-      - { id: b, script: suites/fixture/01-b.sh }
-      - { id: c, script: suites/fixture/02-c.sh }
-`,
-      );
-      const r = runSuites(["fixture"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_SUITES_FILE: fixtureSuites,
-        E2E_SUITES_DIR: tmp,
-      });
-      expect(r.status).not.toBe(0);
-      expect(r.stdout).toContain("A-RAN");
-      expect(r.stdout).toContain("B-RAN");
-      expect(r.stdout).not.toContain("C-RAN");
-      expect(`${r.stdout}${r.stderr}`).toMatch(/FAIL.*(fixture\/b|step=b)/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("smoke_suite_should_require_context", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      // No context.env written to tmp.
-      const r = runSuites(["smoke"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status).not.toBe(0);
-      expect(`${r.stderr}${r.stdout}`).toMatch(/context\.env|E2E_SCENARIO|missing/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("rebuild_and_upgrade_suites_should_emit_stable_assertion_ids_in_dry_run", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["rebuild", "upgrade"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      for (const id of [
-        "suite.rebuild.workspace_state_preserved",
-        "suite.rebuild.agent_version_upgraded",
-        "suite.rebuild.inference_still_works",
-        "suite.rebuild.policy_presets_preserved",
-        "suite.rebuild.hermes_config_preserved",
-        "suite.upgrade.sandbox_registry_preserved",
-        "suite.upgrade.gateway_version_upgraded",
-        "suite.upgrade.survivor_agent_reachable",
-      ]) {
-        expect(r.stdout).toContain(id);
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("smoke_and_inference_run_with_stub_context", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["smoke", "inference"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      for (const id of [
-        "cli-available",
-        "gateway-health",
-        "sandbox-listed",
-        "sandbox-shell",
-        "models-health",
-        "chat-completion",
-        "sandbox-inference-local",
-      ]) {
-        expect(r.stdout).toContain(id);
-      }
-      // Summary should call out PASS for each step.
-      expect(r.stdout).toMatch(/PASS/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml b/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml
new file mode 100644
index 0000000000..b8a30d2589
--- /dev/null
+++ b/test/e2e-scenario/manifests/openclaw-nvidia-rebuild.yaml
@@ -0,0 +1,30 @@
+apiVersion: nemoclaw.io/v1
+kind: NemoClawInstance
+metadata:
+  name: openclaw-nvidia-rebuild
+spec:
+  setup:
+    install:
+      source: repo-current
+    runtime:
+      containerEngine: docker
+      containerDaemon: running
+    platform:
+      os: ubuntu
+      executionTarget: local
+  onboarding:
+    agent: openclaw
+    provider: nvidia
+    modelRoute: inference-local
+    policyTier: balanced
+    messaging: []
+    # Lifecycle phase opt-in. Routes through
+    # nemoclaw_scenarios/lifecycle/dispatch.sh to the
+    # rebuild-current-version worker, which seeds a workspace marker,
+    # invokes `nemoclaw <sandbox> rebuild --yes`, and publishes the
+    # marker contract to runtime-phase assertions.
+    lifecycle: rebuild-current-version
+  state:
+    workspaceRef: default
+    credentialRefs:
+      - NVIDIA_API_KEY
diff --git a/test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh b/test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh
new file mode 100755
index 0000000000..5aaca1b2c1
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Phase-action launcher for the hybrid scenario E2E framework.
+#
+# The phase orchestrators (EnvironmentOrchestrator, OnboardingOrchestrator)
+# call this launcher to invoke a function defined in a sourced shell
+# dispatcher (install/dispatch.sh or onboard/dispatch.sh). Those
+# dispatchers are intentionally library-style (function definitions
+# only); this script gives them a deterministic executable entrypoint
+# the typed runner can spawn.
+#
+# Usage:
+#   dispatch-action.sh <fn> <arg> <dispatcher-script>
+#
+# Examples:
+#   dispatch-action.sh e2e_install repo-current \
+#     test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
+#
+#   dispatch-action.sh e2e_onboard cloud-openclaw \
+#     test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
+#
+# Environment (set by the orchestrator):
+#   E2E_CONTEXT_DIR  artifact directory
+#   E2E_PHASE        environment | onboarding
+#   E2E_ACTION_ID    stable action id, used for trace/log correlation
+
+set -euo pipefail
+
+if [[ $# -lt 3 ]]; then
+  echo "dispatch-action.sh: usage: <fn> <arg> <dispatcher-script>" >&2
+  exit 2
+fi
+
+ACTION_FN="$1"
+ACTION_ARG="$2"
+DISPATCHER="$3"
+
+if [[ ! -f "${DISPATCHER}" ]]; then
+  echo "dispatch-action.sh: dispatcher script not found: ${DISPATCHER}" >&2
+  exit 2
+fi
+
+# Source the runtime/lib helpers the dispatchers (and their workers) rely on.
+RUNTIME_LIB="$(cd "$(dirname "${BASH_SOURCE[0]}")/../runtime/lib" && pwd)"
+# shellcheck source=runtime/lib/env.sh
+. "${RUNTIME_LIB}/env.sh"
+# shellcheck source=runtime/lib/context.sh
+. "${RUNTIME_LIB}/context.sh"
+
+# Apply the standard non-interactive env once, on the very first action of
+# the run. Subsequent actions in the same run see the env via process
+# inheritance. e2e_env_apply_noninteractive is idempotent.
+e2e_env_apply_noninteractive
+e2e_env_trace "phase:${E2E_PHASE:-unknown}/action:${E2E_ACTION_ID:-unknown}"
+
+# IMPORTANT: do NOT call e2e_context_init here. The TS framework
+# (ScenarioRunner.seedContextEnv) is the single owner of context.env
+# initialization for the run; e2e_context_init opens with `: > ctx`
+# which would truncate the file and wipe seeded keys (E2E_SCENARIO,
+# E2E_GATEWAY_URL, ...) that runtime assertions require.
+# Workers may still call e2e_context_set to extend context.env in place.
+
+# Source the dispatcher last so its function definitions are in scope
+# when we invoke the requested function.
+# shellcheck source=/dev/null
+. "${DISPATCHER}"
+
+if ! declare -F "${ACTION_FN}" >/dev/null 2>&1; then
+  echo "dispatch-action.sh: function not found in dispatcher: ${ACTION_FN}" >&2
+  exit 2
+fi
+
+"${ACTION_FN}" "${ACTION_ARG}"
diff --git a/test/e2e-scenario/nemoclaw_scenarios/expected-states.yaml b/test/e2e-scenario/nemoclaw_scenarios/expected-states.yaml
deleted file mode 100644
index 8b7d95c11b..0000000000
--- a/test/e2e-scenario/nemoclaw_scenarios/expected-states.yaml
+++ /dev/null
@@ -1,186 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Expected state configs.
-#
-# Each entry describes the observable contract that must be true after
-# setup/install/onboarding completes for a given scenario. Expected states
-# are reusable: multiple setup scenarios can resolve to the same expected
-# state when they produce the same completed environment.
-#
-# Schema keys are intentionally small and structural. Deeper behavior lives
-# in suites; expected states answer "is the environment in the shape we
-# expect?" not "does every feature still work?".
-#
-# Negative/preflight expected states (e.g. `preflight-failure-no-sandbox`)
-# are introduced in Phase 9 alongside their first consuming scenario.
-
-expected_states:
-  cloud-openclaw-ready:
-    cli:
-      installed: true
-    gateway:
-      expected: present
-      health: healthy
-    sandbox:
-      expected: present
-      status: running
-      agent: openclaw
-    inference:
-      expected: available
-      provider: nvidia
-      route: inference-local
-      mode: gateway-routed
-    credentials:
-      expected: present
-      storage: gateway-managed
-    security:
-      policy_engine: supported
-      shields: supported
-
-  macos-cli-ready-docker-optional:
-    cli:
-      installed: true
-    gateway:
-      expected: optional
-      health: optional
-    sandbox:
-      expected: optional
-      status: optional
-      agent: openclaw
-    inference:
-      expected: optional
-      provider: nvidia
-      route: inference-local
-      mode: gateway-routed
-    credentials:
-      expected: optional
-      storage: gateway-managed
-    security:
-      policy_engine: supported
-      shields: supported
-
-  cloud-openclaw-custom-policies-ready:
-    cli:
-      installed: true
-    gateway:
-      expected: present
-      health: healthy
-    sandbox:
-      expected: present
-      status: running
-      agent: openclaw
-    inference:
-      expected: available
-      provider: nvidia
-      route: inference-local
-      mode: gateway-routed
-    credentials:
-      expected: present
-      storage: gateway-managed
-    onboarding_state:
-      provider: nvidia-prod
-      model: nvidia/nemotron-3-super-120b-a12b
-      policy_presets: npm,pypi
-    security:
-      policy_engine: supported
-      shields: supported
-
-  cloud-hermes-ready:
-    cli:
-      installed: true
-    gateway:
-      expected: present
-      health: healthy
-    sandbox:
-      expected: present
-      status: running
-      agent: hermes
-    inference:
-      expected: available
-      provider: nvidia
-      route: inference-local
-      mode: gateway-routed
-    credentials:
-      expected: present
-      storage: gateway-managed
-    security:
-      policy_engine: supported
-      shields: supported
-
-  local-ollama-openclaw-ready:
-    cli:
-      installed: true
-    gateway:
-      expected: present
-      health: healthy
-    sandbox:
-      expected: present
-      status: running
-      agent: openclaw
-    inference:
-      expected: available
-      provider: ollama
-      route: inference-local
-      mode: gateway-routed
-    credentials:
-      expected: present
-      storage: gateway-managed
-    security:
-      policy_engine: supported
-      shields: supported
-
-  # Negative preflight state. Setup is expected to fail and the runner
-  # must confirm that no gateway or sandbox ghost state was left behind.
-  # The `expected_failure` block (added for #3608) is the structured
-  # contract the runner matches against; the legacy `failure` block is
-  # retained as a drift guard while scenarios migrate.
-  preflight-failure-no-sandbox:
-    cli:
-      installed: true
-    gateway:
-      expected: absent
-    sandbox:
-      expected: absent
-    failure:
-      expected: true
-      stage: preflight
-    expected_failure:
-      phase: preflight
-      error_class: docker-missing
-      # Docker, container, daemon, socket, or preflight - case insensitive.
-      message_pattern: "(?i)docker|container|daemon|socket|preflight"
-      forbidden_side_effects:
-        - sandbox-created
-        - gateway-started
-        - credentials-written
-
-  onboarding-failure-invalid-nvidia-key:
-    cli:
-      installed: true
-    gateway:
-      expected: absent
-    sandbox:
-      expected: absent
-    failure:
-      expected: true
-      stage: onboarding
-      reason: invalid-nvidia-api-key
-      exit_code: 1
-      message_contains: Invalid NVIDIA API key. Must start with nvapi-
-      no_stack_trace: true
-
-  onboarding-failure-gateway-port-conflict:
-    cli:
-      installed: true
-    gateway:
-      expected: absent
-    sandbox:
-      expected: absent
-    failure:
-      expected: true
-      stage: onboarding
-      reason: gateway-port-conflict
-      exit_code: 1
-      message_contains: Port 18080 is not available
-      no_stack_trace: true
diff --git a/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh b/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh
index 3d49c03116..d10fbd2c9d 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh
@@ -12,8 +12,6 @@
 #   older_base_image_prepare <tag> [--registry ghcr.io/nvidia/nemoclaw]
 #     Writes a minimal Dockerfile to a temp location whose first line is
 #     `FROM <registry>:<tag>`, and prints the Dockerfile path on stdout.
-#     Honors E2E_DRY_RUN: skips the `docker pull` step (but still writes
-#     the Dockerfile, which is what callers inspect).
 #   older_base_image_cleanup <dockerfile-path>
 #     Removes the generated Dockerfile and (if present) its build context.
 
@@ -50,11 +48,9 @@ LABEL nemoclaw.e2e.fixture=older-base-image
 EOF
 
   e2e_env_trace "fixture:older-base-image" "${registry}:${tag}"
-  if ! e2e_env_is_dry_run; then
-    if command -v docker >/dev/null 2>&1; then
-      docker pull "${registry}:${tag}" >&2 \
-        || echo "older_base_image_prepare: docker pull failed (continuing; build may still succeed on cached layers)" >&2
-    fi
+  if command -v docker >/dev/null 2>&1; then
+    docker pull "${registry}:${tag}" >&2 \
+      || echo "older_base_image_prepare: docker pull failed (continuing; build may still succeed on cached layers)" >&2
   fi
   printf '%s\n' "${dockerfile}"
 }
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh b/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
index 7ea798cfdf..1a2ec2b0aa 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
@@ -4,7 +4,7 @@
 #
 # Install dispatcher. Routes by install-method / profile id to one of four
 # split helpers (repo-current.sh, public-curl.sh, ollama.sh,
-# launchable.sh). Honors E2E_DRY_RUN.
+# launchable.sh).
 #
 # Accepts both legacy install-method names (repo-checkout,
 # curl-install-script) and the new profile-centric names used by
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh b/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh
index 5ec638e90a..09d8aa3bbb 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh
@@ -18,11 +18,6 @@ _E2E_INST_LNCH_RUNTIME_LIB="$(cd "${_E2E_INST_LNCH_DIR}/../../runtime/lib" && pw
 
 e2e_install_launchable() {
   e2e_env_trace "install-launchable"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-launchable (skipped)"
-    return 0
-  fi
-
   # Match nightly launchable-smoke-e2e: exercise the launchable bootstrap
   # script on the current runner instead of assuming a pre-provisioned Brev VM.
   # The script has no Brev API dependency; it installs Docker/OpenShell/NemoClaw
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh b/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh
index a9d5f81c14..449eae519a 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh
@@ -17,10 +17,6 @@ _E2E_INST_OL_RUNTIME_LIB="$(cd "${_E2E_INST_OL_DIR}/../../runtime/lib" && pwd)"
 
 e2e_install_ollama() {
   e2e_env_trace "install-ollama"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-ollama (skipped)"
-    return 0
-  fi
   local ollama_url="${E2E_OLLAMA_INSTALL_URL:-https://ollama.ai/install.sh}"
   if ! command -v ollama >/dev/null 2>&1; then
     if ! curl -fsSL --retry 3 --retry-delay 2 "${ollama_url}" | bash; then
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh b/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh
index 143d097f0d..6628e332a2 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh
@@ -16,10 +16,6 @@ _E2E_INST_CURL_RUNTIME_LIB="$(cd "${_E2E_INST_CURL_DIR}/../../runtime/lib" && pw
 
 e2e_install_curl() {
   e2e_env_trace "install-curl"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-curl (skipped)"
-    return 0
-  fi
   local url="${E2E_INSTALLER_URL:-https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh}"
   local sha256="${E2E_INSTALLER_SHA256:-}"
   local tmp
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh b/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh
index 8c985dc3f7..000431a4b8 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh
@@ -5,7 +5,6 @@
 # Install from a checked-out repo (repo-current / repo-checkout profile).
 #
 # Split from the install dispatcher to keep scenario setup logic flat and to
-# make the per-profile code discoverable by grep. Honors E2E_DRY_RUN.
 
 _E2E_INST_REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 _E2E_INST_REPO_RUNTIME_LIB="$(cd "${_E2E_INST_REPO_DIR}/../../runtime/lib" && pwd)"
@@ -16,10 +15,6 @@ _E2E_INST_REPO_RUNTIME_LIB="$(cd "${_E2E_INST_REPO_DIR}/../../runtime/lib" && pw
 
 e2e_install_repo() {
   e2e_env_trace "install-repo"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-repo (skipped)"
-    return 0
-  fi
   local repo_root
   repo_root="$(cd "${_E2E_INST_REPO_DIR}/../../../.." && pwd)"
   cd "${repo_root}" || return
diff --git a/test/e2e-scenario/nemoclaw_scenarios/lifecycle/dispatch.sh b/test/e2e-scenario/nemoclaw_scenarios/lifecycle/dispatch.sh
new file mode 100755
index 0000000000..3cb82476cf
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/lifecycle/dispatch.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Lifecycle dispatcher. Mirrors install/dispatch.sh and onboard/dispatch.sh:
+# sources the runtime libs and per-profile worker files, then defines
+# `e2e_lifecycle()` which routes by profile id.
+#
+# Lifecycle workers run AFTER onboarding completes and BEFORE runtime
+# assertions execute. They mutate sandbox state (rebuild, upgrade,
+# snapshot, ...) and seed context.env keys that runtime assertions in
+# validation_suites/lib/rebuild_upgrade.sh consume:
+#
+#   E2E_REBUILD_MARKER_PATH        absolute path to the workspace marker
+#                                  the worker wrote before rebuild
+#   E2E_REBUILD_MARKER_EXPECTED    exact content of that marker
+#   E2E_OLD_AGENT_VERSION          (optional) version present pre-rebuild
+#   E2E_AGENT_VERSION_COMMAND      (optional) sandbox command to read the
+#                                  current agent version
+#
+# Adding a new profile:
+#   1. Drop a worker file here (e.g. snapshot-restore.sh) that defines
+#      `e2e_lifecycle_<profile_id>`.
+#   2. Source it below.
+#   3. Add the case branch in e2e_lifecycle().
+#   4. Register the profile id in LIFECYCLE_PROFILE_SECRET_ENV in
+#      scenarios/compiler.ts so secret env routing keeps working.
+
+_E2E_LIFECYCLE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+_E2E_LIFECYCLE_RUNTIME_LIB="$(cd "${_E2E_LIFECYCLE_DIR}/../../runtime/lib" && pwd)"
+# shellcheck source=../../runtime/lib/env.sh
+. "${_E2E_LIFECYCLE_RUNTIME_LIB}/env.sh"
+# shellcheck source=../../runtime/lib/context.sh
+. "${_E2E_LIFECYCLE_RUNTIME_LIB}/context.sh"
+# shellcheck source=rebuild-current-version.sh
+. "${_E2E_LIFECYCLE_DIR}/rebuild-current-version.sh"
+
+e2e_lifecycle() {
+  local profile="${1:-}"
+  if [[ -z "${profile}" ]]; then
+    echo "e2e_lifecycle: missing lifecycle profile id" >&2
+    return 2
+  fi
+  e2e_env_trace "lifecycle:${profile}"
+  case "${profile}" in
+    rebuild-current-version)
+      e2e_lifecycle_rebuild_current_version
+      ;;
+    *)
+      echo "e2e_lifecycle: unsupported lifecycle profile: ${profile}" >&2
+      return 2
+      ;;
+  esac
+}
diff --git a/test/e2e-scenario/nemoclaw_scenarios/lifecycle/rebuild-current-version.sh b/test/e2e-scenario/nemoclaw_scenarios/lifecycle/rebuild-current-version.sh
new file mode 100755
index 0000000000..359645754a
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/lifecycle/rebuild-current-version.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Lifecycle worker: rebuild-current-version.
+#
+# Drives the workspace-state-preservation invariant from
+# test/e2e/test-rebuild-openclaw.sh, scoped to the rebuild trigger and
+# the contract the runtime-phase rebuild_upgrade.sh assertions consume.
+# The legacy test additionally exercised the version-upgrade path
+# (build OLD-version base image, create sandbox from it, then rebuild
+# to current). That dimension belongs to a future
+# `rebuild-from-old-version` lifecycle profile and is intentionally
+# out of scope here: this profile validates that
+# `nemoclaw <sandbox> rebuild --yes` preserves workspace state across
+# a rebuild, which is the core invariant the rebuild_upgrade.sh
+# assertions assert.
+#
+# Sequence:
+#   1. Read E2E_SANDBOX_NAME from the context the onboarding phase
+#      already populated.
+#   2. Snapshot the current agent version (informational; runtime
+#      assertions accept an empty E2E_OLD_AGENT_VERSION as a vacuous
+#      pass on the version-upgraded check, which is the right default
+#      until the old-version profile lands).
+#   3. Write a unique marker into /sandbox/.openclaw/workspace via the
+#      canonical e2e_sandbox_exec wrapper. Path mirrors the legacy
+#      test's MARKER_FILE so the read-side assertion stays unchanged.
+#   4. Verify the marker is readable post-write (catch silent write
+#      failures before rebuild kicks off).
+#   5. Run `nemoclaw <sandbox> rebuild --yes` and capture the output.
+#   6. Seed E2E_REBUILD_MARKER_PATH and E2E_REBUILD_MARKER_EXPECTED in
+#      context.env so the runtime-phase
+#      rebuild_upgrade_assert_marker_preserved assertion can read them.
+#   7. Optionally seed E2E_AGENT_VERSION_COMMAND so the version-check
+#      assertion uses the in-sandbox `openclaw --version` invocation.
+
+# Source the canonical sandbox-exec wrapper so this worker inherits the
+# ssh-config preferred / openshell-exec fallback transport without
+# re-implementing the routing logic.
+_E2E_LIFECYCLE_RC_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+_E2E_LIFECYCLE_RC_VALIDATION_SUITES="$(cd "${_E2E_LIFECYCLE_RC_DIR}/../../validation_suites" && pwd)"
+# shellcheck source=../../validation_suites/sandbox-exec.sh
+. "${_E2E_LIFECYCLE_RC_VALIDATION_SUITES}/sandbox-exec.sh"
+
+# Marker file path inside the sandbox. Mirrors the legacy
+# test-rebuild-openclaw.sh MARKER_FILE so cross-check against the
+# legacy contract stays apples-to-apples.
+LIFECYCLE_REBUILD_MARKER_PATH="/sandbox/.openclaw/workspace/rebuild-marker.txt"
+
+e2e_lifecycle_rebuild_current_version() {
+  e2e_env_apply_noninteractive
+
+  local sandbox_name marker_content rc=0
+  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
+  if [[ -z "${sandbox_name}" ]]; then
+    echo "lifecycle:rebuild-current-version: E2E_SANDBOX_NAME missing in context" >&2
+    return 1
+  fi
+  # Random suffix prevents marker-content collisions across re-runs that
+  # somehow inherit a partially-rebuilt sandbox; the timestamp keeps the
+  # value greppable in logs.
+  marker_content="REBUILD_LIFECYCLE_$(date +%s)_${RANDOM}"
+
+  echo "lifecycle:rebuild-current-version: sandbox=${sandbox_name}"
+  echo "lifecycle:rebuild-current-version: marker_path=${LIFECYCLE_REBUILD_MARKER_PATH}"
+  echo "lifecycle:rebuild-current-version: marker_content=${marker_content}"
+
+  # Step 2: snapshot current version (best-effort; vacuous if it fails).
+  local pre_rebuild_version=""
+  if pre_rebuild_version="$(
+    E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=30 \
+      e2e_sandbox_exec "${sandbox_name}" -- bash -lc 'openclaw --version 2>/dev/null || true'
+  )"; then
+    echo "lifecycle:rebuild-current-version: pre_rebuild_version=${pre_rebuild_version}"
+  fi
+
+  # Step 3: write the marker file.
+  if ! E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=30 \
+    e2e_sandbox_exec "${sandbox_name}" -- sh -c \
+    "mkdir -p '$(dirname "${LIFECYCLE_REBUILD_MARKER_PATH}")' && printf '%s' '${marker_content}' > '${LIFECYCLE_REBUILD_MARKER_PATH}'"; then
+    echo "lifecycle:rebuild-current-version: failed to write marker into sandbox" >&2
+    return 1
+  fi
+
+  # Step 4: verify marker readable pre-rebuild. This catches sandbox
+  # filesystem oddities (read-only mounts, perms) before we waste the
+  # rebuild cycle.
+  local verify_content=""
+  verify_content="$(
+    E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=30 \
+      e2e_sandbox_exec "${sandbox_name}" -- cat "${LIFECYCLE_REBUILD_MARKER_PATH}"
+  )" || rc=$?
+  if [[ "${rc}" -ne 0 || "${verify_content}" != "${marker_content}" ]]; then
+    echo "lifecycle:rebuild-current-version: marker readback mismatch (got '${verify_content}', expected '${marker_content}')" >&2
+    return 1
+  fi
+  echo "lifecycle:rebuild-current-version: marker seeded and verified"
+
+  # Step 5: trigger the rebuild. Match the legacy contract:
+  # `--yes` to skip the confirmation prompt; `--verbose` to surface
+  # progress in the action log so failures are diagnosable from the
+  # artifact bundle alone.
+  echo "lifecycle:rebuild-current-version: invoking nemoclaw ${sandbox_name} rebuild --yes --verbose"
+  if ! nemoclaw "${sandbox_name}" rebuild --yes --verbose; then
+    rc=$?
+    echo "lifecycle:rebuild-current-version: nemoclaw rebuild exited ${rc}" >&2
+    return "${rc}"
+  fi
+  echo "lifecycle:rebuild-current-version: rebuild completed"
+
+  # Step 6: publish the marker contract to runtime-phase assertions.
+  e2e_context_set E2E_REBUILD_MARKER_PATH "${LIFECYCLE_REBUILD_MARKER_PATH}"
+  e2e_context_set E2E_REBUILD_MARKER_EXPECTED "${marker_content}"
+  # Step 7: tell the version-check assertion how to read the agent
+  # version inside the sandbox. The default in rebuild_upgrade.sh is
+  # already `openclaw --version`, but seeding it explicitly makes the
+  # contract obvious in context.env when artifacts are inspected.
+  e2e_context_set E2E_AGENT_VERSION_COMMAND "openclaw --version"
+  if [[ -n "${pre_rebuild_version}" ]]; then
+    # Only set E2E_OLD_AGENT_VERSION when we actually captured a
+    # non-empty pre-rebuild version. The version-upgraded assertion
+    # treats an empty value as "no comparison required" and passes
+    # vacuously, which is the correct behavior for the
+    # rebuild-current-version profile (no upgrade is expected; we are
+    # only validating workspace preservation).
+    e2e_context_set E2E_OLD_AGENT_VERSION "${pre_rebuild_version}"
+  fi
+
+  echo "lifecycle:rebuild-current-version: context.env updated"
+  return 0
+}
diff --git a/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh b/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh
new file mode 100755
index 0000000000..9c7b9803f1
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Onboard worker: cloud-openclaw-no-docker profile.
+#
+# Drives the negative `ubuntu-no-docker-preflight-negative` scenario by:
+#
+#   1. Installing a `docker` shim earlier on PATH that exits non-zero
+#      with a "Cannot connect to the Docker daemon" message. This makes
+#      `commandExists("docker")` succeed (the binary is present) while
+#      `docker info` fails — matching the production failure mode users
+#      see when Docker is installed but the daemon is not running.
+#
+#   2. Running `nemoclaw onboard --non-interactive` with stdout+stderr
+#      captured to `${E2E_CONTEXT_DIR}/negative-preflight.log`. The
+#      `onboarding.preflight.expected-failed` assertion greps that file.
+#
+#   3. Asserting that nemoclaw exits non-zero (preflight DID fail). If
+#      onboard unexpectedly succeeds, the action fails so the operator
+#      sees a clear "expected failure did not happen" signal instead of a
+#      green light masking a regression.
+#
+#   4. Returning 0 on the *expected* failure path so the orchestrator
+#      reports the action as passed and the assertion phase runs against
+#      the captured log. Without this, the action would be marked failed
+#      and the dependent assertions would be skipped.
+#
+# Pattern mirrors test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh,
+# which sets up a different failure condition (port 8080 occupied) but
+# follows the same capture-output / check-exit / grep-log shape.
+
+e2e_onboard_cloud_openclaw_no_docker() {
+  e2e_env_apply_noninteractive
+  e2e_context_init
+
+  local log shim_dir rc=0
+  log="${E2E_CONTEXT_DIR}/negative-preflight.log"
+  shim_dir="$(mktemp -d -t e2e-no-docker-XXXXXX)"
+
+  cat >"${shim_dir}/docker" <<'SHIM'
+#!/usr/bin/env bash
+# Negative-preflight docker shim — preserves "docker is installed" while
+# breaking "docker info" / "docker version" so preflight fails with the
+# real "Cannot connect to the Docker daemon" message.
+printf 'Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?\n' >&2
+exit 1
+SHIM
+  chmod +x "${shim_dir}/docker"
+
+  echo "negative-preflight: shim docker installed at ${shim_dir}/docker"
+  echo "negative-preflight: log_file=${log}"
+  echo "negative-preflight: invoking nemoclaw onboard --non-interactive (expected to fail at preflight)"
+
+  PATH="${shim_dir}:${PATH}" \
+    nemoclaw onboard --non-interactive --yes-i-accept-third-party-software \
+    >"${log}" 2>&1 || rc=$?
+
+  rm -rf "${shim_dir}"
+
+  echo "negative-preflight: nemoclaw onboard exited ${rc}"
+  if [[ -f "${log}" ]]; then
+    echo "--- captured log tail (${log}) ---"
+    tail -50 "${log}" 2>/dev/null || true
+    echo "--- end captured log ---"
+  fi
+
+  if [[ "${rc}" -eq 0 ]]; then
+    echo "negative-preflight: ERROR: nemoclaw onboard unexpectedly exited 0; preflight should have failed when docker is unreachable" >&2
+    return 1
+  fi
+
+  return 0
+}
diff --git a/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh b/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
index 2baf698986..fba1004559 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
@@ -14,6 +14,8 @@ _E2E_ONBOARD_RUNTIME_LIB="$(cd "${_E2E_ONBOARD_DIR}/../../runtime/lib" && pwd)"
 . "${_E2E_ONBOARD_RUNTIME_LIB}/context.sh"
 # shellcheck source=cloud-openclaw.sh
 . "${_E2E_ONBOARD_DIR}/cloud-openclaw.sh"
+# shellcheck source=cloud-openclaw-no-docker.sh
+. "${_E2E_ONBOARD_DIR}/cloud-openclaw-no-docker.sh"
 # shellcheck source=cloud-hermes.sh
 . "${_E2E_ONBOARD_DIR}/cloud-hermes.sh"
 # shellcheck source=local-ollama-openclaw.sh
@@ -26,14 +28,13 @@ e2e_onboard() {
     return 2
   fi
   e2e_env_trace "onboard:${profile}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] onboard profile=${profile} (skipped)"
-    return 0
-  fi
   case "${profile}" in
     cloud-openclaw)
       e2e_onboard_cloud_openclaw
       ;;
+    cloud-openclaw-no-docker)
+      e2e_onboard_cloud_openclaw_no_docker
+      ;;
     cloud-openclaw-custom-policies)
       E2E_ONBOARDING_MODEL="${E2E_ONBOARDING_MODEL:-nvidia/nemotron-3-super-120b-a12b}"
       E2E_ONBOARDING_POLICY_PRESETS="${E2E_ONBOARDING_POLICY_PRESETS:-npm,pypi}"
diff --git a/test/e2e-scenario/nemoclaw_scenarios/probes/cli-installed.sh b/test/e2e-scenario/nemoclaw_scenarios/probes/cli-installed.sh
new file mode 100755
index 0000000000..77b773e3e6
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/probes/cli-installed.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Probe: cli-installed
+#
+# Asserts that the nemoclaw CLI is reachable on PATH after the
+# environment phase's install action completed.
+
+set -euo pipefail
+
+if ! command -v nemoclaw >/dev/null 2>&1; then
+  echo "probe cli-installed: nemoclaw not found on PATH (PATH=${PATH})" >&2
+  exit 1
+fi
+
+# Resolve to a real binary; aliases or shell functions don't count.
+nemoclaw_bin="$(command -v nemoclaw)"
+if [[ ! -x "${nemoclaw_bin}" ]]; then
+  echo "probe cli-installed: nemoclaw resolved to non-executable: ${nemoclaw_bin}" >&2
+  exit 1
+fi
+
+printf 'probe cli-installed: ok (%s)\n' "${nemoclaw_bin}"
+exit 0
diff --git a/test/e2e-scenario/nemoclaw_scenarios/probes/dispatch.sh b/test/e2e-scenario/nemoclaw_scenarios/probes/dispatch.sh
new file mode 100755
index 0000000000..84db7e7fa1
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/probes/dispatch.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# State-validation probe dispatcher.
+#
+# Each probe is a small bash script in this directory invoked by the
+# typed StateValidationOrchestrator via the shared dispatch-action.sh
+# launcher. The orchestrator owns timeouts, redaction, evidence
+# logging, and pass/fail attribution; probes only return 0 (probe
+# satisfied) or non-zero with a human-readable message on stderr.
+#
+# Probes consult ${E2E_CONTEXT_DIR}/context.env for runtime values
+# (E2E_GATEWAY_URL, E2E_SANDBOX_NAME) seeded by the framework and
+# extended by onboarding.
+#
+# Library style: dispatch.sh defines a single dispatch function
+# (e2e_state_probe) that runs the named probe. The TS phase-action
+# uses fn=e2e_state_probe arg=<probe-id>.
+
+_E2E_PROBES_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# e2e_state_probe <probe-id>
+e2e_state_probe() {
+  local id="$1"
+  if [[ -z "${id}" ]]; then
+    echo "e2e_state_probe: missing probe id" >&2
+    return 2
+  fi
+  local probe_script="${_E2E_PROBES_DIR}/${id}.sh"
+  if [[ ! -f "${probe_script}" ]]; then
+    echo "e2e_state_probe: unknown probe id '${id}' (no script at ${probe_script})" >&2
+    return 2
+  fi
+  e2e_env_trace "probe:${id}"
+  # Probes run in a subshell so a `set -e` failure inside one probe
+  # does not affect another action in the same orchestrator process.
+  (
+    # shellcheck source=/dev/null
+    . "${probe_script}"
+  )
+}
diff --git a/test/e2e-scenario/nemoclaw_scenarios/probes/gateway-absent.sh b/test/e2e-scenario/nemoclaw_scenarios/probes/gateway-absent.sh
new file mode 100755
index 0000000000..67dc0d9d2f
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/probes/gateway-absent.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Probe: gateway-absent
+#
+# Negative-state probe. Asserts that no gateway was started by a
+# scenario whose expected_state declares gateway.expected=absent
+# (preflight failure, invalid-key onboarding failure,
+# gateway-port-conflict onboarding failure). This is the typed
+# replacement for the runtime.expected-failure.no-side-effects
+# pending step on the gateway-started axis: a real probe that fails
+# closed if the gateway IS running.
+
+set -euo pipefail
+
+# Order matters: cheap CLI status check first, then port reachability
+# fallback. We deliberately do NOT rely on any single signal so a
+# scenario that leaves a partially-started gateway behind cannot
+# slip through.
+
+if command -v nemoclaw >/dev/null 2>&1; then
+  if nemoclaw gateway status >/dev/null 2>&1; then
+    echo "probe gateway-absent: nemoclaw reports gateway is running, expected absent" >&2
+    nemoclaw gateway status >&2 || true
+    exit 1
+  fi
+fi
+
+# Best-effort URL reachability check. context.env may carry a
+# gateway URL even for negative scenarios (it is computed from the
+# scenario id, not from a successful onboard).
+context_env="${E2E_CONTEXT_DIR:-.e2e}/context.env"
+if [[ -f "${context_env}" ]]; then
+  url="$(awk -F= '/^E2E_GATEWAY_URL=/{print substr($0, index($0, "=")+1); exit}' "${context_env}" | tr -d '"')"
+  if [[ -n "${url}" ]]; then
+    if curl -fsS -o /dev/null --max-time 3 "${url%/}/health" 2>/dev/null; then
+      echo "probe gateway-absent: ${url%/}/health responded healthy, expected absent" >&2
+      exit 1
+    fi
+  fi
+fi
+
+echo "probe gateway-absent: ok"
+exit 0
diff --git a/test/e2e-scenario/nemoclaw_scenarios/probes/gateway-healthy.sh b/test/e2e-scenario/nemoclaw_scenarios/probes/gateway-healthy.sh
new file mode 100755
index 0000000000..169ce4ce22
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/probes/gateway-healthy.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Probe: gateway-healthy
+#
+# Asserts the gateway is reachable and reports a healthy HTTP status
+# at ${E2E_GATEWAY_URL}/health (with fallback to the base URL). Mirrors
+# the legacy validation_suites/assert/gateway-alive.sh::e2e_gateway_assert_healthy
+# contract, but is invoked as a typed phase action by the
+# StateValidationOrchestrator BEFORE runtime suites run, so suite
+# assertions never execute against a missing or wedged gateway.
+
+set -euo pipefail
+
+# Defer to the legacy bash helper for the actual probe logic so we keep
+# a single implementation of the gateway-health contract during the
+# transition. The legacy helper consults context.env for the URL.
+_THIS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+GATEWAY_HELPER="$(cd "${_THIS_DIR}/../../validation_suites/assert" && pwd)/gateway-alive.sh"
+
+if [[ ! -f "${GATEWAY_HELPER}" ]]; then
+  echo "probe gateway-healthy: legacy helper not found: ${GATEWAY_HELPER}" >&2
+  exit 1
+fi
+
+# shellcheck source=/dev/null
+. "${GATEWAY_HELPER}"
+
+if ! e2e_gateway_assert_healthy; then
+  exit 1
+fi
+
+echo "probe gateway-healthy: ok"
+exit 0
diff --git a/test/e2e-scenario/nemoclaw_scenarios/probes/sandbox-absent.sh b/test/e2e-scenario/nemoclaw_scenarios/probes/sandbox-absent.sh
new file mode 100755
index 0000000000..5f08e39df0
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/probes/sandbox-absent.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Probe: sandbox-absent
+#
+# Negative-state probe. Asserts that no sandbox was created by a
+# scenario whose expected_state declares sandbox.expected=absent
+# (preflight failure, onboarding failures). Typed replacement for
+# the legacy run-scenario.sh inline check
+# `openshell sandbox list | grep -Fq "${sandbox_name}"`.
+
+set -euo pipefail
+
+# E2E_SANDBOX_NAME is seeded by the framework from the scenario id
+# even when onboarding never completed; missing context here is a
+# framework bug, not a probe pass.
+if [[ -z "${E2E_SANDBOX_NAME:-}" ]]; then
+  context_env="${E2E_CONTEXT_DIR:-.e2e}/context.env"
+  if [[ -f "${context_env}" ]]; then
+    E2E_SANDBOX_NAME="$(awk -F= '/^E2E_SANDBOX_NAME=/{print substr($0, index($0, "=")+1); exit}' "${context_env}" | tr -d '"')"
+  fi
+fi
+if [[ -z "${E2E_SANDBOX_NAME:-}" ]]; then
+  echo "probe sandbox-absent: E2E_SANDBOX_NAME unset; framework did not seed context" >&2
+  exit 2
+fi
+
+# Two independent checks — `nemoclaw list` is the user-facing surface
+# and openshell-side listing covers cases where nemoclaw is uninstalled
+# or wedged. Either reporting the sandbox fails the probe.
+if command -v nemoclaw >/dev/null 2>&1; then
+  if nemoclaw list 2>/dev/null | grep -qE "(^|[[:space:]])${E2E_SANDBOX_NAME}([[:space:]]|$)"; then
+    echo "probe sandbox-absent: nemoclaw list reports sandbox '${E2E_SANDBOX_NAME}', expected absent" >&2
+    exit 1
+  fi
+fi
+
+if command -v openshell >/dev/null 2>&1; then
+  if openshell sandbox list 2>/dev/null | grep -Fq "${E2E_SANDBOX_NAME}"; then
+    echo "probe sandbox-absent: openshell reports sandbox '${E2E_SANDBOX_NAME}', expected absent" >&2
+    exit 1
+  fi
+fi
+
+echo "probe sandbox-absent: ok"
+exit 0
diff --git a/test/e2e-scenario/nemoclaw_scenarios/probes/sandbox-running.sh b/test/e2e-scenario/nemoclaw_scenarios/probes/sandbox-running.sh
new file mode 100755
index 0000000000..2ff4d5ded3
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/probes/sandbox-running.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Probe: sandbox-running
+#
+# Asserts the sandbox declared by E2E_SANDBOX_NAME (seeded by
+# onboarding) is present in `nemoclaw list`. Mirrors the legacy
+# validation_suites/assert/sandbox-alive.sh::e2e_sandbox_assert_running
+# contract; promoted to a typed phase action so runtime suites cannot
+# silently run against an absent sandbox.
+
+set -euo pipefail
+
+_THIS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SANDBOX_HELPER="$(cd "${_THIS_DIR}/../../validation_suites/assert" && pwd)/sandbox-alive.sh"
+
+if [[ ! -f "${SANDBOX_HELPER}" ]]; then
+  echo "probe sandbox-running: legacy helper not found: ${SANDBOX_HELPER}" >&2
+  exit 1
+fi
+
+# shellcheck source=/dev/null
+. "${SANDBOX_HELPER}"
+
+if ! e2e_sandbox_assert_running; then
+  exit 1
+fi
+
+echo "probe sandbox-running: ok"
+exit 0
diff --git a/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh b/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh
index 69bda6c47c..fb05606494 100755
--- a/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh
+++ b/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh
@@ -9,7 +9,14 @@ if [[ ! -f "${E2E_CONTEXT_DIR:-}/onboard.log" ]]; then
   exit 1
 fi
 
-if grep -Eiq "preflight.*(fail|error)|docker|container|daemon|socket" "${E2E_CONTEXT_DIR}/onboard.log"; then
+# The onboarding action already completed (exit 0) for this assertion to
+# run; we only need to confirm the captured onboard.log does not contain
+# explicit preflight FAILURE markers. The previous regex matched any
+# mention of 'docker' / 'container' / 'daemon' / 'socket', which a normal
+# successful onboarding always logs. Tighten to actual failure phrases.
+if grep -Eiq \
+  "preflight[[:space:]]+(failed|error)|cannot connect to[[:space:]]+(the[[:space:]]+)?docker daemon|permission denied[[:space:]]+while trying to connect to.*docker.*sock|onboarding aborted|FATAL: docker|ERROR: docker daemon" \
+  "${E2E_CONTEXT_DIR}/onboard.log"; then
   echo "FAIL: onboarding.preflight.passed - onboard log contains preflight failure evidence"
   exit 1
 fi
diff --git a/test/e2e-scenario/runtime/coverage-report.sh b/test/e2e-scenario/runtime/coverage-report.sh
deleted file mode 100755
index 8426d0ba30..0000000000
--- a/test/e2e-scenario/runtime/coverage-report.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Render the E2E scenario coverage report as Markdown to stdout.
-#
-# Usage:
-#   bash test/e2e-scenario/runtime/coverage-report.sh > coverage.md
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
-
-TSX_BIN="${REPO_ROOT}/node_modules/.bin/tsx"
-if [[ -x "${TSX_BIN}" ]]; then
-  "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" coverage
-else
-  # CodeRabbit review items #3, #10: fall back to --no-install so we rely on
-  # the lockfile-pinned tsx rather than a network fetch, and fail closed
-  # with a clear hint if tsx is not installed.
-  if ! (cd "${REPO_ROOT}" && npx --no-install tsx "${SCRIPT_DIR}/resolver/index.ts" coverage); then
-    echo "coverage-report: tsx not available. Run 'npm ci' at the repo root to install devDependencies." >&2
-    exit 1
-  fi
-fi
diff --git a/test/e2e-scenario/runtime/lib/env.sh b/test/e2e-scenario/runtime/lib/env.sh
index ed33fb8a6a..9c33af97cc 100755
--- a/test/e2e-scenario/runtime/lib/env.sh
+++ b/test/e2e-scenario/runtime/lib/env.sh
@@ -40,8 +40,3 @@ e2e_env_trace() {
     printf '%s %s\n' "${event}" "$*" >>"${E2E_TRACE_FILE}"
   fi
 }
-
-# e2e_env_is_dry_run: true if E2E_DRY_RUN=1
-e2e_env_is_dry_run() {
-  [[ "${E2E_DRY_RUN:-0}" == "1" ]]
-}
diff --git a/test/e2e-scenario/runtime/resolver/coverage.ts b/test/e2e-scenario/runtime/resolver/coverage.ts
deleted file mode 100644
index 2a3110f40c..0000000000
--- a/test/e2e-scenario/runtime/resolver/coverage.ts
+++ /dev/null
@@ -1,170 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Render a Markdown coverage report for E2E setup scenarios.
- *
- * Design (per the simplify pass): one primary table, one row per scenario.
- * A `## Gaps` section flags scenarios without suites and expected states
- * that no scenario references. Rows are sorted deterministically for
- * stable CI diffs.
- */
-
-import type { ResolverInput } from "./load.ts";
-
-export interface CoverageReportOptions {
-  /** Optional map of scenario id -> last known run status. */
-  lastRunStatus?: Record<string, string>;
-}
-
-export function renderCoverageReport(
-  meta: ResolverInput,
-  options: CoverageReportOptions = {},
-): string {
-  const { scenarios, expectedStates } = meta;
-  const scenarioIds = Object.keys(scenarios.setup_scenarios).sort();
-  const lines: string[] = [];
-  lines.push("# E2E Setup Scenario Coverage");
-  lines.push("");
-  lines.push(
-    "_Generated from `test/e2e-scenario/nemoclaw_scenarios/{scenarios,expected-states}.yaml` and `test/e2e-scenario/validation_suites/suites.yaml`._",
-  );
-  lines.push("");
-  lines.push("## Base Scenarios");
-  lines.push("");
-  lines.push("| Base | Platform | Install | Runtime | Requirements |");
-  lines.push("|---|---|---|---|---|");
-  for (const [id, base] of Object.entries(scenarios.base_scenarios ?? {}).sort(
-    ([a], [b]) => a.localeCompare(b),
-  )) {
-    lines.push(
-      `| ${id} | ${base.platform} | ${base.install} | ${base.runtime} | ${(base.runner_requirements ?? []).join(", ") || "_none_"} |`,
-    );
-  }
-  lines.push("");
-  lines.push("## Onboarding Profiles");
-  lines.push("");
-  lines.push("| Profile | Path | Provider | Agent | Route |");
-  lines.push("|---|---|---|---|---|");
-  for (const [id, profile] of Object.entries(
-    scenarios.onboarding_profiles ?? {},
-  ).sort(([a], [b]) => a.localeCompare(b))) {
-    lines.push(
-      `| ${id} | ${profile.path ?? ""} | ${profile.provider ?? ""} | ${profile.agent ?? ""} | ${profile.inference_route ?? ""} |`,
-    );
-  }
-  lines.push("");
-  lines.push("## Test Plans");
-  lines.push("");
-  lines.push("| Plan | Base | Onboarding | Expected state | Suites |");
-  lines.push("|---|---|---|---|---|");
-  for (const [id, plan] of Object.entries(scenarios.test_plans ?? {}).sort(
-    ([a], [b]) => a.localeCompare(b),
-  )) {
-    lines.push(
-      `| ${id} | ${plan.base} | ${plan.onboarding} | ${plan.expected_state} | ${(plan.suites ?? []).join(", ") || "_(none)_"} |`,
-    );
-  }
-  lines.push("");
-  lines.push("## Suites");
-  lines.push("");
-  lines.push(`Total suites: ${Object.keys(meta.suites.suites).length}`);
-  lines.push("");
-  lines.push("## Scenarios");
-  lines.push("");
-  const hasStatus =
-    options.lastRunStatus && Object.keys(options.lastRunStatus).length > 0;
-  const header = hasStatus
-    ? "| Scenario | Platform | Install | Runtime | Onboarding | Expected state | Suites | Last run |"
-    : "| Scenario | Platform | Install | Runtime | Onboarding | Expected state | Suites |";
-  const sep = hasStatus
-    ? "|---|---|---|---|---|---|---|---|"
-    : "|---|---|---|---|---|---|---|";
-  lines.push(header);
-  lines.push(sep);
-  for (const id of scenarioIds) {
-    const sc = scenarios.setup_scenarios[id];
-    if (!sc) continue;
-    const suites = sc.suites ?? [];
-    const dimensions = sc.dimensions;
-    const suiteCell = suites.length === 0 ? "_(none)_" : suites.join(", ");
-    const row = [
-      id,
-      dimensions?.platform ?? "",
-      dimensions?.install ?? "",
-      dimensions?.runtime ?? "",
-      dimensions?.onboarding ?? "",
-      sc.expected_state ?? "",
-      suiteCell,
-    ];
-    if (hasStatus) {
-      row.push(options.lastRunStatus?.[id] ?? "_unknown_");
-    }
-    lines.push(`| ${row.join(" | ")} |`);
-  }
-  lines.push("");
-  // Gaps section.
-  const scenarioEntries = scenarioIds.flatMap((id) => {
-    const scenario = scenarios.setup_scenarios[id];
-    return scenario ? [{ id, scenario }] : [];
-  });
-  const scenariosWithoutSuites = scenarioEntries
-    .filter(({ scenario }) => (scenario.suites ?? []).length === 0)
-    .map(({ id }) => id);
-  const skippedScenarios = scenarioEntries
-    .map(({ id, scenario }) => ({
-      id,
-      skips: scenario.skipped_capabilities ?? [],
-    }))
-    .filter(({ skips }) => skips.length > 0);
-  const referencedStates = new Set<string>(
-    scenarioEntries
-      .map(({ scenario }) => scenario.expected_state)
-      .filter((state): state is string => Boolean(state)),
-  );
-  const unusedStates = Object.keys(expectedStates.expected_states)
-    .filter((s) => !referencedStates.has(s))
-    .sort();
-
-  lines.push("## Gaps");
-  lines.push("");
-  if (
-    scenariosWithoutSuites.length === 0 &&
-    unusedStates.length === 0 &&
-    skippedScenarios.length === 0
-  ) {
-    lines.push("_No gaps detected._");
-  } else {
-    if (scenariosWithoutSuites.length > 0) {
-      lines.push("### Scenarios with no suites");
-      lines.push("");
-      for (const id of scenariosWithoutSuites.sort()) {
-        lines.push(`- \`${id}\`: no suites configured`);
-      }
-      lines.push("");
-    }
-    if (skippedScenarios.length > 0) {
-      lines.push("### Explicitly skipped capabilities");
-      lines.push("");
-      for (const { id, skips } of skippedScenarios) {
-        for (const skip of skips) {
-          const suites =
-            Array.isArray(skip.suites) && skip.suites.length > 0
-              ? ` Suites: ${skip.suites.map((suite) => `\`${suite}\``).join(", ")}.`
-              : "";
-          lines.push(`- \`${id}\` / \`${skip.id}\`: ${skip.reason}${suites}`);
-        }
-      }
-      lines.push("");
-    }
-    if (unusedStates.length > 0) {
-      lines.push("### Unused expected states");
-      lines.push("");
-      for (const id of unusedStates) {
-        lines.push(`- \`${id}\`: no scenario references this expected state`);
-      }
-      lines.push("");
-    }
-  }
-  return lines.join("\n");
-}
diff --git a/test/e2e-scenario/runtime/resolver/expected-failure.ts b/test/e2e-scenario/runtime/resolver/expected-failure.ts
deleted file mode 100644
index 07901e5e15..0000000000
--- a/test/e2e-scenario/runtime/resolver/expected-failure.ts
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Expected-failure matcher.
- *
- * Negative scenarios declare an `expected_failure` contract on their
- * expected state. The runner captures the failed setup's log plus a small
- * side-effect inventory (sandbox-created, gateway-started, credentials-written)
- * and asks this module whether the observation matches the contract.
- *
- * The contract has four parts:
- *   - phase: which setup stage produced the failure (informational; the
- *     runner is responsible for invoking the matcher only when that phase
- *     actually ran).
- *   - error_class: stable identifier for the failure mode.
- *   - message_pattern: regex applied to the captured log when present.
- *   - forbidden_side_effects: effects that MUST NOT be observed.
- *
- * Match result is structured (`ExpectedFailureReport`) so the runner can
- * write `expected-vs-actual.json` and surface a useful diff in CI.
- */
-
-import { compileMessagePattern } from "./load.ts";
-import type {
-  ExpectedFailure,
-  ExpectedFailurePhase,
-  ExpectedFailureErrorClass,
-  ExpectedFailureSideEffect,
-} from "./schema.ts";
-
-export interface ObservedFailure {
-  /** Phase the runner attempted; matched against `expected_failure.phase`. */
-  phase: ExpectedFailurePhase;
-  /**
-   * Structured reason if the runner could derive one (preferred). When
-   * absent, matching falls back to log-content heuristics in the runner.
-   */
-  error_class?: ExpectedFailureErrorClass;
-  /** Captured setup log; matched against `expected_failure.message_pattern`. */
-  log: string;
-  /**
-   * Side effects the runner positively observed after the failure. Each
-   * effect in `expected_failure.forbidden_side_effects` is checked against
-   * this set; presence is a failure.
-   */
-  observed_side_effects: ExpectedFailureSideEffect[];
-}
-
-export interface ExpectedFailureCheck {
-  name: "phase" | "error_class" | "message_pattern" | "forbidden_side_effects";
-  ok: boolean;
-  expected: string;
-  actual: string;
-  message?: string;
-}
-
-export interface ExpectedFailureReport {
-  ok: boolean;
-  expected: ExpectedFailure;
-  observed: ObservedFailure;
-  checks: ExpectedFailureCheck[];
-}
-
-export function matchExpectedFailure(
-  expected: ExpectedFailure,
-  observed: ObservedFailure,
-): ExpectedFailureReport {
-  const checks: ExpectedFailureCheck[] = [];
-
-  const phaseOk = expected.phase === observed.phase;
-  checks.push({
-    name: "phase",
-    ok: phaseOk,
-    expected: expected.phase,
-    actual: observed.phase,
-    message: phaseOk
-      ? undefined
-      : `phase mismatch: expected '${expected.phase}' but observed '${observed.phase}'`,
-  });
-
-  if (observed.error_class !== undefined) {
-    const classOk = expected.error_class === observed.error_class;
-    checks.push({
-      name: "error_class",
-      ok: classOk,
-      expected: expected.error_class,
-      actual: observed.error_class,
-      message: classOk
-        ? undefined
-        : `error_class mismatch: expected '${expected.error_class}' but observed '${observed.error_class}'`,
-    });
-  } else {
-    // No structured class from the runner; defer to message_pattern as
-    // the discriminator. Record a SKIPPED entry so the report makes it
-    // obvious that the class was not asserted structurally.
-    checks.push({
-      name: "error_class",
-      ok: true,
-      expected: expected.error_class,
-      actual: "<unobserved>",
-      message: "skipped: runner did not derive a structured error_class",
-    });
-  }
-
-  if (expected.message_pattern) {
-    let regex: RegExp;
-    try {
-      regex = compileMessagePattern(expected.message_pattern);
-    } catch (err) {
-      checks.push({
-        name: "message_pattern",
-        ok: false,
-        expected: expected.message_pattern,
-        actual: "<invalid regex>",
-        message: `message_pattern is not a valid regex: ${(err as Error).message}`,
-      });
-      return finalize(expected, observed, checks);
-    }
-    const ok = regex.test(observed.log);
-    checks.push({
-      name: "message_pattern",
-      ok,
-      expected: expected.message_pattern,
-      actual: ok ? "<match>" : "<no match>",
-      message: ok
-        ? undefined
-        : `message_pattern '${expected.message_pattern}' did not match captured log`,
-    });
-  }
-
-  if (expected.forbidden_side_effects?.length) {
-    const observedSet = new Set(observed.observed_side_effects);
-    const found = expected.forbidden_side_effects.filter((e) => observedSet.has(e));
-    const ok = found.length === 0;
-    checks.push({
-      name: "forbidden_side_effects",
-      ok,
-      expected: expected.forbidden_side_effects.join(","),
-      actual: observed.observed_side_effects.join(",") || "<none>",
-      message: ok
-        ? undefined
-        : `forbidden side effects observed after failure: ${found.join(", ")}`,
-    });
-  }
-
-  return finalize(expected, observed, checks);
-}
-
-function finalize(
-  expected: ExpectedFailure,
-  observed: ObservedFailure,
-  checks: ExpectedFailureCheck[],
-): ExpectedFailureReport {
-  return { ok: checks.every((c) => c.ok), expected, observed, checks };
-}
-
-export function formatExpectedFailureReport(report: ExpectedFailureReport): string {
-  const lines: string[] = [];
-  lines.push(`expected-failure: ${report.ok ? "OK" : "FAILED"}`);
-  for (const c of report.checks) {
-    const status = c.ok ? "PASS" : "FAIL";
-    lines.push(`  ${status} ${c.name} expected=${c.expected} actual=${c.actual}`);
-    if (c.message) lines.push(`       ${c.message}`);
-  }
-  return lines.join("\n");
-}
diff --git a/test/e2e-scenario/runtime/resolver/index.ts b/test/e2e-scenario/runtime/resolver/index.ts
deleted file mode 100644
index 972fd073db..0000000000
--- a/test/e2e-scenario/runtime/resolver/index.ts
+++ /dev/null
@@ -1,354 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * CLI entrypoint for the E2E scenario resolver.
- *
- * Usage:
- *   tsx test/e2e-scenario/runtime/resolver/index.ts plan <scenario-id> [--context-dir <path>]
- *   tsx test/e2e-scenario/runtime/resolver/index.ts validate-state <scenario-id> [--probes-from-state]
- *   tsx test/e2e-scenario/runtime/resolver/index.ts match-failure <scenario-id> \
- *        --log <path> --observed-phase <phase> \
- *        [--observed-error-class <class>] [--observed-side-effects <csv>]
- *
- * Writes `plan.json`, `expected-state-report.json`, or `expected-vs-actual.json`
- * under the context dir (default `.e2e/`). Exit codes:
- *   0 success, 2 usage error, 1 resolution error,
- *   3 expected-state mismatch, 4 expected-failure mismatch.
- */
-
-import fs from "node:fs";
-import path from "node:path";
-import { fileURLToPath } from "node:url";
-
-import { loadMetadataFromDir } from "./load.ts";
-import { resolveScenario, formatPlan } from "./plan.ts";
-import {
-  validateExpectedState,
-  formatReport,
-  type ProbeResults,
-  type ProbeValue,
-} from "./validator.ts";
-import { renderCoverageReport } from "./coverage.ts";
-import {
-  matchExpectedFailure,
-  formatExpectedFailureReport,
-  type ObservedFailure,
-} from "./expected-failure.ts";
-import {
-  EXPECTED_FAILURE_PHASES,
-  EXPECTED_FAILURE_ERROR_CLASSES,
-  EXPECTED_FAILURE_SIDE_EFFECTS,
-  type ExpectedFailurePhase,
-  type ExpectedFailureErrorClass,
-  type ExpectedFailureSideEffect,
-} from "./schema.ts";
-
-function parseArgs(argv: string[]): {
-  command: string;
-  scenarioId?: string;
-  contextDir: string;
-  metadataDir: string;
-  probesFromState: boolean;
-  logPath?: string;
-  observedPhase?: string;
-  observedErrorClass?: string;
-  observedSideEffects?: string;
-} {
-  const args = argv.slice(2);
-  const command = args.shift() ?? "";
-  let scenarioId: string | undefined;
-  let contextDir = process.env.E2E_CONTEXT_DIR ?? ".e2e";
-  let probesFromState = false;
-  let logPath: string | undefined;
-  let observedPhase: string | undefined;
-  let observedErrorClass: string | undefined;
-  let observedSideEffects: string | undefined;
-  const scriptDir = path.dirname(fileURLToPath(import.meta.url));
-  // resolver/ lives under test/e2e-scenario/runtime/, so the E2E metadata root
-  // (which loadMetadataFromDir resolves further into nemoclaw_scenarios/
-  // and validation_suites/) is two levels up.
-  let metadataDir = path.resolve(scriptDir, "..", "..");
-  while (args.length > 0) {
-    const a = args.shift();
-    if (a === "--context-dir") {
-      const v = args.shift();
-      if (!v) throw new Error("--context-dir requires a value");
-      contextDir = v;
-    } else if (a === "--metadata-dir") {
-      const v = args.shift();
-      if (!v) throw new Error("--metadata-dir requires a value");
-      metadataDir = v;
-    } else if (a === "--probes-from-state") {
-      // Dry-run affordance: seed probes from the expected state itself so
-      // the validator can exercise its logic without real probe values.
-      // Non-dry-run callers MUST NOT pass this flag (CodeRabbit review
-      // item #9); the resolver will fail closed when required probe keys
-      // are missing without this flag.
-      probesFromState = true;
-    } else if (a === "--log") {
-      const v = args.shift();
-      if (!v) throw new Error("--log requires a value");
-      logPath = v;
-    } else if (a === "--observed-phase") {
-      const v = args.shift();
-      if (!v) throw new Error("--observed-phase requires a value");
-      observedPhase = v;
-    } else if (a === "--observed-error-class") {
-      const v = args.shift();
-      if (!v) throw new Error("--observed-error-class requires a value");
-      observedErrorClass = v;
-    } else if (a === "--observed-side-effects") {
-      const v = args.shift();
-      if (v === undefined) throw new Error("--observed-side-effects requires a value");
-      observedSideEffects = v;
-    } else if (a && !a.startsWith("--") && !scenarioId) {
-      scenarioId = a;
-    } else if (a === "--help" || a === "-h") {
-      // ignore; help handled by caller
-    } else if (a) {
-      throw new Error(`unexpected argument: ${a}`);
-    }
-  }
-  return {
-    command,
-    scenarioId,
-    contextDir,
-    metadataDir,
-    probesFromState,
-    logPath,
-    observedPhase,
-    observedErrorClass,
-    observedSideEffects,
-  };
-}
-
-function main(): number {
-  let parsed: ReturnType<typeof parseArgs>;
-  try {
-    parsed = parseArgs(process.argv);
-  } catch (err) {
-    process.stderr.write(`resolver: ${(err as Error).message}\n`);
-    return 2;
-  }
-  const { command, scenarioId, contextDir, metadataDir } = parsed;
-  if (command === "coverage") {
-    try {
-      const meta = loadMetadataFromDir(metadataDir);
-      const md = renderCoverageReport(meta);
-      process.stdout.write(`${md}\n`);
-      return 0;
-    } catch (err) {
-      process.stderr.write(`resolver: ${(err as Error).message}\n`);
-      return 1;
-    }
-  }
-  if (!scenarioId) {
-    process.stderr.write("resolver: missing scenario id\n");
-    return 2;
-  }
-  try {
-    const meta = loadMetadataFromDir(metadataDir);
-    const plan = resolveScenario(scenarioId, meta);
-    if (command === "plan") {
-      fs.mkdirSync(contextDir, { recursive: true });
-      const planJsonPath = path.join(contextDir, "plan.json");
-      fs.writeFileSync(planJsonPath, `${JSON.stringify(plan, null, 2)}\n`);
-      process.stdout.write(`${formatPlan(plan)}\n`);
-      process.stdout.write(`plan.json: ${planJsonPath}\n`);
-      return 0;
-    }
-    if (command === "validate-state") {
-      // CodeRabbit review item #9: only self-seed probes when the caller
-      // explicitly opts in (dry-run / test contexts). Non-dry-run callers
-      // without real probes wired should fail, not quietly self-validate.
-      const probes = parsed.probesFromState
-        ? probesFromEnvAndState(plan.expected_state.config)
-        : probesFromEnvOnly();
-      const report = validateExpectedState({
-        stateId: plan.expected_state.id,
-        state: plan.expected_state.config,
-        probes,
-        suites: plan.suites,
-      });
-      fs.mkdirSync(contextDir, { recursive: true });
-      const reportPath = path.join(contextDir, "expected-state-report.json");
-      fs.writeFileSync(reportPath, `${JSON.stringify(report, null, 2)}\n`);
-      process.stdout.write(`${formatReport(report)}\n`);
-      process.stdout.write(`expected-state-report: ${reportPath}\n`);
-      return report.ok ? 0 : 3;
-    }
-    if (command === "match-failure") {
-      if (!plan.expected_failure) {
-        process.stderr.write(
-          `resolver: scenario '${scenarioId}' has no expected_failure block; nothing to match\n`,
-        );
-        return 2;
-      }
-      if (!parsed.observedPhase) {
-        process.stderr.write("resolver: match-failure requires --observed-phase\n");
-        return 2;
-      }
-      if (!EXPECTED_FAILURE_PHASES.includes(parsed.observedPhase as ExpectedFailurePhase)) {
-        process.stderr.write(
-          `resolver: --observed-phase must be one of: ${EXPECTED_FAILURE_PHASES.join(", ")}\n`,
-        );
-        return 2;
-      }
-      let observedErrorClass: ExpectedFailureErrorClass | undefined;
-      if (parsed.observedErrorClass !== undefined && parsed.observedErrorClass !== "") {
-        if (
-          !EXPECTED_FAILURE_ERROR_CLASSES.includes(
-            parsed.observedErrorClass as ExpectedFailureErrorClass,
-          )
-        ) {
-          process.stderr.write(
-            `resolver: --observed-error-class must be one of: ${EXPECTED_FAILURE_ERROR_CLASSES.join(", ")}\n`,
-          );
-          return 2;
-        }
-        observedErrorClass = parsed.observedErrorClass as ExpectedFailureErrorClass;
-      }
-      const observedSideEffects: ExpectedFailureSideEffect[] = (parsed.observedSideEffects ?? "")
-        .split(",")
-        .map((s) => s.trim())
-        .filter(Boolean)
-        .map((s) => {
-          if (!EXPECTED_FAILURE_SIDE_EFFECTS.includes(s as ExpectedFailureSideEffect)) {
-            throw new Error(
-              `--observed-side-effects entry '${s}' must be one of: ${EXPECTED_FAILURE_SIDE_EFFECTS.join(", ")}`,
-            );
-          }
-          return s as ExpectedFailureSideEffect;
-        });
-      if (!parsed.logPath) {
-        process.stderr.write("resolver: match-failure requires --log\n");
-        return 2;
-      }
-      const log = fs.readFileSync(parsed.logPath, "utf8");
-      const observed: ObservedFailure = {
-        phase: parsed.observedPhase as ExpectedFailurePhase,
-        error_class: observedErrorClass,
-        log,
-        observed_side_effects: observedSideEffects,
-      };
-      const report = matchExpectedFailure(plan.expected_failure, observed);
-      // Exclude the (potentially large) log from the JSON artifact so
-      // expected-vs-actual.json stays human-readable; the log is already
-      // captured separately under the context dir.
-      const artifact = {
-        ok: report.ok,
-        expected: report.expected,
-        observed: {
-          phase: report.observed.phase,
-          error_class: report.observed.error_class,
-          observed_side_effects: report.observed.observed_side_effects,
-        },
-        checks: report.checks,
-      };
-      fs.mkdirSync(contextDir, { recursive: true });
-      const reportPath = path.join(contextDir, "expected-vs-actual.json");
-      fs.writeFileSync(reportPath, `${JSON.stringify(artifact, null, 2)}\n`);
-      process.stdout.write(`${formatExpectedFailureReport(report)}\n`);
-      process.stdout.write(`expected-vs-actual: ${reportPath}\n`);
-      return report.ok ? 0 : 4;
-    }
-    process.stderr.write(
-      `resolver: unknown command '${command}' (expected: plan|validate-state|match-failure <scenario-id>)\n`,
-    );
-    return 2;
-  } catch (err) {
-    process.stderr.write(`resolver: ${(err as Error).message}\n`);
-    return 1;
-  }
-}
-
-function flattenState(
-  obj: unknown,
-  prefix: string,
-  out: Record<string, ProbeValue>,
-): void {
-  if (obj === null || typeof obj !== "object") {
-    out[prefix] = obj as ProbeValue;
-    return;
-  }
-  for (const [k, v] of Object.entries(obj as Record<string, unknown>)) {
-    const next = prefix ? `${prefix}.${k}` : k;
-    if (v !== null && typeof v === "object" && !Array.isArray(v)) {
-      flattenState(v, next, out);
-    } else {
-      out[next] = v as ProbeValue;
-    }
-  }
-}
-
-/**
- * Read probe overrides from the environment without seeding from state.
- *
- * Used in non-dry-run mode: the validator then reports a concrete failure
- * for any expected-state key that has no corresponding probe value.
- */
-function probesFromEnvOnly(): ProbeResults {
-  const probes: ProbeResults = {};
-  // 1. Prefix-based overrides: E2E_PROBE_OVERRIDE_<KEY>=<value> where <KEY>
-  //    maps underscores to dots (e.g. GATEWAY_HEALTH -> gateway.health).
-  //    This works for simple keys but cannot express underscores inside a
-  //    single segment.
-  const prefix = "E2E_PROBE_OVERRIDE_";
-  for (const [envKey, value] of Object.entries(process.env)) {
-    if (!envKey.startsWith(prefix) || value === undefined) continue;
-    const key = envKey.slice(prefix.length).toLowerCase().replace(/_/g, ".");
-    probes[key] = coerceProbeValue(value);
-  }
-  // 2. JSON escape hatch for keys with embedded underscores (e.g.
-  //    `security.policy_engine`). Later overrides win over (1).
-  const overridesJson = process.env.E2E_PROBE_OVERRIDES_JSON;
-  if (overridesJson) {
-    try {
-      const parsed = JSON.parse(overridesJson);
-      if (parsed && typeof parsed === "object") {
-        for (const [k, v] of Object.entries(parsed as Record<string, unknown>)) {
-          probes[k] = typeof v === "string" ? coerceProbeValue(v) : (v as ProbeValue);
-        }
-      }
-    } catch (err) {
-      process.stderr.write(
-        `resolver: E2E_PROBE_OVERRIDES_JSON parse error: ${(err as Error).message}\n`,
-      );
-    }
-  }
-  return probes;
-}
-
-/**
- * Build a probe results map.
- *
- * In dry-run / test mode we do not probe real services; instead we default
- * every expected-state leaf to its declared value so the validator passes,
- * and then allow targeted overrides via E2E_PROBE_OVERRIDE_<KEY>=value.
- * This lets tests simulate specific failure modes without spinning up a
- * real gateway or sandbox.
- */
-function probesFromEnvAndState(state: unknown): ProbeResults {
-  const probes: ProbeResults = {};
-  flattenState(state, "", probes);
-  const prefix = "E2E_PROBE_OVERRIDE_";
-  for (const [envKey, value] of Object.entries(process.env)) {
-    if (!envKey.startsWith(prefix) || value === undefined) continue;
-    const key = envKey
-      .slice(prefix.length)
-      .toLowerCase()
-      .replace(/_/g, ".");
-    probes[key] = coerceProbeValue(value);
-  }
-  return probes;
-}
-
-function coerceProbeValue(v: string): ProbeValue {
-  if (v === "true") return true;
-  if (v === "false") return false;
-  if (/^-?\d+$/.test(v)) return parseInt(v, 10);
-  return v;
-}
-
-process.exit(main());
diff --git a/test/e2e-scenario/runtime/resolver/js-yaml.d.ts b/test/e2e-scenario/runtime/resolver/js-yaml.d.ts
deleted file mode 100644
index 6ea52a82de..0000000000
--- a/test/e2e-scenario/runtime/resolver/js-yaml.d.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-// Local type shim for js-yaml. The runtime package ships without
-// TypeScript declarations; we only use `load` for YAML parsing.
-declare module "js-yaml" {
-  export function load(input: string): unknown;
-  export function dump(obj: unknown, opts?: Record<string, unknown>): string;
-  const _default: { load: typeof load; dump: typeof dump };
-  export default _default;
-}
diff --git a/test/e2e-scenario/runtime/resolver/load.ts b/test/e2e-scenario/runtime/resolver/load.ts
deleted file mode 100644
index 9c8dc3991b..0000000000
--- a/test/e2e-scenario/runtime/resolver/load.ts
+++ /dev/null
@@ -1,360 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Load and lightly-validate the E2E metadata files.
- *
- * The full reference check happens in `plan.ts` during scenario resolution.
- * This module only asserts that each file exists and has the required
- * top-level sections so callers get a clear error before touching scenarios.
- */
-
-import fs from "node:fs";
-import path from "node:path";
-import yaml from "js-yaml";
-
-import {
-  EXPECTED_FAILURE_ERROR_CLASSES,
-  EXPECTED_FAILURE_PHASES,
-  EXPECTED_FAILURE_SIDE_EFFECTS,
-} from "./schema.ts";
-import type {
-  ScenariosFile,
-  ExpectedStatesFile,
-  SuitesFile,
-  ExpectedFailurePhase,
-  ExpectedFailureErrorClass,
-  ExpectedFailureSideEffect,
-} from "./schema.ts";
-
-export interface ResolverInput {
-  scenarios: ScenariosFile;
-  expectedStates: ExpectedStatesFile;
-  suites: SuitesFile;
-  /** Optional source dir, used for resolving suite script paths. */
-  sourceDir?: string;
-}
-
-function readYaml(p: string): unknown {
-  const raw = fs.readFileSync(p, "utf8");
-  return yaml.load(raw);
-}
-
-function ensureObject(doc: unknown, file: string): Record<string, unknown> {
-  if (!doc || typeof doc !== "object" || Array.isArray(doc)) {
-    throw new Error(`metadata file ${file} must parse to a YAML mapping`);
-  }
-  return doc as Record<string, unknown>;
-}
-
-function requireSections(
-  doc: Record<string, unknown>,
-  file: string,
-  sections: string[],
-): void {
-  for (const s of sections) {
-    if (!(s in doc)) {
-      throw new Error(`metadata file ${file} is missing required section: ${s}`);
-    }
-  }
-}
-
-/**
- * Compile a YAML-authored `message_pattern` into a JS `RegExp`. RE2-style
- * inline flag prefixes (e.g. `(?i)`, `(?ims)`) are stripped and converted
- * to the corresponding `RegExp` flags so authors can write the same shape
- * the issue body shows without worrying about the underlying engine.
- *
- * Exported so the matcher uses identical compilation rules; throws on any
- * unsupported flag character or on an invalid pattern.
- */
-export function compileMessagePattern(pattern: string): RegExp {
-  let body = pattern;
-  let flags = "";
-  const inlineFlagMatch = /^\(\?([a-zA-Z]+)\)/.exec(pattern);
-  if (inlineFlagMatch) {
-    const allowed = new Set(["i", "m", "s"]);
-    for (const ch of inlineFlagMatch[1]) {
-      if (!allowed.has(ch)) {
-        throw new Error(`unsupported inline regex flag '(?${inlineFlagMatch[1]})'; allowed: i, m, s`);
-      }
-      if (!flags.includes(ch)) flags += ch;
-    }
-    body = pattern.slice(inlineFlagMatch[0].length);
-  }
-  return new RegExp(body, flags);
-}
-
-/**
- * Validate an `expected_failure` block. `partial` controls whether every
- * required field must be present (state-level blocks: yes; scenario-level
- * override: no, since absent fields fall back to the state).
- */
-function validateExpectedFailureBlock(
-  block: unknown,
-  origin: string,
-  opts: { partial: boolean },
-): void {
-  if (!block || typeof block !== "object" || Array.isArray(block)) {
-    throw new Error(`${origin}.expected_failure must be a mapping`);
-  }
-  const b = block as Record<string, unknown>;
-  if (b.phase !== undefined) {
-    if (typeof b.phase !== "string" || !EXPECTED_FAILURE_PHASES.includes(b.phase as ExpectedFailurePhase)) {
-      throw new Error(
-        `${origin}.expected_failure.phase must be one of: ${EXPECTED_FAILURE_PHASES.join(", ")}`,
-      );
-    }
-  } else if (!opts.partial) {
-    throw new Error(`${origin}.expected_failure.phase is required`);
-  }
-  if (b.error_class !== undefined) {
-    if (
-      typeof b.error_class !== "string" ||
-      !EXPECTED_FAILURE_ERROR_CLASSES.includes(b.error_class as ExpectedFailureErrorClass)
-    ) {
-      throw new Error(
-        `${origin}.expected_failure.error_class must be one of: ${EXPECTED_FAILURE_ERROR_CLASSES.join(", ")}`,
-      );
-    }
-  } else if (!opts.partial) {
-    throw new Error(`${origin}.expected_failure.error_class is required`);
-  }
-  if (b.message_pattern !== undefined && typeof b.message_pattern !== "string") {
-    throw new Error(`${origin}.expected_failure.message_pattern must be a string`);
-  }
-  if (typeof b.message_pattern === "string") {
-    try {
-      compileMessagePattern(b.message_pattern);
-    } catch (err) {
-      throw new Error(
-        `${origin}.expected_failure.message_pattern is not a valid regex: ${(err as Error).message}`,
-      );
-    }
-  }
-  if (b.forbidden_side_effects !== undefined) {
-    if (!Array.isArray(b.forbidden_side_effects)) {
-      throw new Error(`${origin}.expected_failure.forbidden_side_effects must be a list`);
-    }
-    for (const effect of b.forbidden_side_effects) {
-      if (
-        typeof effect !== "string" ||
-        !EXPECTED_FAILURE_SIDE_EFFECTS.includes(effect as ExpectedFailureSideEffect)
-      ) {
-        throw new Error(
-          `${origin}.expected_failure.forbidden_side_effects entry '${String(effect)}' must be one of: ${EXPECTED_FAILURE_SIDE_EFFECTS.join(", ")}`,
-        );
-      }
-    }
-  }
-  const known = new Set(["phase", "error_class", "message_pattern", "forbidden_side_effects"]);
-  for (const k of Object.keys(b)) {
-    if (!known.has(k)) {
-      throw new Error(`${origin}.expected_failure has unknown key '${k}'`);
-    }
-  }
-}
-
-function validateScenarios(doc: Record<string, unknown>, file: string): ScenariosFile {
-  requireSections(doc, file, [
-    "platforms",
-    "installs",
-    "runtimes",
-    "onboarding",
-    "setup_scenarios",
-  ]);
-  const setup = doc.setup_scenarios as Record<string, unknown>;
-  for (const [id, entry] of Object.entries(setup)) {
-    if (!entry || typeof entry !== "object") {
-      throw new Error(`scenario ${id} must be a mapping`);
-    }
-    const e = entry as Record<string, unknown>;
-    if ("expected_states" in e) {
-      throw new Error(
-        `scenario ${id} uses array-form 'expected_states'; use singular 'expected_state'`,
-      );
-    }
-    if (typeof e.alias_for_plan === "string") {
-      continue;
-    }
-    if (typeof e.expected_state !== "string") {
-      throw new Error(`scenario ${id} must declare a string 'expected_state'`);
-    }
-    if (!Array.isArray(e.suites)) {
-      throw new Error(`scenario ${id} must declare a list of 'suites'`);
-    }
-    if ("runner_requirements" in e) {
-      if (
-        !Array.isArray(e.runner_requirements) ||
-        e.runner_requirements.some((requirement) => typeof requirement !== "string")
-      ) {
-        throw new Error(`scenario ${id}.runner_requirements must be a list of strings`);
-      }
-    }
-    if ("expected_failure" in e) {
-      validateExpectedFailureBlock(e.expected_failure, `scenario ${id}`, { partial: true });
-    }
-    if ("skipped_capabilities" in e) {
-      if (
-        !Array.isArray(e.skipped_capabilities) ||
-        e.skipped_capabilities.some((skip) => {
-          if (!skip || typeof skip !== "object" || Array.isArray(skip)) return true;
-          const s = skip as Record<string, unknown>;
-          return (
-            typeof s.id !== "string" ||
-            typeof s.reason !== "string" ||
-            ("suites" in s && (!Array.isArray(s.suites) || s.suites.some((suite) => typeof suite !== "string")))
-          );
-        })
-      ) {
-        throw new Error(`scenario ${id}.skipped_capabilities must list {id, reason, suites?}`);
-      }
-    }
-    const dims = e.dimensions as Record<string, unknown> | undefined;
-    if (!dims) {
-      throw new Error(`scenario ${id} must declare 'dimensions'`);
-    }
-    for (const key of ["platform", "install", "runtime", "onboarding"]) {
-      if (typeof dims[key] !== "string") {
-        throw new Error(`scenario ${id}.dimensions.${key} must be a string`);
-      }
-    }
-    const platformId = dims.platform as string;
-    const platform = (doc.platforms as Record<string, Record<string, unknown> | undefined>)[
-      platformId
-    ];
-    const requiresExplicitRunner =
-      platform?.execution_target === "remote" ||
-      platform?.os === "macos" ||
-      platform?.os === "wsl" ||
-      platform?.gpu !== undefined ||
-      platform?.hardware !== undefined;
-    if (
-      requiresExplicitRunner &&
-      (!Array.isArray(e.runner_requirements) || e.runner_requirements.length === 0)
-    ) {
-      throw new Error(`scenario ${id} must declare runner_requirements for platform ${platformId}`);
-    }
-  }
-  return doc as unknown as ScenariosFile;
-}
-
-function validateExpectedStates(
-  doc: Record<string, unknown>,
-  file: string,
-): ExpectedStatesFile {
-  requireSections(doc, file, ["expected_states"]);
-  const rawStates = doc.expected_states;
-  if (!rawStates || typeof rawStates !== "object" || Array.isArray(rawStates)) {
-    throw new Error(`metadata file ${file} section 'expected_states' must be a mapping`);
-  }
-  const states = rawStates as Record<string, unknown>;
-  for (const [id, entry] of Object.entries(states)) {
-    if (!entry || typeof entry !== "object") {
-      throw new Error(`expected_state ${id} must be a mapping`);
-    }
-    const e = entry as Record<string, unknown>;
-    if ("expected_failure" in e) {
-      validateExpectedFailureBlock(e.expected_failure, `expected_state ${id}`, { partial: false });
-    }
-  }
-  return doc as unknown as ExpectedStatesFile;
-}
-
-function validateSuites(doc: Record<string, unknown>, file: string): SuitesFile {
-  requireSections(doc, file, ["suites"]);
-  const suites = doc.suites as Record<string, unknown>;
-  for (const [id, entry] of Object.entries(suites)) {
-    if (!entry || typeof entry !== "object") {
-      throw new Error(`suite ${id} must be a mapping`);
-    }
-    const e = entry as Record<string, unknown>;
-    if (!Array.isArray(e.steps)) {
-      throw new Error(`suite ${id} must declare a 'steps' array`);
-    }
-    for (const step of e.steps) {
-      if (!step || typeof step !== "object") {
-        throw new Error(`suite ${id} has a non-mapping step`);
-      }
-      const s = step as Record<string, unknown>;
-      if (typeof s.id !== "string" || typeof s.script !== "string") {
-        throw new Error(`suite ${id} has an invalid step (requires string id and script)`);
-      }
-    }
-  }
-  return doc as unknown as SuitesFile;
-}
-
-/**
- * Resolve the concrete on-disk locations of the three metadata files
- * given the E2E root directory (`test/e2e/`).
- *
- * Post-restructure layout:
- *   <e2e-root>/nemoclaw_scenarios/scenarios.yaml
- *   <e2e-root>/nemoclaw_scenarios/expected-states.yaml
- *   <e2e-root>/validation_suites/suites.yaml
- *
- * For backward compatibility (and for tests that synthesise a flat
- * fixture directory) we also accept a directory that already contains
- * all three YAML files side by side.
- */
-function resolveMetadataPaths(dir: string): {
-  scenarios: string;
-  states: string;
-  suites: string;
-} {
-  const flatScenarios = path.join(dir, "scenarios.yaml");
-  const flatStates = path.join(dir, "expected-states.yaml");
-  const flatSuites = path.join(dir, "suites.yaml");
-  if (
-    fs.existsSync(flatScenarios) &&
-    fs.existsSync(flatStates) &&
-    fs.existsSync(flatSuites)
-  ) {
-    return { scenarios: flatScenarios, states: flatStates, suites: flatSuites };
-  }
-  return {
-    scenarios: path.join(dir, "nemoclaw_scenarios", "scenarios.yaml"),
-    states: path.join(dir, "nemoclaw_scenarios", "expected-states.yaml"),
-    suites: path.join(dir, "validation_suites", "suites.yaml"),
-  };
-}
-
-export function loadMetadataFromDir(dir: string): ResolverInput {
-  const { scenarios: scenariosPath, states: statesPath, suites: suitesPath } =
-    resolveMetadataPaths(dir);
-  const scenarios = validateScenarios(
-    ensureObject(readYaml(scenariosPath), scenariosPath),
-    scenariosPath,
-  );
-  const expectedStates = validateExpectedStates(
-    ensureObject(readYaml(statesPath), statesPath),
-    statesPath,
-  );
-  const suites = validateSuites(
-    ensureObject(readYaml(suitesPath), suitesPath),
-    suitesPath,
-  );
-  return { scenarios, expectedStates, suites, sourceDir: dir };
-}
-
-export function loadMetadataFromObjects(input: {
-  scenarios: object;
-  expectedStates: object;
-  suites: object;
-  sourceDir?: string;
-}): ResolverInput {
-  const scenarios = validateScenarios(
-    ensureObject(input.scenarios, "<scenarios>"),
-    "<scenarios>",
-  );
-  const expectedStates = validateExpectedStates(
-    ensureObject(input.expectedStates, "<expected-states>"),
-    "<expected-states>",
-  );
-  const suites = validateSuites(
-    ensureObject(input.suites, "<suites>"),
-    "<suites>",
-  );
-  return { scenarios, expectedStates, suites, sourceDir: input.sourceDir };
-}
diff --git a/test/e2e-scenario/runtime/resolver/plan.ts b/test/e2e-scenario/runtime/resolver/plan.ts
deleted file mode 100644
index c20350eaed..0000000000
--- a/test/e2e-scenario/runtime/resolver/plan.ts
+++ /dev/null
@@ -1,256 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Resolve a setup scenario into a concrete, fully-referenced execution plan.
- *
- * The resolver:
- *   1. looks up the scenario by id,
- *   2. resolves each dimension profile,
- *   3. resolves the expected state,
- *   4. resolves each suite definition,
- *   5. validates each suite's `requires_state` against the scenario's expected
- *      state (fail-fast if any key is missing or has an incompatible value).
- *
- * The resulting `ResolvedPlan` is serializable to JSON and forms the basis of
- * the `.e2e/plan.json` artifact and the human-readable plan printout.
- */
-
-import type { ResolverInput } from "./load.ts";
-import type {
-  BaseScenario,
-  ResolvedPlan,
-  ResolvedSuite,
-  SuiteDefinition,
-  ExpectedFailure,
-  ExpectedStateConfig,
-  TestPlan,
-} from "./schema.ts";
-
-export type { ResolverInput } from "./load.ts";
-export type { ResolvedPlan } from "./schema.ts";
-
-function lookupProfile<T>(
-  collection: Record<string, T>,
-  kind: string,
-  name: string,
-  scenarioId: string,
-): T {
-  if (!(name in collection)) {
-    const available = Object.keys(collection).sort().join(", ");
-    throw new Error(
-      `scenario '${scenarioId}' references unknown ${kind} '${name}' (available: ${available || "<none>"})`,
-    );
-  }
-  return collection[name] as T;
-}
-
-function getByDottedPath(obj: unknown, dotted: string): unknown {
-  const parts = dotted.split(".");
-  let cur: unknown = obj;
-  for (const p of parts) {
-    if (cur === null || cur === undefined || typeof cur !== "object") {
-      return undefined;
-    }
-    cur = (cur as Record<string, unknown>)[p];
-  }
-  return cur;
-}
-
-/**
- * Merge a state-level `expected_failure` with an optional scenario-level
- * override and return a fully-formed `ExpectedFailure`, or `undefined` if
- * neither side declares one. Scenario-level fields win over state-level.
- *
- * After merge, every required field MUST be present. The loader already
- * enforces this for state-level blocks; an override-only declaration on a
- * positive expected state is rejected here.
- */
-function resolveExpectedFailure(
-  stateConfig: ExpectedStateConfig,
-  expectedStateId: string,
-  scenarioId: string,
-  overrides: Array<{
-    block?: Partial<ExpectedFailure>;
-    mode: "fill" | "override";
-    origin: string;
-  }>,
-): ExpectedFailure | undefined {
-  const stateBlock = (stateConfig as { expected_failure?: unknown }).expected_failure as
-    | Partial<ExpectedFailure>
-    | undefined;
-  const presentOverrides = overrides.filter((source) => source.block);
-  if (!stateBlock && presentOverrides.length === 0) return undefined;
-  if (!stateBlock) {
-    const origins = presentOverrides.map((source) => source.origin).join(", ");
-    throw new Error(
-      `scenario '${scenarioId}' declares expected_failure but expected_state '${expectedStateId}' does not - declare the base contract on the state first (source: ${origins})`,
-    );
-  }
-  const merged: Partial<ExpectedFailure> = { ...stateBlock };
-  for (const source of overrides) {
-    const block = source.block;
-    if (!block) continue;
-    for (const key of Object.keys(block) as Array<keyof ExpectedFailure>) {
-      const value = block[key];
-      if (value === undefined) continue;
-      if (source.mode === "fill" && merged[key] !== undefined) continue;
-      (merged as Record<keyof ExpectedFailure, unknown>)[key] = value;
-    }
-  }
-  if (!merged.phase || !merged.error_class) {
-    throw new Error(
-      `scenario '${scenarioId}' expected_failure resolves with missing required fields (phase, error_class) after merge`,
-    );
-  }
-  return merged as ExpectedFailure;
-}
-
-function validateSuiteAgainstState(
-  suiteId: string,
-  suite: SuiteDefinition,
-  state: ExpectedStateConfig,
-  scenarioId: string,
-): void {
-  const requires = suite.requires_state ?? {};
-  for (const [key, expected] of Object.entries(requires)) {
-    const actual = getByDottedPath(state, key);
-    if (actual === undefined) {
-      throw new Error(
-        `scenario '${scenarioId}' selects suite '${suiteId}' which requires state key '${key}=${String(expected)}', but the expected state has no value at '${key}'`,
-      );
-    }
-    if (actual !== expected) {
-      throw new Error(
-        `scenario '${scenarioId}' selects suite '${suiteId}' which requires '${key}=${String(expected)}', but the scenario's expected state has '${key}=${String(actual)}'`,
-      );
-    }
-  }
-}
-
-export function resolveScenario(scenarioId: string, meta: ResolverInput): ResolvedPlan {
-  const legacy = meta.scenarios.setup_scenarios[scenarioId];
-  const directPlan = meta.scenarios.test_plans?.[scenarioId];
-  if (!legacy && !directPlan) {
-    const available = [
-      ...Object.keys(meta.scenarios.setup_scenarios),
-      ...Object.keys(meta.scenarios.test_plans ?? {}),
-    ].sort().join(", ");
-    throw new Error(`unknown scenario '${scenarioId}' (available: ${available || "<none>"})`);
-  }
-  const planId = legacy?.alias_for_plan ?? scenarioId;
-  const layeredPlan = meta.scenarios.test_plans?.[planId];
-  const legacyDimensions = legacy?.dimensions;
-  const baseId = layeredPlan?.base;
-  const base = baseId ? lookupProfile(meta.scenarios.base_scenarios ?? {}, "base", baseId, scenarioId) : undefined;
-  const onboardingId = legacy?.alias_for_plan && legacyDimensions?.onboarding ? legacyDimensions.onboarding : (layeredPlan?.onboarding ?? legacyDimensions?.onboarding);
-  const onboardingCollection = onboardingId && onboardingId in meta.scenarios.onboarding ? meta.scenarios.onboarding : (meta.scenarios.onboarding_profiles ?? meta.scenarios.onboarding);
-  const onboarding = lookupProfile(onboardingCollection, "onboarding", onboardingId ?? "", scenarioId);
-  const platformId = base?.platform ?? legacyDimensions?.platform;
-  const installId = base?.install ?? legacyDimensions?.install;
-  const runtimeId = base?.runtime ?? legacyDimensions?.runtime;
-  if (!platformId || !installId || !runtimeId) throw new Error(`scenario '${scenarioId}' is missing layered base or legacy dimensions`);
-  const platform = lookupProfile(meta.scenarios.platforms, "platform", platformId, scenarioId);
-  const install = lookupProfile(meta.scenarios.installs, "install", installId, scenarioId);
-  const runtime = lookupProfile(meta.scenarios.runtimes, "runtime", runtimeId, scenarioId);
-  const expectedStateId = layeredPlan?.expected_state ?? legacy?.expected_state;
-  if (!expectedStateId || !(expectedStateId in meta.expectedStates.expected_states)) {
-    const available = Object.keys(meta.expectedStates.expected_states).sort().join(", ");
-    throw new Error(`scenario '${scenarioId}' references unknown expected_state '${expectedStateId}' (available: ${available || "<none>"})`);
-  }
-  const stateConfig = meta.expectedStates.expected_states[expectedStateId];
-  const suiteIds = layeredPlan?.suites ?? legacy?.suites ?? [];
-  const resolvedSuites: ResolvedSuite[] = [];
-  for (const suiteId of suiteIds) {
-    if (!(suiteId in meta.suites.suites)) {
-      const available = Object.keys(meta.suites.suites).sort().join(", ");
-      throw new Error(
-        `scenario '${scenarioId}' references unknown suite '${suiteId}' (available: ${available || "<none>"})`,
-      );
-    }
-    const def = meta.suites.suites[suiteId];
-    validateSuiteAgainstState(suiteId, def, stateConfig, scenarioId);
-    resolvedSuites.push({
-      id: suiteId,
-      requires_state: def.requires_state ?? {},
-      steps: def.steps.map((s) => ({ id: s.id, script: s.script })),
-    });
-  }
-  const runnerRequirements = [
-    ...(base?.runner_requirements ?? []),
-    ...((layeredPlan as TestPlan | undefined)?.runner_requirements ?? []),
-    ...(legacy?.runner_requirements ?? []),
-  ];
-  const expectedFailure = resolveExpectedFailure(stateConfig, expectedStateId, scenarioId, [
-    { origin: `base '${baseId}'`, block: base?.expected_failure, mode: "fill" },
-    { origin: `test_plan '${planId}'`, block: layeredPlan?.expected_failure, mode: "override" },
-    { origin: `setup_scenario '${scenarioId}'`, block: legacy?.expected_failure, mode: "override" },
-  ]);
-  return {
-    scenario_id: scenarioId,
-    plan_id: layeredPlan ? planId : undefined,
-    legacy_scenario_id: legacy?.alias_for_plan ? scenarioId : undefined,
-    base: base && baseId ? { id: baseId, profile: base as BaseScenario } : undefined,
-    onboarding: onboardingId ? { id: onboardingId, profile: onboarding } : undefined,
-    onboarding_assertions: layeredPlan?.onboarding_assertions ?? [],
-    dimensions: {
-      platform: { id: platformId, profile: platform },
-      install: { id: installId, profile: install },
-      runtime: { id: runtimeId, profile: runtime },
-      onboarding: { id: onboardingId ?? "", profile: onboarding },
-    },
-    expected_state: { id: expectedStateId, config: stateConfig },
-    suites: resolvedSuites,
-    overrides: layeredPlan?.overrides ?? legacy?.overrides,
-    runner_requirements: runnerRequirements.length > 0 ? runnerRequirements : undefined,
-    required_secrets: layeredPlan?.required_secrets,
-    ...(expectedFailure ? { expected_failure: expectedFailure } : {}),
-  };
-}
-
-export function formatPlan(plan: ResolvedPlan): string {
-  const lines: string[] = [];
-  lines.push(`Scenario: ${plan.scenario_id}`);
-  if (plan.plan_id) lines.push(`Test plan: ${plan.plan_id}`);
-  if (plan.base) lines.push(`Base: ${plan.base.id}`);
-  if (plan.onboarding) lines.push(`Onboarding: ${plan.onboarding.id}`);
-  lines.push("Dimensions:");
-  lines.push(`  platform=${plan.dimensions.platform.id}`);
-  lines.push(`  install=${plan.dimensions.install.id}`);
-  lines.push(`  runtime=${plan.dimensions.runtime.id}`);
-  lines.push(`  onboarding=${plan.dimensions.onboarding.id}`);
-  lines.push(`Expected state: ${plan.expected_state.id}`);
-  if (plan.onboarding_assertions && plan.onboarding_assertions.length > 0) {
-    lines.push("Onboarding assertions:");
-    for (const assertion of plan.onboarding_assertions) lines.push(`  - ${assertion}`);
-  }
-  lines.push("Suites:");
-  for (const s of plan.suites) {
-    lines.push(`  - ${s.id}`);
-    for (const step of s.steps) {
-      lines.push(`      * ${step.id} (${step.script})`);
-    }
-  }
-  if (plan.runner_requirements && plan.runner_requirements.length > 0) {
-    lines.push("Runner requirements:");
-    for (const requirement of plan.runner_requirements) {
-      lines.push(`  - ${requirement}`);
-    }
-  }
-  if (plan.overrides) {
-    lines.push("Overrides:");
-    lines.push(`  ${JSON.stringify(plan.overrides)}`);
-  }
-  if (plan.expected_failure) {
-    lines.push("Expected failure:");
-    lines.push(`  phase=${plan.expected_failure.phase}`);
-    lines.push(`  error_class=${plan.expected_failure.error_class}`);
-    if (plan.expected_failure.message_pattern) {
-      lines.push(`  message_pattern=${plan.expected_failure.message_pattern}`);
-    }
-    if (plan.expected_failure.forbidden_side_effects?.length) {
-      lines.push(`  forbidden_side_effects=${plan.expected_failure.forbidden_side_effects.join(",")}`);
-    }
-  }
-  return lines.join("\n");
-}
diff --git a/test/e2e-scenario/runtime/resolver/schema.ts b/test/e2e-scenario/runtime/resolver/schema.ts
deleted file mode 100644
index d8354981f6..0000000000
--- a/test/e2e-scenario/runtime/resolver/schema.ts
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Types for the E2E scenario metadata schema.
- *
- * These mirror the shape of `scenarios.yaml`, `expected-states.yaml`, and
- * `suites.yaml`. The resolver validates unknown references and returns a
- * normalized `ResolvedPlan` suitable for the shell runner and JSON artifact.
- */
-
-export type AnyRecord = Record<string, unknown>;
-
-export interface PlatformProfile extends AnyRecord {
-  os?: string;
-  execution_target?: string;
-}
-export type InstallProfile = AnyRecord;
-export type RuntimeProfile = AnyRecord;
-export interface OnboardingProfile extends AnyRecord {
-  path?: string;
-  agent?: string;
-  provider?: string;
-  inference_route?: string;
-}
-
-/**
- * Phases where setup is permitted to fail in negative scenarios.
- *
- * Aligned with `nemoclaw` setup stages and the wording in NemoClaw issue
- * #3608. `preflight` is the only phase whose side-effect probes are wired
- * in this initial cut; the rest are accepted by the schema so that future
- * negative scenarios can declare them without churning YAML again.
- */
-export const EXPECTED_FAILURE_PHASES = [
-  "preflight",
-  "install",
-  "onboard",
-  "readiness",
-  "suite",
-] as const;
-export type ExpectedFailurePhase = (typeof EXPECTED_FAILURE_PHASES)[number];
-
-/**
- * Structured failure reason. Open-ended on purpose - new negative scenarios
- * may need new classes, but every value here MUST be enumerated so reports
- * have a stable vocabulary.
- */
-export const EXPECTED_FAILURE_ERROR_CLASSES = [
-  "docker-missing",
-  "credentials-missing",
-  "gpu-missing",
-  "unsupported-platform",
-] as const;
-export type ExpectedFailureErrorClass = (typeof EXPECTED_FAILURE_ERROR_CLASSES)[number];
-
-/**
- * Side effects that a successful setup would normally leave behind. A
- * negative scenario asserts that NONE of the listed effects are observed
- * after the failure.
- */
-export const EXPECTED_FAILURE_SIDE_EFFECTS = [
-  "sandbox-created",
-  "gateway-started",
-  "credentials-written",
-] as const;
-export type ExpectedFailureSideEffect = (typeof EXPECTED_FAILURE_SIDE_EFFECTS)[number];
-
-export interface ExpectedFailure {
-  phase: ExpectedFailurePhase;
-  error_class: ExpectedFailureErrorClass;
-  /** RE2/POSIX-compatible regex matched against the captured setup log. */
-  message_pattern?: string;
-  /** Effects that must NOT be observed after the failure. */
-  forbidden_side_effects?: ExpectedFailureSideEffect[];
-}
-
-export interface SkippedCapability extends AnyRecord {
-  id: string;
-  reason: string;
-  suites?: string[];
-}
-
-export interface BaseScenario extends AnyRecord {
-  platform: string;
-  install: string;
-  runtime: string;
-  runner_requirements?: string[];
-  expected_failure?: Partial<ExpectedFailure>;
-  skipped_capabilities?: SkippedCapability[];
-}
-
-export interface TestPlan extends AnyRecord {
-  base: string;
-  onboarding: string;
-  expected_state: string;
-  onboarding_assertions?: string[];
-  suites: string[];
-  overrides?: AnyRecord;
-  runner_requirements?: string[];
-  required_secrets?: string[];
-  expected_failure?: Partial<ExpectedFailure>;
-  skipped_capabilities?: SkippedCapability[];
-}
-
-export interface SetupScenario {
-  alias_for_plan?: string;
-  dimensions?: {
-    platform: string;
-    install: string;
-    runtime: string;
-    onboarding: string;
-  };
-  expected_state?: string;
-  suites?: string[];
-  overrides?: AnyRecord;
-  /** Explicit CI/hardware requirements for non-default platforms. */
-  runner_requirements?: string[];
-  skipped_capabilities?: SkippedCapability[];
-  /**
-   * Per-scenario override of the expected-state failure contract. Fields
-   * present here win over the state-level `expected_failure`; absent
-   * fields fall back to the state. Negative scenarios MUST resolve to a
-   * complete `ExpectedFailure` (state + override merged).
-   */
-  expected_failure?: Partial<ExpectedFailure>;
-  /**
-   * Guard: the legacy array form `expected_states: [...]` must not reappear.
-   * If present, the loader fails.
-   */
-  expected_states?: never;
-}
-
-export interface ScenariosFile {
-  platforms: Record<string, PlatformProfile>;
-  installs: Record<string, InstallProfile>;
-  runtimes: Record<string, RuntimeProfile>;
-  onboarding: Record<string, OnboardingProfile>;
-  setup_scenarios: Record<string, SetupScenario>;
-  base_scenarios?: Record<string, BaseScenario>;
-  onboarding_profiles?: Record<string, OnboardingProfile>;
-  test_plans?: Record<string, TestPlan>;
-  onboarding_assertions?: Record<string, AnyRecord>;
-}
-
-export type ExpectedStateConfig = AnyRecord;
-
-export interface ExpectedStatesFile {
-  expected_states: Record<string, ExpectedStateConfig>;
-}
-
-export interface SuiteStep {
-  id: string;
-  script: string;
-}
-
-export interface SuiteDefinition {
-  requires_state?: Record<string, unknown>;
-  steps: SuiteStep[];
-}
-
-export interface SuitesFile {
-  suites: Record<string, SuiteDefinition>;
-}
-
-export interface ResolvedDimension<T = AnyRecord> {
-  id: string;
-  profile: T;
-}
-
-export interface ResolvedSuite {
-  id: string;
-  requires_state: Record<string, unknown>;
-  steps: SuiteStep[];
-}
-
-export interface ResolvedExpectedState {
-  id: string;
-  config: ExpectedStateConfig;
-}
-
-export interface ResolvedPlan {
-  scenario_id: string;
-  plan_id?: string;
-  legacy_scenario_id?: string;
-  base?: ResolvedDimension<BaseScenario>;
-  onboarding?: ResolvedDimension<OnboardingProfile>;
-  onboarding_assertions?: string[];
-  dimensions: {
-    platform: ResolvedDimension<PlatformProfile>;
-    install: ResolvedDimension<InstallProfile>;
-    runtime: ResolvedDimension<RuntimeProfile>;
-    onboarding: ResolvedDimension<OnboardingProfile>;
-  };
-  expected_state: ResolvedExpectedState;
-  suites: ResolvedSuite[];
-  overrides?: AnyRecord;
-  runner_requirements?: string[];
-  required_secrets?: string[];
-  /**
-   * Present only for negative scenarios that declare an `expected_failure`
-   * (either at scenario level or via their expected state). Absence means
-   * the runner expects setup to succeed.
-   */
-  expected_failure?: ExpectedFailure;
-}
diff --git a/test/e2e-scenario/runtime/resolver/validator.ts b/test/e2e-scenario/runtime/resolver/validator.ts
deleted file mode 100644
index 214190f6dc..0000000000
--- a/test/e2e-scenario/runtime/resolver/validator.ts
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Expected-state validator.
- *
- * Walks the expected state tree and compares each leaf to a probe result.
- * Also validates per-suite `requires_state` entries at runtime, producing a
- * single report whose `ok` field drives whether the runner proceeds to
- * execute suites.
- */
-
-import type { ExpectedStateConfig, ResolvedSuite } from "./schema.ts";
-
-export type ProbeValue = string | number | boolean | null;
-export type ProbeResults = Record<string, ProbeValue>;
-
-export interface ValidatorInput {
-  stateId: string;
-  state: ExpectedStateConfig;
-  probes: ProbeResults;
-  suites: ResolvedSuite[];
-}
-
-export interface ValidatorCheck {
-  key: string;
-  expected: ProbeValue;
-  actual: ProbeValue | undefined;
-  ok: boolean;
-  origin: "state" | "suite";
-  suite?: string;
-  message?: string;
-}
-
-export interface ValidatorReport {
-  state_id: string;
-  ok: boolean;
-  checks: ValidatorCheck[];
-}
-
-function flatten(
-  obj: unknown,
-  prefix: string,
-  out: Record<string, ProbeValue>,
-): void {
-  if (obj === null || typeof obj !== "object") {
-    out[prefix] = obj as ProbeValue;
-    return;
-  }
-  for (const [k, v] of Object.entries(obj as Record<string, unknown>)) {
-    const next = prefix ? `${prefix}.${k}` : k;
-    if (v !== null && typeof v === "object" && !Array.isArray(v)) {
-      flatten(v, next, out);
-    } else {
-      out[next] = v as ProbeValue;
-    }
-  }
-}
-
-function compare(
-  _key: string,
-  expected: ProbeValue,
-  actual: ProbeValue | undefined,
-): boolean {
-  if (actual === undefined) return false;
-  return expected === actual;
-}
-
-export function validateExpectedState(input: ValidatorInput): ValidatorReport {
-  const checks: ValidatorCheck[] = [];
-  const flat: Record<string, ProbeValue> = {};
-  flatten(input.state, "", flat);
-
-  for (const [key, expected] of Object.entries(flat)) {
-    const actual = input.probes[key];
-    const ok = compare(key, expected, actual);
-    checks.push({
-      key,
-      expected,
-      actual,
-      ok,
-      origin: "state",
-      message: ok
-        ? undefined
-        : `expected '${key}=${String(expected)}' but got '${String(actual ?? "<missing>")}'`,
-    });
-  }
-
-  for (const suite of input.suites) {
-    const req = suite.requires_state ?? {};
-    for (const [key, expected] of Object.entries(req)) {
-      const actual = input.probes[key];
-      const ok = compare(key, expected as ProbeValue, actual);
-      checks.push({
-        key,
-        expected: expected as ProbeValue,
-        actual,
-        ok,
-        origin: "suite",
-        suite: suite.id,
-        message: ok
-          ? undefined
-          : `suite '${suite.id}' requires '${key}=${String(expected)}' but got '${String(actual ?? "<missing>")}'`,
-      });
-    }
-  }
-
-  const ok = checks.every((c) => c.ok);
-  return { state_id: input.stateId, ok, checks };
-}
-
-export function formatReport(report: ValidatorReport): string {
-  const lines: string[] = [];
-  lines.push(`expected-state: ${report.state_id} ${report.ok ? "OK" : "FAILED"}`);
-  for (const c of report.checks) {
-    const status = c.ok ? "PASS" : "FAIL";
-    const origin = c.origin === "suite" ? `[suite:${c.suite}]` : "[state]";
-    lines.push(
-      `  ${status} ${origin} ${c.key} expected=${String(c.expected)} actual=${String(c.actual ?? "<missing>")}`,
-    );
-  }
-  return lines.join("\n");
-}
diff --git a/test/e2e-scenario/runtime/run-scenario.sh b/test/e2e-scenario/runtime/run-scenario.sh
index 58042c8523..2477ce79ec 100755
--- a/test/e2e-scenario/runtime/run-scenario.sh
+++ b/test/e2e-scenario/runtime/run-scenario.sh
@@ -2,482 +2,24 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# E2E scenario runner entrypoint.
-#
-# Usage:
-#   bash test/e2e-scenario/runtime/run-scenario.sh <scenario-id> [--plan-only|--validate-only|--dry-run]
-#
-# Flags:
-#   --plan-only      Resolve metadata and print the plan only. Writes
-#                    ${E2E_CONTEXT_DIR:-.e2e}/plan.json for artifact upload.
-#   --validate-only  Run the expected-state validator against the current
-#                    context.env without running install/onboard/suites.
-#                    Emits probe results JSON to stdout and writes
-#                    ${E2E_CONTEXT_DIR}/expected-state-report.json. Used by
-#                    the parity-compare workflow to collect per-assertion
-#                    probe results. Mutually exclusive with --plan-only.
-#   --dry-run        (reserved) Run orchestration with real side effects
-#                    replaced by trace-logged stubs. Sets E2E_DRY_RUN=1 for
-#                    helpers. Full dry-run orchestration lands in later phases.
-#
-# Environment:
-#   E2E_CONTEXT_DIR  Override the scenario artifact directory
-#                    (default: <repo-root>/.e2e/).
+# DEPRECATED. The hybrid scenario architecture has a single supported runtime
+# entrypoint: test/e2e-scenario/scenarios/run.ts. This bash runner duplicated
+# install/onboard/gateway-check/suite-execution that now belongs in TS phase
+# orchestrators (EnvironmentOrchestrator, OnboardingOrchestrator,
+# RuntimeOrchestrator) and shared clients (HostCliClient, GatewayClient,
+# SandboxClient). It is fail-fast so the deprecation is loud, not silent.
 
 set -euo pipefail
 
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-E2E_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
-
-SCENARIO_ID=""
-PLAN_ONLY=0
-VALIDATE_ONLY=0
-DRY_RUN=0
-
-usage() {
-  cat >&2 <<'USAGE'
-Usage: bash test/e2e-scenario/runtime/run-scenario.sh <scenario-id> [--plan-only|--validate-only|--dry-run]
-USAGE
-}
-
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --plan-only)
-      PLAN_ONLY=1
-      shift
-      ;;
-    --validate-only)
-      VALIDATE_ONLY=1
-      shift
-      ;;
-    --dry-run)
-      DRY_RUN=1
-      shift
-      ;;
-    -h | --help)
-      usage
-      exit 0
-      ;;
-    --*)
-      echo "run-scenario: unknown flag: $1" >&2
-      usage
-      exit 2
-      ;;
-    *)
-      if [[ -z "${SCENARIO_ID}" ]]; then
-        SCENARIO_ID="$1"
-      else
-        echo "run-scenario: unexpected positional argument: $1" >&2
-        usage
-        exit 2
-      fi
-      shift
-      ;;
-  esac
-done
-
-if [[ -z "${SCENARIO_ID}" ]]; then
-  echo "run-scenario: missing scenario id" >&2
-  usage
-  exit 2
-fi
-
-if [[ "${PLAN_ONLY}" -eq 1 && "${VALIDATE_ONLY}" -eq 1 ]]; then
-  echo "run-scenario: --plan-only and --validate-only are mutually exclusive" >&2
-  usage
-  exit 2
-fi
-
-export E2E_CONTEXT_DIR="${E2E_CONTEXT_DIR:-${REPO_ROOT}/.e2e}"
-mkdir -p "${E2E_CONTEXT_DIR}"
-
-if [[ "${DRY_RUN}" -eq 1 ]]; then
-  export E2E_DRY_RUN=1
-fi
-
-# Prefer the locally-installed tsx if present, otherwise fall back to npx.
-TSX_BIN="${REPO_ROOT}/node_modules/.bin/tsx"
-if [[ ! -x "${TSX_BIN}" ]]; then
-  TSX_BIN=""
-fi
-
-run_resolver() {
-  if [[ -n "${TSX_BIN}" ]]; then
-    "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" "$@"
-    return
-  fi
-  # CodeRabbit review item #10: fail closed with a clear hint instead of
-  # silently pulling tsx from the network via `npx --yes`.
-  if ! (cd "${REPO_ROOT}" && npx --no-install tsx "${SCRIPT_DIR}/resolver/index.ts" "$@"); then
-    echo "run-scenario: tsx is required but not installed. Run 'npm ci' at the repo root and retry." >&2
-    return 1
-  fi
-}
-
-run_resolver plan "${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}"
-
-if [[ "${PLAN_ONLY}" -eq 1 ]]; then
-  exit 0
-fi
-
-# --validate-only: assume setup has already completed. Skip install /
-# onboard / suite execution and dispatch the expected-state validator
-# using probes resolved from E2E_PROBE_OVERRIDE_* env vars. Emits the
-# probe results JSON report to stdout and writes it to
-# ${E2E_CONTEXT_DIR}/expected-state-report.json.
-if [[ "${VALIDATE_ONLY}" -eq 1 ]]; then
-  validate_args=("${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}")
-  if ! run_resolver validate-state "${validate_args[@]}"; then
-    echo "run-scenario: --validate-only: expected-state validation failed" >&2
-    exit 3
-  fi
-  exit 0
-fi
-
-# Source the shared helper library so we can exercise the full
-# setup → install → onboard → gateway/sandbox check sequence. In dry-run
-# mode each helper short-circuits (and writes to E2E_TRACE_FILE if set).
-# shellcheck source=lib/env.sh
-. "${SCRIPT_DIR}/lib/env.sh"
-# shellcheck source=lib/context.sh
-. "${SCRIPT_DIR}/lib/context.sh"
-# shellcheck source=lib/negative.sh
-. "${SCRIPT_DIR}/lib/negative.sh"
-# shellcheck source=lib/port-holder.sh
-. "${SCRIPT_DIR}/lib/port-holder.sh"
-# shellcheck source=../nemoclaw_scenarios/install/dispatch.sh
-. "${E2E_ROOT}/nemoclaw_scenarios/install/dispatch.sh"
-# shellcheck source=../nemoclaw_scenarios/onboard/dispatch.sh
-. "${E2E_ROOT}/nemoclaw_scenarios/onboard/dispatch.sh"
-# shellcheck source=../validation_suites/assert/gateway-alive.sh
-. "${E2E_ROOT}/validation_suites/assert/gateway-alive.sh"
-# shellcheck source=../validation_suites/assert/sandbox-alive.sh
-. "${E2E_ROOT}/validation_suites/assert/sandbox-alive.sh"
-
-# Apply standard non-interactive env (and trace it).
-e2e_env_apply_noninteractive
-e2e_env_trace "env:noninteractive"
-
-# Emit normalized context from the resolved plan.
-e2e_context_init
-"${E2E_ROOT}/nemoclaw_scenarios/helpers/emit-context-from-plan.sh" "${E2E_CONTEXT_DIR}/plan.json"
-
-# Extract the install method and onboarding profile from the plan so we can
-# dispatch to the right helpers.
-read_plan_string() {
-  local key="$1"
-  node -e "
-    const p = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8'));
-    const parts = process.argv[2].split('.');
-    let cur = p;
-    for (const part of parts) { if (cur == null) { cur = ''; break; } cur = cur[part]; }
-    process.stdout.write(cur == null ? '' : String(cur));
-  " "${E2E_CONTEXT_DIR}/plan.json" "${key}"
-}
-
-INSTALL_ID="$(read_plan_string dimensions.install.id)"
-INSTALL_METHOD="$(read_plan_string dimensions.install.profile.method)"
-ONBOARDING_ID="$(read_plan_string dimensions.onboarding.id)"
-RUNTIME_ID="$(read_plan_string dimensions.runtime.id)"
-RUNTIME_CONTAINER_DAEMON="$(read_plan_string dimensions.runtime.profile.container_daemon)"
-EXPECTED_STATE_ID="$(read_plan_string expected_state.id)"
-FAILURE_STAGE="$(read_plan_string expected_state.config.failure.stage)"
-FAILURE_EXIT_CODE="$(read_plan_string expected_state.config.failure.exit_code)"
-FAILURE_MESSAGE_CONTAINS="$(read_plan_string expected_state.config.failure.message_contains)"
-FAILURE_NO_STACK_TRACE="$(read_plan_string expected_state.config.failure.no_stack_trace)"
-
-# Trace the dimension id so scenario-level assertions can identify the
-# configured install (e.g. repo-current); e2e_install internally traces
-# the resolved method.
-e2e_env_trace "install:${INSTALL_ID}"
-
-install_log="${E2E_CONTEXT_DIR}/install.log"
-set +e
-e2e_install "${INSTALL_METHOD}" >"${install_log}" 2>&1
-install_status=$?
-set -e
-if [[ "${install_status}" -ne 0 ]]; then
-  cat "${install_log}" >&2
-  echo "run-scenario: install ${INSTALL_METHOD} failed with status ${install_status}" >&2
-  exit "${install_status}"
-fi
-export PATH="${HOME}/.local/bin:${PATH}"
-{
-  printf 'PATH=%s\n' "${PATH}"
-  command -v nemoclaw || true
-} >"${E2E_CONTEXT_DIR}/post-install-path.log" 2>&1
-if [[ "${DRY_RUN}" -eq 1 ]]; then
-  printf 'run-scenario: dry-run skipping post-install nemoclaw PATH verification\n' >&2
-else
-  nemoclaw_bin="$(command -v nemoclaw || true)"
-  if [[ -z "${nemoclaw_bin}" ]]; then
-    cat "${E2E_CONTEXT_DIR}/post-install-path.log" >&2
-    echo "run-scenario: nemoclaw not found on PATH after install" >&2
-    exit 127
-  fi
-  printf 'run-scenario: using nemoclaw at %s\n' "${nemoclaw_bin}" >&2
-fi
-
-# Negative scenarios declare an `expected_failure` block on their expected
-# state (see NemoClaw issue #3608). The runner forces the failure mode for
-# the scenario, captures the setup log, gathers a side-effect inventory, and
-# delegates structured matching to `resolver/index.ts match-failure`. The
-# matcher writes `expected-vs-actual.json` for CI artifact upload.
-
-read_plan_failure_field() {
-  local key="$1"
-  node -e "
-    (() => {
-      const p = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8'));
-      const ef = p.expected_failure;
-      if (!ef) { process.stdout.write(''); return; }
-      const v = ef[process.argv[2]];
-      process.stdout.write(v == null ? '' : Array.isArray(v) ? v.join(',') : String(v));
-    })();
-  " "${E2E_CONTEXT_DIR}/plan.json" "${key}"
-}
-
-EXPECTED_FAILURE_PHASE="$(read_plan_failure_field phase)"
-
-if [[ -n "${EXPECTED_FAILURE_PHASE}" ]]; then
-  expected_error_class="$(read_plan_failure_field error_class)"
-  negative_log="${E2E_CONTEXT_DIR}/negative-${EXPECTED_FAILURE_PHASE}.log"
-  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-
-  # Snapshot the side-effect baseline BEFORE forcing the failure so we only
-  # report effects newly introduced by this scenario. A pre-existing gateway
-  # or credentials file from an earlier run would otherwise look like a fresh
-  # side effect and falsely fail negative scenarios in dirty environments.
-  baseline_sandbox=0
-  if [[ -n "${sandbox_name}" ]] && openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    baseline_sandbox=1
-  fi
-  baseline_gateway=0
-  if nemoclaw gateway status >/dev/null 2>&1; then
-    baseline_gateway=1
-  fi
-  baseline_credentials=0
-  if [[ -s "${HOME}/.nemoclaw/credentials.json" ]]; then
-    baseline_credentials=1
-  fi
-
-  # Force the failure mode declared by the scenario. Only `preflight` /
-  # `docker-missing` is implemented here; other phases are accepted by the
-  # schema but their forcing logic lands alongside the first consumer.
-  case "${EXPECTED_FAILURE_PHASE}:${expected_error_class}" in
-    preflight:docker-missing)
-      if [[ "${DRY_RUN}" -eq 1 ]]; then
-        printf 'Cannot connect to the Docker daemon during preflight\n' >"${negative_log}"
-      else
-        if DOCKER_HOST="unix:///tmp/nemoclaw-e2e-missing-docker.sock" \
-          e2e_onboard "${ONBOARDING_ID}" >"${negative_log}" 2>&1; then
-          echo "run-scenario: expected preflight failure, but onboarding succeeded" >&2
-          cat "${negative_log}" >&2
-          exit 4
-        fi
-      fi
-      ;;
-    *)
-      echo "run-scenario: expected_failure phase=${EXPECTED_FAILURE_PHASE} class=${expected_error_class} has no forcing implementation yet" >&2
-      exit 2
-      ;;
-  esac
-
-  # Compute the side-effect delta: only count effects that were absent in the
-  # baseline and present after the forced failure.
-  observed_side_effects=""
-  if [[ "${baseline_sandbox}" -eq 0 ]] && [[ -n "${sandbox_name}" ]] \
-    && openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    observed_side_effects="${observed_side_effects:+${observed_side_effects},}sandbox-created"
-  fi
-  if [[ "${baseline_gateway}" -eq 0 ]] && nemoclaw gateway status >/dev/null 2>&1; then
-    observed_side_effects="${observed_side_effects:+${observed_side_effects},}gateway-started"
-  fi
-  if [[ "${baseline_credentials}" -eq 0 ]] && [[ -s "${HOME}/.nemoclaw/credentials.json" ]]; then
-    observed_side_effects="${observed_side_effects:+${observed_side_effects},}credentials-written"
-  fi
-
-  # `--observed-error-class` is intentionally omitted: the runner does not yet
-  # derive a structured error class from the actual failure output, and
-  # reporting the planned class back to the matcher would make the check
-  # tautological. The matcher logs this as a skipped check.
-  match_args=(
-    match-failure "${SCENARIO_ID}"
-    --context-dir "${E2E_CONTEXT_DIR}"
-    --log "${negative_log}"
-    --observed-phase "${EXPECTED_FAILURE_PHASE}"
-  )
-  if [[ -n "${observed_side_effects}" ]]; then
-    match_args+=(--observed-side-effects "${observed_side_effects}")
-  fi
-  if ! run_resolver "${match_args[@]}"; then
-    echo "run-scenario: expected-failure match failed; see ${E2E_CONTEXT_DIR}/expected-vs-actual.json" >&2
-    exit 4
-  fi
-  echo "run-scenario: negative scenario passed (phase=${EXPECTED_FAILURE_PHASE} class=${expected_error_class})"
-  exit 0
-fi
-
-if [[ "${EXPECTED_STATE_ID}" == "preflight-failure-no-sandbox" ]]; then
-  negative_log="${E2E_CONTEXT_DIR}/negative-preflight.log"
-  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-  if [[ "${DRY_RUN}" -eq 1 ]]; then
-    printf 'Cannot connect to the Docker daemon during preflight\n' >"${negative_log}"
-  elif DOCKER_HOST="unix:///tmp/nemoclaw-e2e-missing-docker.sock" e2e_onboard "${ONBOARDING_ID}" >"${negative_log}" 2>&1; then
-    echo "run-scenario: expected preflight failure, but onboarding succeeded" >&2
-    exit 4
-  fi
-  if ! grep -Eiq "docker|container|daemon|socket|preflight" "${negative_log}"; then
-    echo "run-scenario: negative preflight failed without a clear Docker/preflight reason" >&2
-    cat "${negative_log}" >&2
-    exit 4
-  fi
-  if openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    echo "run-scenario: negative preflight left behind sandbox ${sandbox_name}" >&2
-    exit 4
-  fi
-  echo "run-scenario: negative preflight passed; Docker daemon unavailable and no sandbox was created"
-  exit 0
-fi
-
-if [[ "${FAILURE_STAGE}" == "onboarding" ]]; then
-  negative_log="${E2E_CONTEXT_DIR}/negative-onboarding.log"
-  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-  port_holder_started=0
-  onboard_env=(NEMOCLAW_SANDBOX_NAME="${sandbox_name}" NEMOCLAW_RECREATE_SANDBOX=1 NEMOCLAW_POLICY_MODE=skip)
-  case "${ONBOARDING_ID}" in
-    cloud-openclaw-invalid-nvidia-key)
-      onboard_env+=(NVIDIA_API_KEY=not-a-nvidia-key)
-      ;;
-    cloud-openclaw-gateway-port-conflict)
-      conflict_port="$(read_plan_string dimensions.onboarding.profile.gateway_port)"
-      : "${conflict_port:=18080}"
-      if e2e_port_holder_start "${conflict_port}"; then
-        port_holder_started=1
-      else
-        echo "run-scenario: could not start port holder on ${conflict_port}; continuing against any existing listener" >&2
-      fi
-      onboard_env+=(NEMOCLAW_GATEWAY_PORT="${conflict_port}")
-      ;;
-  esac
-  if [[ "${DRY_RUN}" -eq 1 ]]; then
-    printf '%s
-' "${FAILURE_MESSAGE_CONTAINS}" >"${negative_log}"
-    negative_status="${FAILURE_EXIT_CODE:-1}"
-  else
-    set +e
-    (
-      export "${onboard_env[@]}"
-      e2e_onboard "${ONBOARDING_ID}"
-    ) >"${negative_log}" 2>&1
-    negative_status=$?
-    set -e
-  fi
-  if [[ "${port_holder_started}" -eq 1 ]]; then
-    e2e_port_holder_stop
-  fi
-  if ! e2e_negative_assert_failure "${negative_log}" "${negative_status}" "${FAILURE_EXIT_CODE:-1}" "${FAILURE_MESSAGE_CONTAINS}" "$([[ "${FAILURE_NO_STACK_TRACE}" == "true" ]] && echo 1 || echo 0)"; then
-    exit 4
-  fi
-  if openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    echo "run-scenario: negative onboarding left behind sandbox ${sandbox_name}" >&2
-    exit 4
-  fi
-  echo "run-scenario: negative onboarding ${ONBOARDING_ID} passed"
-  exit 0
-fi
-
-DOCKER_OPTIONAL_UNAVAILABLE=0
-if [[ "${RUNTIME_CONTAINER_DAEMON}" == "optional" ]] && ! docker info >/dev/null 2>&1; then
-  DOCKER_OPTIONAL_UNAVAILABLE=1
-  echo "SKIP: scenario.${SCENARIO_ID}.docker-dependent-suites Docker unavailable for optional runtime ${RUNTIME_ID}; gateway/sandbox/inference coverage skipped"
-  echo "run-scenario: Docker unavailable for optional runtime ${RUNTIME_ID}; scaling back to platform-only suites"
-else
-  onboard_log="${E2E_CONTEXT_DIR}/onboard.log"
-  set +e
-  e2e_onboard "${ONBOARDING_ID}" >"${onboard_log}" 2>&1
-  onboard_status=$?
-  set -e
-  if [[ "${onboard_status}" -ne 0 ]]; then
-    cat "${onboard_log}" >&2
-    echo "run-scenario: onboarding ${ONBOARDING_ID} failed with status ${onboard_status}" >&2
-    exit "${onboard_status}"
-  fi
-  if [[ "${RUNTIME_ID}" == "gpu-docker-cdi" ]] && ! e2e_env_is_dry_run; then
-    echo "run-scenario: GPU Docker CDI uses host-network gateway; validating gateway from suites"
-  else
-    e2e_gateway_assert_healthy
-  fi
-  e2e_sandbox_assert_running
-fi
-
-# Expected state validation. The validator reads E2E_PROBE_OVERRIDE_* env
-# variables to simulate real probe outputs in dry-run/test contexts.
-# Live probe wiring lands scenario-by-scenario; by default, live runs move
-# straight from setup checks to suites so migrated suite assertions can be
-# debugged against the real environment.
-if [[ "${E2E_VALIDATE_EXPECTED_STATE:-0}" == "1" || "${DRY_RUN}" -eq 1 ]]; then
-  validate_args=("${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}")
-  if [[ "${DRY_RUN}" -eq 1 ]]; then
-    # CodeRabbit review item #9: explicitly opt in to seeding probes from
-    # the expected state in dry-run/test mode. Live runs go through real
-    # probes and must fail closed if any are missing.
-    validate_args+=(--probes-from-state)
-  fi
-  if ! run_resolver validate-state "${validate_args[@]}"; then
-    echo "run-scenario: expected-state validation failed; suites will NOT run" >&2
-    exit 3
-  fi
-fi
-
-if [[ "${DRY_RUN}" -eq 1 ]]; then
-  echo "run-scenario: dry-run complete; context.env emitted under ${E2E_CONTEXT_DIR}"
-  exit 0
-fi
-
-SUITE_IDS=()
-while IFS= read -r suite_id; do
-  SUITE_IDS+=("${suite_id}")
-done < <(node -e "
-  try {
-    const planPath = process.argv[1];
-    const p = JSON.parse(require('fs').readFileSync(planPath, 'utf8'));
-    if (!Array.isArray(p.suites)) {
-      throw new Error('missing or invalid suites array');
-    }
-    const filter = process.env.E2E_SUITE_FILTER || '';
-    const selected = filter ? filter.split(',').map((s) => s.trim()).filter(Boolean) : p.suites.map((s) => s.id);
-    for (const id of selected) console.log(id);
-  } catch (err) {
-    console.error('run-scenario: failed to parse plan.json ' + process.argv[1] + ': ' + err.message);
-    process.exit(1);
-  }
-" "${E2E_CONTEXT_DIR}/plan.json")
-
-if [[ "${#SUITE_IDS[@]}" -eq 0 ]]; then
-  echo "run-scenario: no suites selected for ${SCENARIO_ID}" >&2
-  exit 4
-fi
-
-if [[ "${DOCKER_OPTIONAL_UNAVAILABLE}" -eq 1 ]]; then
-  FILTERED_SUITE_IDS=()
-  for suite_id in "${SUITE_IDS[@]}"; do
-    case "${suite_id}" in
-      smoke | inference | credentials | hermes-specific | local-ollama-inference | ollama-proxy | gateway-health | sandbox-shell | cloud-inference | ollama-auth-proxy | security-credentials | messaging-telegram | messaging-discord | messaging-slack | security-shields | inference-routing | sandbox-lifecycle | sandbox-operations | snapshot | rebuild | upgrade | diagnostics | docs-validation | openai-compatible-inference | inference-switch | kimi-compatibility | messaging-token-rotation | security-policy | security-injection | model-router)
-        echo "SKIP: suite.${suite_id} skipped because optional Docker runtime ${RUNTIME_ID} is unavailable"
-        ;;
-      *)
-        FILTERED_SUITE_IDS+=("${suite_id}")
-        ;;
-    esac
-  done
-  SUITE_IDS=("${FILTERED_SUITE_IDS[@]}")
-fi
+cat >&2 <<'MSG'
+run-scenario.sh is deprecated. Use the TS runner instead:
 
-if [[ "${#SUITE_IDS[@]}" -eq 0 ]]; then
-  echo "run-scenario: all suites skipped for ${SCENARIO_ID}" >&2
-  exit 0
-fi
+  npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id[,id...]>
 
-bash "${SCRIPT_DIR}/run-suites.sh" "${SUITE_IDS[@]}"
+Other run.ts modes (read-only):
+  --list                List canonical scenario ids
+  --emit-matrix         Emit GitHub Actions matrix payload from the registry
+  --plan-only           Local debug: print the compiled plan, do not execute
+                        (must NOT appear in any CI workflow)
+MSG
+exit 2
diff --git a/test/e2e-scenario/runtime/run-suites.sh b/test/e2e-scenario/runtime/run-suites.sh
index e99c069408..dac69cd422 100755
--- a/test/e2e-scenario/runtime/run-suites.sh
+++ b/test/e2e-scenario/runtime/run-suites.sh
@@ -2,136 +2,20 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# Run one or more functional suites against a completed E2E environment.
-#
-# Usage:
-#   bash test/e2e-scenario/runtime/run-suites.sh <suite-id> [<suite-id> ...]
-#
-# Reads suite metadata from test/e2e-scenario/validation_suites/suites.yaml
-# (or $E2E_SUITES_FILE). Each suite script receives .e2e/context.env
-# via E2E_CONTEXT_DIR and is expected to source runtime/lib/context.sh if
-# it needs specific keys.
-#
-# Environment:
-#   E2E_CONTEXT_DIR   Directory containing context.env (default: <repo>/.e2e)
-#   E2E_SUITES_FILE   Override suites metadata file (for tests)
-#   E2E_SUITES_DIR    Override the directory that suite scripts are resolved
-#                     against (default: test/e2e-scenario/validation_suites/)
-#   E2E_DRY_RUN       When 1, suite scripts run in dry-run mode themselves.
-#
-# Exit code: 0 if all steps pass; non-zero at the first failing step.
+# DEPRECATED. Suite execution is now driven directly by the TS phase
+# orchestrator (RuntimeOrchestrator -> PhaseOrchestrator.runShellStep) which
+# spawns each migrated assertion step's implementation.ref shell script.
+# There is no longer a YAML-walking bash suite runner.
 
 set -euo pipefail
 
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-E2E_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
-VALIDATION_SUITES_DIR="${E2E_ROOT}/validation_suites"
-
-if (($# == 0)); then
-  echo "run-suites: at least one suite id required" >&2
-  echo "Usage: bash test/e2e-scenario/runtime/run-suites.sh <suite-id> [<suite-id> ...]" >&2
-  exit 2
-fi
-
-export E2E_CONTEXT_DIR="${E2E_CONTEXT_DIR:-${REPO_ROOT}/.e2e}"
-SUITES_FILE="${E2E_SUITES_FILE:-${VALIDATION_SUITES_DIR}/suites.yaml}"
-SUITES_DIR="${E2E_SUITES_DIR:-${VALIDATION_SUITES_DIR}}"
-
-CTX_FILE="${E2E_CONTEXT_DIR}/context.env"
-if [[ ! -f "${CTX_FILE}" ]]; then
-  echo "run-suites: missing ${CTX_FILE}; run-scenario.sh must emit context before running suites" >&2
-  exit 1
-fi
-
-# Sanity-check that the baseline scenario key is present.
-if ! grep -q '^E2E_SCENARIO=' "${CTX_FILE}"; then
-  echo "run-suites: ${CTX_FILE} is missing required key E2E_SCENARIO" >&2
-  exit 1
-fi
-
-# Resolve the suite step list by reading the YAML via node.
-resolve_suite() {
-  local suite_id="$1"
-  node -e "
-    const fs = require('fs');
-    const path = process.argv[1];
-    const wanted = process.argv[2];
-    const raw = fs.readFileSync(path, 'utf8');
-    // Minimal YAML reader: prefer js-yaml if available; else fall back.
-    let yaml;
-    try { yaml = require('js-yaml'); } catch (_) {
-      process.stderr.write('run-suites: js-yaml required to parse suite metadata\n');
-      process.exit(2);
-    }
-    const doc = yaml.load(raw);
-    if (!doc || !doc.suites || !doc.suites[wanted]) {
-      process.stderr.write('run-suites: unknown suite: ' + wanted + '\n');
-      process.exit(3);
-    }
-    const steps = doc.suites[wanted].steps || [];
-    for (const s of steps) {
-      if (!s || typeof s.id !== 'string' || typeof s.script !== 'string') {
-        process.stderr.write('run-suites: malformed step in ' + wanted + '\n');
-        process.exit(4);
-      }
-      process.stdout.write(s.id + '\t' + s.script + '\n');
-    }
-  " "${SUITES_FILE}" "${suite_id}"
-}
-
-declare -a FAILED_STEPS=()
-declare -a PASSED_STEPS=()
-OVERALL_STATUS=0
-
-run_one_suite() {
-  local suite_id="$1"
-  echo "== suite: ${suite_id} =="
-  local steps
-  if ! steps="$(resolve_suite "${suite_id}")"; then
-    OVERALL_STATUS=1
-    return 1
-  fi
-  if [[ -z "${steps}" ]]; then
-    echo "  (no steps)"
-    return 0
-  fi
-  while IFS=$'\t' read -r step_id script; do
-    [[ -z "${step_id}" ]] && continue
-    local full="${SUITES_DIR}/${script}"
-    echo "  -> step: ${step_id} (${script})"
-    if [[ ! -f "${full}" ]]; then
-      echo "    FAIL: script not found at ${full}" >&2
-      FAILED_STEPS+=("${suite_id}/${step_id}")
-      OVERALL_STATUS=1
-      return 1
-    fi
-    if ! bash "${full}"; then
-      echo "    FAIL: suite=${suite_id} step=${step_id}" >&2
-      FAILED_STEPS+=("${suite_id}/${step_id}")
-      OVERALL_STATUS=1
-      return 1
-    fi
-    echo "    PASS: ${step_id}"
-    PASSED_STEPS+=("${suite_id}/${step_id}")
-  done <<<"${steps}"
-}
-
-for suite_id in "$@"; do
-  if ! run_one_suite "${suite_id}"; then
-    break
-  fi
-done
+cat >&2 <<'MSG'
+run-suites.sh is deprecated. Suite assertions are now executed by
+test/e2e-scenario/scenarios/orchestrators/phase.ts via child_process.spawn,
+walking the typed assertionGroups defined in the scenario registry.
 
-echo
-echo "== suite summary =="
-# bash 3.2 (macOS) fails on "${arr[@]}" when the array is empty under `set -u`;
-# use the `${arr[@]+...}` guard to expand to nothing when empty.
-for p in ${PASSED_STEPS[@]+"${PASSED_STEPS[@]}"}; do
-  echo "  PASS ${p}"
-done
-for f in ${FAILED_STEPS[@]+"${FAILED_STEPS[@]}"}; do
-  echo "  FAIL ${f}"
-done
+Run scenarios via:
 
-exit "${OVERALL_STATUS}"
+  npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id[,id...]>
+MSG
+exit 2
diff --git a/test/e2e-scenario/scenarios/assertions/environment.ts b/test/e2e-scenario/scenarios/assertions/environment.ts
deleted file mode 100644
index be7a62e6fb..0000000000
--- a/test/e2e-scenario/scenarios/assertions/environment.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import type { AssertionGroup } from "../types.ts";
-
-export function environmentBaseline(): AssertionGroup {
-  return {
-    id: "environment.baseline",
-    phase: "environment",
-    description: "Skeleton environment baseline assertion group.",
-    migrationStatus: "complete",
-    steps: [
-      {
-        id: "environment.plan.skeleton",
-        phase: "environment",
-        description: "Placeholder step until live environment orchestration is migrated.",
-        implementation: { kind: "pending", ref: "phase-1-skeleton" },
-        evidencePath: ".e2e/environment.result.json",
-      },
-    ],
-  };
-}
diff --git a/test/e2e-scenario/scenarios/assertions/onboarding.ts b/test/e2e-scenario/scenarios/assertions/onboarding.ts
deleted file mode 100644
index 9886a701fb..0000000000
--- a/test/e2e-scenario/scenarios/assertions/onboarding.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import type { AssertionGroup } from "../types.ts";
-
-export function onboardingBaseline(): AssertionGroup {
-  return {
-    id: "onboarding.baseline",
-    phase: "onboarding",
-    description: "Skeleton onboarding assertion group.",
-    steps: [
-      {
-        id: "onboarding.plan.skeleton",
-        phase: "onboarding",
-        description: "Placeholder step until onboarding assertions are migrated.",
-        implementation: { kind: "pending", ref: "phase-1-skeleton" },
-        evidencePath: ".e2e/onboarding.result.json",
-      },
-    ],
-  };
-}
diff --git a/test/e2e-scenario/scenarios/assertions/registry.ts b/test/e2e-scenario/scenarios/assertions/registry.ts
index 5123c6c731..c4457cb9ed 100644
--- a/test/e2e-scenario/scenarios/assertions/registry.ts
+++ b/test/e2e-scenario/scenarios/assertions/registry.ts
@@ -3,7 +3,6 @@
 
 import fs from "node:fs";
 import path from "node:path";
-import { environmentBaseline } from "./environment.ts";
 import type { AssertionGroup, AssertionStep, PhaseName, ScenarioDefinition } from "../types.ts";
 
 type Reliability = AssertionStep["reliability"];
@@ -25,22 +24,42 @@ function shellStep(input: ShellStepInput): AssertionStep {
   };
 }
 
-function probeStep(id: string, phase: PhaseName, ref: string, reliability?: Reliability): AssertionStep {
+interface ProbeStepOptions {
+  reliability?: Reliability;
+  // When true, an unregistered probe fails the phase (and the run)
+  // instead of skipping. Use for security-sensitive probes the run
+  // is not safe without.
+  required?: boolean;
+}
+
+function probeStep(
+  id: string,
+  phase: PhaseName,
+  ref: string,
+  options: ProbeStepOptions = {},
+): AssertionStep {
   return {
     id,
     phase,
     implementation: { kind: "probe", ref },
     evidencePath: `.e2e/assertions/${id}.json`,
-    reliability,
+    reliability: options.reliability,
+    required: options.required,
   };
 }
 
-function pendingStep(id: string, phase: PhaseName, ref: string): AssertionStep {
+function pendingStep(
+  id: string,
+  phase: PhaseName,
+  ref: string,
+  options: { required?: boolean } = {},
+): AssertionStep {
   return {
     id,
     phase,
     implementation: { kind: "pending", ref },
     evidencePath: `.e2e/assertions/${id}.json`,
+    required: options.required,
   };
 }
 
@@ -186,7 +205,21 @@ export const runtimeControlGroups: AssertionGroup[] = [
     phase: "runtime",
     description: "Negative scenario runtime check ensuring forbidden side effects did not occur.",
     migrationStatus: "complete",
-    steps: [pendingStep("runtime.expected-failure.no-side-effects", "runtime", "expectedFailureNoSideEffectsProbe")],
+    steps: [
+      pendingStep(
+        "runtime.expected-failure.no-side-effects",
+        "runtime",
+        "expectedFailureNoSideEffectsProbe",
+        // Negative scenarios assert that a declared failure mode
+        // produced no forbidden side effects. Until the side-effect
+        // validator is implemented, this step must fail closed for
+        // any scenario that opts into runtimeControlGroups[0]
+        // (i.e. scenario.expectedFailure is set). Skipping it would
+        // let negative scenarios silently "pass" without verifying
+        // their core contract.
+        { required: true },
+      ),
+    ],
   },
 ];
 
@@ -219,9 +252,19 @@ export const validationSuiteGroups: AssertionGroup[] = [
   ]),
   suiteGroup("credentials", credentialsSteps),
   suiteGroup("security-credentials", credentialsSteps),
-  suiteGroup("security-shields", [probeStep("security.shields.config", "runtime", "shieldsConfigProbe")]),
-  suiteGroup("security-policy", [probeStep("security.policy.enforced", "runtime", "networkPolicyProbe")]),
-  suiteGroup("security-injection", [probeStep("security.injection.blocked", "runtime", "injectionBlockedProbe")]),
+  // Security-sensitive probes MUST fail closed until the probe
+  // registry lands. A skipped shields/policy/injection check would
+  // produce fake-green for the exact suites these scenarios exist to
+  // protect.
+  suiteGroup("security-shields", [
+    probeStep("security.shields.config", "runtime", "shieldsConfigProbe", { required: true }),
+  ]),
+  suiteGroup("security-policy", [
+    probeStep("security.policy.enforced", "runtime", "networkPolicyProbe", { required: true }),
+  ]),
+  suiteGroup("security-injection", [
+    probeStep("security.injection.blocked", "runtime", "injectionBlockedProbe", { required: true }),
+  ]),
   suiteGroup("messaging-telegram", [
     shellStep({ id: "messaging.telegram.injection-safety", phase: "runtime", ref: "test/e2e-scenario/validation_suites/messaging/telegram/00-telegram-injection-safety.sh", reliability: { timeoutSeconds: 30, retry: { attempts: 2, on: ["external-tunnel"] } } }),
     shellStep({ id: "messaging.telegram.injection-payload-classes", phase: "runtime", ref: "test/e2e-scenario/validation_suites/messaging/telegram/01-telegram-injection-payload-classes.sh", reliability: { timeoutSeconds: 30, retry: { attempts: 2, on: ["external-tunnel"] } } }),
@@ -257,7 +300,7 @@ export const validationSuiteGroups: AssertionGroup[] = [
 ];
 
 export const assertionRegistry = {
-  groups: [environmentBaseline(), ...onboardingAssertionGroups, ...runtimeControlGroups, ...validationSuiteGroups],
+  groups: [...onboardingAssertionGroups, ...runtimeControlGroups, ...validationSuiteGroups],
 };
 
 export function assertionGroupForSuite(suiteId: string): AssertionGroup | undefined {
@@ -285,8 +328,15 @@ function supplementalSuiteIdsForScenario(scenario: ScenarioDefinition): string[]
       "sandbox-lifecycle",
       "sandbox-operations",
       "snapshot",
-      "rebuild",
-      "upgrade",
+      // 'rebuild' and 'upgrade' are intentionally NOT supplemental
+      // here. Those suites assert post-rebuild state (marker survival,
+      // version upgrade, post-rebuild inference) and require a real
+      // `nemoclaw rebuild` to have run first. The dedicated
+      // `ubuntu-rebuild-openclaw` scenario opts into them by declaring
+      // a `rebuild-current-version` lifecycle profile that performs
+      // the rebuild before the runtime phase. Including them on this
+      // scenario produced fake-failures (no rebuild ran, so nothing
+      // could be preserved) and obscured the real coverage gap.
       "diagnostics",
       "docs-validation",
     );
@@ -352,8 +402,11 @@ export function assertionGroupsForScenario(scenario: ScenarioDefinition): Assert
     return group;
   });
 
+  // Environment phase work is performed by typed PhaseAction entries
+  // (context.emit + install.<id>) emitted from compiler.phaseActions(),
+  // not by assertion groups. No environment-phase assertion group is
+  // included in scenario plans.
   const groups: (AssertionGroup | undefined)[] = [
-    environmentBaseline(),
     ...onboardingGroups,
     ...suiteGroups,
     ...supplementalGroups,
diff --git a/test/e2e-scenario/scenarios/assertions/runtime.ts b/test/e2e-scenario/scenarios/assertions/runtime.ts
deleted file mode 100644
index 5ed7031279..0000000000
--- a/test/e2e-scenario/scenarios/assertions/runtime.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import type { AssertionGroup } from "../types.ts";
-
-export function runtimeSmokeSkeleton(): AssertionGroup {
-  return {
-    id: "runtime.smoke.skeleton",
-    phase: "runtime",
-    description: "Skeleton runtime smoke assertion group.",
-    steps: [
-      {
-        id: "runtime.plan.skeleton",
-        phase: "runtime",
-        description: "Placeholder step until validation suites are migrated.",
-        implementation: { kind: "pending", ref: "phase-1-skeleton" },
-        evidencePath: ".e2e/runtime.result.json",
-      },
-    ],
-  };
-}
diff --git a/test/e2e-scenario/scenarios/builder.ts b/test/e2e-scenario/scenarios/builder.ts
index b2b9243a51..d4c2327e84 100644
--- a/test/e2e-scenario/scenarios/builder.ts
+++ b/test/e2e-scenario/scenarios/builder.ts
@@ -60,7 +60,7 @@ export class ScenarioBuilder {
     return this;
   }
 
-  expectedFailure(expectedFailure: Record<string, unknown>): ScenarioBuilder {
+  expectedFailure(expectedFailure: import("./types.ts").ExpectedFailureContract): ScenarioBuilder {
     this.definition.expectedFailure = expectedFailure;
     return this;
   }
diff --git a/test/e2e-scenario/scenarios/compiler.ts b/test/e2e-scenario/scenarios/compiler.ts
index 5046c77dd2..8d46d419d1 100644
--- a/test/e2e-scenario/scenarios/compiler.ts
+++ b/test/e2e-scenario/scenarios/compiler.ts
@@ -4,11 +4,33 @@
 import fs from "node:fs";
 import path from "node:path";
 import { fileURLToPath } from "node:url";
+import { getExpectedState, probesForState } from "./expected-states.ts";
 import { loadManifest } from "./manifests.ts";
 import { requireScenarios } from "./registry.ts";
-import type { AssertionGroup, NemoClawInstanceManifest, PhaseName, RunPlan, ScenarioDefinition, SutBoundary } from "./types.ts";
+import type {
+  AssertionGroup,
+  ExpectedFailureContract,
+  ExpectedFailurePhase,
+  NemoClawInstanceManifest,
+  PhaseAction,
+  PhaseName,
+  RunPlan,
+  ScenarioDefinition,
+  SutBoundary,
+} from "./types.ts";
 
-const PHASES: PhaseName[] = ["environment", "onboarding", "runtime"];
+// Phase order. state-validation runs after onboarding and before
+// runtime so gateway/sandbox/cli probes gate suite execution: a
+// failed probe is a failed phase action, and the existing runner
+// short-circuit reports runtime as skipped without re-running
+// suite assertions against a missing/wedged environment.
+const PHASES: PhaseName[] = [
+  "environment",
+  "onboarding",
+  "state-validation",
+  "lifecycle",
+  "runtime",
+];
 const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../..");
 
 function groupsForPhase(scenario: ScenarioDefinition, phase: PhaseName): AssertionGroup[] {
@@ -67,19 +89,198 @@ function validateManifestCompatibility(scenario: ScenarioDefinition, manifest?:
   }
 }
 
-function phaseActions(phase: PhaseName, scenario: ScenarioDefinition): string[] {
+// Centralized paths to the existing shell helpers. Spec rule: shell
+// scripts can remain as implementations, but invocation goes through
+// typed assertion/action definitions, not bare workflow YAML or a
+// resurrected bash runner.
+const INSTALL_DISPATCH = "test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh";
+const ONBOARD_DISPATCH = "test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh";
+const PROBES_DISPATCH = "test/e2e-scenario/nemoclaw_scenarios/probes/dispatch.sh";
+const LIFECYCLE_DISPATCH = "test/e2e-scenario/nemoclaw_scenarios/lifecycle/dispatch.sh";
+
+// Default action timeouts. Install and onboarding can take a while on
+// cold runners (Docker pulls, image builds, sandbox bootstrap).
+const INSTALL_TIMEOUT_SECONDS = 900;
+const ONBOARD_TIMEOUT_SECONDS = 900;
+// Lifecycle actions wrap state-mutation flows like `nemoclaw rebuild`,
+// which can take longer than onboarding when an image rebuild is
+// involved (workspace snapshot + recreate + verify).
+const LIFECYCLE_TIMEOUT_SECONDS = 900;
+// State-validation probes are cheap (`command -v`, single curl,
+// `nemoclaw list`); a tight timeout keeps a wedged probe from
+// consuming runner budget.
+const PROBE_TIMEOUT_SECONDS = 30;
+
+// Declared parent-env secrets each onboarding profile actually needs.
+// Anything not listed here (and not in the framework allowlist) is
+// dropped before spawn by buildChildEnv. Keep this list minimal —
+// every entry widens the secret blast radius if the child or one of
+// its descendants logs unredacted output.
+const ONBOARD_PROFILE_SECRET_ENV: Readonly<Record<string, readonly string[]>> = {
+  // Cloud profiles invoke `nemoclaw onboard` which authenticates to the
+  // NVIDIA cloud provider via NVIDIA_API_KEY.
+  "cloud-openclaw": ["NVIDIA_API_KEY"],
+  "cloud-openclaw-custom-policies": ["NVIDIA_API_KEY"],
+  "cloud-openclaw-invalid-nvidia-key": ["NVIDIA_API_KEY"],
+  "cloud-openclaw-gateway-port-conflict": ["NVIDIA_API_KEY"],
+  // Negative scenario: nemoclaw onboard runs against a docker shim that
+  // exits non-zero. Onboard never reaches the cloud auth step, but the
+  // CLI still loads NVIDIA_API_KEY when present — keep it in the secret
+  // env so behavior matches a real user invocation.
+  "cloud-openclaw-no-docker": ["NVIDIA_API_KEY"],
+  "cloud-hermes": ["NVIDIA_API_KEY"],
+  "cloud-hermes-discord": ["NVIDIA_API_KEY"],
+  "cloud-hermes-slack": ["NVIDIA_API_KEY"],
+  // Local profiles do not need any cloud secret.
+  "local-ollama-openclaw": [],
+};
+
+function phaseActions(phase: PhaseName, scenario: ScenarioDefinition): PhaseAction[] {
   if (phase === "environment") {
+    if (!scenario.environment) {
+      // Scenarios without any environment dimension (skeleton scenarios)
+      // legitimately have no actions yet. Don't fail-fast here.
+      return [];
+    }
+    const installId = scenario.environment.install;
+    if (!installId) {
+      // Environment is declared but install is missing - that IS a
+      // malformed scenario; fail fast so the caller sees a clear error
+      // rather than a phase that silently no-ops setup work.
+      throw new Error(`Scenario ${scenario.id} is missing environment.install`);
+    }
     return [
-      `install:${scenario.environment?.install ?? "unknown"}`,
-      `runtime:${scenario.environment?.runtime ?? "unknown"}`,
+      {
+        id: `environment.install.${installId}`,
+        phase: "environment",
+        description: `Run e2e_install ${installId} to set up the host control plane.`,
+        kind: "shell-fn",
+        scriptRef: INSTALL_DISPATCH,
+        fn: "e2e_install",
+        arg: installId,
+        timeoutSeconds: INSTALL_TIMEOUT_SECONDS,
+        evidencePath: `.e2e/actions/environment.install.${installId}.log`,
+      },
     ];
   }
   if (phase === "onboarding") {
-    return [`onboard:${scenario.environment?.onboarding ?? "unknown"}`];
+    if (!scenario.environment) {
+      return [];
+    }
+    const baseOnboardingId = scenario.environment.onboarding;
+    if (!baseOnboardingId) {
+      throw new Error(`Scenario ${scenario.id} is missing environment.onboarding`);
+    }
+    // Negative-runtime scenarios route to a dedicated onboarding profile
+    // that sets up the failure condition (e.g. docker-missing) BEFORE
+    // invoking `nemoclaw onboard` and captures the resulting output to
+    // the log file the assertion phase reads. The profile id convention
+    // is `<base>-no-docker`. New negative profiles register a worker in
+    // nemoclaw_scenarios/onboard/dispatch.sh and a secret-env mapping
+    // above.
+    const onboardingId =
+      scenario.environment.runtime === "docker-missing"
+        ? `${baseOnboardingId}-no-docker`
+        : baseOnboardingId;
+    // secretEnv defaults to [] (no parent-env secrets pass through)
+    // unless the profile is explicitly listed above. Unknown profiles
+    // get the safest setting and surface the gap loudly the first
+    // time they actually need a secret to authenticate.
+    const secretEnv = ONBOARD_PROFILE_SECRET_ENV[onboardingId] ?? [];
+    return [
+      {
+        id: `onboarding.profile.${onboardingId}`,
+        phase: "onboarding",
+        description: `Run e2e_onboard ${onboardingId} to bring the gateway and sandbox online.`,
+        kind: "shell-fn",
+        scriptRef: ONBOARD_DISPATCH,
+        fn: "e2e_onboard",
+        arg: onboardingId,
+        timeoutSeconds: ONBOARD_TIMEOUT_SECONDS,
+        evidencePath: `.e2e/actions/onboarding.profile.${onboardingId}.log`,
+        // Legacy preflight assertions look for ${E2E_CONTEXT_DIR}/onboard.log;
+        // publish a stable alias so they keep working without rewiring.
+        aliasPath: "onboard.log",
+        secretEnv,
+      },
+    ];
+  }
+  if (phase === "state-validation") {
+    // State-validation actions are emitted from the typed expected-state
+    // registry, NOT from the legacy expected-states.yaml. The compiler
+    // stays a pure function over typed inputs; YAML-vs-typed parity is
+    // enforced by a framework test, not by re-reading the YAML at
+    // compile time.
+    if (!scenario.expectedStateId) {
+      // Scenarios without an expected state (older skeleton scenarios)
+      // legitimately have no probes; do not fail-fast.
+      return [];
+    }
+    const state = getExpectedState(scenario.expectedStateId);
+    if (!state) {
+      // The compiler treats an unknown expected_state id as a hard
+      // error: typed scenarios must reference a typed state. The
+      // legacy YAML resolver has its own validation path; this is a
+      // separate (and stricter) contract for the typed runner.
+      throw new Error(
+        `Scenario ${scenario.id} references unknown expected_state '${scenario.expectedStateId}'`,
+      );
+    }
+    return probesForState(state).map((probeId) => ({
+      id: `state-validation.${probeId}`,
+      phase: "state-validation",
+      description: `Probe ${probeId} from expected_state '${state.id}'.`,
+      kind: "shell-fn",
+      scriptRef: PROBES_DISPATCH,
+      fn: "e2e_state_probe",
+      arg: probeId,
+      timeoutSeconds: PROBE_TIMEOUT_SECONDS,
+      evidencePath: `.e2e/actions/state-validation.${probeId}.log`,
+    }));
   }
-  return (scenario.suiteIds ?? []).map((suiteId) => `suite:${suiteId}`);
+  if (phase === "lifecycle") {
+    // Lifecycle is the post-onboarding state-mutation phase: rebuild,
+    // upgrade, snapshot+restore, etc. Scenarios opt in by declaring
+    // `environment.lifecycle = <profile-id>`; everything else gets
+    // an empty action list and runs no lifecycle assertions. The
+    // profile id routes through nemoclaw_scenarios/lifecycle/dispatch.sh
+    // to a worker that mutates state and seeds context.env keys
+    // (E2E_REBUILD_MARKER_PATH, E2E_REBUILD_MARKER_EXPECTED, ...) the
+    // runtime-phase assertions in rebuild_upgrade.sh consume.
+    if (!scenario.environment?.lifecycle) {
+      return [];
+    }
+    const lifecycleId = scenario.environment.lifecycle;
+    const secretEnv = LIFECYCLE_PROFILE_SECRET_ENV[lifecycleId] ?? [];
+    return [
+      {
+        id: `lifecycle.profile.${lifecycleId}`,
+        phase: "lifecycle",
+        description: `Run e2e_lifecycle ${lifecycleId} to drive the post-onboard state mutation.`,
+        kind: "shell-fn",
+        scriptRef: LIFECYCLE_DISPATCH,
+        fn: "e2e_lifecycle",
+        arg: lifecycleId,
+        timeoutSeconds: LIFECYCLE_TIMEOUT_SECONDS,
+        evidencePath: `.e2e/actions/lifecycle.profile.${lifecycleId}.log`,
+        secretEnv,
+      },
+    ];
+  }
+  // Runtime phase has no actions; suites are assertion groups.
+  return [];
 }
 
+// Declared parent-env secrets each lifecycle profile needs. Mirrors
+// ONBOARD_PROFILE_SECRET_ENV: minimal allowlist; widen only when a
+// profile actually invokes a CLI that authenticates upstream.
+const LIFECYCLE_PROFILE_SECRET_ENV: Readonly<Record<string, readonly string[]>> = {
+  // `nemoclaw rebuild` re-reads NVIDIA_API_KEY when the post-rebuild
+  // sandbox is brought back up; keep it in the secret env so behavior
+  // matches a real user invocation.
+  "rebuild-current-version": ["NVIDIA_API_KEY"],
+};
+
 const SUT_BOUNDARIES: SutBoundary[] = [
   { id: "host-cli", client: "HostCliClient" },
   { id: "gateway", client: "GatewayClient" },
@@ -89,6 +290,41 @@ const SUT_BOUNDARIES: SutBoundary[] = [
   { id: "state", client: "StateClient" },
 ];
 
+// Negative scenarios advertise their failure mode against one of these
+// user-facing phases. "preflight" is intentionally distinct from the
+// internal PhaseName union: scenario manifests speak the user's vocab
+// ("preflight failed") and the matcher resolves preflight to the
+// onboarding phase orchestrator. See orchestrators/negative-matcher.ts.
+const EXPECTED_FAILURE_PHASES: readonly ExpectedFailurePhase[] = [
+  "environment",
+  "onboarding",
+  "runtime",
+  "preflight",
+];
+
+function validateExpectedFailure(scenarioId: string, contract: ExpectedFailureContract): void {
+  if (!EXPECTED_FAILURE_PHASES.includes(contract.phase)) {
+    throw new Error(
+      `Scenario ${scenarioId} expectedFailure.phase invalid: ${String(contract.phase)} (allowed: ${EXPECTED_FAILURE_PHASES.join(", ")})`,
+    );
+  }
+  if (typeof contract.errorClass !== "string" || contract.errorClass.trim().length === 0) {
+    throw new Error(`Scenario ${scenarioId} expectedFailure.errorClass must be a non-empty string`);
+  }
+  if (contract.forbiddenSideEffects !== undefined) {
+    if (!Array.isArray(contract.forbiddenSideEffects)) {
+      throw new Error(`Scenario ${scenarioId} expectedFailure.forbiddenSideEffects must be an array`);
+    }
+    for (const entry of contract.forbiddenSideEffects) {
+      if (typeof entry !== "string" || entry.trim().length === 0) {
+        throw new Error(
+          `Scenario ${scenarioId} expectedFailure.forbiddenSideEffects entries must be non-empty strings`,
+        );
+      }
+    }
+  }
+}
+
 export function validateRunPlan(plan: RunPlan): void {
   if (!plan.scenarioId) {
     throw new Error("RunPlan missing scenarioId");
@@ -101,6 +337,9 @@ export function validateRunPlan(plan: RunPlan): void {
   if (plan.sutBoundaries.length === 0) {
     throw new Error(`RunPlan ${plan.scenarioId} missing SUT boundaries`);
   }
+  if (plan.expectedFailure) {
+    validateExpectedFailure(plan.scenarioId, plan.expectedFailure);
+  }
 }
 
 export function compileRunPlans(inputs: Array<string | ScenarioDefinition>): RunPlan[] {
@@ -112,7 +351,7 @@ export function compileRunPlans(inputs: Array<string | ScenarioDefinition>): Run
     const plan: RunPlan = {
       scenarioId: scenario.id,
       status: "compiled",
-      note: "compiled plan-only preview; live execution lands in later phases",
+      note: "compiled plan; phase orchestrators execute actions then assertions",
       manifestPath: scenario.manifestPath,
       manifest,
       environment: scenario.environment,
@@ -182,6 +421,18 @@ export function renderPlanText(plans: RunPlan[]): string {
     }
     for (const phase of plan.phases) {
       lines.push(`Phase: ${phase.name}`);
+      for (const action of phase.actions) {
+        const policy: string[] = [];
+        if (action.timeoutSeconds) {
+          policy.push(`timeout=${action.timeoutSeconds}s`);
+        }
+        const target = action.kind === "shell-fn"
+          ? `${action.fn ?? ""}${action.arg ? ` ${action.arg}` : ""}`.trim()
+          : action.scriptRef;
+        const policySuffix = policy.length > 0 ? ` (${policy.join(", ")})` : "";
+        const targetSuffix = target ? ` -> ${target}` : "";
+        lines.push(`  Action: ${action.id}${policySuffix}${targetSuffix}`);
+      }
       for (const group of phase.assertionGroups) {
         lines.push(`  Group: ${group.id}`);
         for (const step of group.steps) {
diff --git a/test/e2e-scenario/scenarios/expected-states.ts b/test/e2e-scenario/scenarios/expected-states.ts
new file mode 100644
index 0000000000..539c520f22
--- /dev/null
+++ b/test/e2e-scenario/scenarios/expected-states.ts
@@ -0,0 +1,133 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type { ExpectedState, StateProbeId } from "./types.ts";
+
+// Typed mirror of nemoclaw_scenarios/expected-states.yaml.
+//
+// During the transition this registry is the source of truth for the
+// TS runner. expected-states.yaml stays in place for the legacy bash
+// resolver; a framework test verifies the typed registry covers the
+// YAML's expected-state ids and matches their structural shape on the
+// dimensions the typed runner probes today (cli, gateway, sandbox).
+// Inference and credentials remain declared in YAML and in this typed
+// registry, but the compiler skips emitting probe actions for them
+// until the corresponding probe scripts land — see
+// nemoclaw_scenarios/probes/.
+
+const cloudOpenclawReady: ExpectedState = {
+  id: "cloud-openclaw-ready",
+  cli: { installed: true },
+  gateway: { expected: "present", health: "healthy" },
+  sandbox: { expected: "present", status: "running", agent: "openclaw" },
+  inference: { expected: "available", provider: "nvidia" },
+  credentials: { expected: "present" },
+};
+
+const cloudOpenclawCustomPoliciesReady: ExpectedState = {
+  ...cloudOpenclawReady,
+  id: "cloud-openclaw-custom-policies-ready",
+};
+
+const cloudHermesReady: ExpectedState = {
+  id: "cloud-hermes-ready",
+  cli: { installed: true },
+  gateway: { expected: "present", health: "healthy" },
+  sandbox: { expected: "present", status: "running", agent: "hermes" },
+  inference: { expected: "available", provider: "nvidia" },
+  credentials: { expected: "present" },
+};
+
+const localOllamaOpenclawReady: ExpectedState = {
+  id: "local-ollama-openclaw-ready",
+  cli: { installed: true },
+  gateway: { expected: "present", health: "healthy" },
+  sandbox: { expected: "present", status: "running", agent: "openclaw" },
+  inference: { expected: "available", provider: "ollama" },
+  credentials: { expected: "present" },
+};
+
+const macosCliReadyDockerOptional: ExpectedState = {
+  id: "macos-cli-ready-docker-optional",
+  cli: { installed: true },
+  gateway: { expected: "optional", health: "optional" },
+  sandbox: { expected: "optional", status: "optional", agent: "openclaw" },
+  inference: { expected: "optional", provider: "nvidia" },
+  credentials: { expected: "optional" },
+};
+
+const preflightFailureNoSandbox: ExpectedState = {
+  id: "preflight-failure-no-sandbox",
+  cli: { installed: true },
+  gateway: { expected: "absent" },
+  sandbox: { expected: "absent" },
+};
+
+const onboardingFailureInvalidNvidiaKey: ExpectedState = {
+  id: "onboarding-failure-invalid-nvidia-key",
+  cli: { installed: true },
+  gateway: { expected: "absent" },
+  sandbox: { expected: "absent" },
+};
+
+const onboardingFailureGatewayPortConflict: ExpectedState = {
+  id: "onboarding-failure-gateway-port-conflict",
+  cli: { installed: true },
+  gateway: { expected: "absent" },
+  sandbox: { expected: "absent" },
+};
+
+const REGISTRY: readonly ExpectedState[] = [
+  cloudOpenclawReady,
+  cloudOpenclawCustomPoliciesReady,
+  cloudHermesReady,
+  localOllamaOpenclawReady,
+  macosCliReadyDockerOptional,
+  preflightFailureNoSandbox,
+  onboardingFailureInvalidNvidiaKey,
+  onboardingFailureGatewayPortConflict,
+];
+
+const BY_ID: ReadonlyMap<string, ExpectedState> = new Map(REGISTRY.map((state) => [state.id, state]));
+
+export function listExpectedStates(): readonly ExpectedState[] {
+  return REGISTRY;
+}
+
+export function getExpectedState(id: string): ExpectedState | undefined {
+  return BY_ID.get(id);
+}
+
+export function requireExpectedState(id: string): ExpectedState {
+  const state = BY_ID.get(id);
+  if (!state) {
+    const available = Array.from(BY_ID.keys()).join(", ");
+    throw new Error(`Unknown expected_state id '${id}' (available: ${available})`);
+  }
+  return state;
+}
+
+// Translate the typed expected-state contract into the concrete probe
+// ids the state-validation orchestrator emits. Inference and
+// credentials probes are intentionally omitted today (probe scripts
+// not yet implemented); their declarations remain in ExpectedState so
+// the contract is visible in plan output and a future change can
+// switch on emission without touching scenario data. "optional"
+// dimensions emit no probe actions.
+export function probesForState(state: ExpectedState): readonly StateProbeId[] {
+  const probes: StateProbeId[] = [];
+  if (state.cli?.installed === true) {
+    probes.push("cli-installed");
+  }
+  if (state.gateway?.expected === "present" && state.gateway.health === "healthy") {
+    probes.push("gateway-healthy");
+  } else if (state.gateway?.expected === "absent") {
+    probes.push("gateway-absent");
+  }
+  if (state.sandbox?.expected === "present" && state.sandbox.status === "running") {
+    probes.push("sandbox-running");
+  } else if (state.sandbox?.expected === "absent") {
+    probes.push("sandbox-absent");
+  }
+  return probes;
+}
diff --git a/test/e2e-scenario/scenarios/matrix.ts b/test/e2e-scenario/scenarios/matrix.ts
index dc869941c9..daea207dd4 100644
--- a/test/e2e-scenario/scenarios/matrix.ts
+++ b/test/e2e-scenario/scenarios/matrix.ts
@@ -26,3 +26,23 @@ export function brevLaunchableRemote(onboarding: string): ScenarioEnvironment {
 export function ubuntuRepoNoDocker(onboarding: string): ScenarioEnvironment {
   return { platform: "ubuntu-local", install: "repo-current", runtime: "docker-missing", onboarding };
 }
+
+/**
+ * ubuntu-local + repo-current + docker-running + a lifecycle profile.
+ * Use for scenarios whose runtime assertions depend on a post-onboard
+ * state mutation (rebuild, upgrade, snapshot+restore). The lifecycle
+ * profile id maps to a worker under nemoclaw_scenarios/lifecycle/ via
+ * its dispatcher.
+ */
+export function ubuntuRepoDockerLifecycle(
+  onboarding: string,
+  lifecycle: string,
+): ScenarioEnvironment {
+  return {
+    platform: "ubuntu-local",
+    install: "repo-current",
+    runtime: "docker-running",
+    onboarding,
+    lifecycle,
+  };
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/context.ts b/test/e2e-scenario/scenarios/orchestrators/context.ts
new file mode 100644
index 0000000000..35394121fc
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/context.ts
@@ -0,0 +1,108 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import fs from "node:fs";
+import path from "node:path";
+import type { RunContext, RunPlan } from "../types.ts";
+
+// Spec ownership: emitting the normalized context.env that downstream
+// shell helpers consume is FRAMEWORK INFRASTRUCTURE, not a phase action.
+// Doing it as a shell action coupled the typed runner back to the old
+// resolver's plan.json shape; doing it here keeps the typed RunPlan as
+// the single source of truth.
+//
+// We seed context.env with values derivable from the typed RunPlan
+// (scenario id, install method, agent/provider/route, default sandbox
+// name and gateway URL). Onboarding helpers may overwrite these via
+// e2e_context_set (e.g. assigning a real sandbox name, real gateway
+// URL after the gateway boots).
+
+function platformOsFromManifest(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.platform.os;
+  if (typeof explicit === "string" && explicit.length > 0) {
+    return explicit;
+  }
+  // Fall back to the scenario environment platform id ("ubuntu-local",
+  // "macos-local", "wsl-local", "gpu-runner", "brev-launchable").
+  const platform = plan.environment?.platform ?? "";
+  if (platform.startsWith("macos")) return "macos";
+  if (platform.startsWith("wsl")) return "wsl";
+  if (platform.startsWith("brev")) return "ubuntu";
+  if (platform.startsWith("gpu")) return "ubuntu";
+  return "ubuntu";
+}
+
+function executionTargetFromManifest(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.platform.executionTarget;
+  if (typeof explicit === "string" && explicit.length > 0) {
+    return explicit;
+  }
+  return plan.environment?.platform === "brev-launchable" ? "remote" : "local";
+}
+
+function containerEngine(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.runtime.containerEngine;
+  return typeof explicit === "string" && explicit.length > 0 ? explicit : "docker";
+}
+
+function containerDaemon(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.runtime.containerDaemon;
+  if (typeof explicit === "string" && explicit.length > 0) {
+    return explicit;
+  }
+  return plan.environment?.runtime === "docker-missing" ? "missing" : "running";
+}
+
+function defaultGatewayUrl(agent: string): string {
+  // Mirrors the historical defaults from emit-context-from-plan.sh so
+  // existing shell helpers see the same seed values they used to.
+  return agent === "hermes" ? "http://127.0.0.1:8642" : "http://127.0.0.1:18789";
+}
+
+function escapeContextValue(value: string): string {
+  // The context library accepts plain `KEY=value` lines without quoting.
+  // Reject newlines (would corrupt the file) and otherwise pass through.
+  if (value.includes("\n")) {
+    throw new Error(`context.env value for must not contain newline: ${JSON.stringify(value)}`);
+  }
+  return value;
+}
+
+export interface ContextSeedResult {
+  path: string;
+  keys: string[];
+}
+
+export function seedContextEnv(ctx: RunContext, plan: RunPlan): ContextSeedResult {
+  const onboarding = plan.manifest?.spec.onboarding;
+  const agent = onboarding?.agent ?? "openclaw";
+  const provider = onboarding?.provider ?? "nvidia";
+  const inferenceRoute = onboarding?.modelRoute ?? "inference-local";
+  const onboardingPath = plan.environment?.onboarding ?? "unknown";
+  const installMethod = plan.environment?.install ?? "unknown";
+
+  const entries: Record<string, string> = {
+    E2E_SCENARIO: plan.scenarioId,
+    E2E_PLATFORM_OS: platformOsFromManifest(plan),
+    E2E_EXECUTION_TARGET: executionTargetFromManifest(plan),
+    E2E_INSTALL_METHOD: installMethod,
+    E2E_CONTAINER_ENGINE: containerEngine(plan),
+    E2E_CONTAINER_DAEMON: containerDaemon(plan),
+    E2E_ONBOARDING_PATH: onboardingPath,
+    E2E_AGENT: agent,
+    E2E_PROVIDER: provider,
+    E2E_INFERENCE_ROUTE: inferenceRoute,
+    E2E_SANDBOX_NAME: `e2e-${plan.scenarioId}`,
+    E2E_GATEWAY_URL: defaultGatewayUrl(agent),
+  };
+
+  // Path matches the shell helper's e2e_context_init: ${E2E_CONTEXT_DIR}/context.env
+  const contextPath = path.join(ctx.contextDir, "context.env");
+  fs.mkdirSync(ctx.contextDir, { recursive: true });
+  const lines = Object.entries(entries)
+    .map(([key, value]) => `${key}=${escapeContextValue(value)}`)
+    .join("\n");
+  fs.writeFileSync(contextPath, `${lines}\n`);
+
+  return { path: contextPath, keys: Object.keys(entries) };
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/lifecycle.ts b/test/e2e-scenario/scenarios/orchestrators/lifecycle.ts
new file mode 100644
index 0000000000..509112f171
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/lifecycle.ts
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { PhaseOrchestrator } from "./phase.ts";
+
+/**
+ * Lifecycle phase orchestrator.
+ *
+ * Sits between state-validation and runtime. Drives post-onboard
+ * state mutations (rebuild, upgrade, snapshot+restore, ...) by
+ * executing the action(s) the compiler emits when a scenario declares
+ * `environment.lifecycle = <profile-id>`. The action's worker (under
+ * test/e2e-scenario/nemoclaw_scenarios/lifecycle/) seeds context.env
+ * keys (E2E_REBUILD_MARKER_PATH, E2E_REBUILD_MARKER_EXPECTED, ...)
+ * which the runtime-phase rebuild_upgrade.sh assertions consume.
+ *
+ * Scenarios without a lifecycle profile see this phase as a no-op:
+ * the compiler emits an empty action list, the orchestrator runs no
+ * assertions, and the runtime phase proceeds as before.
+ */
+export class LifecycleOrchestrator extends PhaseOrchestrator {
+  constructor() {
+    super("lifecycle");
+  }
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/negative-matcher.ts b/test/e2e-scenario/scenarios/orchestrators/negative-matcher.ts
new file mode 100644
index 0000000000..dbbe2b0956
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/negative-matcher.ts
@@ -0,0 +1,236 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type {
+  ExpectedFailureContract,
+  ExpectedFailurePhase,
+  PhaseName,
+  PhaseResult,
+  RunPlan,
+} from "../types.ts";
+
+// Pure framework infrastructure: given a compiled RunPlan and the
+// observed phase results, decide whether a negative scenario's
+// declared failure contract was honored. Does not mutate inputs and
+// does not perform I/O.
+//
+// Spec ownership boundaries:
+// - Failure injection (uninstalling docker, planting a bad key,
+//   occupying a gateway port) is runner-environment prep, NOT this
+//   matcher's job. The matcher only inspects what actually happened.
+// - Forbidden-side-effect verification (did a sandbox actually get
+//   created when the scenario forbids it?) belongs to the
+//   `expectedFailureNoSideEffectsProbe` implementation registered as
+//   a probe step. Until that probe lands, the runtime control group
+//   keeps the negative scenario visibly red via a `required: true`
+//   pending step. The matcher reports the contract status for
+//   phase + errorClass independently of the side-effect probe, and
+//   exposes whether forbiddenSideEffects were declared so callers can
+//   integrate both signals.
+
+export type NegativeContractMatchOutcome =
+  // Right phase, right errorClass match observed.
+  | "matched"
+  // Scenario expected a failure but every phase passed.
+  | "no-failure-observed"
+  // Wrong phase failed (e.g., expected onboarding, observed environment).
+  | "wrong-phase"
+  // Right phase, but the failure message did not advertise the
+  // declared errorClass.
+  | "wrong-error-class";
+
+export interface NegativeContractObservation {
+  failedPhase?: PhaseName;
+  failedActionId?: string;
+  failedActionMessage?: string;
+  failedAssertionId?: string;
+  failedAssertionMessage?: string;
+}
+
+export interface NegativeContractResult {
+  matched: boolean;
+  outcome: NegativeContractMatchOutcome;
+  expected: ExpectedFailureContract;
+  observed: NegativeContractObservation;
+  // Human-readable diagnostic; suitable for evidence logs and CI output.
+  message: string;
+}
+
+// Internal id reserved for the runtime side-effect pending/probe step
+// declared in assertions/registry.ts. The matcher excludes failures of
+// that step from "observed failure" detection so the contract evaluation
+// is not confused by its own enforcement scaffolding.
+//
+// As of the state-validation phase landing, forbidden side effects are
+// observed by the typed gateway-absent / sandbox-absent probes during
+// the state-validation phase, not by this pending step. The exclusion
+// is kept to stay correct for any scenario that still references the
+// legacy step id.
+const SIDE_EFFECT_PROBE_STEP_ID = "runtime.expected-failure.no-side-effects";
+
+// State-validation probe ids the matcher must skip when scanning for
+// observed failures. For a negative scenario, these probes are real
+// post-failure checks (gateway-absent, sandbox-absent) — their pass/fail
+// status does NOT determine which phase advertised the original failure
+// mode, only whether forbidden side effects occurred.
+const STATE_VALIDATION_FORBIDDEN_PROBE_IDS: ReadonlySet<string> = new Set([
+  "state-validation.gateway-absent",
+  "state-validation.sandbox-absent",
+]);
+
+// Map the user-facing expected failure phase to the internal phase
+// orchestrator that owns it. Today preflight assertions live under
+// onboarding (see assertions/registry.ts: onboarding.preflight.*).
+function resolveExpectedPhase(phase: ExpectedFailurePhase): PhaseName {
+  if (phase === "preflight") {
+    return "onboarding";
+  }
+  return phase;
+}
+
+function isOwnPhaseResult(phase: PhaseResult["phase"]): phase is PhaseName {
+  return (
+    phase === "environment" ||
+    phase === "onboarding" ||
+    phase === "state-validation" ||
+    phase === "runtime"
+  );
+}
+
+function findFirstObservedFailure(results: readonly PhaseResult[]): NegativeContractObservation | undefined {
+  for (const result of results) {
+    if (!isOwnPhaseResult(result.phase)) {
+      continue;
+    }
+    // state-validation forbidden-side-effect probes (gateway-absent,
+    // sandbox-absent) are post-failure verification, not the failure
+    // mode itself; skip them when locating the originating failure.
+    // A failed cli-installed probe IS a real observed failure (the
+    // install action passed but the binary isn't reachable) and is
+    // not skipped.
+    const failedAction = result.actions.find(
+      (action) =>
+        action.status === "failed" && !STATE_VALIDATION_FORBIDDEN_PROBE_IDS.has(action.id),
+    );
+    if (failedAction) {
+      return {
+        failedPhase: result.phase,
+        failedActionId: failedAction.id,
+        failedActionMessage: failedAction.message,
+      };
+    }
+    const failedAssertion = result.assertions.find(
+      (assertion) => assertion.status === "failed" && assertion.id !== SIDE_EFFECT_PROBE_STEP_ID,
+    );
+    if (failedAssertion) {
+      return {
+        failedPhase: result.phase,
+        failedAssertionId: failedAssertion.id,
+        failedAssertionMessage: failedAssertion.message,
+      };
+    }
+  }
+  return undefined;
+}
+
+function errorClassMatches(message: string | undefined, errorClass: string): boolean {
+  if (!message) {
+    return false;
+  }
+  // Substring-with-case-fold match. Negative scenarios assert their
+  // failure mode by class name (e.g., "docker-missing",
+  // "invalid-nvidia-api-key"); we match either the literal class
+  // string or a normalized form where dashes/underscores/spaces are
+  // interchangeable. This stays a pure string check so the matcher
+  // can be fully tested in isolation.
+  const normalize = (value: string): string => value.toLowerCase().replace(/[\s_-]+/g, "-");
+  return normalize(message).includes(normalize(errorClass));
+}
+
+function describeObservation(observation: NegativeContractObservation): string {
+  const parts: string[] = [];
+  if (observation.failedPhase) {
+    parts.push(`phase=${observation.failedPhase}`);
+  }
+  if (observation.failedActionId) {
+    parts.push(`action=${observation.failedActionId}`);
+  }
+  if (observation.failedAssertionId) {
+    parts.push(`assertion=${observation.failedAssertionId}`);
+  }
+  const message = observation.failedActionMessage ?? observation.failedAssertionMessage;
+  if (message) {
+    parts.push(`message="${message.slice(0, 240)}"`);
+  }
+  return parts.length > 0 ? parts.join(" ") : "no failure observed";
+}
+
+export function evaluateNegativeContract(plan: RunPlan, results: readonly PhaseResult[]): NegativeContractResult {
+  const expected = plan.expectedFailure;
+  if (!expected) {
+    throw new Error(
+      `evaluateNegativeContract called for scenario ${plan.scenarioId} which has no expectedFailure declared`,
+    );
+  }
+  const expectedPhase = resolveExpectedPhase(expected.phase);
+  const observation = findFirstObservedFailure(results);
+
+  if (!observation) {
+    return {
+      matched: false,
+      outcome: "no-failure-observed",
+      expected,
+      observed: {},
+      message: `scenario ${plan.scenarioId} expected to fail in ${expected.phase} (errorClass=${expected.errorClass}), but all phases passed`,
+    };
+  }
+
+  if (observation.failedPhase !== expectedPhase) {
+    return {
+      matched: false,
+      outcome: "wrong-phase",
+      expected,
+      observed: observation,
+      message: `scenario ${plan.scenarioId} expected ${expected.phase} failure (errorClass=${expected.errorClass}); observed ${describeObservation(observation)}`,
+    };
+  }
+
+  const observedMessage = observation.failedActionMessage ?? observation.failedAssertionMessage;
+  if (!errorClassMatches(observedMessage, expected.errorClass)) {
+    return {
+      matched: false,
+      outcome: "wrong-error-class",
+      expected,
+      observed: observation,
+      message: `scenario ${plan.scenarioId} ${expected.phase} failure errorClass mismatch: expected="${expected.errorClass}" observed=${describeObservation(observation)}`,
+    };
+  }
+
+  return {
+    matched: true,
+    outcome: "matched",
+    expected,
+    observed: observation,
+    message: `scenario ${plan.scenarioId} negative contract matched: ${expected.phase}/${expected.errorClass} (${describeObservation(observation)})`,
+  };
+}
+
+// Convenience: build a synthetic PhaseResult for the runner to append
+// to the per-phase results. Keeps run.ts and artifact writers honest
+// (one shape, written through the same path as real phase results).
+export function negativeContractPhaseResult(result: NegativeContractResult): PhaseResult {
+  return {
+    phase: "negative-contract",
+    status: result.matched ? "passed" : "failed",
+    actions: [],
+    assertions: [
+      {
+        id: "negative-contract.match",
+        status: result.matched ? "passed" : "failed",
+        attempts: 1,
+        durationMs: 0,
+        message: result.message,
+      },
+    ],
+  };
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/phase.ts b/test/e2e-scenario/scenarios/orchestrators/phase.ts
index ae59a58e62..ccde0ba73d 100644
--- a/test/e2e-scenario/scenarios/orchestrators/phase.ts
+++ b/test/e2e-scenario/scenarios/orchestrators/phase.ts
@@ -1,67 +1,306 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+import { spawn } from "node:child_process";
 import fs from "node:fs";
 import path from "node:path";
+import { fileURLToPath } from "node:url";
 import type {
   AssertionResult,
   AssertionStep,
+  PhaseAction,
+  PhaseActionResult,
   PhaseName,
   PhaseResult,
   RunContext,
   RunPlanPhase,
   TransientClassifier,
 } from "../types.ts";
+import { lookupProbe } from "../probes/registry.ts";
+import type { ProbeContext } from "../probes/types.ts";
+import { buildChildEnv, pipeRedacted, redactString } from "./redaction.ts";
+
+// Auto-register the built-in probes the moment the orchestrator is
+// imported. This is a deliberate side-effect import: registry state is
+// module-scoped and we want every entry point that runs assertions
+// (run.ts, ScenarioRunner, framework tests) to see the same wired set
+// without each one repeating the registration.
+import { registerBuiltinProbes } from "../probes/builtin.ts";
+registerBuiltinProbes();
+
+const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../..");
+const DEFAULT_STEP_TIMEOUT_SECONDS = 300;
 
 interface StepAttemptOutcome {
-  status: "passed" | "failed";
+  status: "passed" | "failed" | "skipped";
   classifier?: TransientClassifier;
   message?: string;
+  evidence?: string;
 }
 
-function transientForRef(ref: string): TransientClassifier {
-  if (ref.includes("provider") || ref.includes("transient")) {
-    return "provider-transient";
+// Heuristic transient classifier for shell step refs that don't print
+// their own classifier hint. Phase orchestrators own classification;
+// clients/scripts do not.
+function classifierForRef(ref: string): TransientClassifier {
+  if (/provider|inference|chat-completion|cloudflared|tunnel/i.test(ref)) {
+    // Use case-insensitive matching here too; the outer guard is /i, so
+    // mixed-case refs (Tunnel, Cloudflared) must still classify as
+    // external-tunnel rather than fall through to provider-transient.
+    return /tunnel|cloudflared/i.test(ref) ? "external-tunnel" : "provider-transient";
   }
-  if (ref.includes("gateway")) {
+  if (/gateway/i.test(ref)) {
     return "gateway-transient";
   }
+  if (/event-capture|tui|chat-events/i.test(ref)) {
+    return "empty-event-capture";
+  }
   return "runner-infra";
 }
 
+/**
+ * Build the typed ProbeContext handed to a probe runner. Mirrors the
+ * subset of state that shell steps already get via
+ * ${E2E_CONTEXT_DIR}/context.env, but parsed up front so probe code
+ * doesn't reach into the file system itself.
+ */
+function buildProbeContext(ctx: RunContext, step: AssertionStep): ProbeContext {
+  const contextEnvPath = path.join(ctx.contextDir, "context.env");
+  const contextEnv: Record<string, string> = {};
+  if (fs.existsSync(contextEnvPath)) {
+    const raw = fs.readFileSync(contextEnvPath, "utf8");
+    for (const line of raw.split("\n")) {
+      const trimmed = line.trim();
+      if (!trimmed || trimmed.startsWith("#")) continue;
+      const eq = trimmed.indexOf("=");
+      if (eq <= 0) continue;
+      const key = trimmed.slice(0, eq);
+      let value = trimmed.slice(eq + 1);
+      if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
+        value = value.slice(1, -1);
+      }
+      contextEnv[key] = value;
+    }
+  }
+  const evidenceRel = step.evidencePath ?? `.e2e/assertions/${step.id}.json`;
+  const evidencePath = path.isAbsolute(evidenceRel)
+    ? evidenceRel
+    : path.join(ctx.contextDir, evidenceRel);
+  return {
+    contextDir: ctx.contextDir,
+    evidencePath,
+    contextEnv,
+    sandboxName: contextEnv.E2E_SANDBOX_NAME ?? null,
+    gatewayUrl: contextEnv.E2E_GATEWAY_URL ?? null,
+    repoRoot: REPO_ROOT,
+  };
+}
+
 export class PhaseOrchestrator {
   constructor(private readonly phaseName: PhaseName) {}
 
   async run(ctx: RunContext, phase: RunPlanPhase): Promise<PhaseResult> {
+    const actions: PhaseActionResult[] = [];
+    let actionFailed = false;
+    for (const action of phase.actions) {
+      const actionResult = await this.runAction(ctx, action);
+      actions.push(actionResult);
+      if (actionResult.status === "failed") {
+        actionFailed = true;
+        // Spec failure-layer rule: setup failure must not let assertions
+        // run and accidentally pass. Stop the phase here.
+        break;
+      }
+    }
     const assertions: AssertionResult[] = [];
-    for (const group of phase.assertionGroups) {
-      for (const step of group.steps) {
-        assertions.push(await this.runStep(ctx, step));
+    if (!actionFailed) {
+      for (const group of phase.assertionGroups) {
+        for (const step of group.steps) {
+          assertions.push(await this.runStep(ctx, step));
+        }
       }
     }
-    const status = assertions.some((assertion) => assertion.status === "failed") ? "failed" : "passed";
-    const result: PhaseResult = { phase: this.phaseName, status, assertions };
+    const assertionsFailed = assertions.some((assertion) => assertion.status === "failed");
+    const allSkipped =
+      !actionFailed &&
+      assertions.length > 0 &&
+      assertions.every((assertion) => assertion.status === "skipped");
+    let status: PhaseResult["status"];
+    if (actionFailed || assertionsFailed) {
+      status = "failed";
+    } else if (allSkipped || (actions.length === 0 && assertions.length === 0)) {
+      status = "skipped";
+    } else {
+      status = "passed";
+    }
+    const result: PhaseResult = { phase: this.phaseName, status, actions, assertions };
     this.writePhaseResult(ctx, result);
     return result;
   }
 
+  private async runAction(ctx: RunContext, action: PhaseAction): Promise<PhaseActionResult> {
+    const startedAt = Date.now();
+    const scriptPath = path.isAbsolute(action.scriptRef)
+      ? action.scriptRef
+      : path.resolve(REPO_ROOT, action.scriptRef);
+    if (!fs.existsSync(scriptPath)) {
+      return {
+        id: action.id,
+        status: "failed",
+        durationMs: Date.now() - startedAt,
+        message: `phase action ${action.id} script not found: ${scriptPath}`,
+      };
+    }
+    const timeoutSeconds = action.timeoutSeconds ?? DEFAULT_STEP_TIMEOUT_SECONDS;
+    const logDir = path.join(ctx.contextDir, ".e2e", "actions");
+    fs.mkdirSync(logDir, { recursive: true });
+    const logPath = path.join(logDir, `${action.id}.log`);
+
+    // Compose the bash invocation. shell-fn sources the dispatcher and
+    // calls the named function with its single positional arg; shell
+    // executes the script directly. We always go through bash -lc so
+    // sourced shell helpers see a normal interactive-style env.
+    const dispatchAction = path.join(REPO_ROOT, "test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh");
+    const useDispatchLauncher = action.kind === "shell-fn" && fs.existsSync(dispatchAction);
+    const bashArgs: string[] = useDispatchLauncher
+      ? [dispatchAction, action.fn ?? "", action.arg ?? "", scriptPath]
+      : [scriptPath, ...(action.arg ? [action.arg] : [])];
+
+    // Framework-owned secret hygiene at the spawn boundary. The child
+    // gets a minimal allowlisted env plus only the secrets this action
+    // explicitly declared via PhaseAction.secretEnv. See
+    // orchestrators/redaction.ts for the full contract.
+    const env = buildChildEnv(process.env, {
+      secretEnv: action.secretEnv,
+      frameworkOverlay: {
+        E2E_CONTEXT_DIR: ctx.contextDir,
+        E2E_PHASE: action.phase,
+        E2E_ACTION_ID: action.id,
+      },
+    });
+
+    return await new Promise<PhaseActionResult>((resolve) => {
+      const child = spawn("bash", bashArgs, { env, cwd: REPO_ROOT, detached: true });
+      const pgid = child.pid;
+      const logStream = fs.createWriteStream(logPath);
+      let stderrTail = "";
+      // Every byte from the child passes through redactString before
+      // hitting the evidence log or the stderr tail; raw output never
+      // touches disk or PhaseActionResult.message.
+      pipeRedacted(child.stdout, logStream);
+      pipeRedacted(child.stderr, logStream, (redactedChunk) => {
+        stderrTail = (stderrTail + redactedChunk).slice(-4096);
+      });
+
+      const killGroup = (signal: NodeJS.Signals) => {
+        if (typeof pgid !== "number") {
+          child.kill(signal);
+          return;
+        }
+        try {
+          process.kill(-pgid, signal);
+        } catch {
+          /* group already gone */
+        }
+      };
+
+      let timedOut = false;
+      const timeout = setTimeout(() => {
+        timedOut = true;
+        killGroup("SIGTERM");
+        setTimeout(() => {
+          if (!child.killed) {
+            killGroup("SIGKILL");
+          }
+        }, 5_000).unref();
+      }, timeoutSeconds * 1_000);
+
+      const finishLog = (): Promise<void> =>
+        new Promise((res) => {
+          if ((logStream as unknown as { closed?: boolean }).closed) {
+            res();
+            return;
+          }
+          logStream.once("finish", () => res());
+          logStream.once("error", () => res());
+          logStream.end();
+        });
+
+      child.on("error", (err) => {
+        clearTimeout(timeout);
+        void finishLog().then(() =>
+          resolve({
+            id: action.id,
+            status: "failed",
+            durationMs: Date.now() - startedAt,
+            evidence: logPath,
+            message: redactString(`phase action ${action.id} spawn error: ${err.message}`),
+          }),
+        );
+      });
+
+      child.on("close", (code, signal) => {
+        clearTimeout(timeout);
+        void finishLog().then(() => {
+          const durationMs = Date.now() - startedAt;
+          if (timedOut) {
+            resolve({
+              id: action.id,
+              status: "failed",
+              durationMs,
+              evidence: logPath,
+              message: `phase action ${action.id} exceeded ${timeoutSeconds}s (signal=${signal ?? "SIGTERM"})`,
+            });
+            return;
+          }
+          if (code === 0) {
+            // Publish the action's evidence log under a stable alias for
+            // legacy assertions that reference fixed filenames
+            // (onboard.log, install.log, ...). Best-effort; alias copy
+            // failures do not fail the action.
+            if (action.aliasPath) {
+              try {
+                const aliasFull = path.isAbsolute(action.aliasPath)
+                  ? action.aliasPath
+                  : path.join(ctx.contextDir, action.aliasPath);
+                fs.mkdirSync(path.dirname(aliasFull), { recursive: true });
+                fs.copyFileSync(logPath, aliasFull);
+              } catch {
+                /* alias is a convenience; never fail action on copy */
+              }
+            }
+            resolve({ id: action.id, status: "passed", durationMs, evidence: logPath });
+            return;
+          }
+          resolve({
+            id: action.id,
+            status: "failed",
+            durationMs,
+            evidence: logPath,
+            message: `phase action ${action.id} exit ${code ?? "null"}: ${stderrTail.split("\n").slice(-3).join(" | ").trim()}`,
+          });
+        });
+      });
+    });
+  }
+
   private async runStep(ctx: RunContext, step: AssertionStep): Promise<AssertionResult> {
     const startedAt = Date.now();
     const rawAttempts = step.reliability?.retry?.attempts;
-    const maxAttempts = typeof rawAttempts === "number" && Number.isFinite(rawAttempts) ? Math.max(1, Math.floor(rawAttempts)) : 1;
+    const maxAttempts =
+      typeof rawAttempts === "number" && Number.isFinite(rawAttempts) ? Math.max(1, Math.floor(rawAttempts)) : 1;
     let attempts = 0;
     let lastOutcome: StepAttemptOutcome = { status: "failed", message: "step did not run" };
     for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
       attempts = attempt;
       lastOutcome = await this.executeStep(ctx, step, attempt);
-      if (lastOutcome.status === "passed") {
+      if (lastOutcome.status === "passed" || lastOutcome.status === "skipped") {
         return {
           id: step.id,
-          status: "passed",
+          status: lastOutcome.status,
           attempts,
           durationMs: Date.now() - startedAt,
           classifier: attempt > 1 ? step.reliability?.retry?.on[0] : lastOutcome.classifier,
-          evidence: step.evidencePath,
+          evidence: lastOutcome.evidence ?? step.evidencePath,
           message: lastOutcome.message,
         };
       }
@@ -75,7 +314,7 @@ export class PhaseOrchestrator {
       attempts,
       durationMs: Date.now() - startedAt,
       classifier: lastOutcome.classifier,
-      evidence: step.evidencePath,
+      evidence: lastOutcome.evidence ?? step.evidencePath,
       message: lastOutcome.message,
     };
   }
@@ -92,26 +331,215 @@ export class PhaseOrchestrator {
     return step.reliability?.retry?.on.includes(classifier) ?? false;
   }
 
-  private async executeStep(_ctx: RunContext, step: AssertionStep, attempt: number): Promise<StepAttemptOutcome> {
-    const ref = step.implementation?.ref ?? "";
-    if (ref === "fake-pass" || ref === "phase-1-skeleton") {
-      return { status: "passed" };
+  private async executeStep(ctx: RunContext, step: AssertionStep, _attempt: number): Promise<StepAttemptOutcome> {
+    const kind = step.implementation?.kind;
+    if (kind === "shell") {
+      return this.runShellStep(ctx, step);
     }
-    if (ref === "fake-retry-once-pass") {
-      return attempt === 1
-        ? { status: "failed", classifier: step.reliability?.retry?.on[0] ?? "gateway-transient" }
-        : { status: "passed" };
+    if (kind === "probe") {
+      const ref = step.implementation?.ref ?? "<no ref>";
+      const probe = lookupProbe(ref);
+      if (!probe) {
+        // Probe is referenced by the typed registry but no
+        // implementation has been registered yet. Surface as
+        // skipped — unless the step is marked required, in which
+        // case fail closed so security-sensitive suites never
+        // pass on a missing probe.
+        if (step.required) {
+          return {
+            status: "failed",
+            classifier: "runner-infra",
+            message: `required probe not registered: ${ref} (step ${step.id})`,
+          };
+        }
+        return { status: "skipped", message: `probe not registered: ${ref}` };
+      }
+      const probeCtx = buildProbeContext(ctx, step);
+      try {
+        const outcome = await probe(probeCtx);
+        return {
+          status: outcome.status,
+          classifier: outcome.classifier,
+          message: outcome.message,
+          evidence: outcome.evidence ?? probeCtx.evidencePath,
+        };
+      } catch (err) {
+        // Probes must not throw — but a thrown error must NEVER
+        // cause an unobservable failure. Convert to a failed
+        // outcome with a redacted message so the orchestrator's
+        // result aggregation still records evidence.
+        const message = err instanceof Error ? err.message : String(err);
+        return {
+          status: "failed",
+          message: redactString(`probe ${ref} threw: ${message}`),
+          evidence: probeCtx.evidencePath,
+        };
+      }
+    }
+    if (kind === "pending") {
+      // pending steps surface as skipped with the placeholder ref so
+      // gaps are visible in plan output and phase results. Required
+      // pending steps (e.g. expected-failure side-effect validators
+      // for negative scenarios) fail closed instead — the run cannot
+      // honestly pass while the contract is unimplemented.
+      const ref = step.implementation?.ref ?? "";
+      if (step.required) {
+        return {
+          status: "failed",
+          classifier: "runner-infra",
+          message: `required pending step not implemented: ${ref} (step ${step.id})`,
+        };
+      }
+      return { status: "skipped", message: `pending: ${ref}` };
     }
-    if (ref === "fake-always-transient") {
-      return { status: "failed", classifier: step.reliability?.retry?.on[0] ?? transientForRef(ref) };
+    throw new Error(`Unknown assertion step kind for ${step.id}: ${String(kind)}`);
+  }
+
+  private async runShellStep(ctx: RunContext, step: AssertionStep): Promise<StepAttemptOutcome> {
+    const ref = step.implementation?.ref;
+    if (!ref) {
+      return { status: "failed", message: `shell step ${step.id} missing implementation.ref` };
     }
-    if (step.implementation?.kind === "shell" && _ctx.dryRun) {
-      return { status: "passed", message: `dry-run shell ${ref}` };
+    const scriptPath = path.isAbsolute(ref) ? ref : path.resolve(REPO_ROOT, ref);
+    if (!fs.existsSync(scriptPath)) {
+      return { status: "failed", message: `shell step ${step.id} script not found: ${scriptPath}` };
     }
-    if (step.implementation?.kind === "probe" && _ctx.dryRun) {
-      return { status: "passed", message: `dry-run probe ${ref}` };
+
+    const timeoutSeconds = step.reliability?.timeoutSeconds ?? DEFAULT_STEP_TIMEOUT_SECONDS;
+    const logDir = path.join(ctx.contextDir, ".e2e", "logs");
+    fs.mkdirSync(logDir, { recursive: true });
+    const logPath = path.join(logDir, `${step.id}.log`);
+
+    // Framework-owned secret hygiene at the spawn boundary (mirrors
+    // runAction). The shell step's child gets only the framework
+    // allowlist + scenario context.env keys + step.secretEnv
+    // declarations. See orchestrators/redaction.ts.
+    const env = buildChildEnv(process.env, {
+      secretEnv: step.secretEnv,
+      frameworkOverlay: {
+        E2E_CONTEXT_DIR: ctx.contextDir,
+        E2E_STEP_ID: step.id,
+        E2E_PHASE: step.phase,
+      },
+    });
+    // Surface scenario-derived context (E2E_SCENARIO, E2E_SANDBOX_NAME,
+    // E2E_GATEWAY_URL, etc.) that the framework wrote at the start of the
+    // run and that environment+onboarding phases extended via
+    // e2e_context_set. The shell context library writes to
+    // ${E2E_CONTEXT_DIR}/context.env, NOT to ${E2E_CONTEXT_DIR}/.e2e/.
+    const contextEnvPath = path.join(ctx.contextDir, "context.env");
+    if (fs.existsSync(contextEnvPath)) {
+      const contextEnv = fs.readFileSync(contextEnvPath, "utf8");
+      for (const line of contextEnv.split("\n")) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith("#")) {
+          continue;
+        }
+        const eq = trimmed.indexOf("=");
+        if (eq <= 0) {
+          continue;
+        }
+        const key = trimmed.slice(0, eq);
+        let value = trimmed.slice(eq + 1);
+        if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
+          value = value.slice(1, -1);
+        }
+        env[key] = value;
+      }
     }
-    return { status: "failed", message: `unsupported live step ${step.id}` };
+
+    return await new Promise<StepAttemptOutcome>((resolve) => {
+      // detached: true puts the child (and any of its children, e.g. a `sleep`
+      // spawned by bash) into its own process group. We send signals to the
+      // negative pid so the whole group dies on timeout. Without this, bash
+      // ignores SIGTERM until its current foreground command (e.g. sleep)
+      // returns, and timeouts effectively don't work.
+      const child = spawn("bash", [scriptPath], { env, cwd: REPO_ROOT, detached: true });
+      const pgid = child.pid;
+      const logStream = fs.createWriteStream(logPath);
+      let stderrTail = "";
+      // Redact at the I/O boundary; raw bytes from the child must not
+      // reach the evidence log or the stderr tail that flows into
+      // step result.message.
+      pipeRedacted(child.stdout, logStream);
+      pipeRedacted(child.stderr, logStream, (redactedChunk) => {
+        stderrTail = (stderrTail + redactedChunk).slice(-4096);
+      });
+
+      const killGroup = (signal: NodeJS.Signals) => {
+        if (typeof pgid !== "number") {
+          child.kill(signal);
+          return;
+        }
+        try {
+          process.kill(-pgid, signal);
+        } catch {
+          /* group already gone */
+        }
+      };
+
+      let timedOut = false;
+      const timeout = setTimeout(() => {
+        timedOut = true;
+        killGroup("SIGTERM");
+        setTimeout(() => {
+          if (!child.killed) {
+            killGroup("SIGKILL");
+          }
+        }, 5_000).unref();
+      }, timeoutSeconds * 1_000);
+
+      // Wait for the log writeStream to fully flush before resolving so
+      // callers can synchronously read the evidence file. Without this, the
+      // 'close' event on the child fires before the WriteStream finishes
+      // draining, and tests/orchestrators see an empty log file.
+      const finishLog = (): Promise<void> =>
+        new Promise((res) => {
+          if ((logStream as unknown as { closed?: boolean }).closed) {
+            res();
+            return;
+          }
+          logStream.once("finish", () => res());
+          logStream.once("error", () => res());
+          logStream.end();
+        });
+
+      child.on("error", (err) => {
+        clearTimeout(timeout);
+        void finishLog().then(() =>
+          resolve({
+            status: "failed",
+            message: redactString(`shell step ${step.id} spawn error: ${err.message}`),
+            evidence: logPath,
+          }),
+        );
+      });
+
+      child.on("close", (code, signal) => {
+        clearTimeout(timeout);
+        void finishLog().then(() => {
+          if (timedOut) {
+            resolve({
+              status: "failed",
+              classifier: "runner-infra",
+              message: `shell step ${step.id} exceeded ${timeoutSeconds}s (signal=${signal ?? "SIGTERM"})`,
+              evidence: logPath,
+            });
+            return;
+          }
+          if (code === 0) {
+            resolve({ status: "passed", evidence: logPath });
+            return;
+          }
+          resolve({
+            status: "failed",
+            classifier: classifierForRef(ref),
+            message: `shell step ${step.id} exit ${code ?? "null"}: ${stderrTail.split("\n").slice(-3).join(" | ").trim()}`,
+            evidence: logPath,
+          });
+        });
+      });
+    });
   }
 
   private writePhaseResult(ctx: RunContext, result: PhaseResult) {
diff --git a/test/e2e-scenario/scenarios/orchestrators/redaction.ts b/test/e2e-scenario/scenarios/orchestrators/redaction.ts
new file mode 100644
index 0000000000..347eae12bc
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/redaction.ts
@@ -0,0 +1,235 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Framework-owned secret hygiene at the spawn boundary.
+ *
+ * Spec ownership: redaction and child-env minimization are FRAMEWORK
+ * INFRASTRUCTURE, not a per-action / per-script / per-workflow concern.
+ * Children spawned by PhaseOrchestrator must (a) receive a minimal,
+ * typed env (framework allowlist + per-action declared `secretEnv`
+ * passthrough only), and (b) have their stdout/stderr passed through
+ * redaction before any byte reaches an evidence log or
+ * PhaseResult.message. There is no opt-out flag, no env switch, no
+ * helper that bypasses this. One execution mode, secrets always
+ * redacted in evidence — same one-mode discipline that motivates the
+ * rest of this PR.
+ *
+ * Pattern source-of-truth: src/lib/security/secret-patterns.ts. We
+ * import the canonical regex sets and apply them here so framework
+ * redaction stays in lockstep with product-runtime redaction without
+ * coupling the framework to product runtime modules.
+ *
+ * Bash side: test/e2e-scenario/runtime/lib/context.sh::e2e_context_dump
+ * already redacts on dump via _e2e_context_is_sensitive_key. Bash
+ * helpers must continue to use that for diagnostic dumps; this module
+ * only covers the TS-spawned-child I/O path.
+ *
+ * Tests:
+ *   test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
+ *     - test_should_not_persist_secret_shaped_child_output_into_evidence
+ *     - test_should_drop_non_allowlisted_parent_env_unless_declared_in_secretEnv
+ *     - test_should_pass_declared_secretEnv_through_to_child
+ */
+
+import type { Readable, Writable } from "node:stream";
+
+const REDACTED = "<REDACTED>";
+
+// Framework-local mirror of src/lib/security/secret-patterns.ts. The
+// framework deliberately does not import from src/lib/security/ so it
+// stays decoupled from product runtime modules and the cross-tsconfig
+// boundary. A parity test
+// (test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts)
+// asserts these regex sources stay in lockstep with the canonical
+// product source so adding a token shape there keeps both layers
+// honest at once.
+// Exported only so the parity test
+// (test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts) can
+// import the actual RegExp values rather than parsing source text.
+// Production code in this module continues to use them via the local
+// binding; nothing in the framework runtime imports these.
+export const TOKEN_PREFIX_PATTERNS: RegExp[] = [
+  /nvapi-[A-Za-z0-9_-]{10,}/g,
+  /nvcf-[A-Za-z0-9_-]{10,}/g,
+  /ghp_[A-Za-z0-9_-]{10,}/g,
+  /(?:github_pat_)[A-Za-z0-9_]{30,}/g,
+  /sk-proj-[A-Za-z0-9_-]{10,}/g,
+  /sk-ant-[A-Za-z0-9_-]{10,}/g,
+  /sk-[A-Za-z0-9_-]{20,}/g,
+  /(?:xox[bpas]|xapp)-[A-Za-z0-9-]{10,}/g,
+  /A(?:K|S)IA[A-Z0-9]{16}/g,
+  /hf_[A-Za-z0-9]{10,}/g,
+  /glpat-[A-Za-z0-9_-]{10,}/g,
+  /gsk_[A-Za-z0-9]{10,}/g,
+  /pypi-[A-Za-z0-9_-]{10,}/g,
+  /\bbot\d{8,10}:[A-Za-z0-9_-]{35}\b/g,
+  /\b\d{8,10}:[A-Za-z0-9_-]{35}\b/g,
+  /\b[A-Za-z0-9]{24}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,}\b/g,
+];
+
+export const CONTEXT_PATTERNS: RegExp[] = [
+  /(?<=Bearer\s+)[A-Za-z0-9_.+/=-]{10,}/gi,
+  /(?<=(?:_KEY|API_KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL)[=: ]['"]?)[A-Za-z0-9_.+/=-]{10,}/gi,
+];
+
+/**
+ * Replace every secret-shaped token in `text` with `<REDACTED>`. Uses
+ * the canonical TOKEN_PREFIX_PATTERNS + CONTEXT_PATTERNS sets.
+ *
+ * Best-effort against unknown token shapes. The actual defense is the
+ * env allowlist (buildChildEnv); pattern redaction catches what slips
+ * through (e.g. error messages that echo a secret value).
+ */
+export function redactString(text: string): string {
+  if (!text) return text;
+  let out = text;
+  for (const p of TOKEN_PREFIX_PATTERNS) {
+    p.lastIndex = 0;
+    out = out.replace(p, REDACTED);
+  }
+  for (const p of CONTEXT_PATTERNS) {
+    p.lastIndex = 0;
+    out = out.replace(p, REDACTED);
+  }
+  return out;
+}
+
+// Env keys the framework guarantees children may always see. Anything
+// outside this set, outside FRAMEWORK_ENV_PREFIXES, and not declared
+// in PhaseAction.secretEnv / AssertionStep.secretEnv is dropped before
+// the child spawns.
+const FRAMEWORK_ENV_ALLOWLIST: ReadonlySet<string> = new Set([
+  "PATH",
+  "HOME",
+  "SHELL",
+  "USER",
+  "LOGNAME",
+  "LANG",
+  "LC_ALL",
+  "LC_CTYPE",
+  "TZ",
+  "TERM",
+  "TMPDIR",
+  "RUNNER_TEMP",
+  "RUNNER_OS",
+  "GITHUB_ACTIONS",
+  "CI",
+  "NEMOCLAW_NON_INTERACTIVE",
+  "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE",
+]);
+
+const FRAMEWORK_ENV_PREFIXES: readonly string[] = ["E2E_", "NEMOCLAW_LOG_"];
+
+// Shape required of any declared secretEnv key — must look like a
+// secret-bearing variable. Prevents accidental allowlisting of
+// non-secret values via the secretEnv channel and keeps the
+// "framework-allowlist vs declared-secret" distinction honest.
+const SECRET_ENV_KEY_SHAPE =
+  /^[A-Z][A-Z0-9_]*(?:API[_]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|PASSPHRASE|PRIVATE[_]?KEY|ACCESS[_]?KEY)$/;
+
+export function isValidSecretEnvKey(key: string): boolean {
+  return SECRET_ENV_KEY_SHAPE.test(key);
+}
+
+export interface BuildChildEnvOptions {
+  /** Per-action / per-step declared secret-bearing env keys to pass through. */
+  secretEnv?: readonly string[];
+  /** Framework-controlled overlay (E2E_CONTEXT_DIR, E2E_PHASE, E2E_*_ID). */
+  frameworkOverlay: NodeJS.ProcessEnv;
+}
+
+/**
+ * Build the child's env from `base` (typically `process.env`) by
+ * keeping only:
+ *   1. keys in FRAMEWORK_ENV_ALLOWLIST
+ *   2. keys starting with one of FRAMEWORK_ENV_PREFIXES
+ *   3. keys explicitly declared in `opts.secretEnv` (validated shape)
+ * then layering `opts.frameworkOverlay` on top.
+ *
+ * Throws if a `secretEnv` entry doesn't match the secret-key shape;
+ * better to fail loudly at compile/runtime than silently leak a
+ * non-secret env var (which would defeat the allowlist purpose).
+ */
+export function buildChildEnv(
+  base: NodeJS.ProcessEnv,
+  opts: BuildChildEnvOptions,
+): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {};
+  for (const [key, value] of Object.entries(base)) {
+    if (value === undefined) continue;
+    if (FRAMEWORK_ENV_ALLOWLIST.has(key)) {
+      out[key] = value;
+      continue;
+    }
+    if (FRAMEWORK_ENV_PREFIXES.some((prefix) => key.startsWith(prefix))) {
+      out[key] = value;
+      continue;
+    }
+  }
+  for (const key of opts.secretEnv ?? []) {
+    if (!isValidSecretEnvKey(key)) {
+      throw new Error(
+        `secretEnv entry '${key}' does not match the secret-key shape ` +
+          `(must end with API_KEY, TOKEN, SECRET, PASSWORD, CREDENTIAL, ` +
+          `PASSPHRASE, PRIVATE_KEY, or ACCESS_KEY). Refusing to allowlist.`,
+      );
+    }
+    if (base[key] !== undefined) {
+      out[key] = base[key];
+    }
+  }
+  Object.assign(out, opts.frameworkOverlay);
+  // The install action drops nemoclaw / openshell shims under
+  // ~/.local/bin (see nemoclaw_scenarios/install/repo-current.sh).
+  // On Ubuntu GH runners ~/.local/bin is on the default PATH; on
+  // self-hosted GPU runners and inside WSL it often is not, so the
+  // onboarding action's child runs without nemoclaw on PATH and
+  // dies with 'nemoclaw: command not found'. Add ~/.local/bin to
+  // every child's PATH at the framework boundary so the install
+  // location is consistent across phases. Idempotent equivalent of
+  // the install-path-refresh.sh nemoclaw_ensure_local_bin_on_path
+  // helper, applied centrally instead of per-script.
+  const home = out.HOME ?? base.HOME;
+  if (typeof home === "string" && home.length > 0) {
+    const localBin = `${home}/.local/bin`;
+    const currentPath = out.PATH ?? "";
+    if (!currentPath.split(":").includes(localBin)) {
+      out.PATH = currentPath ? `${localBin}:${currentPath}` : localBin;
+    }
+  }
+  return out;
+}
+
+/**
+ * Pipe `src` into `log`, redacting every chunk on the way through.
+ * Optional `onChunk` receives the already-redacted text (used by the
+ * orchestrator to keep a redacted stderr tail for failure messages).
+ *
+ * No raw bytes from the child ever reach `log` or the tail callback.
+ */
+export function pipeRedacted(
+  src: Readable,
+  log: Writable,
+  onChunk?: (redactedChunk: string) => void,
+): void {
+  src.on("data", (chunk: Buffer) => {
+    const redacted = redactString(chunk.toString("utf8"));
+    log.write(redacted);
+    onChunk?.(redacted);
+  });
+}
+
+/**
+ * Compact array of all framework env keys the child sees by default.
+ * Exported for tests/diagnostics; do not use to bypass the boundary.
+ */
+export function frameworkEnvAllowlistSnapshot(): {
+  keys: string[];
+  prefixes: string[];
+} {
+  return {
+    keys: [...FRAMEWORK_ENV_ALLOWLIST].sort(),
+    prefixes: [...FRAMEWORK_ENV_PREFIXES],
+  };
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/runner.ts b/test/e2e-scenario/scenarios/orchestrators/runner.ts
index 6ab3b76c62..02be1b195f 100644
--- a/test/e2e-scenario/scenarios/orchestrators/runner.ts
+++ b/test/e2e-scenario/scenarios/orchestrators/runner.ts
@@ -1,10 +1,17 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-import type { PhaseResult, RunContext, RunPlan, RunPlanPhase } from "../types.ts";
+import fs from "node:fs";
+import path from "node:path";
+
+import type { PhaseActionResult, PhaseResult, RunContext, RunPlan, RunPlanPhase } from "../types.ts";
+import { seedContextEnv } from "./context.ts";
 import { EnvironmentOrchestrator } from "./environment.ts";
+import { LifecycleOrchestrator } from "./lifecycle.ts";
+import { evaluateNegativeContract, negativeContractPhaseResult } from "./negative-matcher.ts";
 import { OnboardingOrchestrator } from "./onboarding.ts";
 import { RuntimeOrchestrator } from "./runtime.ts";
+import { StateValidationOrchestrator } from "./state-validation.ts";
 
 interface PhaseRunner {
   run(ctx: RunContext, phase: RunPlanPhase, priorResults?: PhaseResult[]): Promise<PhaseResult>;
@@ -13,37 +20,166 @@ interface PhaseRunner {
 export interface ScenarioRunnerDeps {
   environment?: PhaseRunner;
   onboarding?: PhaseRunner;
+  stateValidation?: PhaseRunner;
+  lifecycle?: PhaseRunner;
   runtime?: PhaseRunner;
 }
 
 export class ScenarioRunner {
   private readonly environment: PhaseRunner;
   private readonly onboarding: PhaseRunner;
+  private readonly stateValidation: PhaseRunner;
+  private readonly lifecycle: PhaseRunner;
   private readonly runtime: PhaseRunner;
 
   constructor(deps: ScenarioRunnerDeps = {}) {
     this.environment = deps.environment ?? new EnvironmentOrchestrator();
     this.onboarding = deps.onboarding ?? new OnboardingOrchestrator();
+    this.stateValidation = deps.stateValidation ?? new StateValidationOrchestrator();
+    this.lifecycle = deps.lifecycle ?? new LifecycleOrchestrator();
     this.runtime = deps.runtime ?? new RuntimeOrchestrator();
   }
 
   async run(ctx: RunContext, plan: RunPlan): Promise<PhaseResult[]> {
+    // Seed context.env from the typed RunPlan once, before any phase
+    // runs. Spec ownership: framework infrastructure (the runner), not
+    // a shell action. Onboarding may extend context.env via
+    // e2e_context_set; the runtime phase reads whatever is on disk.
+    seedContextEnv(ctx, plan);
+
     const results: PhaseResult[] = [];
     for (const phase of plan.phases) {
-      if (phase.name === "environment") {
-        results.push(await this.environment.run(ctx, phase, results));
-        continue;
-      }
-      if (phase.name === "onboarding") {
-        results.push(await this.onboarding.run(ctx, phase, results));
+      const blocked = phaseBlockedBy(phase.name, results);
+      if (blocked) {
+        // Cross-phase short-circuit: the previous phase's setup work
+        // failed, so this phase cannot meaningfully run. Synthesize a
+        // skipped PhaseResult with a clear reason so artifacts stay
+        // honest (no false greens, no <1s assertion explosion).
+        results.push({
+          phase: phase.name,
+          status: "skipped",
+          actions: [],
+          assertions: [
+            {
+              id: `${phase.name}.blocked`,
+              status: "skipped",
+              attempts: 0,
+              durationMs: 0,
+              message: `phase blocked by prior failure: ${blocked.phase} action ${blocked.action.id} failed (${blocked.action.message ?? "no message"})`,
+            },
+          ],
+        });
         continue;
       }
-      if (phase.name === "runtime") {
-        results.push(await this.runtime.run(ctx, phase, results));
-        continue;
-      }
-      throw new Error(`Unsupported phase: ${String(phase.name)}`);
+      const orchestrator = this.orchestratorFor(phase.name);
+      results.push(await orchestrator.run(ctx, phase, results));
     }
+
+    // Negative-scenario contract verification. Single decision point:
+    // if the plan declared expectedFailure, evaluate the matcher and
+    // append a synthetic phase result. Positive scenarios are
+    // unaffected. Side-effect verification stays the responsibility of
+    // the runtime control group's required pending step (kept red
+    // until the probe lands); the matcher only judges phase + errorClass.
+    if (plan.expectedFailure) {
+      const contractResult = evaluateNegativeContract(plan, results);
+      const synthetic = negativeContractPhaseResult(contractResult);
+      results.push(synthetic);
+      writeNegativeContractArtifact(ctx, contractResult, synthetic);
+    }
+
     return results;
   }
+
+  private orchestratorFor(name: RunPlanPhase["name"]): PhaseRunner {
+    if (name === "environment") return this.environment;
+    if (name === "onboarding") return this.onboarding;
+    if (name === "state-validation") return this.stateValidation;
+    if (name === "lifecycle") return this.lifecycle;
+    if (name === "runtime") return this.runtime;
+    throw new Error(`Unsupported phase: ${String(name)}`);
+  }
+}
+
+interface BlockingFailure {
+  phase: "environment" | "onboarding" | "state-validation" | "lifecycle" | "runtime";
+  action: PhaseActionResult;
+}
+
+function writeNegativeContractArtifact(
+  ctx: RunContext,
+  contractResult: ReturnType<typeof evaluateNegativeContract>,
+  synthetic: PhaseResult,
+): void {
+  try {
+    const outputDir = path.join(ctx.contextDir, ".e2e");
+    fs.mkdirSync(outputDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(outputDir, "negative-contract.json"),
+      `${JSON.stringify(contractResult, null, 2)}\n`,
+    );
+    fs.writeFileSync(
+      path.join(outputDir, `${synthetic.phase}.result.json`),
+      `${JSON.stringify(synthetic, null, 2)}\n`,
+    );
+  } catch {
+    /* artifact emission is best-effort; matcher result already in memory */
+  }
+}
+
+// state-validation is the typed diagnostic layer between onboarding
+// and runtime. It probes gateway/sandbox/cli post-conditions and is
+// the phase that proves a negative scenario's forbidden side effects
+// did not occur (gateway-absent, sandbox-absent). For state-validation
+// to do its job after a deliberate onboarding failure (negative
+// scenarios), an onboarding failure must NOT block it. Only an
+// environment-phase failure (install never ran) skips state-validation.
+// Runtime stays blocked by any prior phase-action failure, including
+// state-validation, so suites never run against a missing or wedged
+// environment.
+function phaseBlockedBy(
+  phase: "environment" | "onboarding" | "state-validation" | "lifecycle" | "runtime",
+  results: PhaseResult[],
+): BlockingFailure | undefined {
+  const firstFailure = firstBlockingActionFailure(results);
+  if (!firstFailure) {
+    return undefined;
+  }
+  if (phase === "state-validation" && firstFailure.phase !== "environment") {
+    // state-validation is the diagnostic layer that proves a negative
+    // scenario's forbidden side effects didn't occur, so an onboarding
+    // failure must NOT block it.
+    return undefined;
+  }
+  if (phase === "lifecycle" && firstFailure.phase === "state-validation") {
+    // state-validation failure does not block the lifecycle phase
+    // either: state-validation results are diagnostic. Lifecycle
+    // workers depend on onboarding having produced a sandbox, but
+    // not on state-validation probes having all passed.
+    return undefined;
+  }
+  return firstFailure;
+}
+
+function firstBlockingActionFailure(results: PhaseResult[]): BlockingFailure | undefined {
+  // A phase action failure (real setup work didn't succeed) blocks
+  // downstream phases. Assertion failures do NOT block downstream
+  // phases - they are expected to be reported alongside other phase
+  // results so reviewers can see all failure layers at once.
+  for (const result of results) {
+    if (
+      result.phase !== "environment" &&
+      result.phase !== "onboarding" &&
+      result.phase !== "state-validation" &&
+      result.phase !== "lifecycle" &&
+      result.phase !== "runtime"
+    ) {
+      continue;
+    }
+    const failedAction = result.actions.find((action) => action.status === "failed");
+    if (failedAction) {
+      return { phase: result.phase, action: failedAction };
+    }
+  }
+  return undefined;
 }
diff --git a/test/e2e-scenario/scenarios/orchestrators/state-validation.ts b/test/e2e-scenario/scenarios/orchestrators/state-validation.ts
new file mode 100644
index 0000000000..567d49b3a6
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/state-validation.ts
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { PhaseOrchestrator } from "./phase.ts";
+
+// Typed replacement for the inline gateway/sandbox checks the legacy
+// bash runner ran between onboarding and suite execution
+// (e2e_gateway_assert_healthy / e2e_sandbox_assert_running) AND the
+// post-failure side-effect checks for negative scenarios
+// (`openshell sandbox list | grep -Fq ...`). The orchestrator inserts
+// itself between onboarding and runtime; its phase actions are real
+// probes (typed PhaseAction shell-fn entries the compiler emits from
+// scenario.expectedStateId via the typed expected-state registry).
+//
+// Failure semantics: a probe action failure is just a phase-action
+// failure, so the existing ScenarioRunner short-circuit logic kicks
+// in and the runtime phase is reported as skipped. No new control
+// flow is added; this orchestrator is only here to give the phase a
+// dedicated identity in PhaseResult artifacts and in tests.
+export class StateValidationOrchestrator extends PhaseOrchestrator {
+  constructor() {
+    super("state-validation");
+  }
+}
diff --git a/test/e2e-scenario/scenarios/probes/builtin.ts b/test/e2e-scenario/scenarios/probes/builtin.ts
new file mode 100644
index 0000000000..7f78fc06bc
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/builtin.ts
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { diagnosticsProbe } from "./diagnostics.ts";
+import { docsValidationProbe } from "./docs-validation.ts";
+import { injectionBlockedProbe } from "./injection-blocked.ts";
+import { networkPolicyProbe } from "./network-policy.ts";
+import { shieldsConfigProbe } from "./shields-config.ts";
+import { lookupProbe, registerProbe } from "./registry.ts";
+
+/**
+ * Register all built-in probes. Idempotent: re-importing this module
+ * (e.g. through a different entry point) is a no-op once the probes
+ * are already in place.
+ *
+ * Ownership boundary:
+ *   - Built-in probes here implement the cross-scenario contract that
+ *     the typed registry already references by name (see
+ *     scenarios/assertions/registry.ts).
+ *   - Scenario-specific probes (if any) belong in a per-scenario
+ *     module that calls `registerProbe()` directly.
+ *
+ * Security probes (shieldsConfigProbe, networkPolicyProbe,
+ * injectionBlockedProbe) are marked `required: true` in
+ * scenarios/assertions/registry.ts. With the implementations
+ * registered below, the orchestrator runs them and fails the phase
+ * on real assertion violations — not on a missing implementation.
+ */
+const BUILTIN_PROBES = {
+  diagnosticsProbe,
+  docsValidationProbe,
+  shieldsConfigProbe,
+  networkPolicyProbe,
+  injectionBlockedProbe,
+} as const;
+
+export function registerBuiltinProbes(): void {
+  for (const [name, fn] of Object.entries(BUILTIN_PROBES)) {
+    if (lookupProbe(name) === undefined) {
+      registerProbe(name, fn);
+    }
+  }
+}
diff --git a/test/e2e-scenario/scenarios/probes/diagnostics.ts b/test/e2e-scenario/scenarios/probes/diagnostics.ts
new file mode 100644
index 0000000000..e2259a6b77
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/diagnostics.ts
@@ -0,0 +1,156 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { spawn } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import type { ProbeContext, ProbeFn, ProbeOutcome } from "./types.ts";
+
+/**
+ * Probe: diagnostics.bundle (`diagnosticsProbe`).
+ *
+ * Mirrors test/e2e/test-diagnostics.sh's TC-DIAG-02 case:
+ *
+ *   1. Run `nemoclaw debug --quick --output <tmp>/quick-debug.tar.gz`
+ *      with a 30s budget.
+ *   2. Assert exit 0.
+ *   3. Assert the archive exists and is non-empty.
+ *
+ * The legacy test also asserts the archive contains no plaintext
+ * credentials (TC-DIAG-01), but that lives in a separate probe
+ * (a future `diagnosticsBundleSecretsProbe`) so this one stays
+ * narrowly focused on bundle production.
+ *
+ * Evidence: a JSON document at ProbeContext.evidencePath summarizing
+ * exit code, archive size, and elapsed seconds.
+ */
+const DIAGNOSTICS_TIMEOUT_MS = 30_000;
+
+interface DiagnosticsEvidence {
+  exitCode: number | null;
+  signal: NodeJS.Signals | null;
+  elapsedMs: number;
+  archivePath: string;
+  archiveSize: number | null;
+  stderrTail: string;
+}
+
+function writeEvidence(evidencePath: string, payload: DiagnosticsEvidence): void {
+  try {
+    fs.mkdirSync(path.dirname(evidencePath), { recursive: true });
+    fs.writeFileSync(evidencePath, JSON.stringify(payload, null, 2));
+  } catch {
+    /* evidence write is best-effort; never fail the probe on IO. */
+  }
+}
+
+export const diagnosticsProbe: ProbeFn = async (ctx: ProbeContext): Promise<ProbeOutcome> => {
+  // Pre-flight: nemoclaw must be on PATH; the legacy test treats this
+  // as a hard prerequisite, not a skip.
+  // (We rely on the spawned process surfacing ENOENT if it isn't.)
+
+  const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-diag-probe-"));
+  const archivePath = path.join(tmp, "quick-debug.tar.gz");
+  const startedAt = Date.now();
+
+  let exitCode: number | null = null;
+  let signal: NodeJS.Signals | null = null;
+  let stderrTail = "";
+
+  const result = await new Promise<{ code: number | null; signal: NodeJS.Signals | null }>(
+    (resolve) => {
+      const child = spawn(
+        "nemoclaw",
+        ["debug", "--quick", "--output", archivePath],
+        // Use the parent env directly: probes run inside the framework
+        // process and don't need the redacted secret env that shell
+        // steps build at the spawn boundary. PATH/HOME/E2E_* are
+        // already in process.env.
+        { env: process.env, cwd: ctx.repoRoot, stdio: ["ignore", "ignore", "pipe"] },
+      );
+      const onTimeout = setTimeout(() => {
+        try {
+          child.kill("SIGTERM");
+        } catch {
+          /* already gone */
+        }
+      }, DIAGNOSTICS_TIMEOUT_MS);
+      child.stderr?.on("data", (chunk: Buffer) => {
+        stderrTail = (stderrTail + chunk.toString("utf8")).slice(-1024);
+      });
+      child.on("error", (err) => {
+        clearTimeout(onTimeout);
+        // ENOENT or similar — nemoclaw is not on PATH. Surface as a
+        // distinct classifier so the operator can see it's an
+        // environment problem, not a real diagnostics failure.
+        stderrTail = (stderrTail + `spawn error: ${err.message}`).slice(-1024);
+        resolve({ code: 127, signal: null });
+      });
+      child.on("close", (code, sig) => {
+        clearTimeout(onTimeout);
+        resolve({ code, signal: sig });
+      });
+    },
+  );
+  exitCode = result.code;
+  signal = result.signal;
+  const elapsedMs = Date.now() - startedAt;
+
+  let archiveSize: number | null = null;
+  try {
+    const stat = fs.statSync(archivePath);
+    archiveSize = stat.size;
+  } catch {
+    archiveSize = null;
+  }
+
+  const evidence: DiagnosticsEvidence = {
+    exitCode,
+    signal,
+    elapsedMs,
+    archivePath,
+    archiveSize,
+    stderrTail,
+  };
+  writeEvidence(ctx.evidencePath, evidence);
+
+  // Best-effort cleanup of the tmp dir; keep the JSON evidence on
+  // disk regardless.
+  try {
+    fs.rmSync(tmp, { recursive: true, force: true });
+  } catch {
+    /* tmp cleanup is non-fatal */
+  }
+
+  if (signal === "SIGTERM") {
+    return {
+      status: "failed",
+      classifier: "runner-infra",
+      message: `diagnosticsProbe: nemoclaw debug --quick exceeded ${DIAGNOSTICS_TIMEOUT_MS / 1000}s`,
+    };
+  }
+  if (exitCode !== 0) {
+    return {
+      status: "failed",
+      message: `diagnosticsProbe: nemoclaw debug --quick exited ${exitCode}; stderr: ${stderrTail.slice(-300)}`,
+    };
+  }
+  if (archiveSize === null) {
+    return {
+      status: "failed",
+      message: `diagnosticsProbe: archive missing at ${archivePath}`,
+    };
+  }
+  if (archiveSize === 0) {
+    return {
+      status: "failed",
+      message: `diagnosticsProbe: archive at ${archivePath} is empty`,
+    };
+  }
+
+  return {
+    status: "passed",
+    message: `diagnosticsProbe: bundle ok (${archiveSize} bytes, ${elapsedMs}ms)`,
+  };
+};
diff --git a/test/e2e-scenario/scenarios/probes/docs-validation.ts b/test/e2e-scenario/scenarios/probes/docs-validation.ts
new file mode 100644
index 0000000000..76ba5127c6
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/docs-validation.ts
@@ -0,0 +1,160 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { spawn } from "node:child_process";
+import fs from "node:fs";
+import path from "node:path";
+import type { ProbeContext, ProbeFn, ProbeOutcome } from "./types.ts";
+
+/**
+ * Probe: docs.validation (`docsValidationProbe`).
+ *
+ * Mirrors test/e2e/test-docs-validation.sh:
+ *
+ *   1. Run `test/e2e/e2e-cloud-experimental/check-docs.sh --only-cli`
+ *      to verify `nemoclaw --help` matches docs/reference/commands.mdx
+ *      (CLI / docs parity).
+ *   2. Run `... --only-links --local-only` to verify markdown internal
+ *      links resolve. Remote http(s) probes are skipped by default
+ *      because they are slow and flaky under CI rate limiting (the
+ *      legacy script documents this caveat).
+ *
+ * Both checks exit 0 on success. The probe captures both exit codes
+ * and surfaces a single combined outcome, with a structured evidence
+ * JSON for diagnosis.
+ */
+
+const CHECK_DOCS_REL = "test/e2e/e2e-cloud-experimental/check-docs.sh";
+const CLI_PARITY_TIMEOUT_MS = 60_000;
+const LINK_CHECK_TIMEOUT_MS = 90_000;
+
+interface DocsCheckResult {
+  phase: "cli-parity" | "links-local";
+  exitCode: number | null;
+  signal: NodeJS.Signals | null;
+  elapsedMs: number;
+  stderrTail: string;
+  stdoutTail: string;
+}
+
+interface DocsEvidence {
+  results: DocsCheckResult[];
+}
+
+function runCheck(
+  scriptPath: string,
+  args: readonly string[],
+  cwd: string,
+  timeoutMs: number,
+  phase: DocsCheckResult["phase"],
+): Promise<DocsCheckResult> {
+  return new Promise((resolve) => {
+    const startedAt = Date.now();
+    let stdoutTail = "";
+    let stderrTail = "";
+    const child = spawn("bash", [scriptPath, ...args], {
+      env: { ...process.env, CHECK_DOC_LINKS_REMOTE: "0" },
+      cwd,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    const onTimeout = setTimeout(() => {
+      try {
+        child.kill("SIGTERM");
+      } catch {
+        /* already gone */
+      }
+    }, timeoutMs);
+    child.stdout?.on("data", (chunk: Buffer) => {
+      stdoutTail = (stdoutTail + chunk.toString("utf8")).slice(-1024);
+    });
+    child.stderr?.on("data", (chunk: Buffer) => {
+      stderrTail = (stderrTail + chunk.toString("utf8")).slice(-1024);
+    });
+    child.on("error", (err) => {
+      clearTimeout(onTimeout);
+      resolve({
+        phase,
+        exitCode: 127,
+        signal: null,
+        elapsedMs: Date.now() - startedAt,
+        stderrTail: `spawn error: ${err.message}`,
+        stdoutTail,
+      });
+    });
+    child.on("close", (code, sig) => {
+      clearTimeout(onTimeout);
+      resolve({
+        phase,
+        exitCode: code,
+        signal: sig,
+        elapsedMs: Date.now() - startedAt,
+        stderrTail,
+        stdoutTail,
+      });
+    });
+  });
+}
+
+function writeEvidence(evidencePath: string, payload: DocsEvidence): void {
+  try {
+    fs.mkdirSync(path.dirname(evidencePath), { recursive: true });
+    fs.writeFileSync(evidencePath, JSON.stringify(payload, null, 2));
+  } catch {
+    /* evidence write is best-effort */
+  }
+}
+
+export const docsValidationProbe: ProbeFn = async (ctx: ProbeContext): Promise<ProbeOutcome> => {
+  const scriptPath = path.resolve(ctx.repoRoot, CHECK_DOCS_REL);
+  if (!fs.existsSync(scriptPath)) {
+    return {
+      status: "failed",
+      message: `docsValidationProbe: check-docs.sh not found at ${scriptPath}`,
+    };
+  }
+
+  const cliResult = await runCheck(
+    scriptPath,
+    ["--only-cli"],
+    ctx.repoRoot,
+    CLI_PARITY_TIMEOUT_MS,
+    "cli-parity",
+  );
+  const linksResult = await runCheck(
+    scriptPath,
+    ["--only-links", "--local-only"],
+    ctx.repoRoot,
+    LINK_CHECK_TIMEOUT_MS,
+    "links-local",
+  );
+
+  writeEvidence(ctx.evidencePath, { results: [cliResult, linksResult] });
+
+  // Surface SIGTERM (timeout) as runner-infra so the orchestrator may
+  // retry on a transient slowness. Hard exit-code failures do not
+  // retry — a docs/CLI drift is deterministic.
+  if (cliResult.signal === "SIGTERM" || linksResult.signal === "SIGTERM") {
+    const which = cliResult.signal === "SIGTERM" ? "cli-parity" : "links-local";
+    return {
+      status: "failed",
+      classifier: "runner-infra",
+      message: `docsValidationProbe: ${which} check timed out`,
+    };
+  }
+  if (cliResult.exitCode !== 0) {
+    return {
+      status: "failed",
+      message: `docsValidationProbe: CLI/docs parity failed (exit ${cliResult.exitCode}); stderr: ${cliResult.stderrTail.slice(-300)}`,
+    };
+  }
+  if (linksResult.exitCode !== 0) {
+    return {
+      status: "failed",
+      message: `docsValidationProbe: markdown link check failed (exit ${linksResult.exitCode}); stderr: ${linksResult.stderrTail.slice(-300)}`,
+    };
+  }
+  return {
+    status: "passed",
+    message: `docsValidationProbe: ok (cli ${cliResult.elapsedMs}ms, links ${linksResult.elapsedMs}ms)`,
+  };
+};
diff --git a/test/e2e-scenario/scenarios/probes/injection-blocked.ts b/test/e2e-scenario/scenarios/probes/injection-blocked.ts
new file mode 100644
index 0000000000..d1acf8ab3d
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/injection-blocked.ts
@@ -0,0 +1,155 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type { ProbeContext, ProbeFn, ProbeOutcome } from "./types.ts";
+import { runSandboxCmd, writeProbeEvidence } from "./util.ts";
+
+/**
+ * Probe: security.injection.blocked (`injectionBlockedProbe`).
+ *
+ * Mirrors the contract in
+ * test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
+ * `spc_assert_telegram_payload_not_shell_executed`, which itself
+ * encodes the test-credential-sanitization.sh / test-brave-search-e2e.sh
+ * lesson: when an external content channel (chat message, web fetch
+ * response, document body) flows into the agent, it MUST be treated
+ * as data, never as a shell command.
+ *
+ * Implementation:
+ *   1. Pre-clean a unique marker file inside the sandbox.
+ *   2. Send a payload designed to exploit naive shell expansion:
+ *        $(touch <marker> && echo INJECTED)
+ *      via stdin to a remote `sh -c 'MSG=$(cat); printf "%s\\n" "$MSG"'`
+ *      that's responsible for echoing the bytes back without
+ *      evaluation.
+ *   3. Verify the echoed output contains the literal payload
+ *      (proves the bytes were preserved, not interpreted).
+ *   4. Verify the marker file does NOT exist (proves the
+ *      command-substitution operator did not fire on the host
+ *      side or the sandbox-side shell).
+ *
+ * If either invariant is violated the probe fails closed: the
+ * scenario must NEVER pass on a fake-green injection check.
+ *
+ * Evidence: structured JSON capturing the payload, the marker path,
+ * the echo response tail, and the marker existence query result.
+ */
+
+const PER_CALL_SECONDS = 25;
+
+interface InjectionEvidence {
+  payload: string;
+  markerPath: string;
+  echoExitCode: number | null;
+  echoStdoutTail: string;
+  echoStderrTail: string;
+  markerStateExitCode: number | null;
+  markerStateStdout: string;
+  payloadPreservedLiterally: boolean;
+  markerAbsent: boolean;
+}
+
+function uniqueMarkerPath(): string {
+  // `/tmp` is sandbox-writable; collisions across parallel scenarios
+  // are avoided by mixing pid+random to keep the marker scoped to
+  // this probe invocation.
+  const rand = Math.floor(Math.random() * 0xffff_ffff).toString(16);
+  return `/tmp/nemoclaw-injection-probe-${process.pid}-${rand}`;
+}
+
+export const injectionBlockedProbe: ProbeFn = async (ctx: ProbeContext): Promise<ProbeOutcome> => {
+  if (!ctx.sandboxName) {
+    return { status: "failed", message: "injectionBlockedProbe: E2E_SANDBOX_NAME missing in context.env" };
+  }
+
+  const markerPath = uniqueMarkerPath();
+  // Single-quote the marker path inside the payload so the marker
+  // string survives the host-side bash quoting layer; the test is
+  // the COMMAND SUBSTITUTION operator surviving, not the path.
+  const payload = `$(touch '${markerPath}' && echo INJECTED)`;
+
+  const evidence: InjectionEvidence = {
+    payload,
+    markerPath,
+    echoExitCode: null,
+    echoStdoutTail: "",
+    echoStderrTail: "",
+    markerStateExitCode: null,
+    markerStateStdout: "",
+    payloadPreservedLiterally: false,
+    markerAbsent: false,
+  };
+
+  // Step 1: pre-clean the marker. Best-effort; failure here doesn't
+  // matter as long as Step 3 confirms the marker is absent at the
+  // end.
+  await runSandboxCmd(ctx, ["sh", "-c", `rm -f '${markerPath}'`], {
+    perCallSeconds: PER_CALL_SECONDS,
+  });
+
+  // Step 2: send the payload via stdin to an in-sandbox shell that
+  // reads it into a variable and echoes it back. The remote command
+  // string must NOT use $() inside the literal; the host-side bash
+  // wrapper passes the script verbatim and the sandbox shell reads
+  // the payload as data.
+  const echoResult = await runSandboxCmd(
+    ctx,
+    ["sh", "-c", 'MSG=$(cat); printf "%s\n" "$MSG"'],
+    { perCallSeconds: PER_CALL_SECONDS, stdin: payload },
+  );
+  evidence.echoExitCode = echoResult.exitCode;
+  evidence.echoStdoutTail = echoResult.stdout;
+  evidence.echoStderrTail = echoResult.stderr;
+
+  if (echoResult.exitCode !== 0) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      classifier: echoResult.signal === "SIGTERM" ? "gateway-transient" : undefined,
+      message: `injectionBlockedProbe: echo command failed (exit ${echoResult.exitCode}); stderr: ${echoResult.stderr.slice(-300)}`,
+    };
+  }
+
+  evidence.payloadPreservedLiterally = echoResult.stdout.includes(payload);
+  if (!evidence.payloadPreservedLiterally) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `injectionBlockedProbe: payload was not preserved literally; stdout tail: ${echoResult.stdout.slice(-300)}`,
+    };
+  }
+
+  // Step 3: confirm the marker file does NOT exist. A 'SAFE' result
+  // means the command-substitution payload was treated as data; an
+  // 'EXPLOITED' result means a shell somewhere in the chain
+  // evaluated the substitution.
+  const markerResult = await runSandboxCmd(
+    ctx,
+    ["sh", "-c", `test -f '${markerPath}' && echo EXPLOITED || echo SAFE`],
+    { perCallSeconds: PER_CALL_SECONDS },
+  );
+  evidence.markerStateExitCode = markerResult.exitCode;
+  evidence.markerStateStdout = markerResult.stdout;
+  evidence.markerAbsent = markerResult.stdout.includes("SAFE");
+
+  // Best-effort cleanup of the marker if it somehow got created
+  // (an 'EXPLOITED' result is a probe failure but we still don't
+  // want a stray file lingering between runs).
+  await runSandboxCmd(ctx, ["sh", "-c", `rm -f '${markerPath}'`], {
+    perCallSeconds: PER_CALL_SECONDS,
+  });
+
+  writeProbeEvidence(ctx.evidencePath, evidence);
+
+  if (!evidence.markerAbsent) {
+    return {
+      status: "failed",
+      message: `injectionBlockedProbe: marker file ${markerPath} present \u2014 command substitution executed; stdout: ${markerResult.stdout.slice(-200)}`,
+    };
+  }
+
+  return {
+    status: "passed",
+    message: `injectionBlockedProbe: payload preserved as data, marker ${markerPath} absent`,
+  };
+};
diff --git a/test/e2e-scenario/scenarios/probes/network-policy.ts b/test/e2e-scenario/scenarios/probes/network-policy.ts
new file mode 100644
index 0000000000..c3bb50923c
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/network-policy.ts
@@ -0,0 +1,125 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type { ProbeContext, ProbeFn, ProbeOutcome } from "./types.ts";
+import { runSandboxCmd, writeProbeEvidence } from "./util.ts";
+
+/**
+ * Probe: security.policy.enforced (`networkPolicyProbe`).
+ *
+ * Mirrors the deny-by-default contract from
+ * test/e2e/test-network-policy.sh TC-NET-01: when no policy preset
+ * widens egress for a given hostname, a request to that hostname
+ * from inside the sandbox MUST be rejected by the gateway. A success
+ * status is a hard failure \u2014 it means the network-policy enforcement
+ * layer is not catching the request.
+ *
+ * Implementation: from inside the sandbox, run `curl` against a
+ * non-whitelisted URL and inspect:
+ *   - HTTP status code (via curl -w '%{http_code}')
+ *   - curl exit code (curl exit 7 / 28 / etc. when DNS or connect
+ *     is blocked outright)
+ *
+ * Expected outcomes:
+ *   - HTTP 403   (gateway proxy rejected the request)
+ *   - HTTP 4xx (any other 4xx that's not 401 \u2014 401 indicates the
+ *     request reached an upstream auth wall, which counts as policy
+ *     bypass, NOT block)
+ *   - curl exit != 0 with HTTP code 000 (DNS / connect error) \u2014 the
+ *     gateway dropped the request before HTTP could be spoken
+ *
+ * Anything else (HTTP 2xx, 3xx, 401) means policy is NOT enforcing
+ * deny-by-default and the probe fails.
+ *
+ * Hostname choice: example.com is the canonical "should never be on
+ * any preset" target the legacy test uses. Probes that need a
+ * different fixture override via E2E_NETWORK_POLICY_BLOCKED_URL.
+ */
+
+const DEFAULT_BLOCKED_URL = "https://example.com/";
+const CURL_MAX_TIME_S = 10;
+const PER_CALL_SECONDS = 25;
+
+interface NetworkPolicyEvidence {
+  blockedUrl: string;
+  curlExitCode: number | null;
+  curlSignal: string | null;
+  httpStatus: string | null;
+  stdoutTail: string;
+  stderrTail: string;
+}
+
+function isBlockedHttpStatus(code: string): boolean {
+  if (code === "000") return true; // DNS/connect refused before HTTP
+  if (code === "401") return false; // reached upstream auth -> NOT blocked
+  return /^4[0-9][0-9]$/.test(code) || /^5[0-9][0-9]$/.test(code);
+}
+
+export const networkPolicyProbe: ProbeFn = async (ctx: ProbeContext): Promise<ProbeOutcome> => {
+  if (!ctx.sandboxName) {
+    return { status: "failed", message: "networkPolicyProbe: E2E_SANDBOX_NAME missing in context.env" };
+  }
+  const blockedUrl = ctx.contextEnv.E2E_NETWORK_POLICY_BLOCKED_URL || DEFAULT_BLOCKED_URL;
+
+  // curl -sS keeps stderr informative on failure; -o /dev/null discards
+  // body so the gateway's HTML reject page doesn't pollute stdout;
+  // -w prints the status code we parse below.
+  const result = await runSandboxCmd(
+    ctx,
+    [
+      "curl",
+      "-sS",
+      "-o",
+      "/dev/null",
+      "-w",
+      "%{http_code}",
+      "--max-time",
+      String(CURL_MAX_TIME_S),
+      blockedUrl,
+    ],
+    { perCallSeconds: PER_CALL_SECONDS },
+  );
+
+  // curl writes the status code to stdout (or '000' on connect/DNS
+  // failure). Trim whitespace; some curl builds emit a trailing
+  // newline.
+  const httpStatus = result.stdout.trim() || null;
+  const evidence: NetworkPolicyEvidence = {
+    blockedUrl,
+    curlExitCode: result.exitCode,
+    curlSignal: result.signal,
+    httpStatus,
+    stdoutTail: result.stdout,
+    stderrTail: result.stderr,
+  };
+  writeProbeEvidence(ctx.evidencePath, evidence);
+
+  if (result.signal === "SIGTERM") {
+    return {
+      status: "failed",
+      classifier: "gateway-transient",
+      message: `networkPolicyProbe: curl into sandbox timed out after ${PER_CALL_SECONDS}s`,
+    };
+  }
+
+  // The probe accepts:
+  //   - curl exit 0 with a 4xx/5xx body (gateway returned a reject)
+  //   - curl exit != 0 with status '000' (gateway dropped the
+  //     connection, curl never got an HTTP response)
+  if (httpStatus && isBlockedHttpStatus(httpStatus)) {
+    return {
+      status: "passed",
+      message: `networkPolicyProbe: ${blockedUrl} blocked (http_code=${httpStatus}, curl exit ${result.exitCode})`,
+    };
+  }
+  if (result.exitCode !== 0 && (!httpStatus || httpStatus === "000")) {
+    return {
+      status: "passed",
+      message: `networkPolicyProbe: ${blockedUrl} blocked (curl exit ${result.exitCode}, no HTTP response)`,
+    };
+  }
+  return {
+    status: "failed",
+    message: `networkPolicyProbe: ${blockedUrl} reachable from sandbox (http_code=${httpStatus ?? "<empty>"}, curl exit ${result.exitCode}); deny-by-default not enforced`,
+  };
+};
diff --git a/test/e2e-scenario/scenarios/probes/registry.ts b/test/e2e-scenario/scenarios/probes/registry.ts
new file mode 100644
index 0000000000..3c4403cfcc
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/registry.ts
@@ -0,0 +1,54 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type { ProbeFn } from "./types.ts";
+
+/**
+ * Map of probe-ref name → probe runner. Shell-side AssertionStep
+ * declarations carry an `implementation: { kind: "probe", ref: <name> }`.
+ * The orchestrator calls `lookupProbe(ref)` at execution time; if it
+ * returns undefined the step is reported skipped (or failed for
+ * `required` probes).
+ *
+ * The registry is module-scoped state. Built-in probes are registered
+ * by importing `./builtin.ts` (which calls registerProbe at module
+ * load). Tests that need a clean slate can call `resetProbeRegistry()`.
+ */
+const probes = new Map<string, ProbeFn>();
+
+/**
+ * Register a probe implementation under `name`. Re-registering an
+ * existing name throws — silently shadowing a probe is a contract
+ * violation that hides behavior from the runner.
+ */
+export function registerProbe(name: string, fn: ProbeFn): void {
+  if (!name) {
+    throw new Error("registerProbe: name is required");
+  }
+  if (probes.has(name)) {
+    throw new Error(`registerProbe: '${name}' already registered`);
+  }
+  probes.set(name, fn);
+}
+
+/**
+ * Look up a registered probe. Returns undefined when the ref is not
+ * registered; the caller (phase.ts) decides whether the missing probe
+ * surfaces as skipped or failed based on AssertionStep.required.
+ */
+export function lookupProbe(name: string): ProbeFn | undefined {
+  return probes.get(name);
+}
+
+/**
+ * Names of every currently-registered probe. Useful in plan rendering
+ * and tests that assert a build wired its expected probes.
+ */
+export function listRegisteredProbes(): readonly string[] {
+  return Array.from(probes.keys()).sort();
+}
+
+/** Test-only: clear the registry so each test starts from empty. */
+export function resetProbeRegistry(): void {
+  probes.clear();
+}
diff --git a/test/e2e-scenario/scenarios/probes/shields-config.ts b/test/e2e-scenario/scenarios/probes/shields-config.ts
new file mode 100644
index 0000000000..6e268f69a5
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/shields-config.ts
@@ -0,0 +1,196 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type { ProbeContext, ProbeFn, ProbeOutcome } from "./types.ts";
+import { runHostCmd, runSandboxCmd, writeProbeEvidence } from "./util.ts";
+
+/**
+ * Probe: security.shields.config (`shieldsConfigProbe`).
+ *
+ * Mirrors test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
+ * `spc_assert_shields_config_consistent`, which itself ports the
+ * legacy test/e2e/test-shields-config.sh contract:
+ *
+ *   1. Ask the host CLI: `nemoclaw <sandbox> shields status` and
+ *      classify the reported state as up | down | not-configured.
+ *   2. If the scenario declares an expected state via
+ *      `E2E_SHIELDS_EXPECTED_STATE` (or the legacy
+ *      `E2E_SHIELDS_EXPECTED`), assert observed === expected.
+ *   3. Verify the in-sandbox config file permissions match the
+ *      observed state:
+ *        - up                  -> root:root + restrictive 4xx mode
+ *                                 (read-only for owner+group, no write
+ *                                  for sandbox user)
+ *        - down|not-configured -> sandbox:sandbox (writable by the
+ *                                  sandbox user, since shields are
+ *                                  not locking the file)
+ *
+ * Config path depends on the agent the scenario onboarded:
+ *   - openclaw -> /sandbox/.openclaw/openclaw.json
+ *   - hermes   -> /sandbox/.hermes/.env
+ *
+ * Evidence: a JSON document at ProbeContext.evidencePath summarizing
+ * status output, observed state, expected state (if declared), and
+ * config-permission stat output.
+ */
+
+const SHIELDS_STATUS_TIMEOUT_MS = 30_000;
+const SANDBOX_STAT_PER_CALL_SECONDS = 25;
+
+type ShieldsState = "up" | "down" | "not-configured";
+
+interface ShieldsEvidence {
+  observed: ShieldsState | null;
+  expected: ShieldsState | null;
+  statusExitCode: number | null;
+  statusStdoutTail: string;
+  configPath: string | null;
+  permissionsLine: string | null;
+  mode: string | null;
+  owner: string | null;
+}
+
+function classifyStatus(stdout: string): ShieldsState | null {
+  if (stdout.includes("Shields: UP")) return "up";
+  if (stdout.includes("Shields: DOWN")) return "down";
+  if (stdout.includes("Shields: NOT CONFIGURED")) return "not-configured";
+  return null;
+}
+
+function configPathFor(agent: string | undefined): string | null {
+  switch (agent) {
+    case "openclaw":
+    case undefined:
+    case "":
+      return "/sandbox/.openclaw/openclaw.json";
+    case "hermes":
+      return "/sandbox/.hermes/.env";
+    default:
+      return null;
+  }
+}
+
+function permissionsOk(observed: ShieldsState, mode: string, owner: string): boolean {
+  if (observed === "up") {
+    // Locked: owner must be root, mode must be 4xx (no group/world
+    // writes; legacy lib accepts 4[0-4][0-4]).
+    return /^4[0-4][0-4]$/.test(mode) && owner === "root:root";
+  }
+  // down | not-configured: sandbox user owns the file so they can
+  // edit when shields are dropped.
+  return owner === "sandbox:sandbox";
+}
+
+function expectedStateFromContext(env: Readonly<Record<string, string>>): ShieldsState | null {
+  const raw = (env.E2E_SHIELDS_EXPECTED_STATE || env.E2E_SHIELDS_EXPECTED || "").trim();
+  if (!raw) return null;
+  const norm = raw.replace(/_/g, "-").toLowerCase();
+  if (norm === "up" || norm === "down" || norm === "not-configured") return norm;
+  return null;
+}
+
+export const shieldsConfigProbe: ProbeFn = async (ctx: ProbeContext): Promise<ProbeOutcome> => {
+  if (!ctx.sandboxName) {
+    return { status: "failed", message: "shieldsConfigProbe: E2E_SANDBOX_NAME missing in context.env" };
+  }
+
+  const evidence: ShieldsEvidence = {
+    observed: null,
+    expected: expectedStateFromContext(ctx.contextEnv),
+    statusExitCode: null,
+    statusStdoutTail: "",
+    configPath: null,
+    permissionsLine: null,
+    mode: null,
+    owner: null,
+  };
+
+  // --- Step 1: nemoclaw <sandbox> shields status ---
+  const statusResult = await runHostCmd(
+    "nemoclaw",
+    [ctx.sandboxName, "shields", "status"],
+    { timeoutMs: SHIELDS_STATUS_TIMEOUT_MS },
+  );
+  evidence.statusExitCode = statusResult.exitCode;
+  evidence.statusStdoutTail = statusResult.stdout;
+  if (statusResult.signal === "SIGTERM") {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      classifier: "runner-infra",
+      message: `shieldsConfigProbe: 'nemoclaw shields status' timed out after ${SHIELDS_STATUS_TIMEOUT_MS}ms`,
+    };
+  }
+  if (statusResult.exitCode !== 0) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `shieldsConfigProbe: 'nemoclaw shields status' exited ${statusResult.exitCode}; stderr: ${statusResult.stderr.slice(-300)}`,
+    };
+  }
+  const observed = classifyStatus(statusResult.stdout);
+  evidence.observed = observed;
+  if (!observed) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `shieldsConfigProbe: status output did not report a recognized Shields state; tail: ${statusResult.stdout.slice(-200)}`,
+    };
+  }
+  if (evidence.expected && evidence.expected !== observed) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `shieldsConfigProbe: expected shields '${evidence.expected}', observed '${observed}'`,
+    };
+  }
+
+  // --- Step 2: in-sandbox stat of the config file ---
+  const configPath = configPathFor(ctx.contextEnv.E2E_AGENT);
+  if (!configPath) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `shieldsConfigProbe: unsupported E2E_AGENT '${ctx.contextEnv.E2E_AGENT}'`,
+    };
+  }
+  evidence.configPath = configPath;
+  const statResult = await runSandboxCmd(
+    ctx,
+    ["stat", "-c", "%a %U:%G", configPath],
+    { perCallSeconds: SANDBOX_STAT_PER_CALL_SECONDS },
+  );
+  if (statResult.exitCode !== 0) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      classifier: statResult.signal === "SIGTERM" ? "gateway-transient" : undefined,
+      message: `shieldsConfigProbe: stat of ${configPath} failed (exit ${statResult.exitCode}); stderr: ${statResult.stderr.slice(-300)}`,
+    };
+  }
+  const permsLine = statResult.stdout.trim();
+  evidence.permissionsLine = permsLine;
+  const [mode, owner] = permsLine.split(/\s+/, 2);
+  evidence.mode = mode ?? null;
+  evidence.owner = owner ?? null;
+  if (!mode || !owner) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `shieldsConfigProbe: could not parse stat output: '${permsLine}'`,
+    };
+  }
+  if (!permissionsOk(observed, mode, owner)) {
+    writeProbeEvidence(ctx.evidencePath, evidence);
+    return {
+      status: "failed",
+      message: `shieldsConfigProbe: shields are '${observed}' but ${configPath} permissions are '${permsLine}'`,
+    };
+  }
+
+  writeProbeEvidence(ctx.evidencePath, evidence);
+  return {
+    status: "passed",
+    message: `shieldsConfigProbe: shields=${observed} ${configPath}=${permsLine}`,
+  };
+};
diff --git a/test/e2e-scenario/scenarios/probes/types.ts b/test/e2e-scenario/scenarios/probes/types.ts
new file mode 100644
index 0000000000..4b1edabd08
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/types.ts
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import type { TransientClassifier } from "../types.ts";
+
+/**
+ * Context handed to a probe runner. Mirrors the subset of scenario
+ * state that shell steps already get via `${E2E_CONTEXT_DIR}/context.env`,
+ * but typed so probe implementations don't have to parse the file
+ * themselves.
+ *
+ * The orchestrator builds this before invoking the probe; probe code
+ * must NOT mutate `contextEnv` (treat as read-only).
+ */
+export interface ProbeContext {
+  /** Repo-relative or absolute path to .e2e/.. context root. */
+  contextDir: string;
+  /** Absolute path to the evidence file the probe SHOULD write. */
+  evidencePath: string;
+  /** Parsed key/value pairs from ${contextDir}/context.env. */
+  contextEnv: Readonly<Record<string, string>>;
+  /** Convenience accessor for the most-used keys. Null when missing. */
+  sandboxName: string | null;
+  gatewayUrl: string | null;
+  /** Repo root, so probes that shell out have a canonical cwd. */
+  repoRoot: string;
+}
+
+/**
+ * Structured probe result. Mirrors AssertionStep StepAttemptOutcome
+ * in `phase.ts` so the orchestrator can adopt it without translation.
+ *
+ * Probes MUST emit a structured outcome — never throw out of the
+ * registered function. Throwing is a contract violation that the
+ * orchestrator surfaces as a failed assertion with the error message,
+ * but a well-behaved probe converts thrown errors into a `failed`
+ * outcome with a redacted message.
+ */
+export interface ProbeOutcome {
+  status: "passed" | "failed" | "skipped";
+  message?: string;
+  classifier?: TransientClassifier;
+  /**
+   * Optional override for the evidence path. If omitted the orchestrator
+   * uses `step.evidencePath` (which the probe was already told via
+   * ProbeContext.evidencePath).
+   */
+  evidence?: string;
+}
+
+/**
+ * The function shape every registered probe implements.
+ *
+ * Convention:
+ *   - Probes are async even when they could be sync, so the registry
+ *     can swap an implementation for a slow IO-bound version without
+ *     ripple effects through the orchestrator.
+ *   - Probes write structured evidence (JSON) to ProbeContext.evidencePath
+ *     so failures are diagnosable from the artifact bundle.
+ */
+export type ProbeFn = (ctx: ProbeContext) => Promise<ProbeOutcome>;
diff --git a/test/e2e-scenario/scenarios/probes/util.ts b/test/e2e-scenario/scenarios/probes/util.ts
new file mode 100644
index 0000000000..22e192d074
--- /dev/null
+++ b/test/e2e-scenario/scenarios/probes/util.ts
@@ -0,0 +1,287 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { spawn } from "node:child_process";
+import fs from "node:fs";
+import path from "node:path";
+import type { ProbeContext } from "./types.ts";
+
+/**
+ * Shared utilities for built-in probes. Two responsibilities:
+ *
+ *   1. Entering the sandbox via the canonical bash wrapper
+ *      (`validation_suites/sandbox-exec.sh`) instead of re-implementing
+ *      the ssh-config / openshell-exec logic in TS. This keeps the
+ *      transport choice in ONE place \u2014 if the wrapper changes
+ *      (e.g. switches from openshell-exec to ssh-config preferred),
+ *      every probe inherits the new behavior.
+ *
+ *   2. Spawning host-side CLIs (`nemoclaw`, `openshell`) with timeouts
+ *      and structured outcome capture. Probes never invoke spawn
+ *      directly so timeout and stdio handling stays consistent.
+ *
+ * Probe code MUST treat the returned `stdout`/`stderr` as already-bounded
+ * (we slice the tail). The full output is never returned or logged from
+ * here \u2014 evidence files keep the structured fields a probe explicitly
+ * decides to persist.
+ */
+
+const VALIDATION_SUITES_REL = "test/e2e-scenario/validation_suites";
+const TAIL_BYTES = 2048;
+
+export interface CmdResult {
+  exitCode: number | null;
+  signal: NodeJS.Signals | null;
+  stdout: string;
+  stderr: string;
+  elapsedMs: number;
+}
+
+interface RunOptions {
+  /** Hard cap; on expiry the helper SIGTERMs the child and resolves. */
+  timeoutMs: number;
+  /** stdin payload for `runSandboxCmdStdin`. UTF-8 only. */
+  stdin?: string;
+  /** Override env. Defaults to process.env. */
+  env?: NodeJS.ProcessEnv;
+  /** Override cwd. Defaults to ProbeContext.repoRoot resolution. */
+  cwd?: string;
+}
+
+function tail(buf: string, max = TAIL_BYTES): string {
+  return buf.length <= max ? buf : buf.slice(-max);
+}
+
+/**
+ * Reject NUL bytes in any string that flows into a child process. Mirrors
+ * the defense-in-depth used by src/lib/runner.ts (normalizeSpawnFile /
+ * normalizeSpawnArgs) so probe-side spawns enforce the same boundary.
+ */
+function rejectNulByte(value: string, label: string): string {
+  if (value.includes("\u0000")) {
+    throw new Error(`${label} must not contain NUL bytes`);
+  }
+  return value;
+}
+
+/**
+ * Spawn a bash script and capture the result. Internal helper used by
+ * the sandbox-cmd path; not exported because direct bash spawning by
+ * probes invites the same drift the canonical wrapper exists to
+ * prevent.
+ *
+ * Contract that addresses CodeQL js/shell-command-injection-from-environment:
+ *
+ *   1. The `script` parameter is always a string LITERAL at every call
+ *      site — callers do not interpolate user-controlled data into
+ *      the script body.
+ *   2. `bashArgs` carry all variable data and reach the script via
+ *      bash positional parameters ($1, $2, ...). Bash treats positional
+ *      argv as data, not code, so the values bypass parser expansion.
+ *   3. Every string in `bashArgs` is NUL-byte-rejected here — NUL is
+ *      the only byte process-spawn cannot survive cleanly.
+ *   4. The bash binary path is hard-coded; `shell: false` is implicit
+ *      because spawn() does not enable a shell when given an explicit
+ *      argv array.
+ *
+ * The lgtm suppression below is justified by this contract; it mirrors
+ * the established pattern in src/lib/runner.ts where the same rule is
+ * suppressed for argv arrays passed through `bash -c`.
+ */
+function spawnBash(
+  script: string,
+  opts: RunOptions,
+  bashArgs: readonly string[] = [],
+): Promise<CmdResult> {
+  const safeArgs = bashArgs.map((arg, idx) =>
+    rejectNulByte(String(arg), `spawnBash: bashArgs[${idx + 1}]`),
+  );
+  return new Promise((resolve) => {
+    const startedAt = Date.now();
+    let stdout = "";
+    let stderr = "";
+    // bash -c reserves the first positional after the script for $0;
+    // a fixed sentinel keeps the script's own $1..$N aligned with the
+    // caller-supplied bashArgs. Spawn safety contract is documented on
+    // spawnBash above (literal script body, NUL-validated positional
+    // argv, hard-coded bash binary). The lgtm marker MUST be the line
+    // immediately preceding the spawn() call so CodeQL/LGTM picks it up.
+    // lgtm[js/shell-command-injection-from-environment]
+    const child = spawn("bash", ["-c", script, "e2e-probe-spawn", ...safeArgs], {
+      env: opts.env ?? process.env,
+      cwd: opts.cwd,
+      stdio: [opts.stdin === undefined ? "ignore" : "pipe", "pipe", "pipe"],
+    });
+    const onTimeout = setTimeout(() => {
+      try {
+        child.kill("SIGTERM");
+      } catch {
+        /* already gone */
+      }
+    }, opts.timeoutMs);
+    child.stdout?.on("data", (chunk: Buffer) => {
+      stdout = tail(stdout + chunk.toString("utf8"));
+    });
+    child.stderr?.on("data", (chunk: Buffer) => {
+      stderr = tail(stderr + chunk.toString("utf8"));
+    });
+    if (opts.stdin !== undefined && child.stdin) {
+      child.stdin.end(opts.stdin);
+    }
+    child.on("error", (err) => {
+      clearTimeout(onTimeout);
+      resolve({
+        exitCode: 127,
+        signal: null,
+        stdout,
+        stderr: tail(stderr + `spawn error: ${err.message}`),
+        elapsedMs: Date.now() - startedAt,
+      });
+    });
+    child.on("close", (code, sig) => {
+      clearTimeout(onTimeout);
+      resolve({
+        exitCode: code,
+        signal: sig,
+        stdout,
+        stderr,
+        elapsedMs: Date.now() - startedAt,
+      });
+    });
+  });
+}
+
+/**
+ * Run a command inside the scenario's sandbox via the canonical
+ * `e2e_sandbox_exec` shell wrapper. Picks up the same ssh-config
+ * preferred / openshell-exec fallback transport, the per-call
+ * timeout, and the classified diagnostic on hang.
+ *
+ * `args` is treated as a single argv vector by the wrapper. Each
+ * element is passed as a positional bash parameter (not
+ * interpolated into the script body) so payloads with shell
+ * metacharacters survive intact and no user-controlled data flows
+ * into the shell command string.
+ */
+export async function runSandboxCmd(
+  ctx: ProbeContext,
+  args: readonly string[],
+  opts: { timeoutMs?: number; perCallSeconds?: number; stdin?: string } = {},
+): Promise<CmdResult> {
+  if (!ctx.sandboxName) {
+    return {
+      exitCode: 1,
+      signal: null,
+      stdout: "",
+      stderr: "runSandboxCmd: ProbeContext.sandboxName is null (E2E_SANDBOX_NAME unset in context.env)",
+      elapsedMs: 0,
+    };
+  }
+  const wrapperPath = path.resolve(ctx.repoRoot, VALIDATION_SUITES_REL, "sandbox-exec.sh");
+  if (!fs.existsSync(wrapperPath)) {
+    return {
+      exitCode: 1,
+      signal: null,
+      stdout: "",
+      stderr: `runSandboxCmd: wrapper not found at ${wrapperPath}`,
+      elapsedMs: 0,
+    };
+  }
+  const fnName = opts.stdin === undefined ? "e2e_sandbox_exec" : "e2e_sandbox_exec_stdin";
+  // Per-call wrapper cap (bash-side timeout); outer node-side cap
+  // sits a few seconds above so node always wins and we get a clean
+  // CmdResult even if bash hangs mid-output.
+  const perCall = opts.perCallSeconds ?? 25;
+  const outerMs = opts.timeoutMs ?? perCall * 1000 + 5_000;
+  // All user-controlled values (wrapper path from ctx.repoRoot,
+  // sandbox name, payload argv) are passed as positional bash
+  // parameters rather than interpolated into the script body.
+  // Layout: $1=wrapperPath, $2=fnName, $3=sandboxName, $4..$N=argv.
+  // CodeQL alert 715 — "shell command built from environment
+  // values" — is cleared by this contract because no user data
+  // appears in the script string.
+  const script = `set -uo pipefail
+. "$1"
+E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=${perCall} "$2" "$3" -- "\${@:4}"
+`;
+  return spawnBash(
+    script,
+    {
+      timeoutMs: outerMs,
+      stdin: opts.stdin,
+      env: { ...process.env, E2E_CONTEXT_DIR: ctx.contextDir },
+      cwd: ctx.repoRoot,
+    },
+    [wrapperPath, fnName, ctx.sandboxName, ...args],
+  );
+}
+
+/**
+ * Spawn a host-side CLI directly. Use for `nemoclaw` / `openshell`
+ * commands that operate against the host, not inside the sandbox
+ * (e.g. `nemoclaw <sb> shields status`, `openshell policy get`).
+ */
+export function runHostCmd(
+  bin: string,
+  args: readonly string[],
+  opts: { timeoutMs?: number; cwd?: string; env?: NodeJS.ProcessEnv } = {},
+): Promise<CmdResult> {
+  return new Promise((resolve) => {
+    const startedAt = Date.now();
+    let stdout = "";
+    let stderr = "";
+    const child = spawn(bin, [...args], {
+      env: opts.env ?? process.env,
+      cwd: opts.cwd,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    const timeoutMs = opts.timeoutMs ?? 30_000;
+    const onTimeout = setTimeout(() => {
+      try {
+        child.kill("SIGTERM");
+      } catch {
+        /* already gone */
+      }
+    }, timeoutMs);
+    child.stdout?.on("data", (chunk: Buffer) => {
+      stdout = tail(stdout + chunk.toString("utf8"));
+    });
+    child.stderr?.on("data", (chunk: Buffer) => {
+      stderr = tail(stderr + chunk.toString("utf8"));
+    });
+    child.on("error", (err) => {
+      clearTimeout(onTimeout);
+      resolve({
+        exitCode: 127,
+        signal: null,
+        stdout,
+        stderr: tail(stderr + `spawn error: ${err.message}`),
+        elapsedMs: Date.now() - startedAt,
+      });
+    });
+    child.on("close", (code, sig) => {
+      clearTimeout(onTimeout);
+      resolve({
+        exitCode: code,
+        signal: sig,
+        stdout,
+        stderr,
+        elapsedMs: Date.now() - startedAt,
+      });
+    });
+  });
+}
+
+/**
+ * Best-effort write of structured probe evidence. Every built-in
+ * probe writes its structured outcome to ProbeContext.evidencePath
+ * via this helper so the artifact bundle has a uniform JSON layout.
+ */
+export function writeProbeEvidence(evidencePath: string, payload: unknown): void {
+  try {
+    fs.mkdirSync(path.dirname(evidencePath), { recursive: true });
+    fs.writeFileSync(evidencePath, JSON.stringify(payload, null, 2));
+  } catch {
+    /* evidence is best-effort; never fail the probe on IO */
+  }
+}
diff --git a/test/e2e-scenario/scenarios/run.ts b/test/e2e-scenario/scenarios/run.ts
index b50e7836ea..ff9fb056c4 100644
--- a/test/e2e-scenario/scenarios/run.ts
+++ b/test/e2e-scenario/scenarios/run.ts
@@ -8,14 +8,12 @@ import { compileRunPlans, renderPlanText, writePlanArtifacts } from "./compiler.
 import { ScenarioRunner } from "./orchestrators/runner.ts";
 import { listScenarios } from "./registry.ts";
 import { resolveRunnerForScenario } from "./runner-routing.ts";
-import type { ScenarioDefinition } from "./types.ts";
+import type { PhaseResult, ScenarioDefinition } from "./types.ts";
 
 interface Args {
   list: boolean;
-  planOnly: boolean;
-  dryRun: boolean;
-  validateOnly: boolean;
   emitMatrix: boolean;
+  planOnly: boolean;
   scenarios: string[];
 }
 
@@ -34,14 +32,7 @@ export interface ScenarioMatrixEntry {
 }
 
 function parseArgs(argv: string[]): Args {
-  const args: Args = {
-    list: false,
-    planOnly: false,
-    dryRun: false,
-    validateOnly: false,
-    emitMatrix: false,
-    scenarios: [],
-  };
+  const args: Args = { list: false, emitMatrix: false, planOnly: false, scenarios: [] };
   for (let i = 0; i < argv.length; i += 1) {
     const arg = argv[i];
     if (arg === "--list") {
@@ -56,14 +47,6 @@ function parseArgs(argv: string[]): Args {
       args.planOnly = true;
       continue;
     }
-    if (arg === "--dry-run") {
-      args.dryRun = true;
-      continue;
-    }
-    if (arg === "--validate-only") {
-      args.validateOnly = true;
-      continue;
-    }
     if (arg === "--scenarios") {
       const value = argv[i + 1];
       if (!value) {
@@ -122,6 +105,7 @@ function emitMatrix() {
   // Single line so GHA's `$GITHUB_OUTPUT` can consume it via
   //   echo "matrix=$(npx tsx ... --emit-matrix)" >> "$GITHUB_OUTPUT"
   // without needing heredoc multi-line output handling.
+  // Consumed by the dynamic matrix workflow (PR #4359).
   process.stdout.write(`${JSON.stringify(buildScenarioMatrix())}\n`);
 }
 
@@ -136,10 +120,6 @@ async function main() {
     return;
   }
 
-  const modeCount = [args.planOnly, args.dryRun, args.validateOnly].filter(Boolean).length;
-  if (modeCount !== 1) {
-    throw new Error("Use exactly one of --plan-only, --dry-run, or --validate-only with --scenarios <id[,id...]>");
-  }
   if (args.scenarios.length === 0) {
     throw new Error("scenario execution requires --scenarios <id[,id...]>");
   }
@@ -153,12 +133,73 @@ async function main() {
   writePlanArtifacts(plans, contextDir);
   console.log(renderPlanText(plans));
 
-  if (args.dryRun) {
-    const runner = new ScenarioRunner();
-    for (const plan of plans) {
-      await runner.run({ contextDir, dryRun: true }, plan);
+  if (args.planOnly) {
+    // Local debug only. Workflows must not pass --plan-only.
+    return;
+  }
+
+  const runner = new ScenarioRunner();
+  const allResults: PhaseResult[] = [];
+  let anyFailed = false;
+  for (const plan of plans) {
+    const results = await runner.run({ contextDir }, plan);
+    allResults.push(...results);
+    if (planFailed(plan, results)) {
+      anyFailed = true;
     }
   }
+
+  // Surface a compact run summary so phase results don't have to be opened
+  // to see what passed.
+  console.log("");
+  console.log("Phase results:");
+  for (const result of allResults) {
+    const counts = result.assertions.reduce(
+      (acc, assertion) => {
+        acc[assertion.status] = (acc[assertion.status] ?? 0) + 1;
+        return acc;
+      },
+      {} as Record<string, number>,
+    );
+    const detail = Object.entries(counts)
+      .map(([status, count]) => `${status}=${count}`)
+      .join(" ");
+    console.log(`  ${result.phase}: ${result.status} (${detail || "no steps"})`);
+  }
+
+  if (anyFailed) {
+    process.exitCode = 1;
+  }
+}
+
+// A scenario fails iff:
+//   positive (no expectedFailure): any phase result failed.
+//   negative (expectedFailure declared): the synthetic
+//     negative-contract phase did not match, OR the runtime
+//     control group's required side-effect step did not pass.
+//
+// The matcher decides exit code for negatives so that a scenario
+// that failed for the right reason in the right phase is no longer
+// reported as red just because setup did not complete. Until the
+// forbidden-side-effect probe lands, the required pending step in
+// runtimeControlGroups keeps negatives visibly red on the side-effect
+// axis even when phase + errorClass match.
+function planFailed(plan: import("./types.ts").RunPlan, results: PhaseResult[]): boolean {
+  if (!plan.expectedFailure) {
+    return results.some((result) => result.status === "failed");
+  }
+  const contractPhase = results.find((result) => result.phase === "negative-contract");
+  if (!contractPhase || contractPhase.status !== "passed") {
+    return true;
+  }
+  const runtime = results.find((result) => result.phase === "runtime");
+  const sideEffectStep = runtime?.assertions.find(
+    (assertion) => assertion.id === "runtime.expected-failure.no-side-effects",
+  );
+  if (!sideEffectStep || sideEffectStep.status !== "passed") {
+    return true;
+  }
+  return false;
 }
 
 // Only execute when invoked directly as a script. Importing this module from
diff --git a/test/e2e-scenario/scenarios/scenarios/baseline.ts b/test/e2e-scenario/scenarios/scenarios/baseline.ts
index 098209017a..cd19c43fe0 100644
--- a/test/e2e-scenario/scenarios/scenarios/baseline.ts
+++ b/test/e2e-scenario/scenarios/scenarios/baseline.ts
@@ -8,10 +8,11 @@ import {
   gpuRepoDockerCdi,
   macosRepoDocker,
   ubuntuRepoDocker,
+  ubuntuRepoDockerLifecycle,
   ubuntuRepoNoDocker,
   wslRepoDocker,
 } from "../matrix.ts";
-import type { ScenarioDefinition, ScenarioEnvironment } from "../types.ts";
+import type { ExpectedFailureContract, ScenarioDefinition, ScenarioEnvironment } from "../types.ts";
 
 interface CanonicalScenarioInput {
   id: string;
@@ -24,7 +25,7 @@ interface CanonicalScenarioInput {
   runnerRequirements?: string[];
   requiredSecrets?: string[];
   skippedCapabilities?: Array<Record<string, unknown>>;
-  expectedFailure?: Record<string, unknown>;
+  expectedFailure?: ExpectedFailureContract;
 }
 
 function canonicalScenario(input: CanonicalScenarioInput): ScenarioDefinition {
@@ -130,6 +131,23 @@ const canonicalScenarioInputs: CanonicalScenarioInput[] = [
       forbiddenSideEffects: ["gateway-started", "sandbox-created"],
     },
   },
+  {
+    // Rebuild scenario. Onboards an OpenClaw sandbox normally, then
+    // the lifecycle phase seeds a workspace marker, runs
+    // `nemoclaw rebuild --yes`, and publishes the marker contract to
+    // runtime-phase assertions in rebuild_upgrade.sh. Mirrors the
+    // workspace-state-preservation invariant from
+    // test/e2e/test-rebuild-openclaw.sh; the broader version-upgrade
+    // dimension (build OLD-version base image first) belongs to a
+    // future `rebuild-from-old-version` lifecycle profile and is
+    // intentionally out of scope here.
+    id: "ubuntu-rebuild-openclaw",
+    manifestName: "openclaw-nvidia-rebuild",
+    environment: ubuntuRepoDockerLifecycle("cloud-openclaw", "rebuild-current-version"),
+    expectedStateId: "cloud-openclaw-ready",
+    suiteIds: ["smoke", "rebuild", "upgrade"],
+    requiredSecrets: ["NVIDIA_API_KEY"],
+  },
   {
     id: "ubuntu-repo-openai-compatible-openclaw",
     manifestName: "openclaw-openai-compatible",
diff --git a/test/e2e-scenario/scenarios/types.ts b/test/e2e-scenario/scenarios/types.ts
index b29f8458d6..157ffa0ae6 100644
--- a/test/e2e-scenario/scenarios/types.ts
+++ b/test/e2e-scenario/scenarios/types.ts
@@ -1,7 +1,85 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-export type PhaseName = "environment" | "onboarding" | "runtime";
+export type PhaseName =
+  | "environment"
+  | "onboarding"
+  | "state-validation"
+  | "lifecycle"
+  | "runtime";
+
+// Synthetic phase appended by the scenario runner when a scenario
+// declares plan.expectedFailure. Distinct from PhaseName so a scenario
+// builder cannot accidentally declare an assertion or action against
+// it. Only the runner emits PhaseResult entries with this name.
+export type NegativeContractPhase = "negative-contract";
+
+export type PhaseResultName = PhaseName | NegativeContractPhase;
+
+// Concrete probe ids the state-validation orchestrator emits as phase
+// actions. Each id maps to a probe script under
+// nemoclaw_scenarios/probes/. Inference and credentials probes are
+// declared but not yet implemented; the compiler skips emitting actions
+// for them until the probe scripts land.
+export type StateProbeId =
+  | "cli-installed"
+  | "gateway-healthy"
+  | "gateway-absent"
+  | "sandbox-running"
+  | "sandbox-absent";
+
+// User-facing phase the negative-scenario contract advertises. Wider
+// than PhaseName because manifests may declare "preflight" failures,
+// which the matcher resolves to the onboarding phase orchestrator.
+// state-validation is intentionally omitted: it is an internal phase
+// the framework inserts after onboarding; scenarios cannot declare
+// expected failures against it (those are expressed via
+// expectedStateId + the absent/forbidden-side-effect probes).
+export type ExpectedFailurePhase = "environment" | "onboarding" | "runtime" | "preflight";
+
+export interface ExpectedFailureContract {
+  phase: ExpectedFailurePhase;
+  errorClass: string;
+  forbiddenSideEffects?: readonly string[];
+}
+
+// Expected-state contract. Mirrors the structural shape of
+// nemoclaw_scenarios/expected-states.yaml so the typed registry can
+// remain a verifiable mirror of the legacy YAML during transition.
+// Each dimension's `expected` field declares whether that aspect of
+// the post-setup environment should be present, absent, or optional.
+// Optional dimensions emit no probe actions; present/absent dimensions
+// emit a real probe that gates the runtime phase.
+//
+// Spec ownership: the typed registry (scenarios/expected-states.ts) is
+// the source of truth for the TS runner; expected-states.yaml stays
+// alongside until the legacy resolver is fully retired, with a contract
+// test that the typed registry mirrors the YAML.
+export type ExpectedPresence = "present" | "absent" | "optional";
+export type ExpectedHealth = "healthy" | "absent" | "optional";
+export type ExpectedSandboxStatus = "running" | "absent" | "optional";
+export type ExpectedInferenceAvail = "available" | "absent" | "optional";
+
+export interface ExpectedState {
+  id: string;
+  cli?: { installed?: boolean };
+  gateway?: {
+    expected: ExpectedPresence;
+    health?: ExpectedHealth;
+  };
+  sandbox?: {
+    expected: ExpectedPresence;
+    status?: ExpectedSandboxStatus;
+    agent?: string;
+  };
+  inference?: {
+    expected: ExpectedInferenceAvail;
+    provider?: string;
+  };
+  credentials?: {
+    expected: ExpectedPresence;
+  };
+}
 
 export type TransientClassifier =
   | "empty-event-capture"
@@ -66,6 +144,21 @@ export interface AssertionStep {
   };
   evidencePath?: string;
   reliability?: AssertionStepReliability;
+  // Declared parent-env keys this step requires beyond the framework's
+  // allowlist. Anything not allowlisted and not declared here is
+  // dropped before spawn. See orchestrators/redaction.ts. Each entry
+  // must match the secret-key shape; the framework rejects non-secret
+  // names to keep the allowlist-vs-declared-secret boundary honest.
+  secretEnv?: readonly string[];
+  // When true, a probe/pending step that resolves as "skipped" is
+  // reclassified as "failed" by the phase orchestrator. Required
+  // steps fail closed when their underlying implementation isn't
+  // available yet (probe registry not landed, expected-failure
+  // side-effect validator not implemented, ...) instead of silently
+  // producing fake green. Defaults to false; set true for security-
+  // sensitive suites and expected-failure validators that the run
+  // is not safe without.
+  required?: boolean;
 }
 
 export interface AssertionGroup {
@@ -83,6 +176,15 @@ export interface ScenarioEnvironment {
   install: string;
   runtime: string;
   onboarding: string;
+  // Optional lifecycle profile id. When set, the compiler emits a
+  // dedicated `lifecycle` phase action between state-validation and
+  // runtime. The action is implemented by a worker under
+  // nemoclaw_scenarios/lifecycle/, dispatched by
+  // nemoclaw_scenarios/lifecycle/dispatch.sh, and routes by profile
+  // id (e.g. "rebuild-current-version"). Scenarios that don't need a
+  // post-onboard state mutation simply omit this field; their
+  // lifecycle phase emits no actions and runs no assertions.
+  lifecycle?: string;
 }
 
 export interface ScenarioDefinition {
@@ -97,12 +199,56 @@ export interface ScenarioDefinition {
   runnerRequirements?: string[];
   requiredSecrets?: string[];
   skippedCapabilities?: Array<Record<string, unknown>>;
-  expectedFailure?: Record<string, unknown>;
+  expectedFailure?: ExpectedFailureContract;
+}
+
+// A phase action is real, deterministic setup work the phase orchestrator
+// performs BEFORE running its assertions: install nemoclaw, run
+// onboarding, emit context.env, etc. Actions short-circuit assertions on
+// failure (assertions don't run if the action they depend on failed).
+//
+// Spec ownership: phase orchestrators own actions. The top-level runner
+// must not execute actions; clients must not embed action policy.
+export interface PhaseAction {
+  id: string;
+  phase: PhaseName;
+  description?: string;
+  // "shell-fn" sources the bash dispatcher and invokes the named function.
+  // "shell"    runs an executable script (used for context-emit helper).
+  kind: "shell-fn" | "shell";
+  // Repo-relative path to the script.
+  scriptRef: string;
+  // For "shell-fn": the bash function to invoke after sourcing scriptRef.
+  fn?: string;
+  // Single positional arg passed to the function/script (install method or
+  // onboarding profile id today). Kept as a single string to keep stable
+  // ids predictable; multi-arg variants can extend this later.
+  arg?: string;
+  // Per-action timeout. No retry by default - install/onboard must fail
+  // loudly so the regression is visible. Retry stays a property of
+  // assertion steps, not actions.
+  timeoutSeconds?: number;
+  // Repo-relative evidence log path.
+  evidencePath?: string;
+  // Optional stable alias the orchestrator copies the evidence log to
+  // after a successful action. Lets legacy shell assertions that
+  // reference well-known filenames (e.g. ${E2E_CONTEXT_DIR}/onboard.log)
+  // keep working without coupling them to the action's stable id.
+  aliasPath?: string;
+  // Declared parent-env keys this action requires beyond the
+  // framework's allowlist (PATH, HOME, E2E_*, NEMOCLAW_*, ...).
+  // Anything not allowlisted and not declared here is dropped before
+  // spawn. See orchestrators/redaction.ts. Each entry must match the
+  // secret-key shape; the framework rejects non-secret names so the
+  // allowlist-vs-declared-secret boundary stays honest. Cloud install
+  // declares ["NVIDIA_API_KEY"]; slack onboarding declares the slack
+  // tokens it actually needs; etc.
+  secretEnv?: readonly string[];
 }
 
 export interface RunPlanPhase {
   name: PhaseName;
-  actions: string[];
+  actions: PhaseAction[];
   assertionGroups: AssertionGroup[];
 }
 
@@ -120,13 +266,12 @@ export interface RunPlan {
   runnerRequirements: string[];
   requiredSecrets: string[];
   skippedCapabilities: Array<Record<string, unknown>>;
-  expectedFailure?: Record<string, unknown>;
+  expectedFailure?: ExpectedFailureContract;
   sutBoundaries: SutBoundary[];
 }
 
 export interface RunContext {
   contextDir: string;
-  dryRun: boolean;
 }
 
 export interface AssertionResult {
@@ -139,8 +284,20 @@ export interface AssertionResult {
   message?: string;
 }
 
+export interface PhaseActionResult {
+  id: string;
+  status: "passed" | "failed" | "skipped";
+  durationMs: number;
+  evidence?: string;
+  message?: string;
+}
+
 export interface PhaseResult {
-  phase: PhaseName;
+  phase: PhaseResultName;
   status: "passed" | "failed" | "skipped";
+  // Action results are recorded distinctly from assertion results so
+  // failure-layer attribution stays unambiguous: a failure in actions
+  // means setup never completed; assertions did not have a fair chance.
+  actions: PhaseActionResult[];
   assertions: AssertionResult[];
 }
diff --git a/test/e2e-scenario/validation_suites/assert/gateway-alive.sh b/test/e2e-scenario/validation_suites/assert/gateway-alive.sh
index a498602d35..42f33e1c50 100755
--- a/test/e2e-scenario/validation_suites/assert/gateway-alive.sh
+++ b/test/e2e-scenario/validation_suites/assert/gateway-alive.sh
@@ -9,6 +9,8 @@ _E2E_GW_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../runtime/lib" && pwd)
 . "${_E2E_GW_LIB_DIR}/env.sh"
 # shellcheck source=../../runtime/lib/context.sh
 . "${_E2E_GW_LIB_DIR}/context.sh"
+# shellcheck source=../sandbox-exec.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/sandbox-exec.sh"
 
 # e2e_gateway_assert_healthy [url]
 # Defaults to E2E_GATEWAY_URL from context; returns non-zero with a clear
@@ -23,10 +25,6 @@ e2e_gateway_assert_healthy() {
     return 2
   fi
   e2e_env_trace "gateway:check" "${url}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] gateway check ${url} (skipped)"
-    return 0
-  fi
   # Prefer /health if available, otherwise just hit the base URL.
   local http_code
   http_code="$(curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 "${url%/}/health" 2>/dev/null || echo 000)"
@@ -41,7 +39,9 @@ e2e_gateway_assert_healthy() {
     local sandbox_name
     sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
     if [[ -n "${sandbox_name}" ]] && command -v openshell >/dev/null 2>&1; then
-      http_code="$(openshell sandbox exec -n "${sandbox_name}" -- curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 http://localhost:18789/health 2>/dev/null || echo 000)"
+      # Wrapper applies a per-call timeout so a wedged ssh handshake here
+      # cannot consume the orchestrator's whole step budget.
+      http_code="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=15 e2e_sandbox_exec "${sandbox_name}" -- curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 http://localhost:18789/health 2>/dev/null || echo 000)"
       if [[ "${http_code}" == "200" || "${http_code}" == "401" ]]; then
         return 0
       fi
diff --git a/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh b/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh
index b85ef9cd60..473061e972 100755
--- a/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh
+++ b/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh
@@ -12,7 +12,6 @@ _E2E_SB_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../runtime/lib" && pwd)
 
 # e2e_sandbox_assert_running
 # Requires E2E_SANDBOX_NAME in context. Real implementation queries
-# `nemoclaw list`; honors E2E_DRY_RUN.
 e2e_sandbox_assert_running() {
   if ! e2e_context_require E2E_SANDBOX_NAME; then
     return 1
@@ -20,10 +19,6 @@ e2e_sandbox_assert_running() {
   local name
   name="$(e2e_context_get E2E_SANDBOX_NAME)"
   e2e_env_trace "sandbox:check" "${name}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] sandbox check ${name} (skipped)"
-    return 0
-  fi
   if ! command -v nemoclaw >/dev/null 2>&1; then
     echo "e2e_sandbox_assert_running: nemoclaw CLI not on PATH" >&2
     return 1
diff --git a/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh b/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh
index 0fff0fd9ab..4b8161aea4 100755
--- a/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh
+++ b/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh
@@ -16,10 +16,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../runtime/lib" && pwd)"
 
 echo "hermes-specific:hermes-health"
 e2e_context_require E2E_AGENT
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run Hermes health checks"
-  exit 0
-fi
 agent="$(e2e_context_get E2E_AGENT)"
 if [[ "${agent}" != "hermes" ]]; then
   echo "hermes-specific: E2E_AGENT should be 'hermes', got '${agent}'" >&2
diff --git a/test/e2e-scenario/validation_suites/hermes/01-history-writable.sh b/test/e2e-scenario/validation_suites/hermes/01-history-writable.sh
index 953263d50a..64746aa31c 100755
--- a/test/e2e-scenario/validation_suites/hermes/01-history-writable.sh
+++ b/test/e2e-scenario/validation_suites/hermes/01-history-writable.sh
@@ -31,10 +31,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../runtime/lib" && pwd)"
 
 echo "hermes-specific:history-writable"
 e2e_context_require E2E_AGENT E2E_SANDBOX_NAME
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would probe /sandbox/.hermes/.hermes_history writability under shields up/down"
-  exit 0
-fi
 
 agent="$(e2e_context_get E2E_AGENT)"
 if [[ "${agent}" != "hermes" ]]; then
diff --git a/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh b/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh
index 64e1b086fc..8277f05f38 100755
--- a/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh
+++ b/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh
@@ -13,17 +13,16 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "inference:models-health"
 e2e_context_require E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would GET inference.local/v1/models from inside the sandbox"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
-body="$(openshell sandbox exec --name "${name}" -- curl -fsS --max-time 30 "https://inference.local/v1/models")"
+# Orchestrator step cap is 30s; wrapper default 25s applies. Inner curl
+# --max-time keeps a hung HTTP read from consuming the whole budget.
+body="$(e2e_sandbox_exec "${name}" -- curl -fsS --max-time 20 "https://inference.local/v1/models")"
 if [[ -z "${body}" ]]; then
   echo "inference:models-health: no response from models endpoint" >&2
   exit 1
diff --git a/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh b/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh
index f54ff8806b..c76e15842d 100755
--- a/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh
+++ b/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh
@@ -12,19 +12,21 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "inference:chat-completion"
 e2e_context_require E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would POST a chat completion to inference.local from inside the sandbox"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 payload='{"model":"nvidia/nemotron-3-super-120b-a12b","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}'
-response="$(openshell sandbox exec --name "${name}" -- curl -fsS --max-time 60 -H 'Content-Type: application/json' \
-  -d "${payload}" "https://inference.local/v1/chat/completions")"
+# Orchestrator step cap is 60s; widen the wrapper cap to 50s so a hung
+# upstream surfaces with a clear diagnostic before SIGTERM. Inner curl
+# --max-time stays ~10s under the wrapper cap.
+# shellcheck disable=SC2034 # consumed by e2e_sandbox_exec via env
+E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 \
+  response="$(e2e_sandbox_exec "${name}" -- curl -fsS --max-time 40 -H 'Content-Type: application/json' \
+    -d "${payload}" "https://inference.local/v1/chat/completions")"
 # CodeRabbit review item #12: substring expansion instead of `| head`
 # avoids SIGPIPE-driven false failures under `set -o pipefail`.
 printf '%s\n' "${response:0:1024}"
diff --git a/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh b/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh
index 6d1343a736..e00b83f75e 100755
--- a/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh
+++ b/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh
@@ -13,18 +13,37 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "inference:sandbox-inference-local"
 e2e_context_require E2E_SANDBOX_NAME E2E_INFERENCE_ROUTE
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would resolve inference-local from inside the sandbox"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 route="$(e2e_context_get E2E_INFERENCE_ROUTE)"
+
+# Map the route slug recorded in context.env (e.g. "inference-local")
+# to the actual DNS hostname used by the OpenShell DNS+proxy inside
+# the sandbox. The legacy test/e2e/ tests (test-cloud-inference-e2e.sh,
+# test-bedrock-runtime-compatible-anthropic.sh, test-full-e2e.sh, ...)
+# all hit the literal `inference.local` hostname — the sandbox-side
+# resolver only knows that name. Interpolating the slug directly
+# (`https://inference-local/...`) yields a different, non-existent DNS
+# name and the gateway returns 403 because no policy widens egress
+# for it.
+host=""
+case "${route}" in
+  inference-local) host="inference.local" ;;
+  *)
+    echo "inference:sandbox-inference-local: unsupported E2E_INFERENCE_ROUTE '${route}'; add a slug→hostname mapping here" >&2
+    exit 2
+    ;;
+esac
+
+# Orchestrator step cap is 45s; widen wrapper cap to 35s.
 # CodeRabbit review item #13: capture then truncate to avoid `| head` racing
 # curl under `pipefail` and flagging a successful request as failed.
-body="$(openshell sandbox exec --name "${name}" -- curl -fsS --max-time 10 "https://${route}/v1/models")"
+# shellcheck disable=SC2034 # consumed by e2e_sandbox_exec via env
+E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=35 \
+  body="$(e2e_sandbox_exec "${name}" -- curl -fsS --max-time 25 "https://${host}/v1/models")"
 printf '%s\n' "${body:0:512}"
diff --git a/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh b/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh
index 77d4772c17..d172615795 100755
--- a/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh
+++ b/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh
@@ -12,18 +12,16 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "ollama-proxy:proxy-reachable"
 e2e_context_require E2E_SANDBOX_NAME
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would verify the Ollama auth proxy is reachable from the sandbox"
-  exit 0
-fi
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 # The Ollama auth proxy intentionally rejects unauthenticated requests to
 # /api/tags (legacy test-gpu-e2e.sh accepts 401/403 as proof the proxy is
 # live and enforcing auth). Do not use curl -f here.
-status="$(openshell sandbox exec --name "${name}" -- curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "http://inference-local/api/tags" 2>/dev/null || echo 000)"
+status="$(e2e_sandbox_exec "${name}" -- curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "http://inference-local/api/tags" 2>/dev/null || echo 000)"
 case "${status}" in
   200 | 401 | 403)
     echo "ollama-proxy:proxy-reachable status=${status}"
diff --git a/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh b/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh
index 47e9f1fd43..d61ead2e98 100755
--- a/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh
+++ b/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh
@@ -15,10 +15,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 
 echo "local-ollama-inference:ollama-models-health"
 e2e_context_require E2E_PROVIDER
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would GET ollama /api/tags via host Ollama"
-  exit 0
-fi
 # GPU Ollama scenarios mirror legacy test-gpu-e2e.sh: validate the host
 # Ollama daemon directly because Docker GPU host networking bypasses the
 # normal dashboard/gateway forward path.
diff --git a/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh b/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh
index ad8ff54faa..5d18b4209a 100755
--- a/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh
+++ b/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh
@@ -15,10 +15,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 
 echo "local-ollama-inference:ollama-chat-completion"
 e2e_context_require E2E_SANDBOX_NAME
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would POST chat completion from sandbox to host-network Ollama"
-  exit 0
-fi
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 model="$(curl -fsS --max-time 10 http://127.0.0.1:11434/api/tags \
   | node -e "const fs=require('fs'); const data=JSON.parse(fs.readFileSync(0,'utf8')); process.stdout.write(data.models?.[0]?.name || data.models?.[0]?.model || 'default');")"
diff --git a/test/e2e-scenario/validation_suites/lib/inference_routing.sh b/test/e2e-scenario/validation_suites/lib/inference_routing.sh
index b4f4c1d63f..17db0bbedb 100755
--- a/test/e2e-scenario/validation_suites/lib/inference_routing.sh
+++ b/test/e2e-scenario/validation_suites/lib/inference_routing.sh
@@ -31,16 +31,6 @@ _e2e_inference_sandbox_name() {
   e2e_context_get E2E_SANDBOX_NAME
 }
 
-_e2e_inference_plan() {
-  local assertion_id="${1:-}"
-  local detail="${2:-planned inference/provider check}"
-  e2e_env_trace "inference:plan" "${assertion_id} ${detail}"
-  echo "[dry-run] ${assertion_id}: ${detail}"
-  if [[ -f "$(e2e_context_path)" ]]; then
-    e2e_context_dump | sed -E 's/(TOKEN|SECRET|API_KEY|APIKEY|CREDENTIAL|PASSWORD)([^=]*)=.*/\1\2=REDACTED/'
-  fi
-}
-
 _e2e_inference_curl_json() {
   local sandbox="$1"
   local url="$2"
@@ -64,10 +54,6 @@ e2e_inference_routing_assert_chat_completion() {
   local assertion_id="${1:-post-onboard.inference-routing.inference-local-chat-completion}"
   _e2e_inference_assertion "${assertion_id}"
   _e2e_inference_require_sandbox
-  if e2e_env_is_dry_run; then
-    _e2e_inference_plan "${assertion_id}" "POST https://inference.local/v1/chat/completions with bounded curl"
-    return 0
-  fi
   local sandbox payload output
   sandbox="$(_e2e_inference_sandbox_name)"
   payload='{"model":"default","messages":[{"role":"user","content":"Say ok"}],"max_tokens":8}'
@@ -84,10 +70,6 @@ e2e_inference_routing_assert_health() {
   local url="${2:-https://inference.local/v1/models}"
   _e2e_inference_assertion "${assertion_id}"
   _e2e_inference_require_sandbox
-  if e2e_env_is_dry_run; then
-    _e2e_inference_plan "${assertion_id}" "GET ${url} with bounded curl"
-    return 0
-  fi
   local sandbox status
   sandbox="$(_e2e_inference_sandbox_name)"
   status="$(_e2e_inference_status "${sandbox}" "${url}")"
@@ -103,10 +85,6 @@ e2e_inference_routing_assert_auth_proxy() {
   local mode="${2:-valid}"
   _e2e_inference_assertion "${assertion_id}"
   _e2e_inference_require_sandbox
-  if e2e_env_is_dry_run; then
-    _e2e_inference_plan "${assertion_id}" "auth-proxy ${mode} request; sensitive context redacted"
-    return 0
-  fi
   local sandbox status token
   sandbox="$(_e2e_inference_sandbox_name)"
   case "${mode}" in
diff --git a/test/e2e-scenario/validation_suites/lib/messaging_providers.sh b/test/e2e-scenario/validation_suites/lib/messaging_providers.sh
index 77eb1f1176..01250b784f 100755
--- a/test/e2e-scenario/validation_suites/lib/messaging_providers.sh
+++ b/test/e2e-scenario/validation_suites/lib/messaging_providers.sh
@@ -104,10 +104,6 @@ e2e_messaging_read_config_surface() {
     return 0
   fi
   path="$(e2e_messaging_agent_config_path)"
-  if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-    printf '%s=PLACEHOLDER\n' "$(e2e_messaging_config_key)"
-    return 0
-  fi
   if [[ -f "${path}" ]]; then
     cat "${path}"
     return 0
@@ -177,9 +173,6 @@ e2e_messaging_assert_literal_payload() {
   local assertion_id="${1:?assertion id required}"
   local payload="${2:?payload required}"
   local observed="${3:-}"
-  if [[ -z "${observed}" && -n "${E2E_DRY_RUN:-}" ]]; then
-    observed="${payload}"
-  fi
   if [[ -z "${observed}" ]]; then
     e2e_fail "${assertion_id} missing observed payload output"
   fi
diff --git a/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh b/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh
index c6483c99fb..317e0974f8 100755
--- a/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh
+++ b/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh
@@ -10,6 +10,15 @@ _REBUILD_UPGRADE_REPO_ROOT="$(cd "${_REBUILD_UPGRADE_DIR}/../../../.." && pwd)"
 . "${_REBUILD_UPGRADE_REPO_ROOT}/test/e2e-scenario/runtime/lib/context.sh"
 # shellcheck source=../../runtime/lib/logging.sh
 . "${_REBUILD_UPGRADE_REPO_ROOT}/test/e2e-scenario/runtime/lib/logging.sh"
+# shellcheck source=../sandbox-exec.sh
+. "${_REBUILD_UPGRADE_REPO_ROOT}/test/e2e-scenario/validation_suites/sandbox-exec.sh"
+
+# Sandbox-exec calls in this lib feed the lifecycle.rebuild/upgrade
+# orchestrator steps, which carry 120s caps. Default the per-call wrapper
+# cap to 100s so a hung 'openshell sandbox exec'/'ssh -F' surfaces as a
+# classified exit 124 well before the orchestrator's SIGTERM. Callers
+# may still override per-call.
+: "${E2E_SANDBOX_EXEC_TIMEOUT_SECONDS:=100}"
 
 rebuild_upgrade_require_context() {
   e2e_context_require E2E_SCENARIO E2E_AGENT E2E_SANDBOX_NAME E2E_GATEWAY_URL
@@ -30,15 +39,30 @@ _rebuild_upgrade_run() {
   "$@"
 }
 
+# _rebuild_upgrade_sandbox_exec <sandbox> <cmd> [args...]
+# Routes through the canonical `e2e_sandbox_exec` wrapper (ssh-config
+# preferred, openshell-exec fallback, per-call timeout, classified
+# diagnostic on hang) for production; honors the legacy
+# REBUILD_UPGRADE_SANDBOX_CMD override so tests can inject a fake. The
+# override contract preserves the original argv shape
+# (`<override> -n <sandbox> -- <cmd>...`) so existing test fakes
+# (e.g. `REBUILD_UPGRADE_SANDBOX_CMD=fake_sandbox`) keep working.
+_rebuild_upgrade_sandbox_exec() {
+  local sandbox="$1"
+  shift
+  if [[ -n "${REBUILD_UPGRADE_SANDBOX_CMD:-}" ]]; then
+    # shellcheck disable=SC2086
+    ${REBUILD_UPGRADE_SANDBOX_CMD} -n "${sandbox}" -- "$@"
+    return $?
+  fi
+  e2e_sandbox_exec "${sandbox}" -- "$@"
+}
+
 rebuild_upgrade_assert_sandbox_reachable() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.upgrade.survivor_agent_reachable dry-run"
-    return 0
-  fi
   local sandbox
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
-  if _rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- true; then
+  if _rebuild_upgrade_sandbox_exec "${sandbox}" true; then
     e2e_pass "suite.upgrade.survivor_agent_reachable"
   else
     e2e_fail "suite.upgrade.survivor_agent_reachable"
@@ -47,15 +71,11 @@ rebuild_upgrade_assert_sandbox_reachable() {
 
 rebuild_upgrade_assert_marker_preserved() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.workspace_state_preserved dry-run"
-    return 0
-  fi
   local sandbox marker_path expected actual
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   marker_path="${E2E_REBUILD_MARKER_PATH:-/workspace/.nemoclaw-rebuild-marker}"
   expected="${E2E_REBUILD_MARKER_EXPECTED:-${E2E_STATE_MARKER_EXPECTED:-}}"
-  actual="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- cat "${marker_path}" 2>/dev/null || true)"
+  actual="$(_rebuild_upgrade_sandbox_exec "${sandbox}" cat "${marker_path}" 2>/dev/null || true)"
   if [[ -n "${actual}" && (-z "${expected}" || "${actual}" == "${expected}") ]]; then
     e2e_pass "suite.rebuild.workspace_state_preserved"
   else
@@ -65,16 +85,12 @@ rebuild_upgrade_assert_marker_preserved() {
 
 rebuild_upgrade_assert_agent_version_upgraded() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.agent_version_upgraded dry-run"
-    return 0
-  fi
   local sandbox old expected actual cmd
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   old="${E2E_OLD_AGENT_VERSION:-}"
   expected="${E2E_EXPECTED_AGENT_VERSION:-}"
   cmd="${E2E_AGENT_VERSION_COMMAND:-openclaw --version}"
-  actual="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- bash -lc "${cmd}" 2>/dev/null || true)"
+  actual="$(_rebuild_upgrade_sandbox_exec "${sandbox}" bash -lc "${cmd}" 2>/dev/null || true)"
   if [[ -n "${actual}" && (-z "${old}" || "${actual}" != *"${old}"*) && (-z "${expected}" || "${actual}" == *"${expected}"*) ]]; then
     e2e_pass "suite.rebuild.agent_version_upgraded"
   else
@@ -84,14 +100,10 @@ rebuild_upgrade_assert_agent_version_upgraded() {
 
 rebuild_upgrade_assert_inference_works() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.inference_still_works dry-run"
-    return 0
-  fi
   local sandbox cmd output
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   cmd="${E2E_INFERENCE_CHECK_COMMAND:-curl -fsS http://inference.local/v1/models}"
-  output="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- bash -lc "${cmd}" 2>/dev/null || true)"
+  output="$(_rebuild_upgrade_sandbox_exec "${sandbox}" bash -lc "${cmd}" 2>/dev/null || true)"
   if [[ -n "${output}" ]]; then
     e2e_pass "suite.rebuild.inference_still_works"
   else
@@ -101,20 +113,48 @@ rebuild_upgrade_assert_inference_works() {
 
 rebuild_upgrade_assert_policy_presets_preserved() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.policy_presets_preserved dry-run"
-    return 0
-  fi
-  local presets output preset
+  local id="suite.rebuild.policy_presets_preserved"
+  local sandbox presets output preset
+  sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   presets="${E2E_EXPECTED_POLICY_PRESETS:-npm pypi}"
-  output="$(_rebuild_upgrade_run REBUILD_UPGRADE_NEMOCLAW_CMD nemoclaw policy status 2>/dev/null || true)"
+
+  # Mirror the legacy test/e2e/test-rebuild-openclaw.sh and
+  # test-full-e2e.sh pattern: ask the live gateway for the full policy
+  # via `openshell policy get --full <sandbox>` and grep for the preset
+  # name OR a well-known endpoint hostname for that preset. The earlier
+  # implementation called `nemoclaw policy status`, which does not
+  # exist as a CLI subcommand — the assertion always failed silently
+  # because the wrapper swallowed the missing-command stderr via
+  # `2>/dev/null || true`.
+  output="$(_rebuild_upgrade_run REBUILD_UPGRADE_OPENSHELL_CMD openshell policy get --full "${sandbox}" 2>&1 || true)"
+  if [[ -z "${output}" ]]; then
+    e2e_fail "${id} openshell policy get --full returned no output for sandbox '${sandbox}'"
+    return 1
+  fi
+
+  local preset matchers found m
   for preset in ${presets}; do
-    if [[ "${output}" != *"${preset}"* ]]; then
-      e2e_fail "suite.rebuild.policy_presets_preserved"
+    case "${preset}" in
+      npm) matchers=("npm" "registry.npmjs.org") ;;
+      pypi) matchers=("pypi" "pypi.org" "files.pythonhosted.org") ;;
+      huggingface) matchers=("huggingface" "huggingface.co") ;;
+      brew) matchers=("brew" "formulae.brew.sh") ;;
+      openclaw-pricing) matchers=("openclaw-pricing" "openrouter.ai") ;;
+      *) matchers=("${preset}") ;;
+    esac
+    found=0
+    for m in "${matchers[@]}"; do
+      if [[ "${output}" == *"${m}"* ]]; then
+        found=1
+        break
+      fi
+    done
+    if [[ "${found}" -eq 0 ]]; then
+      e2e_fail "${id} preset '${preset}' not in policy (matchers: ${matchers[*]}); head: ${output:0:300}"
       return 1
     fi
   done
-  e2e_pass "suite.rebuild.policy_presets_preserved"
+  e2e_pass "${id} presets=${presets}"
 }
 
 rebuild_upgrade_assert_hermes_config_preserved() {
@@ -123,13 +163,9 @@ rebuild_upgrade_assert_hermes_config_preserved() {
     e2e_pass "suite.rebuild.hermes_config_preserved skipped non-hermes"
     return 0
   fi
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.hermes_config_preserved dry-run"
-    return 0
-  fi
   local sandbox output
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
-  output="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- bash -lc "grep -R 'platforms.discord\|DISCORD' ~/.hermes . 2>/dev/null" || true)"
+  output="$(_rebuild_upgrade_sandbox_exec "${sandbox}" bash -lc "grep -R 'platforms.discord\|DISCORD' ~/.hermes . 2>/dev/null" || true)"
   if [[ "${output}" == *"discord"* || "${output}" == *"DISCORD"* ]]; then
     e2e_pass "suite.rebuild.hermes_config_preserved"
   else
@@ -139,10 +175,6 @@ rebuild_upgrade_assert_hermes_config_preserved() {
 
 rebuild_upgrade_assert_sandbox_registry_preserved() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.upgrade.sandbox_registry_preserved dry-run"
-    return 0
-  fi
   local sandbox output
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   output="$(_rebuild_upgrade_run REBUILD_UPGRADE_NEMOCLAW_CMD nemoclaw list 2>/dev/null || true)"
@@ -155,10 +187,6 @@ rebuild_upgrade_assert_sandbox_registry_preserved() {
 
 rebuild_upgrade_assert_gateway_version_upgraded() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.upgrade.gateway_version_upgraded dry-run"
-    return 0
-  fi
   local expected output
   expected="${E2E_EXPECTED_OPENSHELL_VERSION:-}"
   output="$(_rebuild_upgrade_run REBUILD_UPGRADE_GATEWAY_CMD curl -fsS "$(_rebuild_upgrade_ctx E2E_GATEWAY_URL)/version" 2>/dev/null || true)"
diff --git a/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh b/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh
index df942487e7..fa33a4230e 100755
--- a/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh
+++ b/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh
@@ -5,6 +5,8 @@
 _sandbox_lifecycle_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # shellcheck source=../../runtime/lib/context.sh
 . "${_sandbox_lifecycle_dir}/../../runtime/lib/context.sh"
+# shellcheck source=../sandbox-exec.sh
+. "${_sandbox_lifecycle_dir}/../sandbox-exec.sh"
 
 SANDBOX_LIFECYCLE_LAST_OUTPUT=""
 
@@ -37,11 +39,6 @@ sandbox_lifecycle_run_with_timeout() {
   local seconds="$1"
   shift
   SANDBOX_LIFECYCLE_LAST_OUTPUT=""
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    SANDBOX_LIFECYCLE_LAST_OUTPUT="dry-run: $*"
-    printf '%s\n' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}"
-    return 0
-  fi
   if command -v timeout >/dev/null 2>&1; then
     SANDBOX_LIFECYCLE_LAST_OUTPUT="$(timeout "${seconds}" "$@" 2>&1)" || {
       local rc=$?
@@ -58,13 +55,47 @@ sandbox_lifecycle_run_with_timeout() {
   printf '%s\n' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}"
 }
 
+# _sandbox_lifecycle_sandbox_exec <seconds> <cmd> [args...]
+#
+# Routes ssh-into-sandbox calls through the canonical e2e_sandbox_exec
+# wrapper (ssh-config preferred transport, openshell-exec fallback,
+# classified diagnostic on hang) instead of invoking
+# `openshell sandbox exec` directly. Behavior contract for callers:
+#   - On success: SANDBOX_LIFECYCLE_LAST_OUTPUT contains stdout+stderr;
+#     stdout is also printed (matches sandbox_lifecycle_run_with_timeout).
+#   - On failure: returns the wrapper's exit code (124 on hang, real
+#     command exit otherwise) and prints the captured output to stderr.
+#
+# Why a separate helper instead of just calling e2e_sandbox_exec at the
+# call sites: this lib's existing assert helpers all read
+# SANDBOX_LIFECYCLE_LAST_OUTPUT after the timeout helper returns. Keeping
+# that contract intact lets us migrate without rewriting every assert.
+_sandbox_lifecycle_sandbox_exec() {
+  local seconds="$1"
+  shift
+  SANDBOX_LIFECYCLE_LAST_OUTPUT=""
+  local rc=0
+  SANDBOX_LIFECYCLE_LAST_OUTPUT="$(
+    E2E_SANDBOX_EXEC_TIMEOUT_SECONDS="${seconds}" \
+      e2e_sandbox_exec "${E2E_SANDBOX_NAME}" -- "$@" 2>&1
+  )" || rc=$?
+  if [[ "${rc}" -ne 0 ]]; then
+    printf '%s\n' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" >&2
+    return "${rc}"
+  fi
+  printf '%s\n' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}"
+}
+
 sandbox_lifecycle_assert_nemoclaw_list_contains_sandbox() {
   local id="validation.sandbox_operations.sandbox_listed"
   sandbox_lifecycle_run_with_timeout 20 nemoclaw list >/dev/null || {
     sandbox_lifecycle_fail "${id}" "nemoclaw list failed"
     return 1
   }
-  [[ "${E2E_DRY_RUN:-0}" == "1" || "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" == *"${E2E_SANDBOX_NAME}"* ]] || {
+  # Match the sandbox name exactly as a whole token; substring match
+  # would let `sb1` falsely match `sb10`.
+  awk -v n="${E2E_SANDBOX_NAME}" '$1 == n { found = 1 } END { exit !found }' \
+    <<<"${SANDBOX_LIFECYCLE_LAST_OUTPUT}" || {
     sandbox_lifecycle_fail "${id}" "sandbox not listed: ${E2E_SANDBOX_NAME}"
     return 1
   }
@@ -77,16 +108,25 @@ sandbox_lifecycle_assert_status_fields_present() {
     sandbox_lifecycle_fail "${id}" "nemoclaw status failed"
     return 1
   }
-  if [[ "${E2E_DRY_RUN:-0}" != "1" ]]; then
-    local status_output_lower
-    status_output_lower="$(printf '%s' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" | tr '[:upper:]' '[:lower:]')"
-    for field in status gateway sandbox; do
-      [[ "${status_output_lower}" == *"${field}"* ]] || {
-        sandbox_lifecycle_fail "${id}" "missing status field: ${field}"
-        return 1
-      }
-    done
+  # The real `nemoclaw <name> status` output (src/lib/actions/sandbox/status.ts)
+  # always emits a 'Sandbox: <name>' header plus structured fields like
+  # 'Model:', 'OpenShell:', 'Policies:'. The original assertion required
+  # literal 'status' and 'gateway' tokens that never appear in normal
+  # output — it only passed against the test-suite mock. Align with the
+  # production CLI: require the sandbox name and a couple of substantive
+  # field labels that are unconditionally printed.
+  local output="${SANDBOX_LIFECYCLE_LAST_OUTPUT}"
+  if [[ "${output}" != *"${E2E_SANDBOX_NAME}"* ]]; then
+    sandbox_lifecycle_fail "${id}" "status output did not mention sandbox '${E2E_SANDBOX_NAME}'"
+    return 1
   fi
+  local field
+  for field in Sandbox Model OpenShell; do
+    [[ "${output}" == *"${field}"* ]] || {
+      sandbox_lifecycle_fail "${id}" "missing status field: ${field}"
+      return 1
+    }
+  done
   sandbox_lifecycle_pass "${id}" "status fields present"
 }
 
@@ -96,7 +136,7 @@ sandbox_lifecycle_assert_logs_available() {
     sandbox_lifecycle_fail "${id}" "nemoclaw logs failed"
     return 1
   }
-  [[ "${E2E_DRY_RUN:-0}" == "1" || -n "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" ]] || {
+  [[ -n "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" ]] || {
     sandbox_lifecycle_fail "${id}" "logs empty"
     return 1
   }
@@ -105,11 +145,11 @@ sandbox_lifecycle_assert_logs_available() {
 
 sandbox_lifecycle_assert_openshell_exec_ok() {
   local id="validation.sandbox_operations.openshell_exec_ok"
-  sandbox_lifecycle_run_with_timeout 20 openshell sandbox exec -n "${E2E_SANDBOX_NAME}" -- sh -lc 'echo lifecycle-ok' >/dev/null || {
+  _sandbox_lifecycle_sandbox_exec 20 sh -lc 'echo lifecycle-ok' >/dev/null || {
     sandbox_lifecycle_fail "${id}" "openshell exec failed"
     return 1
   }
-  [[ "${E2E_DRY_RUN:-0}" == "1" || "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" == *"lifecycle-ok"* ]] || {
+  [[ "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" == *"lifecycle-ok"* ]] || {
     sandbox_lifecycle_fail "${id}" "unexpected exec output"
     return 1
   }
@@ -139,30 +179,36 @@ sandbox_lifecycle_assert_gateway_recovers_after_probe() {
 }
 
 sandbox_lifecycle_assert_snapshot_create_list_restore_marker() {
-  sandbox_lifecycle_run_with_timeout 30 openshell sandbox exec -n "${E2E_SANDBOX_NAME}" -- sh -lc 'echo lifecycle-marker-before-snapshot > /tmp/nemoclaw-lifecycle-marker' >/dev/null || {
+  _sandbox_lifecycle_sandbox_exec 30 sh -lc 'echo lifecycle-marker-before-snapshot > /tmp/nemoclaw-lifecycle-marker' >/dev/null || {
     sandbox_lifecycle_fail validation.sandbox_snapshot.marker_written "failed to write marker"
     return 1
   }
   sandbox_lifecycle_pass validation.sandbox_snapshot.marker_written "marker written"
-  sandbox_lifecycle_run_with_timeout 30 nemoclaw snapshot create "${E2E_SANDBOX_NAME}" >/dev/null || {
+  # Argv shape: `nemoclaw <sandbox> snapshot <subcommand>`. The earlier
+  # form `nemoclaw snapshot create <sandbox>` parsed `snapshot` as a
+  # sandbox name and produced the misleading 'Unknown command: snapshot'
+  # error. Mirrors test/e2e/test-snapshot-commands.sh argv layout.
+  sandbox_lifecycle_run_with_timeout 30 nemoclaw "${E2E_SANDBOX_NAME}" snapshot create >/dev/null || {
     sandbox_lifecycle_fail validation.sandbox_snapshot.create_succeeds "snapshot create failed"
     return 1
   }
   sandbox_lifecycle_pass validation.sandbox_snapshot.create_succeeds "snapshot create succeeded"
-  sandbox_lifecycle_run_with_timeout 30 openshell sandbox exec -n "${E2E_SANDBOX_NAME}" -- sh -lc 'echo lifecycle-marker-after-snapshot > /tmp/nemoclaw-lifecycle-marker' >/dev/null || {
+  _sandbox_lifecycle_sandbox_exec 30 sh -lc 'echo lifecycle-marker-after-snapshot > /tmp/nemoclaw-lifecycle-marker' >/dev/null || {
     sandbox_lifecycle_fail validation.sandbox_snapshot.restore_rolls_back_marker "failed to mutate marker"
     return 1
   }
-  sandbox_lifecycle_run_with_timeout 30 nemoclaw snapshot list "${E2E_SANDBOX_NAME}" >/dev/null || {
+  sandbox_lifecycle_run_with_timeout 30 nemoclaw "${E2E_SANDBOX_NAME}" snapshot list >/dev/null || {
     sandbox_lifecycle_fail validation.sandbox_snapshot.list_shows_snapshot "snapshot list failed"
     return 1
   }
   sandbox_lifecycle_pass validation.sandbox_snapshot.list_shows_snapshot "snapshot listed"
-  sandbox_lifecycle_run_with_timeout 30 nemoclaw snapshot restore "${E2E_SANDBOX_NAME}" latest >/dev/null || {
+  # `snapshot restore` with no positional arg defaults to latest;
+  # matches test/e2e/test-snapshot-commands.sh Phase 6.
+  sandbox_lifecycle_run_with_timeout 30 nemoclaw "${E2E_SANDBOX_NAME}" snapshot restore >/dev/null || {
     sandbox_lifecycle_fail validation.sandbox_snapshot.restore_rolls_back_marker "snapshot restore failed"
     return 1
   }
-  sandbox_lifecycle_run_with_timeout 30 openshell sandbox exec -n "${E2E_SANDBOX_NAME}" -- sh -lc 'test -f /tmp/nemoclaw-lifecycle-marker && grep -Fxq lifecycle-marker-before-snapshot /tmp/nemoclaw-lifecycle-marker' >/dev/null || {
+  _sandbox_lifecycle_sandbox_exec 30 sh -lc 'test -f /tmp/nemoclaw-lifecycle-marker && grep -Fxq lifecycle-marker-before-snapshot /tmp/nemoclaw-lifecycle-marker' >/dev/null || {
     sandbox_lifecycle_fail validation.sandbox_snapshot.restore_rolls_back_marker "marker did not roll back"
     return 1
   }
diff --git a/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh b/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
index 3e1872d62a..8d34a5444f 100755
--- a/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
+++ b/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
@@ -55,10 +55,6 @@ spc_assert_credentials_expected() {
     return 1
   fi
   spc_log_provider_metadata "$(spc_context_get E2E_PROVIDER)" "gateway"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would list gateway credentials without raw values"
-    return 0
-  fi
   local raw_file listed_raw listed list_rc
   raw_file="$(mktemp "${TMPDIR:-/tmp}/nemoclaw-credentials-list.XXXXXX")"
   chmod 600 "${raw_file}"
@@ -105,10 +101,6 @@ spc_assert_policy_preset_present() {
   spc_assertion_id "post-onboard.security-policy.${preset}-preset-applied"
   spc_require_context E2E_SCENARIO E2E_SANDBOX_NAME
   echo "policy preset expected: ${preset}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would verify policy preset ${preset}"
-    return 0
-  fi
   local sandbox_name active
   sandbox_name="$(spc_context_get E2E_SANDBOX_NAME)"
   if ! active="$(nemoclaw "${sandbox_name}" policy-list 2>&1)"; then
@@ -143,10 +135,6 @@ spc_semver_ge() {
 spc_assert_openshell_credential_rewrite_supported() {
   spc_assertion_id "post-onboard.gateway.openshell-version-supports-credential-rewrite"
   spc_require_context E2E_SCENARIO
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would verify OpenShell gateway capability metadata"
-    return 0
-  fi
   local openshell_bin version_output version minimum_version binary_strings feature
   minimum_version="0.0.39"
   openshell_bin="$(command -v openshell 2>/dev/null || true)"
@@ -221,10 +209,6 @@ spc_assert_shields_permissions_match_state() {
 spc_assert_shields_config_consistent() {
   spc_assertion_id "post-onboard.security-shields.config-consistent"
   spc_require_context E2E_SCENARIO E2E_SANDBOX_NAME E2E_AGENT
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would verify shields config consistency"
-    return 0
-  fi
   local sandbox_name status observed expected
   sandbox_name="$(spc_context_get E2E_SANDBOX_NAME)"
   if ! status="$(nemoclaw "${sandbox_name}" shields status 2>&1)"; then
@@ -262,10 +246,6 @@ spc_assert_telegram_payload_not_shell_executed() {
   if [[ -n "${fixture_payload}" ]]; then
     printf 'telegram payload fixture loaded (%s bytes)\n' "${#fixture_payload}"
   fi
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would submit payload without shell evaluation"
-    return 0
-  fi
   local sandbox_name marker payload send_output marker_state
   sandbox_name="$(spc_context_get E2E_SANDBOX_NAME)"
   marker="/tmp/nemoclaw-telegram-injection-proof-$RANDOM-$$"
diff --git a/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh b/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh
index 9fc2156ad0..8ec82f8aeb 100755
--- a/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh
+++ b/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh
@@ -5,9 +5,4 @@
 set -euo pipefail
 . "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib/messaging_providers.sh"
 e2e_messaging_load_context
-if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-  provider="$(e2e_messaging_provider_name)"
-  e2e_pass "expected-state.messaging.${provider}.bridge-reachable dry-run"
-  exit 0
-fi
 e2e_messaging_assert_bridge_reachable
diff --git a/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh b/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh
index 32cd79093d..a6c02f7f1e 100755
--- a/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh
+++ b/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh
@@ -3,7 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
-. "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib/messaging_providers.sh"
+_SLACK_SUITES_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+. "${_SLACK_SUITES_DIR}/lib/messaging_providers.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${_SLACK_SUITES_DIR}/sandbox-exec.sh"
 e2e_messaging_load_context
 provider="$(e2e_messaging_provider_name)"
 case "${provider}" in
@@ -13,25 +16,25 @@ esac
 e2e_messaging_assert_provider_attached
 agent="$(e2e_context_get E2E_AGENT)"
 if [[ "${agent}" == "openclaw" ]]; then
-  if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-    e2e_pass "expected-state.messaging.slack.openclaw-enabled dry-run"
-    e2e_pass "expected-state.messaging.slack.runtime-discovery dry-run"
-  else
-    content="$(e2e_messaging_read_config_surface)"
-    if ! printf '%s\n' "${content}" | python3 -c '
+  content="$(e2e_messaging_read_config_surface)"
+  if ! printf '%s\n' "${content}" | python3 -c '
 import json
 import sys
 cfg = json.load(sys.stdin)
 assert cfg["channels"]["slack"]["enabled"] is True
 assert cfg["plugins"]["entries"]["slack"]["enabled"] is True
 '; then
-      e2e_fail "expected-state.messaging.slack.openclaw-enabled missing channels.slack.enabled or plugins.entries.slack.enabled"
-    fi
-    e2e_pass "expected-state.messaging.slack.openclaw-enabled channel and plugin enabled"
+    e2e_fail "expected-state.messaging.slack.openclaw-enabled missing channels.slack.enabled or plugins.entries.slack.enabled"
+  fi
+  e2e_pass "expected-state.messaging.slack.openclaw-enabled channel and plugin enabled"
 
-    sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-    runtime_json="$(openshell sandbox exec --name "${sandbox_name}" -- timeout 45 openclaw channels list --all --json --no-color 2>/dev/null || true)"
-    runtime_state="$(printf '%s\n' "${runtime_json}" | python3 -c '
+  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
+  # Wrapper cap (50s) sits just above the inner `timeout 45` so the inner
+  # cap is what fires under normal upstream slowness; the wrapper only
+  # catches the case where openshell itself wedges before delivering the
+  # `timeout` invocation to the sandbox.
+  runtime_json="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 e2e_sandbox_exec "${sandbox_name}" -- timeout 45 openclaw channels list --all --json --no-color 2>/dev/null || true)"
+  runtime_state="$(printf '%s\n' "${runtime_json}" | python3 -c '
 import json
 import sys
 try:
@@ -45,11 +48,10 @@ try:
 except Exception as exc:
     print("error %s" % exc)
 ' 2>/dev/null || true)"
-    if [[ "${runtime_state}" != "yes" ]]; then
-      e2e_fail "expected-state.messaging.slack.runtime-discovery OpenClaw did not report Slack installed/configured (${runtime_state}; output=${runtime_json:0:300})"
-    fi
-    e2e_pass "expected-state.messaging.slack.runtime-discovery OpenClaw reports Slack installed and configured"
+  if [[ "${runtime_state}" != "yes" ]]; then
+    e2e_fail "expected-state.messaging.slack.runtime-discovery OpenClaw did not report Slack installed/configured (${runtime_state}; output=${runtime_json:0:300})"
   fi
+  e2e_pass "expected-state.messaging.slack.runtime-discovery OpenClaw reports Slack installed and configured"
 fi
 if [[ "${agent}" == "hermes" ]]; then
   # This scenario asserts the static enablement contract Hermes' gateway uses
@@ -61,16 +63,14 @@ if [[ "${agent}" == "hermes" ]]; then
   #      and the Bolt app reached the running state.
   #   3) SLACK_ALLOWED_CHANNELS, when configured, is present in .env so the
   #      allowlist values reach the adapter's environment.
-  if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-    e2e_pass "expected-state.messaging.slack.hermes-platforms-enabled dry-run"
-    e2e_pass "expected-state.messaging.slack.hermes-allowed-channels-scoped dry-run"
-    e2e_pass "expected-state.messaging.slack.hermes-gateway-running dry-run"
-  else
-    sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-    # The Hermes venv is the same Python that loads config.yaml at runtime, so
-    # PyYAML is guaranteed there even when the host runner ships a minimal
-    # python3. Parsing inside the sandbox removes the awk fallback path.
-    platforms_state="$(openshell sandbox exec --name "${sandbox_name}" -- /opt/hermes/.venv/bin/python -c '
+  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
+  # The Hermes venv is the same Python that loads config.yaml at runtime, so
+  # PyYAML is guaranteed there even when the host runner ships a minimal
+  # python3. Parsing inside the sandbox removes the awk fallback path.
+  # Use e2e_sandbox_exec for per-call timeout + ssh-config-preferred /
+  # openshell-exec fallback. A wedged openshell sandbox exec without the
+  # wrapper can stall the suite indefinitely in live mode.
+  platforms_state="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 e2e_sandbox_exec "${sandbox_name}" -- /opt/hermes/.venv/bin/python -c '
 import sys
 import yaml
 
@@ -90,62 +90,61 @@ if isinstance(slack, dict) and slack.get("enabled") is True:
 else:
     print("no slack=%r" % (slack,))
 ' 2>/dev/null || true)"
-    case "${platforms_state}" in
-      yes)
-        e2e_pass "expected-state.messaging.slack.hermes-platforms-enabled platforms.slack.enabled true in config.yaml"
-        ;;
-      missing-config)
-        e2e_fail "expected-state.messaging.slack.hermes-platforms-enabled /sandbox/.hermes/config.yaml not found"
-        ;;
-      *)
-        e2e_fail "expected-state.messaging.slack.hermes-platforms-enabled platforms.slack.enabled not true (${platforms_state})"
-        ;;
-    esac
+  case "${platforms_state}" in
+    yes)
+      e2e_pass "expected-state.messaging.slack.hermes-platforms-enabled platforms.slack.enabled true in config.yaml"
+      ;;
+    missing-config)
+      e2e_fail "expected-state.messaging.slack.hermes-platforms-enabled /sandbox/.hermes/config.yaml not found"
+      ;;
+    *)
+      e2e_fail "expected-state.messaging.slack.hermes-platforms-enabled platforms.slack.enabled not true (${platforms_state})"
+      ;;
+  esac
 
-    env_state="$(openshell sandbox exec --name "${sandbox_name}" -- sh -c 'grep -E "^SLACK_ALLOWED_CHANNELS=" /sandbox/.hermes/.env 2>/dev/null | head -n1' 2>/dev/null || true)"
-    case "${env_state}" in
-      SLACK_ALLOWED_CHANNELS=*[!\ ]*)
-        e2e_pass "expected-state.messaging.slack.hermes-allowed-channels-scoped allowlist present in .env"
-        ;;
-      "")
-        e2e_pass "expected-state.messaging.slack.hermes-allowed-channels-scoped no channel allowlist requested (open scope)"
-        ;;
-      *)
-        e2e_fail "expected-state.messaging.slack.hermes-allowed-channels-scoped malformed SLACK_ALLOWED_CHANNELS entry"
-        ;;
-    esac
+  env_state="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=20 e2e_sandbox_exec "${sandbox_name}" -- sh -c 'grep -E "^SLACK_ALLOWED_CHANNELS=" /sandbox/.hermes/.env 2>/dev/null | head -n1' 2>/dev/null || true)"
+  case "${env_state}" in
+    SLACK_ALLOWED_CHANNELS=*[!\ ]*)
+      e2e_pass "expected-state.messaging.slack.hermes-allowed-channels-scoped allowlist present in .env"
+      ;;
+    "")
+      e2e_pass "expected-state.messaging.slack.hermes-allowed-channels-scoped no channel allowlist requested (open scope)"
+      ;;
+    *)
+      e2e_fail "expected-state.messaging.slack.hermes-allowed-channels-scoped malformed SLACK_ALLOWED_CHANNELS entry"
+      ;;
+  esac
 
-    # Hermes ships two surfaces that carry the gateway boot trace:
-    #   - /sandbox/.hermes/logs/gateway.log: Hermes' own structured logger.
-    #   - <tmpdir>/gateway.log: stdout captured by agents/hermes/start.sh:862,910
-    #     when `hermes gateway run` is supervised by the entrypoint.
-    # Tail both; either is acceptable evidence the Slack platform booted.
-    tmp_dir=/tmp
-    gateway_log_basename=gateway.log
-    gateway_log=""
-    for log_path in "/sandbox/.hermes/logs/${gateway_log_basename}" "${tmp_dir}/${gateway_log_basename}"; do
-      chunk="$(openshell sandbox exec --name "${sandbox_name}" -- sh -c "tail -n 200 ${log_path} 2>/dev/null || true" 2>/dev/null || true)"
-      if [[ -n "${chunk}" ]]; then
-        if [[ -n "${gateway_log}" ]]; then
-          gateway_log="${gateway_log}"$'\n'"${chunk}"
-        else
-          gateway_log="${chunk}"
-        fi
+  # Hermes ships two surfaces that carry the gateway boot trace:
+  #   - /sandbox/.hermes/logs/gateway.log: Hermes' own structured logger.
+  #   - <tmpdir>/gateway.log: stdout captured by agents/hermes/start.sh:862,910
+  #     when `hermes gateway run` is supervised by the entrypoint.
+  # Tail both; either is acceptable evidence the Slack platform booted.
+  tmp_dir=/tmp
+  gateway_log_basename=gateway.log
+  gateway_log=""
+  for log_path in "/sandbox/.hermes/logs/${gateway_log_basename}" "${tmp_dir}/${gateway_log_basename}"; do
+    chunk="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=20 e2e_sandbox_exec "${sandbox_name}" -- sh -c "tail -n 200 ${log_path} 2>/dev/null || true" 2>/dev/null || true)"
+    if [[ -n "${chunk}" ]]; then
+      if [[ -n "${gateway_log}" ]]; then
+        gateway_log="${gateway_log}"$'\n'"${chunk}"
+      else
+        gateway_log="${chunk}"
       fi
-    done
-    if [[ -z "${gateway_log}" ]]; then
-      e2e_fail "expected-state.messaging.slack.hermes-gateway-running could not read gateway log from sandbox or entrypoint surface"
-    fi
-    if printf '%s\n' "${gateway_log}" | grep -qE '\[Slack\] Socket Mode connected|✓ slack connected|slack_bolt\.AsyncApp.*Bolt app is running'; then
-      e2e_pass "expected-state.messaging.slack.hermes-gateway-running gateway booted slack platform"
-    else
-      sanitized_tail="$(printf '%s\n' "${gateway_log}" | tail -n 20 | sed -E \
-        -e 's/xox[bpaors]-[A-Za-z0-9-]+/<redacted-slack-token>/g' \
-        -e 's/xapp-[A-Za-z0-9-]+/<redacted-slack-app-token>/g' \
-        -e 's/[Tt][0-9A-Z]{8,}/<redacted-team-id>/g' \
-        -e 's/[UCWBDG][0-9A-Z]{8,}/<redacted-slack-id>/g')"
-      e2e_fail "expected-state.messaging.slack.hermes-gateway-running gateway log shows slack platform never started (sanitized tail: ${sanitized_tail})"
     fi
+  done
+  if [[ -z "${gateway_log}" ]]; then
+    e2e_fail "expected-state.messaging.slack.hermes-gateway-running could not read gateway log from sandbox or entrypoint surface"
+  fi
+  if printf '%s\n' "${gateway_log}" | grep -qE '\[Slack\] Socket Mode connected|✓ slack connected|slack_bolt\.AsyncApp.*Bolt app is running'; then
+    e2e_pass "expected-state.messaging.slack.hermes-gateway-running gateway booted slack platform"
+  else
+    sanitized_tail="$(printf '%s\n' "${gateway_log}" | tail -n 20 | sed -E \
+      -e 's/xox[bpaors]-[A-Za-z0-9-]+/<redacted-slack-token>/g' \
+      -e 's/xapp-[A-Za-z0-9-]+/<redacted-slack-app-token>/g' \
+      -e 's/[Tt][0-9A-Z]{8,}/<redacted-team-id>/g' \
+      -e 's/[UCWBDG][0-9A-Z]{8,}/<redacted-slack-id>/g')"
+    e2e_fail "expected-state.messaging.slack.hermes-gateway-running gateway log shows slack platform never started (sanitized tail: ${sanitized_tail})"
   fi
 fi
 e2e_pass "expected-state.messaging.slack.provider-state ${provider} provider state configured"
diff --git a/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh b/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh
index 2f42115f5e..4f2f094c67 100755
--- a/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh
+++ b/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh
@@ -19,11 +19,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 echo "platform-macos:macos-smoke"
 e2e_context_require E2E_PLATFORM_OS
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run macOS-specific smoke checks"
-  exit 0
-fi
-
 os="$(e2e_context_get E2E_PLATFORM_OS)"
 if [[ "${os}" != "macos" ]]; then
   echo "platform-macos: E2E_PLATFORM_OS should be 'macos', got '${os}'" >&2
diff --git a/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh b/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh
index 1aeb39fe7c..ef96795a0c 100755
--- a/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh
+++ b/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh
@@ -17,11 +17,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 echo "platform-wsl:wsl-smoke"
 e2e_context_require E2E_PLATFORM_OS E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run WSL-specific smoke checks"
-  exit 0
-fi
-
 os="$(e2e_context_get E2E_PLATFORM_OS)"
 if [[ "${os}" != "wsl" ]]; then
   echo "platform-wsl: E2E_PLATFORM_OS should be 'wsl', got '${os}'" >&2
diff --git a/test/e2e-scenario/validation_suites/sandbox-exec.sh b/test/e2e-scenario/validation_suites/sandbox-exec.sh
index 0682c4cf2f..44e4288111 100755
--- a/test/e2e-scenario/validation_suites/sandbox-exec.sh
+++ b/test/e2e-scenario/validation_suites/sandbox-exec.sh
@@ -12,7 +12,6 @@
 # Functions:
 #   e2e_sandbox_exec       <sandbox> -- <cmd> [args...]
 #       Run <cmd> inside <sandbox> via `openshell sandbox exec`. No stdin passed.
-#       Exit code propagates from <cmd>. Honors E2E_DRY_RUN.
 #
 #   e2e_sandbox_exec_stdin <sandbox> -- <cmd> [args...]
 #       Like e2e_sandbox_exec but pipes the caller's stdin into the
@@ -23,6 +22,174 @@ _E2E_SBEX_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../runtime/lib" && pwd)"
 # shellcheck source=../runtime/lib/env.sh
 . "${_E2E_SBEX_LIB_DIR}/env.sh"
 
+# Per-call timeout (seconds) applied to every `openshell sandbox exec`
+# invocation routed through this wrapper. Callers MAY override per call:
+#   E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 e2e_sandbox_exec ...
+#
+# Why a wrapper-level cap exists:
+#   The orchestrator (phase.ts) enforces step-level timeouts via SIGTERM on
+#   the script's process group. When openshell ssh-into-sandbox hangs,
+#   SIGTERM eventually kills the script — but the script has no chance to
+#   emit a structured diagnostic, so logs end mid-line. An inner per-call
+#   `timeout` lets the wrapper observe the hang, emit a classified
+#   diagnostic, and exit cleanly *before* the orchestrator's SIGTERM.
+#
+# The default (25s) sits below the most common orchestrator step caps
+# (30s smoke / kimi, 45s sandbox-local). Steps with longer caps (60s
+# chat-completion, 120s rebuild) export a larger value before calling.
+: "${E2E_SANDBOX_EXEC_TIMEOUT_SECONDS:=25}"
+
+# Resolve the timeout binary once. Empty string == not available.
+_e2e_sbex_resolve_timeout_cmd() {
+  if command -v timeout >/dev/null 2>&1; then
+    printf '%s' timeout
+  elif command -v gtimeout >/dev/null 2>&1; then
+    printf '%s' gtimeout
+  else
+    printf '%s' ''
+  fi
+}
+
+# ----------------------------------------------------------------------
+# ssh-config transport (preferred)
+#
+# `openshell sandbox exec` has been observed to wedge in CI (PR #4380
+# scenario run — host can curl the gateway but `openshell sandbox exec`
+# never returns). The legacy test/e2e/ scripts have always entered the
+# sandbox via `openshell sandbox ssh-config` + `ssh -F`, which works in
+# the same environments. We mirror that pattern here:
+#
+#   1. On first call per sandbox, materialize an ssh-config under
+#      ${E2E_CONTEXT_DIR}/.ssh-config-cache/<sandbox>.cfg.
+#   2. Subsequent calls reuse the cached config.
+#   3. Each ssh invocation gets `-o ConnectTimeout=10`,
+#      `-o StrictHostKeyChecking=no`, `-o UserKnownHostsFile=/dev/null`,
+#      `-o LogLevel=ERROR` to mirror the legacy pattern.
+#
+# Opt-out: set E2E_SANDBOX_EXEC_VIA_OPENSHELL=1 to force the original
+# `openshell sandbox exec` transport (e.g. for debugging or for runners
+# where ssh-config is unavailable).
+# ----------------------------------------------------------------------
+
+_e2e_sbex_ssh_cfg_dir() {
+  local base="${E2E_CONTEXT_DIR:-/tmp}"
+  printf '%s/.ssh-config-cache' "${base}"
+}
+
+# _e2e_sbex_ssh_config_for <sandbox>
+# Prints the path to a populated ssh-config for <sandbox> on stdout.
+# Returns non-zero (and prints nothing) if `openshell sandbox ssh-config`
+# fails — callers fall back to `openshell sandbox exec`.
+_e2e_sbex_ssh_config_for() {
+  local sandbox="$1"
+  local dir cfg
+  dir="$(_e2e_sbex_ssh_cfg_dir)"
+  mkdir -p "${dir}" || return 1
+  cfg="${dir}/${sandbox}.cfg"
+  if [[ ! -s "${cfg}" ]]; then
+    if ! openshell sandbox ssh-config "${sandbox}" >"${cfg}" 2>/dev/null; then
+      rm -f "${cfg}"
+      return 1
+    fi
+  fi
+  printf '%s' "${cfg}"
+}
+
+# _e2e_sbex_quote_args <args...>
+# Outputs the args quoted into a single shell string suitable for
+# embedding as the remote command in `ssh host 'cmd args ...'`.
+_e2e_sbex_quote_args() {
+  local arg out=""
+  for arg in "$@"; do
+    out+="$(printf '%q' "${arg}") "
+  done
+  printf '%s' "${out% }"
+}
+
+# _e2e_sbex_invoke_via_ssh <cfg> <stdin_mode> <seconds> <timeout_cmd>
+# stdin_mode is 'pipe' (forward caller stdin) or 'none' (close stdin).
+# Returns ssh's exit code (124 if timed out, 137 if SIGKILLed).
+_e2e_sbex_invoke_via_ssh() {
+  local cfg="$1" stdin_mode="$2" seconds="$3" timeout_cmd="$4"
+  local remote_cmd ssh_args
+  remote_cmd="$(_e2e_sbex_quote_args "${_E2E_SBEX_CMD[@]}")"
+  ssh_args=(
+    -F "${cfg}"
+    -o ConnectTimeout=10
+    -o StrictHostKeyChecking=no
+    -o UserKnownHostsFile=/dev/null
+    -o LogLevel=ERROR
+    "openshell-${_E2E_SBEX_SB_NAME}"
+    "${remote_cmd}"
+  )
+  if [[ "${stdin_mode}" == "none" ]]; then
+    if [[ -z "${timeout_cmd}" ]]; then
+      ssh "${ssh_args[@]}" </dev/null
+    else
+      "${timeout_cmd}" --kill-after=5s "${seconds}" ssh "${ssh_args[@]}" </dev/null
+    fi
+  else
+    if [[ -z "${timeout_cmd}" ]]; then
+      ssh "${ssh_args[@]}"
+    else
+      "${timeout_cmd}" --kill-after=5s "${seconds}" ssh "${ssh_args[@]}"
+    fi
+  fi
+}
+
+# _e2e_sbex_invoke_via_openshell <stdin_mode> <seconds> <timeout_cmd>
+# Fallback path that uses `openshell sandbox exec`.
+_e2e_sbex_invoke_via_openshell() {
+  local stdin_mode="$1" seconds="$2" timeout_cmd="$3"
+  if [[ -z "${timeout_cmd}" ]]; then
+    openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  else
+    "${timeout_cmd}" --kill-after=5s "${seconds}" \
+      openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  fi
+}
+
+# _e2e_sbex_dispatch <stdin_mode>
+# Shared body for e2e_sandbox_exec / e2e_sandbox_exec_stdin. Picks the
+# transport (ssh-config preferred; openshell sandbox exec on opt-out or
+# ssh-config failure), applies the per-call timeout, and emits a
+# classified diagnostic on hang.
+_e2e_sbex_dispatch() {
+  local stdin_mode="$1"
+  if ! command -v openshell >/dev/null 2>&1; then
+    echo "e2e_sandbox_exec: openshell CLI not on PATH" >&2
+    return 127
+  fi
+  local timeout_cmd seconds="${E2E_SANDBOX_EXEC_TIMEOUT_SECONDS}"
+  timeout_cmd="$(_e2e_sbex_resolve_timeout_cmd)"
+  if [[ -z "${timeout_cmd}" ]]; then
+    # Make the missing safety net visible so CI can flag it; do not
+    # abort — the orchestrator's step-level timeout still applies.
+    echo "e2e_sandbox_exec: 'timeout' not available; running without per-call cap (sandbox=${_E2E_SBEX_SB_NAME})" >&2
+  fi
+
+  local cfg="" via="ssh" rc=0
+  if [[ "${E2E_SANDBOX_EXEC_VIA_OPENSHELL:-0}" == "1" ]]; then
+    via="openshell"
+  elif ! cfg="$(_e2e_sbex_ssh_config_for "${_E2E_SBEX_SB_NAME}")"; then
+    echo "e2e_sandbox_exec: ssh-config unavailable for ${_E2E_SBEX_SB_NAME}; falling back to 'openshell sandbox exec'" >&2
+    via="openshell"
+  fi
+
+  if [[ "${via}" == "ssh" ]]; then
+    _e2e_sbex_invoke_via_ssh "${cfg}" "${stdin_mode}" "${seconds}" "${timeout_cmd}"
+    rc=$?
+  else
+    _e2e_sbex_invoke_via_openshell "${stdin_mode}" "${seconds}" "${timeout_cmd}"
+    rc=$?
+  fi
+
+  if [[ "${rc}" -eq 124 || "${rc}" -eq 137 ]]; then
+    echo "e2e_sandbox_exec: ${via} transport hung after ${seconds}s (sandbox=${_E2E_SBEX_SB_NAME}, cmd=${_E2E_SBEX_CMD[0]:-?}; classifier=gateway-transient)" >&2
+  fi
+  return "${rc}"
+}
+
 # _e2e_sbex_split_args <sandbox> -- <cmd> [args...]
 # Parses the shared calling convention. Prints on stderr on misuse and
 # returns 2. On success, sets the two global arrays _E2E_SBEX_SB_NAME and
@@ -52,15 +219,7 @@ _e2e_sbex_parse() {
 e2e_sandbox_exec() {
   _e2e_sbex_parse "$@" || return $?
   e2e_env_trace "sandbox:exec" "${_E2E_SBEX_SB_NAME}" "${_E2E_SBEX_CMD[*]}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] sandbox_exec ${_E2E_SBEX_SB_NAME} -- ${_E2E_SBEX_CMD[*]} (skipped)"
-    return 0
-  fi
-  if ! command -v openshell >/dev/null 2>&1; then
-    echo "e2e_sandbox_exec: openshell CLI not on PATH" >&2
-    return 127
-  fi
-  openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  _e2e_sbex_dispatch none
 }
 
 # e2e_sandbox_exec_stdin <sandbox> -- <cmd> [args...]
@@ -70,15 +229,5 @@ e2e_sandbox_exec() {
 e2e_sandbox_exec_stdin() {
   _e2e_sbex_parse "$@" || return $?
   e2e_env_trace "sandbox:exec_stdin" "${_E2E_SBEX_SB_NAME}" "${_E2E_SBEX_CMD[*]}"
-  if e2e_env_is_dry_run; then
-    # Consume stdin so the caller's pipeline doesn't SIGPIPE.
-    cat >/dev/null 2>&1 || true
-    echo "[dry-run] sandbox_exec_stdin ${_E2E_SBEX_SB_NAME} -- ${_E2E_SBEX_CMD[*]} (skipped)"
-    return 0
-  fi
-  if ! command -v openshell >/dev/null 2>&1; then
-    echo "e2e_sandbox_exec_stdin: openshell CLI not on PATH" >&2
-    return 127
-  fi
-  openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  _e2e_sbex_dispatch pipe
 }
diff --git a/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh b/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh
index e56925b1f9..ab733f039d 100755
--- a/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh
+++ b/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh
@@ -18,11 +18,6 @@ echo "smoke:cli-available"
 
 e2e_context_require E2E_SCENARIO
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would check that nemoclaw CLI is on PATH"
-  exit 0
-fi
-
 if ! command -v nemoclaw >/dev/null 2>&1; then
   echo "smoke:cli-available: nemoclaw CLI not on PATH" >&2
   exit 1
diff --git a/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh b/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh
index b92dc33e8a..966efeb2d8 100755
--- a/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh
+++ b/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh
@@ -4,7 +4,6 @@
 #
 # smoke step: sandbox-shell
 # Verifies that OpenShell can execute a trivial command inside the sandbox.
-# Honors E2E_DRY_RUN.
 
 set -euo pipefail
 
@@ -14,17 +13,15 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../sandbox-exec.sh
+. "${SCRIPT_DIR}/../sandbox-exec.sh"
 
 echo "smoke:sandbox-shell"
 e2e_context_require E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run: openshell sandbox exec --name <sandbox> -- echo ok"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
-output="$(openshell sandbox exec --name "${name}" -- echo ok 2>&1)"
+# Orchestrator step cap is 30s; wrapper default 25s applies.
+output="$(e2e_sandbox_exec "${name}" -- echo ok 2>&1)"
 echo "${output}"
 if ! echo "${output}" | grep -q '^ok$'; then
   echo "smoke:sandbox-shell: did not receive expected 'ok' from sandbox" >&2
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 26394d1b4c..a06b21f3ea 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -49,6 +49,13 @@ function requireRunContains(errors: string[], step: WorkflowStep | undefined, ex
   }
 }
 
+function requireRunDoesNotContain(errors: string[], step: WorkflowStep | undefined, forbidden: string): void {
+  if (!step) return;
+  if (stringValue(step.run).includes(forbidden)) {
+    errors.push(`step '${step.name ?? "<unnamed>"}' run script must not include ${forbidden}`);
+  }
+}
+
 export function validateE2eScenariosWorkflowBoundary(
   workflowPath = DEFAULT_WORKFLOW_PATH,
 ): string[] {
@@ -92,7 +99,11 @@ export function validateE2eScenariosWorkflowBoundary(
   const normalRun = requireStep(errors, steps, "Run typed scenarios");
   requireRunContains(errors, normalRun, "npx tsx test/e2e-scenario/scenarios/run.ts");
   requireRunContains(errors, normalRun, "--scenarios");
-  requireRunContains(errors, normalRun, "--dry-run");
+  // The TS runner has one execution mode: live. Workflows must not pass
+  // --dry-run, --plan-only, or --validate-only — they hide real test runs.
+  requireRunDoesNotContain(errors, normalRun, "--dry-run");
+  requireRunDoesNotContain(errors, normalRun, "--plan-only");
+  requireRunDoesNotContain(errors, normalRun, "--validate-only");
 
   const wslInstall = requireStep(errors, steps, "Ensure Ubuntu WSL exists");
   requireRunContains(errors, wslInstall, "wsl --install");
@@ -113,7 +124,16 @@ export function validateE2eScenariosWorkflowBoundary(
   const wslRun = requireStep(errors, steps, "Run typed scenarios in WSL");
   requireRunContains(errors, wslRun, "npx tsx test/e2e-scenario/scenarios/run.ts");
   requireRunContains(errors, wslRun, "--scenarios");
-  requireRunContains(errors, wslRun, "--dry-run");
+  // From this PR: the typed runner is the only execution path; the
+  // bash runner / dry-run / validate-only / plan-only modes are
+  // removed from CI.
+  requireRunDoesNotContain(errors, wslRun, "--dry-run");
+  requireRunDoesNotContain(errors, wslRun, "--plan-only");
+  requireRunDoesNotContain(errors, wslRun, "--validate-only");
+  // From main (#4346): the WSL step must use the robust PowerShell
+  // wrapper that materializes a bash script, copies it into WSL via
+  // wslpath, and invokes it with `bash -l` so Docker WSL integration
+  // and Ubuntu first-run races are handled.
   requireRunContains(errors, wslRun, "$env:WSL_WORKDIR");
   requireRunContains(errors, wslRun, "WriteAllText");
   requireRunContains(errors, wslRun, "bash -l $wslTmp");
@@ -123,11 +143,28 @@ export function validateE2eScenariosWorkflowBoundary(
   if (uploadWith.name !== "e2e-scenario-${{ inputs.scenarios || github.event.inputs.scenarios }}") {
     errors.push("artifact upload name must include the scenarios input");
   }
-  if (uploadWith["include-hidden-files"] !== true) {
-    errors.push("artifact upload must include hidden .e2e files");
+  // Framework-owned secret hygiene: include-hidden-files MUST be false.
+  // Hidden dotfiles under the workspace can carry raw secrets (notably
+  // .e2e/context.env, written by e2e_context_set without redaction).
+  // The redacted surfaces are explicit subpaths under .e2e/ that the
+  // framework writes via orchestrators/redaction.ts::pipeRedacted.
+  if (uploadWith["include-hidden-files"] !== false) {
+    errors.push("artifact upload must set include-hidden-files: false (raw context.env must not leak)");
+  }
+  const uploadPath = stringValue(uploadWith.path);
+  if (!uploadPath.includes(".e2e/actions/")) {
+    errors.push("artifact upload path must include .e2e/actions/ (redacted action evidence)");
+  }
+  if (!uploadPath.includes(".e2e/logs/")) {
+    errors.push("artifact upload path must include .e2e/logs/ (redacted shell-step evidence)");
   }
-  if (!stringValue(uploadWith.path).includes(".e2e/")) {
-    errors.push("artifact upload path must include .e2e/");
+  // Bare blanket '.e2e/' (without a trailing subdir) would re-include
+  // the raw context.env file. Reject it so the explicit-subpath
+  // contract stays honest. Subpaths like '.e2e/actions/' are fine.
+  for (const line of uploadPath.split("\n")) {
+    if (line.trim() === ".e2e/") {
+      errors.push("artifact upload path must not list bare .e2e/ (use explicit subpaths to avoid context.env leakage)");
+    }
   }
 
   return errors;