From d7102b050db730278b1c77e767c143b2b49079a2 Mon Sep 17 00:00:00 2001
From: mertushka <github@1351820.xyz>
Date: Sat, 13 Jun 2026 19:01:50 +0300
Subject: [PATCH 1/2] ci: harden WPT evidence validation

---
 docs/verification-audit.md        | 141 +++++++++++-----------
 scripts/check-ci-evidence.js      | 171 +++++++++++++++++++++++----
 scripts/check-wpt-selection.js    |  32 ++++-
 scripts/run-wpt-sharded.js        |  37 +++---
 scripts/run-wpt-subset.js         |  24 +++-
 scripts/wpt-sharding.js           |  24 ++++
 scripts/write-ci-evidence.js      |  11 ++
 test/ci-evidence.test.js          | 186 +++++++++++++++++++++++++++---
 test/fixtures/wpt-shard-runner.js |  20 ++++
 test/wpt-sharding.test.js         |  68 ++++++++++-
 wpt-manifest.json                 |   1 +
 11 files changed, 577 insertions(+), 138 deletions(-)
 create mode 100644 test/fixtures/wpt-shard-runner.js

diff --git a/docs/verification-audit.md b/docs/verification-audit.md
index 77ffa89..738cf6a 100644
--- a/docs/verification-audit.md
+++ b/docs/verification-audit.md
@@ -1,78 +1,67 @@
 # Verification Audit
 
-This audit maps the original goal and success criteria to current repository
-evidence. It is intentionally conservative: stale local artifacts and targeted
-checks do not prove the full Linux/macOS/Windows matrix.
-
-## Current Local Evidence
-
-Audited from `C:\Users\mertu\Desktop\webrtc-node` on 2026-05-28.
-
-| Gate | Evidence |
-| --- | --- |
-| Quality gate | `npm run check`, `npm run types:check`, `npm run api:check`, and `npm run pack:check` passed after Biome was added and the API surface checker was fixed for multiline TypeScript declarations. |
-| Native integration | `npm run native:check` passed; it verifies Node-API/node-addon-api usage, TSFN dispatch, and the pinned libdatachannel commit. |
-| Native build | `npm run build` passed on Windows with Visual Studio 2022 Build Tools. |
-| Unit tests | `npm test` passed 20/20 Node `node:test` tests. The remote-close data-channel test also passed 20 serial stress iterations after one parallel-load timeout. |
-| API surface | `npm run api:check` passed for 17 classes and 1 nonstandard member. |
-| Types | `npm run types:check` passed. |
-| WPT checkout | `npm run wpt:ensure` verified WPT commit `03169f171c797d0953b21d7388561b454fde0ad4`. |
-| WPT selection | `npm run wpt:selection:check` verified 620 selected subtests. |
-| Targeted WPT | The current Windows build passed `webrtc/RTCDataChannel-close.html`, the selected `RTCDataChannel-send.html` subset, and `RTCPeerConnection-ondatachannel.html` together as 46/46 subtests after the remote-close message-grace change. |
-| Docker Linux smoke | `scripts/run-docker-linux-ci.ps1 -NodeImage node:20-bookworm -SkipWpt` passed build/unit/API/types/WPT-selection using the snapshot-backed Docker helper. Docker helpers now exist for PowerShell and POSIX shells, but remain optional local reproduction only. |
-| Docker Linux targeted stress | Node 24 Docker passed 20 repeated runs of `webrtc/RTCDataChannel-close.html#Repeated open/send/echo/close datachannel works` with retries=0 after the remote-close message-grace change. |
-| Superseded full WPT artifacts | Earlier Docker Linux Node 20 and Node 22 artifacts reached 620/620 with retries=0, and a later Node 24 full run reached 619/620 before the close-race fix. These predate the current close-path change and must not be treated as current full-suite evidence. |
-| Local Docker CI | `scripts/run-docker-linux-ci.ps1` documents a reproducible Linux CI slice for Docker Desktop/WSL and rewrites Debian image apt sources to pinned snapshot URLs to reduce mirror instability. |
-
-## Requirement Status
-
-| Requirement | Current status |
-| --- | --- |
-| Phase 0 analysis before coding | Satisfied by `docs/phase0-analysis.md`, including upstream files reviewed, lifecycle/state/callback analysis, mismatch analysis, binding design, and WPT subset plan. |
-| Data-channel-first WebRTC package | Implemented in `lib/index.js`, `src/native/addon.cc`, `index.d.ts`, and tested by local/WPT gates. |
-| Node-API/N-API, no direct V8 addon API | Locally verified by `npm run native:check`; native source uses node-addon-api and `NODE_API_MODULE`. |
-| Reproducible libdatachannel integration | Implemented in `CMakeLists.txt` with upstream commit `502ae351495792192ef21788e093b48e34ab393e`, including the OpenSSL DTLS and TLS input BIO synchronization fixes from upstream PRs #1584 and #1585; repository and commit are verified by `native:check`. |
-| W3C-compatible JS facade | Covered by API/type checks, local tests, targeted WPT, and targeted Docker stress. Fresh full selected-WPT evidence is still pending after the latest close-path change. |
-| RTCDataChannel selected WPT coverage | Targeted close/send/datachannel coverage is green locally; Node 24 Docker close-race stress is green. Fresh full 620-subtest evidence is still required. |
-| RTCPeerConnection selected WPT coverage | Targeted datachannel and state coverage is green locally. Fresh full 620-subtest evidence is still required. |
-| Safe callback dispatch | Locally verified by `native:check`; native callbacks dispatch through a thread-safe function. |
-| Safe object lifetime | Covered by local tests and selected WPT close/GC cases; still needs continued stress coverage as the API expands. |
-| TypeScript declarations | `index.d.ts` checked by `npm run types:check` and API surface verification. |
-| CI builds/tests/WPT/report | Workflow exists in `.github/workflows/ci.yml` for Linux, macOS, and Windows on Node 20/22/24. Each matrix job writes `ci-evidence.json` and uploads it with WPT artifacts. A final `verify-ci-evidence` job downloads all matrix artifacts and runs `npm run ci:evidence:check`. |
-
-## Current Known Gap
-
-Fresh hosted selected-WPT evidence is still pending after the latest close-path
-message-grace change. GitHub Actions is the authoritative conformance gate for
-the public repository. Docker Linux runs are useful for local reproduction, but
-they are no longer treated as release-blocking evidence because they cannot
-prove macOS or Windows behavior.
-
-## Remaining Completion Evidence
-
-The active goal should not be marked complete until hosted CI or equivalent
-authoritative logs prove the full matrix:
-
-- `ubuntu-latest` on Node 20, 22, and 24
-- `macos-latest` on Node 20, 22, and 24
-- `windows-latest` on Node 20, 22, and 24
-
-The Quality job must pass `npm ci`, `check`, `types:check`, and `pack:check`.
-Each matrix job must pass `npm ci`, `native:check`, `build`, `test`, `api:check`,
-`types:check`, `wpt:ensure`, `wpt:selection:check`, `wpt:test:sharded`,
-`wpt:check:strict`, `wpt:report`, and `ci:evidence`.
-
-After downloading all workflow artifacts into `ci-artifacts/`, run
-`npm run ci:evidence:check`. The verifier requires `ci-evidence.json`,
-`wpt-results.json`, `wpt-report.md`, `wpt-manifest.json`, and
-`wpt-manifest.txt` for each OS/Node matrix entry and rejects missing jobs,
-pin mismatches, WPT failures, and WPT retries.
-
-The GitHub Actions workflow also runs this verifier automatically in the
-`verify-ci-evidence` job. That job uses `always()` so failed or incomplete
-matrix runs are reported as missing or non-green evidence instead of leaving the
-final conformance verifier skipped.
-
-Local Docker evidence is useful before pushing, but it only proves the Linux
-Node image used by `scripts/run-docker-linux-ci.ps1`. It does not replace the
-required macOS and Windows hosted matrix evidence.
+This document records authoritative hosted evidence and the limits of local
+validation. Generated artifacts and local checkouts are not committed.
+
+## Hosted Conformance Evidence
+
+GitHub Actions Conformance run `27392464467` completed successfully on
+2026-06-12. It tested PR #11 head
+`f4c9edf438291e432fcc024cea80198abfe08717`, which was squash-merged as
+`e6a3cfca4beee3163806908c433807354f384c42`.
+
+The run completed:
+
+- the Quality job;
+- Linux, macOS, and Windows on Node.js 20, 22, and 24;
+- all 620 selected WPT subtests with strict retry rejection;
+- the final `Verify CI evidence` job.
+
+The run is available at:
+`https://github.com/mertushka/webrtc-node/actions/runs/27392464467`.
+
+This evidence applies to the tested commit. Later WebRTC semantic, native,
+lifecycle, SDP, ICE, buffering, or event-timing changes require new applicable
+conformance evidence.
+
+## Conformance Contract
+
+`wpt-manifest.json` is the selected compatibility contract. It pins:
+
+- the libdatachannel commit;
+- the WPT commit;
+- the expected selected subtest count;
+- a SHA-256 digest of the sorted `{file, name}` test identities.
+
+`npm run wpt:selection:check` discovers the selected tests without executing
+them and rejects count, identity, duplicate, or digest changes. Updating the
+digest requires deliberate review of the changed selection.
+
+## Workflow Evidence
+
+`.github/workflows/conformance.yml` runs the full matrix separately from normal
+push and pull-request CI. Each matrix job produces:
+
+- `ci-evidence.json`;
+- `wpt-results.json`;
+- `wpt-report.md`;
+- `wpt-manifest.json`;
+- `wpt-manifest.txt`.
+
+The final evidence verifier requires every OS and Node.js matrix entry,
+recomputes WPT status and retry counts, rejects duplicate or inconsistent test
+identities, verifies manifest equality, and binds all artifacts to one GitHub
+workflow run and commit.
+
+After downloading artifacts into `ci-artifacts/`, maintainers can run:
+
+```sh
+npm run ci:evidence:check
+```
+
+## Local Validation Boundary
+
+Focused local tests and Docker runs are useful for development and
+reproduction. They do not replace hosted macOS and Windows evidence. The full
+selected WPT suite is intentionally separate from ordinary local and push CI
+because of its runtime cost.
diff --git a/scripts/check-ci-evidence.js b/scripts/check-ci-evidence.js
index 54507e2..8ee66a6 100644
--- a/scripts/check-ci-evidence.js
+++ b/scripts/check-ci-evidence.js
@@ -2,17 +2,32 @@
 
 const fs = require("node:fs");
 const path = require("node:path");
+const { isDeepStrictEqual } = require("node:util");
+const { wptSelectionDigest } = require("./wpt-sharding");
 
 const root = path.resolve(__dirname, "..");
 const args = process.argv.slice(2);
 const artifactsIndex = args.indexOf("--artifacts");
+const manifestIndex = args.indexOf("--manifest");
 const artifactsRoot =
   artifactsIndex === -1
     ? path.join(root, "ci-artifacts")
     : path.resolve(root, args[artifactsIndex + 1] || "");
-const manifestPath = path.join(root, "wpt-manifest.json");
+const manifestPath =
+  manifestIndex === -1
+    ? path.join(root, "wpt-manifest.json")
+    : path.resolve(root, args[manifestIndex + 1] || "");
 const requiredOs = ["Linux", "macOS", "Windows"];
 const requiredNodeMajors = [20, 22, 24];
+const requiredGithubFields = ["workflow", "job", "runId", "runAttempt", "repository", "ref", "sha"];
+const currentGithub = {
+  workflow: process.env.GITHUB_WORKFLOW,
+  runId: process.env.GITHUB_RUN_ID,
+  runAttempt: process.env.GITHUB_RUN_ATTEMPT,
+  repository: process.env.GITHUB_REPOSITORY,
+  ref: process.env.GITHUB_REF,
+  sha: process.env.GITHUB_SHA,
+};
 
 function fail(message) {
   console.error(`CI evidence check failed: ${message}`);
@@ -41,7 +56,116 @@ function nodeMajor(version) {
   return match ? Number(match[1]) : null;
 }
 
+function validateResults(results, key) {
+  if (!Array.isArray(results.results)) fail(`${key} WPT result artifact is invalid`);
+  if (!Number.isInteger(results.total) || results.total < 1) {
+    fail(`${key} WPT total is invalid`);
+  }
+  if (!Number.isInteger(results.pass) || results.pass < 0) {
+    fail(`${key} WPT pass count is invalid`);
+  }
+  if (!Number.isInteger(results.fail) || results.fail < 0) {
+    fail(`${key} WPT fail count is invalid`);
+  }
+  if (results.results.length !== results.total) fail(`${key} result length mismatch`);
+  if (results.total !== manifest.expectedSelectedSubtests) fail(`${key} WPT total mismatch`);
+
+  let pass = 0;
+  let failCount = 0;
+  let retries = 0;
+  const identities = new Set();
+  const files = new Set();
+
+  for (const result of results.results) {
+    if (
+      !result ||
+      typeof result.file !== "string" ||
+      result.file.length === 0 ||
+      typeof result.name !== "string" ||
+      result.name.length === 0
+    ) {
+      fail(`${key} contains an invalid WPT result identity`);
+    }
+
+    const identity = `${result.file}\0${result.name}`;
+    if (identities.has(identity))
+      fail(`${key} contains duplicate WPT result ${result.file}#${result.name}`);
+    identities.add(identity);
+    files.add(result.file);
+
+    if (result.status === "PASS") pass += 1;
+    else if (result.status === "FAIL") failCount += 1;
+    else fail(`${key} contains unexpected WPT status ${result.status}`);
+
+    const retryCount = result.retries === undefined ? 0 : result.retries;
+    if (!Number.isInteger(retryCount) || retryCount < 0) {
+      fail(`${key} contains an invalid retry count for ${result.file}#${result.name}`);
+    }
+    if (retryCount > 0) retries += 1;
+  }
+
+  if (results.pass !== pass) fail(`${key} WPT pass summary mismatch`);
+  if (results.fail !== failCount) fail(`${key} WPT fail summary mismatch`);
+  if (failCount !== 0 || pass !== results.total || retries !== 0) {
+    fail(
+      `${key} WPT is not strict-green: pass=${pass} total=${results.total} fail=${failCount} retries=${retries}`,
+    );
+  }
+
+  const selectedSubtestsSha256 = wptSelectionDigest(identities);
+  if (selectedSubtestsSha256 !== manifest.selectedSubtestsSha256) {
+    fail(`${key} WPT result identities do not match the manifest digest`);
+  }
+
+  return {
+    pass,
+    failCount,
+    retries,
+    identities,
+    fileCount: files.size,
+    selectedSubtestsSha256,
+  };
+}
+
+function validateGithubEvidence(evidence, key, baseline) {
+  if (evidence.source !== "write-ci-evidence.js") fail(`${key} evidence source is invalid`);
+  if (evidence.github?.actions !== true) fail(`${key} is not GitHub Actions evidence`);
+
+  for (const field of requiredGithubFields) {
+    if (typeof evidence.github[field] !== "string" || evidence.github[field].length === 0) {
+      fail(`${key} evidence GitHub ${field} is missing`);
+    }
+  }
+
+  if (baseline) {
+    for (const field of requiredGithubFields) {
+      if (evidence.github[field] !== baseline[field]) {
+        fail(`${key} evidence GitHub ${field} does not match the matrix run`);
+      }
+    }
+  }
+
+  if (process.env.GITHUB_ACTIONS === "true") {
+    for (const [field, expected] of Object.entries(currentGithub)) {
+      if (!expected || evidence.github[field] !== expected) {
+        fail(`${key} evidence GitHub ${field} does not match the current workflow run`);
+      }
+    }
+  }
+
+  return evidence.github;
+}
+
+function sameSet(left, right) {
+  if (left.size !== right.size) return false;
+  for (const value of left) {
+    if (!right.has(value)) return false;
+  }
+  return true;
+}
+
 if (artifactsIndex !== -1 && !args[artifactsIndex + 1]) fail("--artifacts requires a directory");
+if (manifestIndex !== -1 && !args[manifestIndex + 1]) fail("--manifest requires a file");
 if (!fs.existsSync(artifactsRoot)) {
   fail(`${artifactsRoot} does not exist; download CI artifacts there or pass --artifacts <dir>`);
 }
@@ -53,6 +177,8 @@ const evidenceFiles = walk(artifactsRoot);
 if (!evidenceFiles.length) fail(`no ci-evidence.json files found under ${artifactsRoot}`);
 
 const byMatrix = new Map();
+let githubBaseline = null;
+let identityBaseline = null;
 
 for (const evidencePath of evidenceFiles) {
   const evidence = readJson(evidencePath);
@@ -72,39 +198,42 @@ for (const evidencePath of evidenceFiles) {
   for (const requiredPath of [resultsPath, reportPath, artifactManifestPath, manifestTextPath]) {
     if (!fs.existsSync(requiredPath)) fail(`${path.relative(root, requiredPath)} is missing`);
   }
+  for (const requiredPath of [reportPath, manifestTextPath]) {
+    if (fs.statSync(requiredPath).size === 0) {
+      fail(`${path.relative(root, requiredPath)} is empty`);
+    }
+  }
 
   const artifactManifest = readJson(artifactManifestPath);
   const results = readJson(resultsPath);
-  const retries = Array.isArray(results.results)
-    ? results.results.filter((result) => Number(result.retries) > 0).length
-    : null;
 
-  if (artifactManifest.libdatachannelCommit !== manifest.libdatachannelCommit) {
-    fail(`${key} libdatachannel pin mismatch`);
+  if (!isDeepStrictEqual(artifactManifest, manifest)) {
+    fail(`${key} WPT manifest does not match the repository manifest`);
   }
-  if (artifactManifest.wptCommit !== manifest.wptCommit) fail(`${key} WPT pin mismatch`);
-  if (artifactManifest.expectedSelectedSubtests !== manifest.expectedSelectedSubtests) {
-    fail(`${key} selected subtest count mismatch`);
-  }
-  if (!Array.isArray(results.results)) fail(`${key} WPT result artifact is invalid`);
-  if (results.results.length !== results.total) fail(`${key} result length mismatch`);
-  if (results.total !== manifest.expectedSelectedSubtests) fail(`${key} WPT total mismatch`);
-  if (results.pass !== results.total || results.fail !== 0 || retries !== 0) {
-    fail(
-      `${key} WPT is not strict-green: pass=${results.pass} total=${results.total} fail=${results.fail} retries=${retries}`,
-    );
+
+  const validated = validateResults(results, key);
+  if (identityBaseline && !sameSet(validated.identities, identityBaseline)) {
+    fail(`${key} WPT result identities do not match the matrix run`);
   }
+  identityBaseline ??= validated.identities;
+
+  const github = validateGithubEvidence(evidence, key, githubBaseline);
+  githubBaseline ??= github;
+
   if (evidence.pins?.libdatachannel !== manifest.libdatachannelCommit) {
     fail(`${key} evidence libdatachannel pin mismatch`);
   }
   if (evidence.pins?.wpt !== manifest.wptCommit) fail(`${key} evidence WPT pin mismatch`);
   if (
+    evidence.wpt?.expectedSelectedSubtests !== manifest.expectedSelectedSubtests ||
     evidence.wpt?.total !== manifest.expectedSelectedSubtests ||
-    evidence.wpt?.pass !== manifest.expectedSelectedSubtests ||
-    evidence.wpt?.fail !== 0 ||
-    evidence.wpt?.retries !== 0
+    evidence.wpt?.pass !== validated.pass ||
+    evidence.wpt?.fail !== validated.failCount ||
+    evidence.wpt?.retries !== validated.retries ||
+    evidence.wpt?.resultFiles !== validated.fileCount ||
+    evidence.wpt?.selectedSubtestsSha256 !== validated.selectedSubtestsSha256
   ) {
-    fail(`${key} evidence WPT summary is not strict-green`);
+    fail(`${key} evidence WPT summary does not match the result artifact`);
   }
 
   byMatrix.set(key, { os, major, evidencePath });
diff --git a/scripts/check-wpt-selection.js b/scripts/check-wpt-selection.js
index d7a51a8..9f6acb4 100644
--- a/scripts/check-wpt-selection.js
+++ b/scripts/check-wpt-selection.js
@@ -4,11 +4,13 @@ const fs = require("node:fs");
 const os = require("node:os");
 const path = require("node:path");
 const { spawnSync } = require("node:child_process");
+const { wptSelectionDigest } = require("./wpt-sharding");
 
 const root = path.resolve(__dirname, "..");
 const manifestPath = path.join(root, "wpt-manifest.json");
 const manifest = JSON.parse(fs.readFileSync(manifestPath, "utf8"));
 const expectedTotal = manifest.expectedSelectedSubtests;
+const expectedDigest = manifest.selectedSubtestsSha256;
 
 function fail(message) {
   console.error(`WPT selection check failed: ${message}`);
@@ -18,6 +20,9 @@ function fail(message) {
 if (!Number.isInteger(expectedTotal) || expectedTotal < 1) {
   fail("wpt-manifest.json expectedSelectedSubtests must be a positive integer");
 }
+if (typeof expectedDigest !== "string" || !/^[a-f0-9]{64}$/.test(expectedDigest)) {
+  fail("wpt-manifest.json selectedSubtestsSha256 must be a SHA-256 digest");
+}
 
 const resultsPath = path.join(
   os.tmpdir(),
@@ -29,6 +34,7 @@ try {
     cwd: root,
     env: {
       ...process.env,
+      WPT_LIST_IDENTITIES: "1",
       WPT_LIST_TESTS: "1",
       WPT_WORKER_RESULTS: resultsPath,
     },
@@ -55,14 +61,34 @@ try {
   if (!Array.isArray(payload.tests)) {
     fail("list mode artifact does not contain a tests array");
   }
-  if (!payload.tests.every((test) => typeof test === "string" && test.length > 0)) {
-    fail("list mode artifact contains an invalid test name");
+  if (
+    !payload.tests.every(
+      (test) =>
+        test &&
+        typeof test.file === "string" &&
+        test.file.length > 0 &&
+        typeof test.name === "string" &&
+        test.name.length > 0,
+    )
+  ) {
+    fail("list mode artifact contains an invalid test identity");
   }
   if (payload.tests.length !== expectedTotal) {
     fail(`selected ${payload.tests.length} subtests, expected ${expectedTotal}`);
   }
 
-  console.log(`WPT selection verified: ${payload.tests.length} selected subtests`);
+  const identities = payload.tests.map((test) => `${test.file}\0${test.name}`).sort();
+  if (new Set(identities).size !== identities.length) {
+    fail("list mode artifact contains duplicate test identities");
+  }
+  const digest = wptSelectionDigest(identities);
+  if (digest !== expectedDigest) {
+    fail(`selected subtest digest ${digest} does not match manifest ${expectedDigest}`);
+  }
+
+  console.log(
+    `WPT selection verified: ${payload.tests.length} selected subtests, sha256=${digest}`,
+  );
 } finally {
   try {
     fs.unlinkSync(resultsPath);
diff --git a/scripts/run-wpt-sharded.js b/scripts/run-wpt-sharded.js
index 7d955f7..0d3a062 100644
--- a/scripts/run-wpt-sharded.js
+++ b/scripts/run-wpt-sharded.js
@@ -4,7 +4,7 @@ const fs = require("node:fs");
 const os = require("node:os");
 const path = require("node:path");
 const { spawn } = require("node:child_process");
-const { mergeWptSummaries } = require("./wpt-sharding");
+const { mergeWptSummaries, validateWptSelectionTotal } = require("./wpt-sharding");
 
 const root = path.resolve(__dirname, "..");
 const manifest = require("../wpt-manifest.json");
@@ -15,6 +15,9 @@ const shardCount = Number(
   shardArgument?.slice("--shards=".length) || process.env.WPT_SHARD_COUNT || 3,
 );
 const outputPath = path.resolve(process.env.WPT_RESULTS || path.join(root, "wpt-results.json"));
+const runnerPath = path.resolve(
+  process.env.WPT_SHARD_RUNNER || path.join(__dirname, "run-wpt-subset.js"),
+);
 const expectedTotal = process.env.WPT_EXPECTED_TOTAL
   ? Number(process.env.WPT_EXPECTED_TOTAL)
   : selectors.length === 0
@@ -40,21 +43,17 @@ function tempResultsPath(index) {
 
 function runShard(index, resultsPath) {
   return new Promise((resolve) => {
-    const child = spawn(
-      process.execPath,
-      ["--expose-gc", path.join(__dirname, "run-wpt-subset.js"), ...selectors],
-      {
-        cwd: root,
-        env: {
-          ...process.env,
-          WPT_LOG_PREFIX: `[shard ${index + 1}/${shardCount}] `,
-          WPT_SHARD_COUNT: String(shardCount),
-          WPT_SHARD_INDEX: String(index),
-          WPT_WORKER_RESULTS: resultsPath,
-        },
-        stdio: "inherit",
+    const child = spawn(process.execPath, ["--expose-gc", runnerPath, ...selectors], {
+      cwd: root,
+      env: {
+        ...process.env,
+        WPT_LOG_PREFIX: `[shard ${index + 1}/${shardCount}] `,
+        WPT_SHARD_COUNT: String(shardCount),
+        WPT_SHARD_INDEX: String(index),
+        WPT_WORKER_RESULTS: resultsPath,
       },
-    );
+      stdio: "inherit",
+    });
 
     child.on("error", (error) => resolve({ index, error, code: null, signal: null }));
     child.on("exit", (code, signal) => resolve({ index, error: null, code, signal }));
@@ -153,10 +152,10 @@ async function main() {
     console.log(`WPT shards: ${summary.pass}/${summary.total} passed across ${shardCount} shards`);
 
     if (summary.fail > 0) process.exitCode = 1;
-    if (expectedTotal !== null && summary.total !== expectedTotal) {
-      console.error(
-        `WPT sharded run selected ${summary.total} subtests, expected ${expectedTotal}`,
-      );
+    try {
+      validateWptSelectionTotal(summary.total, expectedTotal);
+    } catch (error) {
+      console.error(`WPT sharded run failed: ${error.message}`);
       process.exitCode = 1;
     }
   } finally {
diff --git a/scripts/run-wpt-subset.js b/scripts/run-wpt-subset.js
index 47d6e02..fd134c3 100644
--- a/scripts/run-wpt-subset.js
+++ b/scripts/run-wpt-subset.js
@@ -18,6 +18,7 @@ const workerDelayMs = Number(process.env.WPT_WORKER_DELAY_MS || 200);
 const workerRetries = Math.max(0, Number(process.env.WPT_WORKER_RETRIES || 0));
 const workerTimeoutMs = Number(process.env.WPT_WORKER_TIMEOUT_MS || 300000);
 const listTestsOnly = process.env.WPT_LIST_TESTS === "1";
+const listTestIdentities = !isWorker && process.env.WPT_LIST_IDENTITIES === "1";
 const shardCount = Number(process.env.WPT_SHARD_COUNT || 1);
 const shardIndex = Number(process.env.WPT_SHARD_INDEX || 0);
 const logPrefix = process.env.WPT_LOG_PREFIX || "";
@@ -31,7 +32,7 @@ if (!Number.isInteger(shardIndex) || shardIndex < 0 || shardIndex >= shardCount)
 
 if (!isWorker) ensureWpt({ quiet: true });
 
-const { assignWptSpecGroups, shardForTest } = require("./wpt-sharding");
+const { assignWptSpecGroups, shardForTest, validateWptSelectionTotal } = require("./wpt-sharding");
 
 const perTestIsolatedFiles = new Set([
   "webrtc/RTCPeerConnection-createDataChannel.html",
@@ -1065,11 +1066,19 @@ function specGroupKey(spec, index) {
 function listIsolatedTests(specsToRun) {
   const tests = [];
   for (let index = 0; index < specsToRun.length; ++index) {
-    tests.push(...runListWorker(specsToRun[index], index));
+    const spec = specsToRun[index];
+    const names = runListWorker(spec, index);
+    tests.push(...formatListedTests(spec, names));
   }
   return tests;
 }
 
+function formatListedTests(spec, names) {
+  if (!listTestIdentities) return names;
+  const file = `${spec.file}${spec.search || ""}`;
+  return names.map((name) => ({ file, name }));
+}
+
 function runListWorker(spec, index) {
   const discovery = discoverSpecTests(spec, index);
   if (!discovery.failure) return discovery.tests;
@@ -1233,6 +1242,7 @@ function runWorker(spec, index, extraEnv = {}) {
 function writeTestList(tests) {
   const outputFile = workerResultsFile || path.join(root, "wpt-results.json");
   fs.writeFileSync(outputFile, `${JSON.stringify({ tests }, null, 2)}\n`);
+  if (!isWorker) validateWptSelectionTotal(tests.length);
 }
 
 function writeSummary({ quiet = false } = {}) {
@@ -1256,6 +1266,14 @@ function writeSummary({ quiet = false } = {}) {
   }
 
   if (summary.fail > 0) process.exitCode = 1;
+  if (!isWorker && shardCount === 1) {
+    try {
+      validateWptSelectionTotal(summary.total);
+    } catch (error) {
+      console.error(`${logPrefix}WPT subset failed: ${error.message}`);
+      process.exitCode = 1;
+    }
+  }
 }
 
 (async () => {
@@ -1263,7 +1281,7 @@ function writeSummary({ quiet = false } = {}) {
     if (listTestsOnly) {
       const tests = [];
       for (const spec of specs) {
-        tests.push(...(await runFile(spec)));
+        tests.push(...formatListedTests(spec, await runFile(spec)));
       }
       writeTestList(tests);
       return;
diff --git a/scripts/wpt-sharding.js b/scripts/wpt-sharding.js
index dd148a9..7bdeb03 100644
--- a/scripts/wpt-sharding.js
+++ b/scripts/wpt-sharding.js
@@ -1,5 +1,7 @@
 "use strict";
 
+const crypto = require("node:crypto");
+
 function testIdentity(file, name) {
   if (typeof file !== "string" || file.length === 0) {
     throw new Error("WPT result file must be a non-empty string");
@@ -115,9 +117,31 @@ function mergeWptSummaries(summaries) {
   };
 }
 
+function validateWptSelectionTotal(total, expectedTotal = null) {
+  if (!Number.isInteger(total) || total < 0) {
+    throw new Error("WPT selected subtest total must be a non-negative integer");
+  }
+  if (total === 0) {
+    throw new Error("WPT run selected no subtests");
+  }
+  if (expectedTotal !== null && total !== expectedTotal) {
+    throw new Error(`WPT run selected ${total} subtests, expected ${expectedTotal}`);
+  }
+}
+
+function wptSelectionDigest(identities) {
+  const sorted = [...identities].sort();
+  if (sorted.some((identity) => typeof identity !== "string" || identity.length === 0)) {
+    throw new Error("WPT selection identities must be non-empty strings");
+  }
+  return crypto.createHash("sha256").update(JSON.stringify(sorted)).digest("hex");
+}
+
 module.exports = {
   assignWptSpecGroups,
   mergeWptSummaries,
   shardForTest,
   testIdentity,
+  validateWptSelectionTotal,
+  wptSelectionDigest,
 };
diff --git a/scripts/write-ci-evidence.js b/scripts/write-ci-evidence.js
index 77fd93c..437350f 100644
--- a/scripts/write-ci-evidence.js
+++ b/scripts/write-ci-evidence.js
@@ -2,6 +2,7 @@
 
 const fs = require("node:fs");
 const path = require("node:path");
+const { testIdentity, wptSelectionDigest } = require("./wpt-sharding");
 
 const root = path.resolve(__dirname, "..");
 const args = process.argv.slice(2);
@@ -46,15 +47,24 @@ if (results.results.length !== results.total) {
 const pass = results.results.filter((result) => result.status === "PASS").length;
 const failCount = results.results.filter((result) => result.status === "FAIL").length;
 const retries = results.results.filter((result) => Number(result.retries) > 0).length;
+const identities = results.results.map((result) => testIdentity(result.file, result.name));
+const identitySet = new Set(identities);
+const selectedSubtestsSha256 = wptSelectionDigest(identitySet);
 
 if (pass !== results.pass) fail(`PASS count ${pass} does not match summary ${results.pass}`);
 if (failCount !== results.fail)
   fail(`FAIL count ${failCount} does not match summary ${results.fail}`);
+if (identitySet.size !== identities.length) fail("WPT results contain duplicate test identities");
 if (manifest.expectedSelectedSubtests && results.total !== manifest.expectedSelectedSubtests) {
   fail(
     `result total ${results.total} does not match manifest ${manifest.expectedSelectedSubtests}`,
   );
 }
+if (selectedSubtestsSha256 !== manifest.selectedSubtestsSha256) {
+  fail(
+    `result identity digest ${selectedSubtestsSha256} does not match manifest ${manifest.selectedSubtestsSha256}`,
+  );
+}
 if (results.fail !== 0 || retries !== 0 || pass !== results.total) {
   fail(
     `strict WPT evidence requires all pass and no retries; pass=${pass} total=${results.total} retries=${retries}`,
@@ -98,6 +108,7 @@ const evidence = {
     fail: failCount,
     retries,
     resultFiles: new Set(results.results.map((result) => result.file)).size,
+    selectedSubtestsSha256,
   },
   gates: [
     "npm ci",
diff --git a/test/ci-evidence.test.js b/test/ci-evidence.test.js
index 2a63c24..2245e67 100644
--- a/test/ci-evidence.test.js
+++ b/test/ci-evidence.test.js
@@ -4,19 +4,41 @@ const fs = require("node:fs");
 const os = require("node:os");
 const path = require("node:path");
 const test = require("node:test");
+const { testIdentity, wptSelectionDigest } = require("../scripts/wpt-sharding");
 
 const root = path.resolve(__dirname, "..");
-const manifest = JSON.parse(fs.readFileSync(path.join(root, "wpt-manifest.json"), "utf8"));
+const repositoryManifest = JSON.parse(
+  fs.readFileSync(path.join(root, "wpt-manifest.json"), "utf8"),
+);
 const requiredOs = ["Linux", "macOS", "Windows"];
 const requiredNodeMajors = [20, 22, 24];
+const github = {
+  actions: true,
+  workflow: "Conformance",
+  job: "wpt-full",
+  runId: "123456",
+  runAttempt: "1",
+  repository: "mertushka/webrtc-node",
+  ref: "refs/heads/main",
+  sha: "0123456789abcdef0123456789abcdef01234567",
+};
+const selectedResults = Array.from({ length: 4 }, (_, index) => ({
+  file: "webrtc/fixture.html",
+  name: `fixture ${index + 1}`,
+  status: "PASS",
+  retries: 0,
+}));
+const selectedSubtestsSha256 = wptSelectionDigest(
+  selectedResults.map((result) => testIdentity(result.file, result.name)),
+);
+const manifest = {
+  ...repositoryManifest,
+  expectedSelectedSubtests: selectedResults.length,
+  selectedSubtestsSha256,
+};
 
 function makeResults() {
-  const results = Array.from({ length: manifest.expectedSelectedSubtests }, (_, index) => ({
-    file: "webrtc/fixture.html",
-    name: `fixture ${index + 1}`,
-    status: "PASS",
-    retries: 0,
-  }));
+  const results = selectedResults.map((result) => ({ ...result }));
   return {
     total: results.length,
     pass: results.length,
@@ -27,6 +49,8 @@ function makeResults() {
 
 function makeEvidence(osName, nodeMajor, results) {
   return {
+    source: "write-ci-evidence.js",
+    github: { ...github },
     runner: {
       os: osName,
       arch: "X64",
@@ -39,10 +63,13 @@ function makeEvidence(osName, nodeMajor, results) {
       wpt: manifest.wptCommit,
     },
     wpt: {
+      expectedSelectedSubtests: manifest.expectedSelectedSubtests,
       total: results.total,
       pass: results.pass,
       fail: results.fail,
       retries: 0,
+      resultFiles: new Set(results.results.map((result) => result.file)).size,
+      selectedSubtestsSha256,
     },
   };
 }
@@ -51,11 +78,13 @@ function writeJson(file, value) {
   fs.writeFileSync(file, `${JSON.stringify(value, null, 2)}\n`);
 }
 
-function writeMatrixArtifact(artifactsRoot, osName, nodeMajor) {
+function writeMatrixArtifact(artifactsRoot, osName, nodeMajor, mutate = () => {}) {
   const results = makeResults();
+  const evidence = makeEvidence(osName, nodeMajor, results);
+  mutate({ results, evidence });
   const artifactDir = path.join(artifactsRoot, `wpt-manifest-${osName}-node-${nodeMajor}`);
   fs.mkdirSync(artifactDir, { recursive: true });
-  writeJson(path.join(artifactDir, "ci-evidence.json"), makeEvidence(osName, nodeMajor, results));
+  writeJson(path.join(artifactDir, "ci-evidence.json"), evidence);
   writeJson(path.join(artifactDir, "wpt-results.json"), results);
   writeJson(path.join(artifactDir, "wpt-manifest.json"), manifest);
   fs.writeFileSync(path.join(artifactDir, "wpt-report.md"), "# WPT Conformance Report\n");
@@ -65,7 +94,13 @@ function writeMatrixArtifact(artifactsRoot, osName, nodeMajor) {
 function runEvidenceCheck(artifactsRoot) {
   return spawnSync(
     process.execPath,
-    [path.join("scripts", "check-ci-evidence.js"), "--artifacts", artifactsRoot],
+    [
+      path.join("scripts", "check-ci-evidence.js"),
+      "--artifacts",
+      artifactsRoot,
+      "--manifest",
+      path.join(artifactsRoot, "expected-wpt-manifest.json"),
+    ],
     {
       cwd: root,
       encoding: "utf8",
@@ -76,19 +111,29 @@ function runEvidenceCheck(artifactsRoot) {
 function withTempArtifacts(callback) {
   const dir = fs.mkdtempSync(path.join(os.tmpdir(), "webrtc-node-ci-evidence-"));
   try {
+    writeJson(path.join(dir, "expected-wpt-manifest.json"), manifest);
     return callback(dir);
   } finally {
     fs.rmSync(dir, { recursive: true, force: true });
   }
 }
 
+function writeCompleteMatrix(artifactsRoot, mutateByKey = new Map()) {
+  for (const osName of requiredOs) {
+    for (const nodeMajor of requiredNodeMajors) {
+      writeMatrixArtifact(
+        artifactsRoot,
+        osName,
+        nodeMajor,
+        mutateByKey.get(`${osName}|${nodeMajor}`),
+      );
+    }
+  }
+}
+
 test("CI evidence verifier accepts a complete strict-green matrix", () => {
   withTempArtifacts((artifactsRoot) => {
-    for (const osName of requiredOs) {
-      for (const nodeMajor of requiredNodeMajors) {
-        writeMatrixArtifact(artifactsRoot, osName, nodeMajor);
-      }
-    }
+    writeCompleteMatrix(artifactsRoot);
 
     const result = runEvidenceCheck(artifactsRoot);
     assert.equal(result.status, 0, result.stderr || result.stdout);
@@ -96,6 +141,117 @@ test("CI evidence verifier accepts a complete strict-green matrix", () => {
   });
 });
 
+test("CI evidence verifier rejects forged strict-green result summaries", () => {
+  withTempArtifacts((artifactsRoot) => {
+    writeCompleteMatrix(
+      artifactsRoot,
+      new Map([
+        [
+          "Linux|20",
+          ({ results }) => {
+            results.results[0].status = "FAIL";
+          },
+        ],
+      ]),
+    );
+
+    const result = runEvidenceCheck(artifactsRoot);
+    assert.notEqual(result.status, 0);
+    assert.match(result.stderr, /WPT pass summary mismatch/);
+  });
+});
+
+test("CI evidence verifier rejects duplicate WPT result identities", () => {
+  withTempArtifacts((artifactsRoot) => {
+    writeCompleteMatrix(
+      artifactsRoot,
+      new Map([
+        [
+          "Linux|20",
+          ({ results }) => {
+            results.results[1] = { ...results.results[0] };
+          },
+        ],
+      ]),
+    );
+
+    const result = runEvidenceCheck(artifactsRoot);
+    assert.notEqual(result.status, 0);
+    assert.match(result.stderr, /contains duplicate WPT result/);
+  });
+});
+
+test("CI evidence verifier rejects inconsistent matrix result identities", () => {
+  withTempArtifacts((artifactsRoot) => {
+    writeCompleteMatrix(
+      artifactsRoot,
+      new Map([
+        [
+          "Windows|24",
+          ({ results }) => {
+            results.results[0].name = "different selected subtest";
+          },
+        ],
+      ]),
+    );
+
+    const result = runEvidenceCheck(artifactsRoot);
+    assert.notEqual(result.status, 0);
+    assert.match(result.stderr, /WPT result identities do not match the manifest digest/);
+  });
+});
+
+test("CI evidence verifier rejects artifacts from different workflow runs", () => {
+  withTempArtifacts((artifactsRoot) => {
+    writeCompleteMatrix(
+      artifactsRoot,
+      new Map([
+        [
+          "macOS|22",
+          ({ evidence }) => {
+            evidence.github.runId = "different-run";
+          },
+        ],
+      ]),
+    );
+
+    const result = runEvidenceCheck(artifactsRoot);
+    assert.notEqual(result.status, 0);
+    assert.match(result.stderr, /GitHub runId does not match the matrix run/);
+  });
+});
+
+test("CI evidence verifier rejects missing GitHub provenance", () => {
+  withTempArtifacts((artifactsRoot) => {
+    writeCompleteMatrix(
+      artifactsRoot,
+      new Map([
+        [
+          "Linux|24",
+          ({ evidence }) => {
+            evidence.github = null;
+          },
+        ],
+      ]),
+    );
+
+    const result = runEvidenceCheck(artifactsRoot);
+    assert.notEqual(result.status, 0);
+    assert.match(result.stderr, /is not GitHub Actions evidence/);
+  });
+});
+
+test("CI evidence verifier rejects empty required reports", () => {
+  withTempArtifacts((artifactsRoot) => {
+    writeCompleteMatrix(artifactsRoot);
+    fs.writeFileSync(path.join(artifactsRoot, "wpt-manifest-Linux-node-20", "wpt-report.md"), "");
+
+    const result = runEvidenceCheck(artifactsRoot);
+    assert.notEqual(result.status, 0);
+    assert.match(result.stderr, /wpt-report\.md is empty/);
+  });
+});
+
 test("CI evidence verifier rejects missing matrix jobs", () => {
   withTempArtifacts((artifactsRoot) => {
     for (const osName of requiredOs) {
diff --git a/test/fixtures/wpt-shard-runner.js b/test/fixtures/wpt-shard-runner.js
new file mode 100644
index 0000000..89141de
--- /dev/null
+++ b/test/fixtures/wpt-shard-runner.js
@@ -0,0 +1,20 @@
+"use strict";
+
+const fs = require("node:fs");
+
+const mode = process.argv[2];
+const shardIndex = Number(process.env.WPT_SHARD_INDEX);
+const results =
+  mode === "single" && shardIndex === 0
+    ? [{ file: "webrtc/fixture.html", name: "selected subtest", status: "PASS" }]
+    : [];
+
+fs.writeFileSync(
+  process.env.WPT_WORKER_RESULTS,
+  `${JSON.stringify({
+    total: results.length,
+    pass: results.length,
+    fail: 0,
+    results,
+  })}\n`,
+);
diff --git a/test/wpt-sharding.test.js b/test/wpt-sharding.test.js
index c97aeb7..1cda63e 100644
--- a/test/wpt-sharding.test.js
+++ b/test/wpt-sharding.test.js
@@ -1,8 +1,45 @@
 "use strict";
 
 const assert = require("node:assert/strict");
+const { spawnSync } = require("node:child_process");
+const fs = require("node:fs");
+const os = require("node:os");
+const path = require("node:path");
 const test = require("node:test");
-const { assignWptSpecGroups, mergeWptSummaries, shardForTest } = require("../scripts/wpt-sharding");
+const {
+  assignWptSpecGroups,
+  mergeWptSummaries,
+  shardForTest,
+  validateWptSelectionTotal,
+  wptSelectionDigest,
+} = require("../scripts/wpt-sharding");
+
+const root = path.resolve(__dirname, "..");
+const shardRunner = path.join(__dirname, "fixtures", "wpt-shard-runner.js");
+
+function runShardedFixture(mode) {
+  const output = path.join(
+    os.tmpdir(),
+    `webrtc-node-wpt-sharding-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2)}.json`,
+  );
+  try {
+    return spawnSync(
+      process.execPath,
+      [path.join(root, "scripts", "run-wpt-sharded.js"), "--shards=3", mode],
+      {
+        cwd: root,
+        env: {
+          ...process.env,
+          WPT_RESULTS: output,
+          WPT_SHARD_RUNNER: shardRunner,
+        },
+        encoding: "utf8",
+      },
+    );
+  } finally {
+    fs.rmSync(output, { force: true });
+  }
+}
 
 test("WPT shard assignment is deterministic and exhaustive", () => {
   const shardCount = 3;
@@ -84,3 +121,32 @@ test("WPT shard merger rejects overlapping results", () => {
     /duplicate WPT result/,
   );
 });
+
+test("WPT selection validation rejects an empty targeted run", () => {
+  assert.throws(() => validateWptSelectionTotal(0), /selected no subtests/);
+  assert.doesNotThrow(() => validateWptSelectionTotal(1));
+});
+
+test("WPT selection validation enforces an expected total", () => {
+  assert.throws(() => validateWptSelectionTotal(3, 4), /selected 3 subtests, expected 4/);
+  assert.doesNotThrow(() => validateWptSelectionTotal(4, 4));
+});
+
+test("WPT selection digest is deterministic and identity-sensitive", () => {
+  const first = wptSelectionDigest(["b\0second", "a\0first"]);
+  const second = wptSelectionDigest(["a\0first", "b\0second"]);
+  assert.equal(first, second);
+  assert.notEqual(first, wptSelectionDigest(["a\0first", "b\0changed"]));
+});
+
+test("sharded WPT runner rejects an empty merged selection", () => {
+  const result = runShardedFixture("empty");
+  assert.notEqual(result.status, 0);
+  assert.match(result.stderr, /selected no subtests/);
+});
+
+test("sharded WPT runner permits empty shards when the merged selection is nonempty", () => {
+  const result = runShardedFixture("single");
+  assert.equal(result.status, 0, result.stderr || result.stdout);
+  assert.match(result.stdout, /WPT shards: 1\/1 passed across 3 shards/);
+});
diff --git a/wpt-manifest.json b/wpt-manifest.json
index 91f7fff..dacc417 100644
--- a/wpt-manifest.json
+++ b/wpt-manifest.json
@@ -3,6 +3,7 @@
   "libdatachannelCommit": "502ae351495792192ef21788e093b48e34ab393e",
   "wptCommit": "03169f171c797d0953b21d7388561b454fde0ad4",
   "expectedSelectedSubtests": 620,
+  "selectedSubtestsSha256": "967896888c9496738e4911ec1eff073a1d0fff701352729c3de2e51665e8bb97",
   "expectedPass": [
     "webrtc/RTCPeerConnection-constructor.html",
     "webrtc/RTCError.html?interop-2026",

From dcf463161c7a90088e74262da003d891de111eec Mon Sep 17 00:00:00 2001
From: mertushka <github@1351820.xyz>
Date: Sat, 13 Jun 2026 19:13:57 +0300
Subject: [PATCH 2/2] test: isolate CI evidence fixtures

---
 test/ci-evidence.test.js | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/ci-evidence.test.js b/test/ci-evidence.test.js
index 2245e67..de9f423 100644
--- a/test/ci-evidence.test.js
+++ b/test/ci-evidence.test.js
@@ -104,6 +104,10 @@ function runEvidenceCheck(artifactsRoot) {
     {
       cwd: root,
       encoding: "utf8",
+      env: {
+        ...process.env,
+        GITHUB_ACTIONS: "false",
+      },
     },
   );
 }