From d7102b050db730278b1c77e767c143b2b49079a2 Mon Sep 17 00:00:00 2001 From: mertushka Date: Sat, 13 Jun 2026 19:01:50 +0300 Subject: [PATCH 1/2] ci: harden WPT evidence validation --- docs/verification-audit.md | 141 +++++++++++----------- scripts/check-ci-evidence.js | 171 +++++++++++++++++++++++---- scripts/check-wpt-selection.js | 32 ++++- scripts/run-wpt-sharded.js | 37 +++--- scripts/run-wpt-subset.js | 24 +++- scripts/wpt-sharding.js | 24 ++++ scripts/write-ci-evidence.js | 11 ++ test/ci-evidence.test.js | 186 +++++++++++++++++++++++++++--- test/fixtures/wpt-shard-runner.js | 20 ++++ test/wpt-sharding.test.js | 68 ++++++++++- wpt-manifest.json | 1 + 11 files changed, 577 insertions(+), 138 deletions(-) create mode 100644 test/fixtures/wpt-shard-runner.js diff --git a/docs/verification-audit.md b/docs/verification-audit.md index 77ffa89..738cf6a 100644 --- a/docs/verification-audit.md +++ b/docs/verification-audit.md @@ -1,78 +1,67 @@ # Verification Audit -This audit maps the original goal and success criteria to current repository -evidence. It is intentionally conservative: stale local artifacts and targeted -checks do not prove the full Linux/macOS/Windows matrix. - -## Current Local Evidence - -Audited from `C:\Users\mertu\Desktop\webrtc-node` on 2026-05-28. - -| Gate | Evidence | -| --- | --- | -| Quality gate | `npm run check`, `npm run types:check`, `npm run api:check`, and `npm run pack:check` passed after Biome was added and the API surface checker was fixed for multiline TypeScript declarations. | -| Native integration | `npm run native:check` passed; it verifies Node-API/node-addon-api usage, TSFN dispatch, and the pinned libdatachannel commit. | -| Native build | `npm run build` passed on Windows with Visual Studio 2022 Build Tools. | -| Unit tests | `npm test` passed 20/20 Node `node:test` tests. The remote-close data-channel test also passed 20 serial stress iterations after one parallel-load timeout. | -| API surface | `npm run api:check` passed for 17 classes and 1 nonstandard member. | -| Types | `npm run types:check` passed. | -| WPT checkout | `npm run wpt:ensure` verified WPT commit `03169f171c797d0953b21d7388561b454fde0ad4`. | -| WPT selection | `npm run wpt:selection:check` verified 620 selected subtests. | -| Targeted WPT | The current Windows build passed `webrtc/RTCDataChannel-close.html`, the selected `RTCDataChannel-send.html` subset, and `RTCPeerConnection-ondatachannel.html` together as 46/46 subtests after the remote-close message-grace change. | -| Docker Linux smoke | `scripts/run-docker-linux-ci.ps1 -NodeImage node:20-bookworm -SkipWpt` passed build/unit/API/types/WPT-selection using the snapshot-backed Docker helper. Docker helpers now exist for PowerShell and POSIX shells, but remain optional local reproduction only. | -| Docker Linux targeted stress | Node 24 Docker passed 20 repeated runs of `webrtc/RTCDataChannel-close.html#Repeated open/send/echo/close datachannel works` with retries=0 after the remote-close message-grace change. | -| Superseded full WPT artifacts | Earlier Docker Linux Node 20 and Node 22 artifacts reached 620/620 with retries=0, and a later Node 24 full run reached 619/620 before the close-race fix. These predate the current close-path change and must not be treated as current full-suite evidence. | -| Local Docker CI | `scripts/run-docker-linux-ci.ps1` documents a reproducible Linux CI slice for Docker Desktop/WSL and rewrites Debian image apt sources to pinned snapshot URLs to reduce mirror instability. | - -## Requirement Status - -| Requirement | Current status | -| --- | --- | -| Phase 0 analysis before coding | Satisfied by `docs/phase0-analysis.md`, including upstream files reviewed, lifecycle/state/callback analysis, mismatch analysis, binding design, and WPT subset plan. | -| Data-channel-first WebRTC package | Implemented in `lib/index.js`, `src/native/addon.cc`, `index.d.ts`, and tested by local/WPT gates. | -| Node-API/N-API, no direct V8 addon API | Locally verified by `npm run native:check`; native source uses node-addon-api and `NODE_API_MODULE`. | -| Reproducible libdatachannel integration | Implemented in `CMakeLists.txt` with upstream commit `502ae351495792192ef21788e093b48e34ab393e`, including the OpenSSL DTLS and TLS input BIO synchronization fixes from upstream PRs #1584 and #1585; repository and commit are verified by `native:check`. | -| W3C-compatible JS facade | Covered by API/type checks, local tests, targeted WPT, and targeted Docker stress. Fresh full selected-WPT evidence is still pending after the latest close-path change. | -| RTCDataChannel selected WPT coverage | Targeted close/send/datachannel coverage is green locally; Node 24 Docker close-race stress is green. Fresh full 620-subtest evidence is still required. | -| RTCPeerConnection selected WPT coverage | Targeted datachannel and state coverage is green locally. Fresh full 620-subtest evidence is still required. | -| Safe callback dispatch | Locally verified by `native:check`; native callbacks dispatch through a thread-safe function. | -| Safe object lifetime | Covered by local tests and selected WPT close/GC cases; still needs continued stress coverage as the API expands. | -| TypeScript declarations | `index.d.ts` checked by `npm run types:check` and API surface verification. | -| CI builds/tests/WPT/report | Workflow exists in `.github/workflows/ci.yml` for Linux, macOS, and Windows on Node 20/22/24. Each matrix job writes `ci-evidence.json` and uploads it with WPT artifacts. A final `verify-ci-evidence` job downloads all matrix artifacts and runs `npm run ci:evidence:check`. | - -## Current Known Gap - -Fresh hosted selected-WPT evidence is still pending after the latest close-path -message-grace change. GitHub Actions is the authoritative conformance gate for -the public repository. Docker Linux runs are useful for local reproduction, but -they are no longer treated as release-blocking evidence because they cannot -prove macOS or Windows behavior. - -## Remaining Completion Evidence - -The active goal should not be marked complete until hosted CI or equivalent -authoritative logs prove the full matrix: - -- `ubuntu-latest` on Node 20, 22, and 24 -- `macos-latest` on Node 20, 22, and 24 -- `windows-latest` on Node 20, 22, and 24 - -The Quality job must pass `npm ci`, `check`, `types:check`, and `pack:check`. -Each matrix job must pass `npm ci`, `native:check`, `build`, `test`, `api:check`, -`types:check`, `wpt:ensure`, `wpt:selection:check`, `wpt:test:sharded`, -`wpt:check:strict`, `wpt:report`, and `ci:evidence`. - -After downloading all workflow artifacts into `ci-artifacts/`, run -`npm run ci:evidence:check`. The verifier requires `ci-evidence.json`, -`wpt-results.json`, `wpt-report.md`, `wpt-manifest.json`, and -`wpt-manifest.txt` for each OS/Node matrix entry and rejects missing jobs, -pin mismatches, WPT failures, and WPT retries. - -The GitHub Actions workflow also runs this verifier automatically in the -`verify-ci-evidence` job. That job uses `always()` so failed or incomplete -matrix runs are reported as missing or non-green evidence instead of leaving the -final conformance verifier skipped. - -Local Docker evidence is useful before pushing, but it only proves the Linux -Node image used by `scripts/run-docker-linux-ci.ps1`. It does not replace the -required macOS and Windows hosted matrix evidence. +This document records authoritative hosted evidence and the limits of local +validation. Generated artifacts and local checkouts are not committed. + +## Hosted Conformance Evidence + +GitHub Actions Conformance run `27392464467` completed successfully on +2026-06-12. It tested PR #11 head +`f4c9edf438291e432fcc024cea80198abfe08717`, which was squash-merged as +`e6a3cfca4beee3163806908c433807354f384c42`. + +The run completed: + +- the Quality job; +- Linux, macOS, and Windows on Node.js 20, 22, and 24; +- all 620 selected WPT subtests with strict retry rejection; +- the final `Verify CI evidence` job. + +The run is available at: +`https://github.com/mertushka/webrtc-node/actions/runs/27392464467`. + +This evidence applies to the tested commit. Later WebRTC semantic, native, +lifecycle, SDP, ICE, buffering, or event-timing changes require new applicable +conformance evidence. + +## Conformance Contract + +`wpt-manifest.json` is the selected compatibility contract. It pins: + +- the libdatachannel commit; +- the WPT commit; +- the expected selected subtest count; +- a SHA-256 digest of the sorted `{file, name}` test identities. + +`npm run wpt:selection:check` discovers the selected tests without executing +them and rejects count, identity, duplicate, or digest changes. Updating the +digest requires deliberate review of the changed selection. + +## Workflow Evidence + +`.github/workflows/conformance.yml` runs the full matrix separately from normal +push and pull-request CI. Each matrix job produces: + +- `ci-evidence.json`; +- `wpt-results.json`; +- `wpt-report.md`; +- `wpt-manifest.json`; +- `wpt-manifest.txt`. + +The final evidence verifier requires every OS and Node.js matrix entry, +recomputes WPT status and retry counts, rejects duplicate or inconsistent test +identities, verifies manifest equality, and binds all artifacts to one GitHub +workflow run and commit. + +After downloading artifacts into `ci-artifacts/`, maintainers can run: + +```sh +npm run ci:evidence:check +``` + +## Local Validation Boundary + +Focused local tests and Docker runs are useful for development and +reproduction. They do not replace hosted macOS and Windows evidence. The full +selected WPT suite is intentionally separate from ordinary local and push CI +because of its runtime cost. diff --git a/scripts/check-ci-evidence.js b/scripts/check-ci-evidence.js index 54507e2..8ee66a6 100644 --- a/scripts/check-ci-evidence.js +++ b/scripts/check-ci-evidence.js @@ -2,17 +2,32 @@ const fs = require("node:fs"); const path = require("node:path"); +const { isDeepStrictEqual } = require("node:util"); +const { wptSelectionDigest } = require("./wpt-sharding"); const root = path.resolve(__dirname, ".."); const args = process.argv.slice(2); const artifactsIndex = args.indexOf("--artifacts"); +const manifestIndex = args.indexOf("--manifest"); const artifactsRoot = artifactsIndex === -1 ? path.join(root, "ci-artifacts") : path.resolve(root, args[artifactsIndex + 1] || ""); -const manifestPath = path.join(root, "wpt-manifest.json"); +const manifestPath = + manifestIndex === -1 + ? path.join(root, "wpt-manifest.json") + : path.resolve(root, args[manifestIndex + 1] || ""); const requiredOs = ["Linux", "macOS", "Windows"]; const requiredNodeMajors = [20, 22, 24]; +const requiredGithubFields = ["workflow", "job", "runId", "runAttempt", "repository", "ref", "sha"]; +const currentGithub = { + workflow: process.env.GITHUB_WORKFLOW, + runId: process.env.GITHUB_RUN_ID, + runAttempt: process.env.GITHUB_RUN_ATTEMPT, + repository: process.env.GITHUB_REPOSITORY, + ref: process.env.GITHUB_REF, + sha: process.env.GITHUB_SHA, +}; function fail(message) { console.error(`CI evidence check failed: ${message}`); @@ -41,7 +56,116 @@ function nodeMajor(version) { return match ? Number(match[1]) : null; } +function validateResults(results, key) { + if (!Array.isArray(results.results)) fail(`${key} WPT result artifact is invalid`); + if (!Number.isInteger(results.total) || results.total < 1) { + fail(`${key} WPT total is invalid`); + } + if (!Number.isInteger(results.pass) || results.pass < 0) { + fail(`${key} WPT pass count is invalid`); + } + if (!Number.isInteger(results.fail) || results.fail < 0) { + fail(`${key} WPT fail count is invalid`); + } + if (results.results.length !== results.total) fail(`${key} result length mismatch`); + if (results.total !== manifest.expectedSelectedSubtests) fail(`${key} WPT total mismatch`); + + let pass = 0; + let failCount = 0; + let retries = 0; + const identities = new Set(); + const files = new Set(); + + for (const result of results.results) { + if ( + !result || + typeof result.file !== "string" || + result.file.length === 0 || + typeof result.name !== "string" || + result.name.length === 0 + ) { + fail(`${key} contains an invalid WPT result identity`); + } + + const identity = `${result.file}\0${result.name}`; + if (identities.has(identity)) + fail(`${key} contains duplicate WPT result ${result.file}#${result.name}`); + identities.add(identity); + files.add(result.file); + + if (result.status === "PASS") pass += 1; + else if (result.status === "FAIL") failCount += 1; + else fail(`${key} contains unexpected WPT status ${result.status}`); + + const retryCount = result.retries === undefined ? 0 : result.retries; + if (!Number.isInteger(retryCount) || retryCount < 0) { + fail(`${key} contains an invalid retry count for ${result.file}#${result.name}`); + } + if (retryCount > 0) retries += 1; + } + + if (results.pass !== pass) fail(`${key} WPT pass summary mismatch`); + if (results.fail !== failCount) fail(`${key} WPT fail summary mismatch`); + if (failCount !== 0 || pass !== results.total || retries !== 0) { + fail( + `${key} WPT is not strict-green: pass=${pass} total=${results.total} fail=${failCount} retries=${retries}`, + ); + } + + const selectedSubtestsSha256 = wptSelectionDigest(identities); + if (selectedSubtestsSha256 !== manifest.selectedSubtestsSha256) { + fail(`${key} WPT result identities do not match the manifest digest`); + } + + return { + pass, + failCount, + retries, + identities, + fileCount: files.size, + selectedSubtestsSha256, + }; +} + +function validateGithubEvidence(evidence, key, baseline) { + if (evidence.source !== "write-ci-evidence.js") fail(`${key} evidence source is invalid`); + if (evidence.github?.actions !== true) fail(`${key} is not GitHub Actions evidence`); + + for (const field of requiredGithubFields) { + if (typeof evidence.github[field] !== "string" || evidence.github[field].length === 0) { + fail(`${key} evidence GitHub ${field} is missing`); + } + } + + if (baseline) { + for (const field of requiredGithubFields) { + if (evidence.github[field] !== baseline[field]) { + fail(`${key} evidence GitHub ${field} does not match the matrix run`); + } + } + } + + if (process.env.GITHUB_ACTIONS === "true") { + for (const [field, expected] of Object.entries(currentGithub)) { + if (!expected || evidence.github[field] !== expected) { + fail(`${key} evidence GitHub ${field} does not match the current workflow run`); + } + } + } + + return evidence.github; +} + +function sameSet(left, right) { + if (left.size !== right.size) return false; + for (const value of left) { + if (!right.has(value)) return false; + } + return true; +} + if (artifactsIndex !== -1 && !args[artifactsIndex + 1]) fail("--artifacts requires a directory"); +if (manifestIndex !== -1 && !args[manifestIndex + 1]) fail("--manifest requires a file"); if (!fs.existsSync(artifactsRoot)) { fail(`${artifactsRoot} does not exist; download CI artifacts there or pass --artifacts `); } @@ -53,6 +177,8 @@ const evidenceFiles = walk(artifactsRoot); if (!evidenceFiles.length) fail(`no ci-evidence.json files found under ${artifactsRoot}`); const byMatrix = new Map(); +let githubBaseline = null; +let identityBaseline = null; for (const evidencePath of evidenceFiles) { const evidence = readJson(evidencePath); @@ -72,39 +198,42 @@ for (const evidencePath of evidenceFiles) { for (const requiredPath of [resultsPath, reportPath, artifactManifestPath, manifestTextPath]) { if (!fs.existsSync(requiredPath)) fail(`${path.relative(root, requiredPath)} is missing`); } + for (const requiredPath of [reportPath, manifestTextPath]) { + if (fs.statSync(requiredPath).size === 0) { + fail(`${path.relative(root, requiredPath)} is empty`); + } + } const artifactManifest = readJson(artifactManifestPath); const results = readJson(resultsPath); - const retries = Array.isArray(results.results) - ? results.results.filter((result) => Number(result.retries) > 0).length - : null; - if (artifactManifest.libdatachannelCommit !== manifest.libdatachannelCommit) { - fail(`${key} libdatachannel pin mismatch`); + if (!isDeepStrictEqual(artifactManifest, manifest)) { + fail(`${key} WPT manifest does not match the repository manifest`); } - if (artifactManifest.wptCommit !== manifest.wptCommit) fail(`${key} WPT pin mismatch`); - if (artifactManifest.expectedSelectedSubtests !== manifest.expectedSelectedSubtests) { - fail(`${key} selected subtest count mismatch`); - } - if (!Array.isArray(results.results)) fail(`${key} WPT result artifact is invalid`); - if (results.results.length !== results.total) fail(`${key} result length mismatch`); - if (results.total !== manifest.expectedSelectedSubtests) fail(`${key} WPT total mismatch`); - if (results.pass !== results.total || results.fail !== 0 || retries !== 0) { - fail( - `${key} WPT is not strict-green: pass=${results.pass} total=${results.total} fail=${results.fail} retries=${retries}`, - ); + + const validated = validateResults(results, key); + if (identityBaseline && !sameSet(validated.identities, identityBaseline)) { + fail(`${key} WPT result identities do not match the matrix run`); } + identityBaseline ??= validated.identities; + + const github = validateGithubEvidence(evidence, key, githubBaseline); + githubBaseline ??= github; + if (evidence.pins?.libdatachannel !== manifest.libdatachannelCommit) { fail(`${key} evidence libdatachannel pin mismatch`); } if (evidence.pins?.wpt !== manifest.wptCommit) fail(`${key} evidence WPT pin mismatch`); if ( + evidence.wpt?.expectedSelectedSubtests !== manifest.expectedSelectedSubtests || evidence.wpt?.total !== manifest.expectedSelectedSubtests || - evidence.wpt?.pass !== manifest.expectedSelectedSubtests || - evidence.wpt?.fail !== 0 || - evidence.wpt?.retries !== 0 + evidence.wpt?.pass !== validated.pass || + evidence.wpt?.fail !== validated.failCount || + evidence.wpt?.retries !== validated.retries || + evidence.wpt?.resultFiles !== validated.fileCount || + evidence.wpt?.selectedSubtestsSha256 !== validated.selectedSubtestsSha256 ) { - fail(`${key} evidence WPT summary is not strict-green`); + fail(`${key} evidence WPT summary does not match the result artifact`); } byMatrix.set(key, { os, major, evidencePath }); diff --git a/scripts/check-wpt-selection.js b/scripts/check-wpt-selection.js index d7a51a8..9f6acb4 100644 --- a/scripts/check-wpt-selection.js +++ b/scripts/check-wpt-selection.js @@ -4,11 +4,13 @@ const fs = require("node:fs"); const os = require("node:os"); const path = require("node:path"); const { spawnSync } = require("node:child_process"); +const { wptSelectionDigest } = require("./wpt-sharding"); const root = path.resolve(__dirname, ".."); const manifestPath = path.join(root, "wpt-manifest.json"); const manifest = JSON.parse(fs.readFileSync(manifestPath, "utf8")); const expectedTotal = manifest.expectedSelectedSubtests; +const expectedDigest = manifest.selectedSubtestsSha256; function fail(message) { console.error(`WPT selection check failed: ${message}`); @@ -18,6 +20,9 @@ function fail(message) { if (!Number.isInteger(expectedTotal) || expectedTotal < 1) { fail("wpt-manifest.json expectedSelectedSubtests must be a positive integer"); } +if (typeof expectedDigest !== "string" || !/^[a-f0-9]{64}$/.test(expectedDigest)) { + fail("wpt-manifest.json selectedSubtestsSha256 must be a SHA-256 digest"); +} const resultsPath = path.join( os.tmpdir(), @@ -29,6 +34,7 @@ try { cwd: root, env: { ...process.env, + WPT_LIST_IDENTITIES: "1", WPT_LIST_TESTS: "1", WPT_WORKER_RESULTS: resultsPath, }, @@ -55,14 +61,34 @@ try { if (!Array.isArray(payload.tests)) { fail("list mode artifact does not contain a tests array"); } - if (!payload.tests.every((test) => typeof test === "string" && test.length > 0)) { - fail("list mode artifact contains an invalid test name"); + if ( + !payload.tests.every( + (test) => + test && + typeof test.file === "string" && + test.file.length > 0 && + typeof test.name === "string" && + test.name.length > 0, + ) + ) { + fail("list mode artifact contains an invalid test identity"); } if (payload.tests.length !== expectedTotal) { fail(`selected ${payload.tests.length} subtests, expected ${expectedTotal}`); } - console.log(`WPT selection verified: ${payload.tests.length} selected subtests`); + const identities = payload.tests.map((test) => `${test.file}\0${test.name}`).sort(); + if (new Set(identities).size !== identities.length) { + fail("list mode artifact contains duplicate test identities"); + } + const digest = wptSelectionDigest(identities); + if (digest !== expectedDigest) { + fail(`selected subtest digest ${digest} does not match manifest ${expectedDigest}`); + } + + console.log( + `WPT selection verified: ${payload.tests.length} selected subtests, sha256=${digest}`, + ); } finally { try { fs.unlinkSync(resultsPath); diff --git a/scripts/run-wpt-sharded.js b/scripts/run-wpt-sharded.js index 7d955f7..0d3a062 100644 --- a/scripts/run-wpt-sharded.js +++ b/scripts/run-wpt-sharded.js @@ -4,7 +4,7 @@ const fs = require("node:fs"); const os = require("node:os"); const path = require("node:path"); const { spawn } = require("node:child_process"); -const { mergeWptSummaries } = require("./wpt-sharding"); +const { mergeWptSummaries, validateWptSelectionTotal } = require("./wpt-sharding"); const root = path.resolve(__dirname, ".."); const manifest = require("../wpt-manifest.json"); @@ -15,6 +15,9 @@ const shardCount = Number( shardArgument?.slice("--shards=".length) || process.env.WPT_SHARD_COUNT || 3, ); const outputPath = path.resolve(process.env.WPT_RESULTS || path.join(root, "wpt-results.json")); +const runnerPath = path.resolve( + process.env.WPT_SHARD_RUNNER || path.join(__dirname, "run-wpt-subset.js"), +); const expectedTotal = process.env.WPT_EXPECTED_TOTAL ? Number(process.env.WPT_EXPECTED_TOTAL) : selectors.length === 0 @@ -40,21 +43,17 @@ function tempResultsPath(index) { function runShard(index, resultsPath) { return new Promise((resolve) => { - const child = spawn( - process.execPath, - ["--expose-gc", path.join(__dirname, "run-wpt-subset.js"), ...selectors], - { - cwd: root, - env: { - ...process.env, - WPT_LOG_PREFIX: `[shard ${index + 1}/${shardCount}] `, - WPT_SHARD_COUNT: String(shardCount), - WPT_SHARD_INDEX: String(index), - WPT_WORKER_RESULTS: resultsPath, - }, - stdio: "inherit", + const child = spawn(process.execPath, ["--expose-gc", runnerPath, ...selectors], { + cwd: root, + env: { + ...process.env, + WPT_LOG_PREFIX: `[shard ${index + 1}/${shardCount}] `, + WPT_SHARD_COUNT: String(shardCount), + WPT_SHARD_INDEX: String(index), + WPT_WORKER_RESULTS: resultsPath, }, - ); + stdio: "inherit", + }); child.on("error", (error) => resolve({ index, error, code: null, signal: null })); child.on("exit", (code, signal) => resolve({ index, error: null, code, signal })); @@ -153,10 +152,10 @@ async function main() { console.log(`WPT shards: ${summary.pass}/${summary.total} passed across ${shardCount} shards`); if (summary.fail > 0) process.exitCode = 1; - if (expectedTotal !== null && summary.total !== expectedTotal) { - console.error( - `WPT sharded run selected ${summary.total} subtests, expected ${expectedTotal}`, - ); + try { + validateWptSelectionTotal(summary.total, expectedTotal); + } catch (error) { + console.error(`WPT sharded run failed: ${error.message}`); process.exitCode = 1; } } finally { diff --git a/scripts/run-wpt-subset.js b/scripts/run-wpt-subset.js index 47d6e02..fd134c3 100644 --- a/scripts/run-wpt-subset.js +++ b/scripts/run-wpt-subset.js @@ -18,6 +18,7 @@ const workerDelayMs = Number(process.env.WPT_WORKER_DELAY_MS || 200); const workerRetries = Math.max(0, Number(process.env.WPT_WORKER_RETRIES || 0)); const workerTimeoutMs = Number(process.env.WPT_WORKER_TIMEOUT_MS || 300000); const listTestsOnly = process.env.WPT_LIST_TESTS === "1"; +const listTestIdentities = !isWorker && process.env.WPT_LIST_IDENTITIES === "1"; const shardCount = Number(process.env.WPT_SHARD_COUNT || 1); const shardIndex = Number(process.env.WPT_SHARD_INDEX || 0); const logPrefix = process.env.WPT_LOG_PREFIX || ""; @@ -31,7 +32,7 @@ if (!Number.isInteger(shardIndex) || shardIndex < 0 || shardIndex >= shardCount) if (!isWorker) ensureWpt({ quiet: true }); -const { assignWptSpecGroups, shardForTest } = require("./wpt-sharding"); +const { assignWptSpecGroups, shardForTest, validateWptSelectionTotal } = require("./wpt-sharding"); const perTestIsolatedFiles = new Set([ "webrtc/RTCPeerConnection-createDataChannel.html", @@ -1065,11 +1066,19 @@ function specGroupKey(spec, index) { function listIsolatedTests(specsToRun) { const tests = []; for (let index = 0; index < specsToRun.length; ++index) { - tests.push(...runListWorker(specsToRun[index], index)); + const spec = specsToRun[index]; + const names = runListWorker(spec, index); + tests.push(...formatListedTests(spec, names)); } return tests; } +function formatListedTests(spec, names) { + if (!listTestIdentities) return names; + const file = `${spec.file}${spec.search || ""}`; + return names.map((name) => ({ file, name })); +} + function runListWorker(spec, index) { const discovery = discoverSpecTests(spec, index); if (!discovery.failure) return discovery.tests; @@ -1233,6 +1242,7 @@ function runWorker(spec, index, extraEnv = {}) { function writeTestList(tests) { const outputFile = workerResultsFile || path.join(root, "wpt-results.json"); fs.writeFileSync(outputFile, `${JSON.stringify({ tests }, null, 2)}\n`); + if (!isWorker) validateWptSelectionTotal(tests.length); } function writeSummary({ quiet = false } = {}) { @@ -1256,6 +1266,14 @@ function writeSummary({ quiet = false } = {}) { } if (summary.fail > 0) process.exitCode = 1; + if (!isWorker && shardCount === 1) { + try { + validateWptSelectionTotal(summary.total); + } catch (error) { + console.error(`${logPrefix}WPT subset failed: ${error.message}`); + process.exitCode = 1; + } + } } (async () => { @@ -1263,7 +1281,7 @@ function writeSummary({ quiet = false } = {}) { if (listTestsOnly) { const tests = []; for (const spec of specs) { - tests.push(...(await runFile(spec))); + tests.push(...formatListedTests(spec, await runFile(spec))); } writeTestList(tests); return; diff --git a/scripts/wpt-sharding.js b/scripts/wpt-sharding.js index dd148a9..7bdeb03 100644 --- a/scripts/wpt-sharding.js +++ b/scripts/wpt-sharding.js @@ -1,5 +1,7 @@ "use strict"; +const crypto = require("node:crypto"); + function testIdentity(file, name) { if (typeof file !== "string" || file.length === 0) { throw new Error("WPT result file must be a non-empty string"); @@ -115,9 +117,31 @@ function mergeWptSummaries(summaries) { }; } +function validateWptSelectionTotal(total, expectedTotal = null) { + if (!Number.isInteger(total) || total < 0) { + throw new Error("WPT selected subtest total must be a non-negative integer"); + } + if (total === 0) { + throw new Error("WPT run selected no subtests"); + } + if (expectedTotal !== null && total !== expectedTotal) { + throw new Error(`WPT run selected ${total} subtests, expected ${expectedTotal}`); + } +} + +function wptSelectionDigest(identities) { + const sorted = [...identities].sort(); + if (sorted.some((identity) => typeof identity !== "string" || identity.length === 0)) { + throw new Error("WPT selection identities must be non-empty strings"); + } + return crypto.createHash("sha256").update(JSON.stringify(sorted)).digest("hex"); +} + module.exports = { assignWptSpecGroups, mergeWptSummaries, shardForTest, testIdentity, + validateWptSelectionTotal, + wptSelectionDigest, }; diff --git a/scripts/write-ci-evidence.js b/scripts/write-ci-evidence.js index 77fd93c..437350f 100644 --- a/scripts/write-ci-evidence.js +++ b/scripts/write-ci-evidence.js @@ -2,6 +2,7 @@ const fs = require("node:fs"); const path = require("node:path"); +const { testIdentity, wptSelectionDigest } = require("./wpt-sharding"); const root = path.resolve(__dirname, ".."); const args = process.argv.slice(2); @@ -46,15 +47,24 @@ if (results.results.length !== results.total) { const pass = results.results.filter((result) => result.status === "PASS").length; const failCount = results.results.filter((result) => result.status === "FAIL").length; const retries = results.results.filter((result) => Number(result.retries) > 0).length; +const identities = results.results.map((result) => testIdentity(result.file, result.name)); +const identitySet = new Set(identities); +const selectedSubtestsSha256 = wptSelectionDigest(identitySet); if (pass !== results.pass) fail(`PASS count ${pass} does not match summary ${results.pass}`); if (failCount !== results.fail) fail(`FAIL count ${failCount} does not match summary ${results.fail}`); +if (identitySet.size !== identities.length) fail("WPT results contain duplicate test identities"); if (manifest.expectedSelectedSubtests && results.total !== manifest.expectedSelectedSubtests) { fail( `result total ${results.total} does not match manifest ${manifest.expectedSelectedSubtests}`, ); } +if (selectedSubtestsSha256 !== manifest.selectedSubtestsSha256) { + fail( + `result identity digest ${selectedSubtestsSha256} does not match manifest ${manifest.selectedSubtestsSha256}`, + ); +} if (results.fail !== 0 || retries !== 0 || pass !== results.total) { fail( `strict WPT evidence requires all pass and no retries; pass=${pass} total=${results.total} retries=${retries}`, @@ -98,6 +108,7 @@ const evidence = { fail: failCount, retries, resultFiles: new Set(results.results.map((result) => result.file)).size, + selectedSubtestsSha256, }, gates: [ "npm ci", diff --git a/test/ci-evidence.test.js b/test/ci-evidence.test.js index 2a63c24..2245e67 100644 --- a/test/ci-evidence.test.js +++ b/test/ci-evidence.test.js @@ -4,19 +4,41 @@ const fs = require("node:fs"); const os = require("node:os"); const path = require("node:path"); const test = require("node:test"); +const { testIdentity, wptSelectionDigest } = require("../scripts/wpt-sharding"); const root = path.resolve(__dirname, ".."); -const manifest = JSON.parse(fs.readFileSync(path.join(root, "wpt-manifest.json"), "utf8")); +const repositoryManifest = JSON.parse( + fs.readFileSync(path.join(root, "wpt-manifest.json"), "utf8"), +); const requiredOs = ["Linux", "macOS", "Windows"]; const requiredNodeMajors = [20, 22, 24]; +const github = { + actions: true, + workflow: "Conformance", + job: "wpt-full", + runId: "123456", + runAttempt: "1", + repository: "mertushka/webrtc-node", + ref: "refs/heads/main", + sha: "0123456789abcdef0123456789abcdef01234567", +}; +const selectedResults = Array.from({ length: 4 }, (_, index) => ({ + file: "webrtc/fixture.html", + name: `fixture ${index + 1}`, + status: "PASS", + retries: 0, +})); +const selectedSubtestsSha256 = wptSelectionDigest( + selectedResults.map((result) => testIdentity(result.file, result.name)), +); +const manifest = { + ...repositoryManifest, + expectedSelectedSubtests: selectedResults.length, + selectedSubtestsSha256, +}; function makeResults() { - const results = Array.from({ length: manifest.expectedSelectedSubtests }, (_, index) => ({ - file: "webrtc/fixture.html", - name: `fixture ${index + 1}`, - status: "PASS", - retries: 0, - })); + const results = selectedResults.map((result) => ({ ...result })); return { total: results.length, pass: results.length, @@ -27,6 +49,8 @@ function makeResults() { function makeEvidence(osName, nodeMajor, results) { return { + source: "write-ci-evidence.js", + github: { ...github }, runner: { os: osName, arch: "X64", @@ -39,10 +63,13 @@ function makeEvidence(osName, nodeMajor, results) { wpt: manifest.wptCommit, }, wpt: { + expectedSelectedSubtests: manifest.expectedSelectedSubtests, total: results.total, pass: results.pass, fail: results.fail, retries: 0, + resultFiles: new Set(results.results.map((result) => result.file)).size, + selectedSubtestsSha256, }, }; } @@ -51,11 +78,13 @@ function writeJson(file, value) { fs.writeFileSync(file, `${JSON.stringify(value, null, 2)}\n`); } -function writeMatrixArtifact(artifactsRoot, osName, nodeMajor) { +function writeMatrixArtifact(artifactsRoot, osName, nodeMajor, mutate = () => {}) { const results = makeResults(); + const evidence = makeEvidence(osName, nodeMajor, results); + mutate({ results, evidence }); const artifactDir = path.join(artifactsRoot, `wpt-manifest-${osName}-node-${nodeMajor}`); fs.mkdirSync(artifactDir, { recursive: true }); - writeJson(path.join(artifactDir, "ci-evidence.json"), makeEvidence(osName, nodeMajor, results)); + writeJson(path.join(artifactDir, "ci-evidence.json"), evidence); writeJson(path.join(artifactDir, "wpt-results.json"), results); writeJson(path.join(artifactDir, "wpt-manifest.json"), manifest); fs.writeFileSync(path.join(artifactDir, "wpt-report.md"), "# WPT Conformance Report\n"); @@ -65,7 +94,13 @@ function writeMatrixArtifact(artifactsRoot, osName, nodeMajor) { function runEvidenceCheck(artifactsRoot) { return spawnSync( process.execPath, - [path.join("scripts", "check-ci-evidence.js"), "--artifacts", artifactsRoot], + [ + path.join("scripts", "check-ci-evidence.js"), + "--artifacts", + artifactsRoot, + "--manifest", + path.join(artifactsRoot, "expected-wpt-manifest.json"), + ], { cwd: root, encoding: "utf8", @@ -76,19 +111,29 @@ function runEvidenceCheck(artifactsRoot) { function withTempArtifacts(callback) { const dir = fs.mkdtempSync(path.join(os.tmpdir(), "webrtc-node-ci-evidence-")); try { + writeJson(path.join(dir, "expected-wpt-manifest.json"), manifest); return callback(dir); } finally { fs.rmSync(dir, { recursive: true, force: true }); } } +function writeCompleteMatrix(artifactsRoot, mutateByKey = new Map()) { + for (const osName of requiredOs) { + for (const nodeMajor of requiredNodeMajors) { + writeMatrixArtifact( + artifactsRoot, + osName, + nodeMajor, + mutateByKey.get(`${osName}|${nodeMajor}`), + ); + } + } +} + test("CI evidence verifier accepts a complete strict-green matrix", () => { withTempArtifacts((artifactsRoot) => { - for (const osName of requiredOs) { - for (const nodeMajor of requiredNodeMajors) { - writeMatrixArtifact(artifactsRoot, osName, nodeMajor); - } - } + writeCompleteMatrix(artifactsRoot); const result = runEvidenceCheck(artifactsRoot); assert.equal(result.status, 0, result.stderr || result.stdout); @@ -96,6 +141,117 @@ test("CI evidence verifier accepts a complete strict-green matrix", () => { }); }); +test("CI evidence verifier rejects forged strict-green result summaries", () => { + withTempArtifacts((artifactsRoot) => { + writeCompleteMatrix( + artifactsRoot, + new Map([ + [ + "Linux|20", + ({ results }) => { + results.results[0].status = "FAIL"; + }, + ], + ]), + ); + + const result = runEvidenceCheck(artifactsRoot); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /WPT pass summary mismatch/); + }); +}); + +test("CI evidence verifier rejects duplicate WPT result identities", () => { + withTempArtifacts((artifactsRoot) => { + writeCompleteMatrix( + artifactsRoot, + new Map([ + [ + "Linux|20", + ({ results }) => { + results.results[1] = { ...results.results[0] }; + }, + ], + ]), + ); + + const result = runEvidenceCheck(artifactsRoot); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /contains duplicate WPT result/); + }); +}); + +test("CI evidence verifier rejects inconsistent matrix result identities", () => { + withTempArtifacts((artifactsRoot) => { + writeCompleteMatrix( + artifactsRoot, + new Map([ + [ + "Windows|24", + ({ results }) => { + results.results[0].name = "different selected subtest"; + }, + ], + ]), + ); + + const result = runEvidenceCheck(artifactsRoot); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /WPT result identities do not match the manifest digest/); + }); +}); + +test("CI evidence verifier rejects artifacts from different workflow runs", () => { + withTempArtifacts((artifactsRoot) => { + writeCompleteMatrix( + artifactsRoot, + new Map([ + [ + "macOS|22", + ({ evidence }) => { + evidence.github.runId = "different-run"; + }, + ], + ]), + ); + + const result = runEvidenceCheck(artifactsRoot); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /GitHub runId does not match the matrix run/); + }); +}); + +test("CI evidence verifier rejects missing GitHub provenance", () => { + withTempArtifacts((artifactsRoot) => { + writeCompleteMatrix( + artifactsRoot, + new Map([ + [ + "Linux|24", + ({ evidence }) => { + evidence.github = null; + }, + ], + ]), + ); + + const result = runEvidenceCheck(artifactsRoot); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /is not GitHub Actions evidence/); + }); +}); + +test("CI evidence verifier rejects empty required reports", () => { + withTempArtifacts((artifactsRoot) => { + writeCompleteMatrix(artifactsRoot); + fs.writeFileSync(path.join(artifactsRoot, "wpt-manifest-Linux-node-20", "wpt-report.md"), ""); + + const result = runEvidenceCheck(artifactsRoot); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /wpt-report\.md is empty/); + }); +}); + test("CI evidence verifier rejects missing matrix jobs", () => { withTempArtifacts((artifactsRoot) => { for (const osName of requiredOs) { diff --git a/test/fixtures/wpt-shard-runner.js b/test/fixtures/wpt-shard-runner.js new file mode 100644 index 0000000..89141de --- /dev/null +++ b/test/fixtures/wpt-shard-runner.js @@ -0,0 +1,20 @@ +"use strict"; + +const fs = require("node:fs"); + +const mode = process.argv[2]; +const shardIndex = Number(process.env.WPT_SHARD_INDEX); +const results = + mode === "single" && shardIndex === 0 + ? [{ file: "webrtc/fixture.html", name: "selected subtest", status: "PASS" }] + : []; + +fs.writeFileSync( + process.env.WPT_WORKER_RESULTS, + `${JSON.stringify({ + total: results.length, + pass: results.length, + fail: 0, + results, + })}\n`, +); diff --git a/test/wpt-sharding.test.js b/test/wpt-sharding.test.js index c97aeb7..1cda63e 100644 --- a/test/wpt-sharding.test.js +++ b/test/wpt-sharding.test.js @@ -1,8 +1,45 @@ "use strict"; const assert = require("node:assert/strict"); +const { spawnSync } = require("node:child_process"); +const fs = require("node:fs"); +const os = require("node:os"); +const path = require("node:path"); const test = require("node:test"); -const { assignWptSpecGroups, mergeWptSummaries, shardForTest } = require("../scripts/wpt-sharding"); +const { + assignWptSpecGroups, + mergeWptSummaries, + shardForTest, + validateWptSelectionTotal, + wptSelectionDigest, +} = require("../scripts/wpt-sharding"); + +const root = path.resolve(__dirname, ".."); +const shardRunner = path.join(__dirname, "fixtures", "wpt-shard-runner.js"); + +function runShardedFixture(mode) { + const output = path.join( + os.tmpdir(), + `webrtc-node-wpt-sharding-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2)}.json`, + ); + try { + return spawnSync( + process.execPath, + [path.join(root, "scripts", "run-wpt-sharded.js"), "--shards=3", mode], + { + cwd: root, + env: { + ...process.env, + WPT_RESULTS: output, + WPT_SHARD_RUNNER: shardRunner, + }, + encoding: "utf8", + }, + ); + } finally { + fs.rmSync(output, { force: true }); + } +} test("WPT shard assignment is deterministic and exhaustive", () => { const shardCount = 3; @@ -84,3 +121,32 @@ test("WPT shard merger rejects overlapping results", () => { /duplicate WPT result/, ); }); + +test("WPT selection validation rejects an empty targeted run", () => { + assert.throws(() => validateWptSelectionTotal(0), /selected no subtests/); + assert.doesNotThrow(() => validateWptSelectionTotal(1)); +}); + +test("WPT selection validation enforces an expected total", () => { + assert.throws(() => validateWptSelectionTotal(3, 4), /selected 3 subtests, expected 4/); + assert.doesNotThrow(() => validateWptSelectionTotal(4, 4)); +}); + +test("WPT selection digest is deterministic and identity-sensitive", () => { + const first = wptSelectionDigest(["b\0second", "a\0first"]); + const second = wptSelectionDigest(["a\0first", "b\0second"]); + assert.equal(first, second); + assert.notEqual(first, wptSelectionDigest(["a\0first", "b\0changed"])); +}); + +test("sharded WPT runner rejects an empty merged selection", () => { + const result = runShardedFixture("empty"); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /selected no subtests/); +}); + +test("sharded WPT runner permits empty shards when the merged selection is nonempty", () => { + const result = runShardedFixture("single"); + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.match(result.stdout, /WPT shards: 1\/1 passed across 3 shards/); +}); diff --git a/wpt-manifest.json b/wpt-manifest.json index 91f7fff..dacc417 100644 --- a/wpt-manifest.json +++ b/wpt-manifest.json @@ -3,6 +3,7 @@ "libdatachannelCommit": "502ae351495792192ef21788e093b48e34ab393e", "wptCommit": "03169f171c797d0953b21d7388561b454fde0ad4", "expectedSelectedSubtests": 620, + "selectedSubtestsSha256": "967896888c9496738e4911ec1eff073a1d0fff701352729c3de2e51665e8bb97", "expectedPass": [ "webrtc/RTCPeerConnection-constructor.html", "webrtc/RTCError.html?interop-2026", From dcf463161c7a90088e74262da003d891de111eec Mon Sep 17 00:00:00 2001 From: mertushka Date: Sat, 13 Jun 2026 19:13:57 +0300 Subject: [PATCH 2/2] test: isolate CI evidence fixtures --- test/ci-evidence.test.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/ci-evidence.test.js b/test/ci-evidence.test.js index 2245e67..de9f423 100644 --- a/test/ci-evidence.test.js +++ b/test/ci-evidence.test.js @@ -104,6 +104,10 @@ function runEvidenceCheck(artifactsRoot) { { cwd: root, encoding: "utf8", + env: { + ...process.env, + GITHUB_ACTIONS: "false", + }, }, ); }