From 4262e2e6a51c5738f5123a3615a25fc098c2f40c Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Mon, 15 Jun 2026 22:28:30 +0200 Subject: [PATCH 1/3] ci(test): committed gap-suite runner + informational smoke-parity gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add scripts/run_gap_tests.sh — a committed runner for the gap suite (test-files/test_gap_*.ts, 235 tests), replacing the out-of-repo /tmp/run_gap_tests.sh that CLAUDE.md pointed at. It is a thin wrapper over run_parity_tests.sh --filter test_gap_, so it reuses the one canonical normalizer, skip-list, and output cap (seed of the single-normalizer work), and gates on "no NEW failures vs known_failures.json" rather than run_parity_tests.sh's loose <80%-aggregate exit. Wire it into a new smoke-parity CI job. The gap suite is the highest-signal- per-second test Perry has and had no committed runner and no PR gate — a single-feature regression could merge green. The job is INFORMATIONAL for now (continue-on-error): the first runs surface which gap tests fail on the Linux image under node 26 so they can be triaged into known_failures.json; once curated + green, a follow-up drops continue-on-error and branch protection makes it required. Uses node 26 (the node-suite baseline oracle) and only allow-listed actions (no sccache), so it is not blocked by the org action allow-list. Also fix the stale CLAUDE.md parity-status line (28 -> 235 tests, /tmp -> scripts/run_gap_tests.sh). No source changes; no existing job altered. --- .github/workflows/test.yml | 41 ++++++++++++++++++++++ CLAUDE.md | 2 +- scripts/run_gap_tests.sh | 72 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100755 scripts/run_gap_tests.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 19601046d0..bd383dcfa7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -258,6 +258,47 @@ jobs: find target/debug/deps -maxdepth 1 -type f -perm -111 ! -name '*.so' -delete done + # --------------------------------------------------------------------------- + # Gap-suite smoke (roadmap I-01) + # + # AOT-compiles every test-files/test_gap_*.ts and diffs it byte-for-byte + # against `node --experimental-strip-types` via scripts/run_gap_tests.sh + # (a thin wrapper over run_parity_tests.sh --filter test_gap_, so it reuses + # the one normalizer + skip-list). The gap suite is the highest-signal- + # per-second test Perry has, and until now it had no committed runner and no + # CI gate — a contributor who regressed a single feature got a green build. + # + # INFORMATIONAL for now (continue-on-error): the first runs surface which gap + # tests currently fail on the Linux CI image (and under node 26) so they can + # be triaged into test-parity/known_failures.json. Once curated + reliably + # green, drop `continue-on-error` and add `smoke-parity` to the branch- + # protection required checks (staged rollout: informational -> required). + # Node 26 matches the node-suite baseline oracle (vs the legacy parity job's + # node 22), which is the version the regression guard will use too. + # --------------------------------------------------------------------------- + smoke-parity: + continue-on-error: true + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v6 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + shared-key: "${{ runner.os }}-perry" + save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: '26' + + - name: Run gap suite + run: ./scripts/run_gap_tests.sh + # --------------------------------------------------------------------------- # GC write-barrier stress (optional / non-blocking) # diff --git a/CLAUDE.md b/CLAUDE.md index 088ae08c85..3668bbf16b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,7 +13,7 @@ Perry is a native TypeScript compiler written in Rust that compiles TypeScript s ## TypeScript Parity Status -Tracked via the gap test suite (`test-files/test_gap_*.ts`, 28 tests). Compared byte-for-byte against `node --experimental-strip-types`. Run via `/tmp/run_gap_tests.sh` after `cargo build --release -p perry-runtime -p perry-stdlib -p perry`. +Tracked via the gap test suite (`test-files/test_gap_*.ts`, 235 tests). Compared byte-for-byte against `node --experimental-strip-types`. Run via `./scripts/run_gap_tests.sh` (a thin wrapper over `run_parity_tests.sh --filter test_gap_` that builds the compiler itself and gates on no new untriaged failures). **Last full sweep:** run `./run_parity_tests.sh` for the current snapshot. The umbrella tracker is #793 (Node.js + TypeScript compatibility roadmap); the previously-cited #447–#452 batch closed on 2026-05-04. Currently-open trackers worth knowing about: diff --git a/scripts/run_gap_tests.sh b/scripts/run_gap_tests.sh new file mode 100755 index 0000000000..51b0e219f7 --- /dev/null +++ b/scripts/run_gap_tests.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Committed runner for the Perry "gap" suite. +# +# Every test-files/test_gap_*.ts is AOT-compiled by Perry and diffed +# byte-for-byte against `node --experimental-strip-types`. This is a thin +# wrapper over run_parity_tests.sh --filter test_gap_ so it reuses the ONE +# canonical normalizer, the skip-list, the per-test output cap, and the JSON +# report (this shared-normalizer reuse is the seed of roadmap initiative I-14). +# +# Replaces the out-of-repo /tmp/run_gap_tests.sh that CLAUDE.md used to point +# at — the gap suite is the highest-signal-per-second test Perry has and was +# previously dark in CI. +# +# Regression-gate semantics: exits non-zero if any gap test fails parity or +# compilation and is NOT already triaged in test-parity/known_failures.json. +# (run_parity_tests.sh's own exit code only trips below 80% AGGREGATE parity, +# which is far too loose to catch a single-feature regression — exactly the +# "a module silently went to 0 behind a green build" class.) +# +# Requirements: +# - a Rust toolchain (the wrapped run_parity_tests.sh builds target/release/perry) +# - node with --experimental-strip-types +# - jq +# +# Usage: scripts/run_gap_tests.sh +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$ROOT" + +echo "==> Running gap suite (test-files/test_gap_*.ts) via run_parity_tests.sh --filter test_gap_" +# run_parity_tests.sh exits 1 when AGGREGATE parity < 80%. We gate on "no NEW +# untriaged failures" instead (below), so don't let its aggregate exit abort us. +set +e +./run_parity_tests.sh --filter test_gap_ +set -e + +REPORT="test-parity/reports/latest.json" +KNOWN="test-parity/known_failures.json" +if [[ ! -f "$REPORT" ]]; then + echo "ERROR: parity report not found at $REPORT (did run_parity_tests.sh run?)" >&2 + exit 2 +fi + +# Every failure in this report is a gap test (we filtered on test_gap_), so the +# whole failure set is the gap failure set. Drop empty entries (run_parity_tests.sh +# emits compile: [""] when there are zero compile failures). +jq -r '(.failures.parity // []) + (.failures.compile // []) | .[] | select(. != "")' \ + "$REPORT" | sort -u > /tmp/gap_all_fails.txt + +if [[ -f "$KNOWN" ]]; then + # known_failures.json is keyed by test name; skip the audit-metadata _schema key. + jq -r 'keys[] | select(. != "_schema")' "$KNOWN" | sort -u > /tmp/gap_known.txt +else + : > /tmp/gap_known.txt +fi + +comm -23 /tmp/gap_all_fails.txt /tmp/gap_known.txt > /tmp/gap_new.txt +TOTAL=$(wc -l < /tmp/gap_all_fails.txt | tr -d ' ') + +if [[ -s /tmp/gap_new.txt ]]; then + echo "" >&2 + echo "NEW gap failures (not triaged in test-parity/known_failures.json):" >&2 + sed 's/^/ - /' /tmp/gap_new.txt >&2 + echo "" >&2 + echo "Fix the regression, or — if the failure is intentional/known — add a" >&2 + echo "triaged entry to test-parity/known_failures.json (category + reason)." >&2 + exit 1 +fi + +echo "All ${TOTAL} gap failures (if any) are known/triaged. Gap gate OK." From 101ce8d35097c4c87ddb9d6e99a9fc822c1dbceb Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Tue, 16 Jun 2026 12:25:21 +0200 Subject: [PATCH 2/3] ci(test): scope gap-runner temp files to a mktemp dir (CodeRabbit) run_gap_tests.sh wrote its failure lists to fixed /tmp/gap_*.txt names, so concurrent runs (a second PR, local + CI on the same box, or the upcoming node-suite-guard alongside) could clobber each other and produce a false gate result. Allocate a run-scoped dir with mktemp -d and rm -rf it on EXIT. --- scripts/run_gap_tests.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scripts/run_gap_tests.sh b/scripts/run_gap_tests.sh index 51b0e219f7..d2edbcc7f1 100755 --- a/scripts/run_gap_tests.sh +++ b/scripts/run_gap_tests.sh @@ -29,6 +29,12 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" cd "$ROOT" +# Run-scoped temp dir — fixed /tmp names would let concurrent runs (a second +# PR, local + CI on the same box, or the future node-suite-guard alongside) +# clobber each other's failure lists and produce a false gate result. +WORK="$(mktemp -d "${TMPDIR:-/tmp}/perry-gap.XXXXXX")" +trap 'rm -rf "$WORK"' EXIT + echo "==> Running gap suite (test-files/test_gap_*.ts) via run_parity_tests.sh --filter test_gap_" # run_parity_tests.sh exits 1 when AGGREGATE parity < 80%. We gate on "no NEW # untriaged failures" instead (below), so don't let its aggregate exit abort us. @@ -47,22 +53,22 @@ fi # whole failure set is the gap failure set. Drop empty entries (run_parity_tests.sh # emits compile: [""] when there are zero compile failures). jq -r '(.failures.parity // []) + (.failures.compile // []) | .[] | select(. != "")' \ - "$REPORT" | sort -u > /tmp/gap_all_fails.txt + "$REPORT" | sort -u > "$WORK/all_fails.txt" if [[ -f "$KNOWN" ]]; then # known_failures.json is keyed by test name; skip the audit-metadata _schema key. - jq -r 'keys[] | select(. != "_schema")' "$KNOWN" | sort -u > /tmp/gap_known.txt + jq -r 'keys[] | select(. != "_schema")' "$KNOWN" | sort -u > "$WORK/known.txt" else - : > /tmp/gap_known.txt + : > "$WORK/known.txt" fi -comm -23 /tmp/gap_all_fails.txt /tmp/gap_known.txt > /tmp/gap_new.txt -TOTAL=$(wc -l < /tmp/gap_all_fails.txt | tr -d ' ') +comm -23 "$WORK/all_fails.txt" "$WORK/known.txt" > "$WORK/new.txt" +TOTAL=$(wc -l < "$WORK/all_fails.txt" | tr -d ' ') -if [[ -s /tmp/gap_new.txt ]]; then +if [[ -s "$WORK/new.txt" ]]; then echo "" >&2 echo "NEW gap failures (not triaged in test-parity/known_failures.json):" >&2 - sed 's/^/ - /' /tmp/gap_new.txt >&2 + sed 's/^/ - /' "$WORK/new.txt" >&2 echo "" >&2 echo "Fix the regression, or — if the failure is intentional/known — add a" >&2 echo "triaged entry to test-parity/known_failures.json (category + reason)." >&2 From 6d5a488ab5e9af8f697349cbfc74cd3f37f34dcf Mon Sep 17 00:00:00 2001 From: TheHypnoo Date: Tue, 16 Jun 2026 21:25:38 +0200 Subject: [PATCH 3/3] ci(smoke-parity): run the gap suite under node 22 + persist-credentials: false MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes to the informational gap gate: - Oracle node 26 -> 22. The gap suite is byte-diffed live against `node --experimental-strip-types`, and it is already green under node 22 (the legacy parity job). Node 26 introduced version-sensitive diffs (v8 / perf_hooks / process internals — e.g. test_gap_node_v8, test_gap_v8_2, test_gap_perfhooks, test_gap_process_*) that are not Perry regressions and would pollute the triage list. The node-suite regression guard keeps node 26 for its frozen pass-count baseline — a separate mechanism. - persist-credentials: false on checkout. The job is read-only (build + test), so it should not leave the GITHUB_TOKEN in the local git config (CodeRabbit / least privilege). --- .github/workflows/test.yml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc3e0503b9..ffe7c85f1e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -301,12 +301,15 @@ jobs: # CI gate — a contributor who regressed a single feature got a green build. # # INFORMATIONAL for now (continue-on-error): the first runs surface which gap - # tests currently fail on the Linux CI image (and under node 26) so they can - # be triaged into test-parity/known_failures.json. Once curated + reliably - # green, drop `continue-on-error` and add `smoke-parity` to the branch- - # protection required checks (staged rollout: informational -> required). - # Node 26 matches the node-suite baseline oracle (vs the legacy parity job's - # node 22), which is the version the regression guard will use too. + # tests currently fail on the Linux CI image so they can be triaged into + # test-parity/known_failures.json. Once curated + reliably green, drop + # `continue-on-error` and add `smoke-parity` to the branch-protection + # required checks (staged rollout: informational -> required). + # Oracle = node 22, matching the legacy parity job the gap suite is already + # green under. Node-version-sensitive output (v8/perf_hooks/process internals) + # would otherwise diff against a newer Node and mask real Perry regressions. + # (The node-suite regression guard uses node 26 against its frozen baseline — + # a different mechanism.) # --------------------------------------------------------------------------- smoke-parity: continue-on-error: true @@ -314,6 +317,10 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v6 + with: + # Read-only job (build + test); keep the GITHUB_TOKEN out of the + # local git config (least privilege — OWASP / CodeRabbit). + persist-credentials: false - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable @@ -326,7 +333,9 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v6 with: - node-version: '26' + # Match the legacy parity job the gap suite is already green under; + # node 26 introduces version-sensitive diffs that aren't Perry bugs. + node-version: '22' - name: Run gap suite run: ./scripts/run_gap_tests.sh