diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5f167f855..ffe7c85f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -290,6 +290,56 @@ jobs: find target/debug/deps -maxdepth 1 -type f -perm -111 ! -name '*.so' -delete done + # --------------------------------------------------------------------------- + # Gap-suite smoke (roadmap I-01) + # + # AOT-compiles every test-files/test_gap_*.ts and diffs it byte-for-byte + # against `node --experimental-strip-types` via scripts/run_gap_tests.sh + # (a thin wrapper over run_parity_tests.sh --filter test_gap_, so it reuses + # the one normalizer + skip-list). The gap suite is the highest-signal- + # per-second test Perry has, and until now it had no committed runner and no + # CI gate — a contributor who regressed a single feature got a green build. + # + # INFORMATIONAL for now (continue-on-error): the first runs surface which gap + # tests currently fail on the Linux CI image so they can be triaged into + # test-parity/known_failures.json. Once curated + reliably green, drop + # `continue-on-error` and add `smoke-parity` to the branch-protection + # required checks (staged rollout: informational -> required). + # Oracle = node 22, matching the legacy parity job the gap suite is already + # green under. Node-version-sensitive output (v8/perf_hooks/process internals) + # would otherwise diff against a newer Node and mask real Perry regressions. + # (The node-suite regression guard uses node 26 against its frozen baseline — + # a different mechanism.) + # --------------------------------------------------------------------------- + smoke-parity: + continue-on-error: true + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v6 + with: + # Read-only job (build + test); keep the GITHUB_TOKEN out of the + # local git config (least privilege — OWASP / CodeRabbit). + persist-credentials: false + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + shared-key: "${{ runner.os }}-perry" + save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + # Match the legacy parity job the gap suite is already green under; + # node 26 introduces version-sensitive diffs that aren't Perry bugs. + node-version: '22' + + - name: Run gap suite + run: ./scripts/run_gap_tests.sh + # --------------------------------------------------------------------------- # GC write-barrier stress (optional / non-blocking) # diff --git a/CLAUDE.md b/CLAUDE.md index f97ea8625..99a0d6029 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,7 +13,7 @@ Perry is a native TypeScript compiler written in Rust that compiles TypeScript s ## TypeScript Parity Status -Tracked via the gap test suite (`test-files/test_gap_*.ts`, 28 tests). Compared byte-for-byte against `node --experimental-strip-types`. Run via `/tmp/run_gap_tests.sh` after `cargo build --release -p perry-runtime -p perry-stdlib -p perry`. +Tracked via the gap test suite (`test-files/test_gap_*.ts`, 235 tests). Compared byte-for-byte against `node --experimental-strip-types`. Run via `./scripts/run_gap_tests.sh` (a thin wrapper over `run_parity_tests.sh --filter test_gap_` that builds the compiler itself and gates on no new untriaged failures). **Last full sweep:** run `./run_parity_tests.sh` for the current snapshot. The umbrella tracker is #793 (Node.js + TypeScript compatibility roadmap); the previously-cited #447–#452 batch closed on 2026-05-04. Currently-open trackers worth knowing about: diff --git a/scripts/run_gap_tests.sh b/scripts/run_gap_tests.sh new file mode 100755 index 000000000..d2edbcc7f --- /dev/null +++ b/scripts/run_gap_tests.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Committed runner for the Perry "gap" suite. +# +# Every test-files/test_gap_*.ts is AOT-compiled by Perry and diffed +# byte-for-byte against `node --experimental-strip-types`. This is a thin +# wrapper over run_parity_tests.sh --filter test_gap_ so it reuses the ONE +# canonical normalizer, the skip-list, the per-test output cap, and the JSON +# report (this shared-normalizer reuse is the seed of roadmap initiative I-14). +# +# Replaces the out-of-repo /tmp/run_gap_tests.sh that CLAUDE.md used to point +# at — the gap suite is the highest-signal-per-second test Perry has and was +# previously dark in CI. +# +# Regression-gate semantics: exits non-zero if any gap test fails parity or +# compilation and is NOT already triaged in test-parity/known_failures.json. +# (run_parity_tests.sh's own exit code only trips below 80% AGGREGATE parity, +# which is far too loose to catch a single-feature regression — exactly the +# "a module silently went to 0 behind a green build" class.) +# +# Requirements: +# - a Rust toolchain (the wrapped run_parity_tests.sh builds target/release/perry) +# - node with --experimental-strip-types +# - jq +# +# Usage: scripts/run_gap_tests.sh +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$ROOT" + +# Run-scoped temp dir — fixed /tmp names would let concurrent runs (a second +# PR, local + CI on the same box, or the future node-suite-guard alongside) +# clobber each other's failure lists and produce a false gate result. +WORK="$(mktemp -d "${TMPDIR:-/tmp}/perry-gap.XXXXXX")" +trap 'rm -rf "$WORK"' EXIT + +echo "==> Running gap suite (test-files/test_gap_*.ts) via run_parity_tests.sh --filter test_gap_" +# run_parity_tests.sh exits 1 when AGGREGATE parity < 80%. We gate on "no NEW +# untriaged failures" instead (below), so don't let its aggregate exit abort us. +set +e +./run_parity_tests.sh --filter test_gap_ +set -e + +REPORT="test-parity/reports/latest.json" +KNOWN="test-parity/known_failures.json" +if [[ ! -f "$REPORT" ]]; then + echo "ERROR: parity report not found at $REPORT (did run_parity_tests.sh run?)" >&2 + exit 2 +fi + +# Every failure in this report is a gap test (we filtered on test_gap_), so the +# whole failure set is the gap failure set. Drop empty entries (run_parity_tests.sh +# emits compile: [""] when there are zero compile failures). +jq -r '(.failures.parity // []) + (.failures.compile // []) | .[] | select(. != "")' \ + "$REPORT" | sort -u > "$WORK/all_fails.txt" + +if [[ -f "$KNOWN" ]]; then + # known_failures.json is keyed by test name; skip the audit-metadata _schema key. + jq -r 'keys[] | select(. != "_schema")' "$KNOWN" | sort -u > "$WORK/known.txt" +else + : > "$WORK/known.txt" +fi + +comm -23 "$WORK/all_fails.txt" "$WORK/known.txt" > "$WORK/new.txt" +TOTAL=$(wc -l < "$WORK/all_fails.txt" | tr -d ' ') + +if [[ -s "$WORK/new.txt" ]]; then + echo "" >&2 + echo "NEW gap failures (not triaged in test-parity/known_failures.json):" >&2 + sed 's/^/ - /' "$WORK/new.txt" >&2 + echo "" >&2 + echo "Fix the regression, or — if the failure is intentional/known — add a" >&2 + echo "triaged entry to test-parity/known_failures.json (category + reason)." >&2 + exit 1 +fi + +echo "All ${TOTAL} gap failures (if any) are known/triaged. Gap gate OK."