pineforge-4pass · luisleo526 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -36,8 +36,9 @@ jobs:
   release:
     runs-on: ubuntu-latest
     env:
-      BUMP:     ${{ inputs.bump }}
-      OVERRIDE: ${{ inputs.override }}
+      BUMP:          ${{ inputs.bump }}
+      OVERRIDE:      ${{ inputs.override }}
+      PYTHONHASHSEED: "0"   # deterministic gate runtime (match gate.yml)
     steps:
       - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
         with:
@@ -83,16 +84,39 @@ jobs:
 
       - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
         with:
-          python-version: "3.12"
+          python-version: "3.14"   # match the gate's runtime
+
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
+        with:
+          node-version: "22"
+
+      # Conformance gate (BLOCKING): the differential parity gate must pass
+      # before anything is built, published, or tagged. A gate-broken codegen
+      # fails the release here — nothing reaches PyPI, no tag is pushed (so the
+      # tag->npm trigger never fires), and no GitHub Release is created.
+      - name: Run conformance gate (blocking)
+        run: |
+          python -m pip install -e .
+          npm ci
+          npm run gate:selftest
+          npm run gate:full
 
       - name: Build sdist + wheel
         run: |
           python -m pip install --upgrade build
           python -m build
           ls -l dist/
 
-      # Commit + tag before publishing so a tag always corresponds to a built
-      # artifact set; the GitHub Release (with files) comes after PyPI succeeds.
+      # Publish to PyPI BEFORE the tag is pushed. The tag push triggers
+      # publish-pyodide.yml (npm), so PyPI must land first — otherwise a PyPI
+      # failure would leave npm shipping a version PyPI never got.
+      - name: Publish to PyPI
+        if: ${{ !inputs.dry_run }}
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # release/v1
+
+      # Commit + tag + push AFTER PyPI succeeds. The `git push --tags` triggers
+      # publish-pyodide.yml for npm (which re-runs the gate — defense in depth),
+      # so the tag only exists once the gate passed and PyPI published.
       - name: Commit + tag + push
         if: ${{ !inputs.dry_run }}
         env:
@@ -105,10 +129,6 @@ jobs:
           git tag "v${NEW_VERSION}"
           git push origin HEAD --tags
 
-      - name: Publish to PyPI
-        if: ${{ !inputs.dry_run }}
-        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # release/v1
-
       - name: Create GitHub Release
         if: ${{ !inputs.dry_run }}
         env:

diff --git a/gate/compare.mjs b/gate/compare.mjs
@@ -27,3 +27,43 @@ export function compareResults(name, native, browser) {
   }
   return null;
 }
+
+// Expected verdict for a corpus branch: "ok/*" fixtures must transpile
+// successfully (result.ok === true), "err/*" fixtures must be rejected
+// (result.ok === false). Anything else — the wrong verdict, an unparseable
+// payload, or an unexpected (non-CompileError) exception — is a gate failure
+// even when native and wasm agree (two identical crashes must NOT pass).
+//
+// This is intentionally separate from compareResults so the gate enforces BOTH
+// (a) native↔wasm parity and (b) the right answer. `side` is {json} or
+// {unexpected}; `expectOk` is true for "ok", false for "err".
+function verdictOf(side) {
+  if (!side) return { kind: "missing" };
+  if (side.unexpected) return { kind: "unexpected", detail: side.unexpected };
+  try {
+    const v = JSON.parse(side.json);
+    if (typeof v.ok !== "boolean") return { kind: "malformed", detail: side.json };
+    return { kind: "verdict", ok: v.ok };
+  } catch {
+    return { kind: "malformed", detail: side.json };
+  }
+}
+
+// Returns a failure string if either side does not match the expected verdict
+// for the fixture's branch, or null if both sides produced the expected verdict.
+export function checkExpectedVerdict(name, expectOk, native, browser) {
+  for (const [label, side] of [["native", native], ["pyodide", browser]]) {
+    const r = verdictOf(side);
+    if (r.kind === "missing") return `${name}: ${label} produced no result`;
+    if (r.kind === "unexpected") {
+      return `${name}: ${label} threw an unexpected exception (expected ok=${expectOk}): ${r.detail}`;
+    }
+    if (r.kind === "malformed") {
+      return `${name}: ${label} returned a malformed result (expected ok=${expectOk}): ${r.detail}`;
+    }
+    if (r.ok !== expectOk) {
+      return `${name}: ${label} verdict ok=${r.ok} but corpus dir expects ok=${expectOk}`;
+    }
+  }
+  return null;
+}
diff --git a/gate/run-gate.mjs b/gate/run-gate.mjs
@@ -13,7 +13,7 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from
 import { createRequire } from "node:module";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
-import { compareResults } from "./compare.mjs";
+import { checkExpectedVerdict, compareResults } from "./compare.mjs";
 
 const require = createRequire(import.meta.url);
 const HERE = dirname(fileURLToPath(import.meta.url));
@@ -79,8 +79,14 @@ async function main() {
   });
   const native = JSON.parse(oracleOut);
 
-  // 4. Pyodide side + compare.
+  // 4. Pyodide side + compare. The gate enforces TWO independent properties:
+  //    (a) native↔wasm parity (compareResults) and
+  //    (b) the EXPECTED verdict by corpus dir (checkExpectedVerdict): ok/* must
+  //        succeed, err/* must be rejected. A purely differential check would
+  //        let two identical crashes — or an ok/ fixture that erroneously errors
+  //        — slip through; (b) closes that gap.
   const mismatches = [];
+  const verdictFailures = [];
   for (const { name, src } of items) {
     let browser;
     try {
@@ -90,6 +96,9 @@ async function main() {
     }
     const m = compareResults(name, native[name], browser);
     if (m) mismatches.push(m);
+    const expectOk = name.startsWith("ok/");
+    const v = checkExpectedVerdict(name, expectOk, native[name], browser);
+    if (v) verdictFailures.push(v);
   }
 
   // 5. release.json (versions derived from the loaded Pyodide lock).
@@ -106,11 +115,16 @@ async function main() {
   writeFileSync(join(ROOT, "release.json"), JSON.stringify(release, null, 2) + "\n");
   console.log("gate: release.json ->", JSON.stringify(release));
 
-  if (mismatches.length) {
-    console.error(`gate: ${mismatches.length} MISMATCH(es):\n` + mismatches.join("\n"));
+  if (mismatches.length || verdictFailures.length) {
+    if (mismatches.length) {
+      console.error(`gate: ${mismatches.length} PARITY MISMATCH(es):\n` + mismatches.join("\n"));
+    }
+    if (verdictFailures.length) {
+      console.error(`gate: ${verdictFailures.length} VERDICT FAILURE(s) (wrong ok/err result):\n` + verdictFailures.join("\n"));
+    }
     process.exit(1);
   }
-  console.log(`gate: PARITY OK over ${items.length} fixtures`);
+  console.log(`gate: PARITY OK over ${items.length} fixtures (verdicts asserted: ok/* succeed, err/* rejected)`);
 }
 
 main().catch((e) => {

diff --git a/gate/selftest.mjs b/gate/selftest.mjs
@@ -1,29 +1,57 @@
-// Canary: prove the gate's comparator actually catches a divergence. Imports the
-// PURE comparator (gate/compare.mjs) so it runs in <1s without loading Pyodide.
-import { compareResults } from "./compare.mjs";
+// Canary: prove the gate's checks actually catch divergences. Imports the PURE
+// comparator + verdict checker (gate/compare.mjs) so it runs in <1s without
+// loading Pyodide. Covers BOTH (a) native↔wasm parity (compareResults) and
+// (b) expected-verdict-by-corpus-dir (checkExpectedVerdict).
+import { checkExpectedVerdict, compareResults } from "./compare.mjs";
 
-const cases = [
-  // [name, native, browser, mustFlag]
-  ["same-ok", { json: '{"ok":true,"cpp":"X"}' }, { json: '{"ok":true,"cpp":"X"}' }, false],
+const OK = '{"ok":true,"cpp":"X"}';
+const ERR = '{"ok":false,"error":"e","diagnostics":[]}';
+
+// --- (a) differential comparator cases: [name, native, browser, mustFlag] ---
+const cmpCases = [
+  ["same-ok", { json: OK }, { json: OK }, false],
   ["cpp-differs", { json: '{"ok":true,"cpp":"X"}' }, { json: '{"ok":true,"cpp":"Y"}' }, true],
-  ["verdict-differs", { json: '{"ok":true,"cpp":"X"}' }, { json: '{"ok":false,"error":"e","diagnostics":[]}' }, true],
+  ["verdict-differs", { json: OK }, { json: ERR }, true],
   ["error-differs", { json: '{"ok":false,"error":"a","diagnostics":[]}' }, { json: '{"ok":false,"error":"b","diagnostics":[]}' }, true],
-  ["unexpected-one-side", { json: '{"ok":true,"cpp":"X"}' }, { unexpected: "TypeError: boom" }, true],
+  ["unexpected-one-side", { json: OK }, { unexpected: "TypeError: boom" }, true],
   ["unexpected-both-same", { unexpected: "TypeError: boom" }, { unexpected: "TypeError: boom" }, false],
   ["unexpected-both-diff", { unexpected: "TypeError: a" }, { unexpected: "ValueError: b" }, true],
-  ["missing-native", undefined, { json: '{"ok":true,"cpp":"X"}' }, true],
+  ["missing-native", undefined, { json: OK }, true],
+];
+
+// --- (b) expected-verdict cases: [name, expectOk, native, browser, mustFlag] ---
+// A native↔wasm match with the WRONG verdict (e.g. ok/ that errors, or a shared
+// unexpected exception) must FAIL even though compareResults would pass it.
+const verdictCases = [
+  ["ok/good", true, { json: OK }, { json: OK }, false],
+  ["err/bad", false, { json: ERR }, { json: ERR }, false],
+  ["ok/that-errors-both-sides", true, { json: ERR }, { json: ERR }, true],
+  ["err/that-succeeds-both-sides", false, { json: OK }, { json: OK }, true],
+  ["ok/unexpected-both-same", true, { unexpected: "TypeError: boom" }, { unexpected: "TypeError: boom" }, true],
+  ["err/unexpected-both-same", false, { unexpected: "TypeError: boom" }, { unexpected: "TypeError: boom" }, true],
+  ["ok/native-wrong-only", true, { json: ERR }, { json: OK }, true],
+  ["ok/pyodide-wrong-only", true, { json: OK }, { json: ERR }, true],
+  ["ok/missing-native", true, undefined, { json: OK }, true],
+  ["ok/malformed", true, { json: "not json" }, { json: OK }, true],
 ];
 
 let failed = 0;
-for (const [name, n, b, mustFlag] of cases) {
+for (const [name, n, b, mustFlag] of cmpCases) {
   const flagged = compareResults(name, n, b) !== null;
   if (flagged !== mustFlag) {
-    console.error(`selftest FAIL: ${name} expected mustFlag=${mustFlag} got ${flagged}`);
+    console.error(`selftest FAIL (compareResults): ${name} expected mustFlag=${mustFlag} got ${flagged}`);
+    failed++;
+  }
+}
+for (const [name, expectOk, n, b, mustFlag] of verdictCases) {
+  const flagged = checkExpectedVerdict(name, expectOk, n, b) !== null;
+  if (flagged !== mustFlag) {
+    console.error(`selftest FAIL (checkExpectedVerdict): ${name} expected mustFlag=${mustFlag} got ${flagged}`);
     failed++;
   }
 }
 if (failed) {
   console.error(`gate selftest: ${failed} case(s) failed`);
   process.exit(1);
 }
-console.log(`gate selftest: ${cases.length} comparator cases OK`);
+console.log(`gate selftest: ${cmpCases.length} comparator + ${verdictCases.length} verdict cases OK`);
diff --git a/pineforge_codegen/codegen/base.py b/pineforge_codegen/codegen/base.py
@@ -185,19 +185,31 @@ def __init__(self, ctx: AnalyzerContext) -> None:
         # This ensures sub-function series vars get cloned for the parent's call sites.
         func_var_originals: dict[str, list[str]] = {}  # func_name -> list of original var names
 
-        # First, collect all function-scoped series vars (union across all functions)
-        all_func_scoped_series: set[str] = set()
+        # First, collect all function-scoped series vars (union across all functions).
+        # Use an ordered, de-duplicated list (NOT a set): set iteration order is
+        # PYTHONHASHSEED-randomized, and this order reaches emitted C++ member
+        # declarations via ``orig_names`` -> ``func_var_originals`` ->
+        # ``_func_cs_var_remap``. ``ctx.func_series_vars`` is a dict whose VALUES
+        # are themselves sets (analyzer stores ``dict[str, set]``), so we must
+        # iterate each value in ``sorted`` order to be hash-seed independent.
+        all_func_scoped_series: list[str] = []
         for svars in ctx.func_series_vars.values():
-            all_func_scoped_series.update(svars)
-        # Also include function-scoped var_members
-        all_func_scoped_vars: set[str] = set()
+            for sv in sorted(svars):
+                if sv not in all_func_scoped_series:
+                    all_func_scoped_series.append(sv)
+        # Also include function-scoped var_members (same ordered-list rationale).
+        # ``ctx.func_var_members`` values are lists (already insertion-ordered).
+        all_func_scoped_vars: list[str] = []
         for vlist in ctx.func_var_members.values():
             for n, _, _ in vlist:
-                all_func_scoped_vars.add(n)
+                if n not in all_func_scoped_vars:
+                    all_func_scoped_vars.append(n)
 
         # For each function with call-site cloning (has TA ranges or is called multiple times),
-        # include ALL function-scoped series/var vars that could be used in its body
-        for fname in set(ctx.func_call_site_counts.keys()):
+        # include ALL function-scoped series/var vars that could be used in its body.
+        # Iterate the dict directly (insertion-ordered) rather than ``set(...keys())``,
+        # which would randomize the order of emitted clones across hash seeds.
+        for fname in ctx.func_call_site_counts:
             total_cs = ctx.func_call_site_counts[fname]
             if total_cs <= 1:
                 continue  # No cloning needed for single-call-site functions
@@ -207,9 +219,9 @@ def __init__(self, ctx: AnalyzerContext) -> None:
                 for n, _, _ in ctx.func_var_members[fname]:
                     if n not in orig_names:
                         orig_names.append(n)
-            # Include function's own series vars
+            # Include function's own series vars (set -> sorted for determinism)
             if fname in ctx.func_series_vars:
-                for sv in ctx.func_series_vars[fname]:
+                for sv in sorted(ctx.func_series_vars[fname]):
                     if sv not in orig_names:
                         orig_names.append(sv)
             # Include series vars from sub-functions (they share the same class members)