From 11c796c1e902dff967b00e62d0e5fc43a418a769 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Fri, 3 Apr 2026 20:35:16 +0200 Subject: [PATCH 01/11] semantics: add CstReader-based version --- packages/runtime/src/cstReader.ts | 26 ++++ packages/semantics/bench.ts | 106 +++++++++++++++ packages/semantics/package.json | 15 ++- packages/semantics/src/index.test.ts | 3 +- packages/semantics/src/reader.test.ts | 102 ++++++++++++++ packages/semantics/src/reader.ts | 187 ++++++++++++++++++++++++++ pnpm-lock.yaml | 3 + 7 files changed, 438 insertions(+), 4 deletions(-) create mode 100644 packages/semantics/bench.ts create mode 100644 packages/semantics/src/reader.test.ts create mode 100644 packages/semantics/src/reader.ts diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstReader.ts index 777fb59b..9a23b920 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstReader.ts @@ -166,6 +166,11 @@ export class CstReader { return this._ctx.input; } + /** The array of rule names, indexed by rule ID. */ + get ruleNames(): readonly string[] { + return this._ctx.ruleNames; + } + /** * Iterate over children. The callback receives (childHandle, leadingSpacesLen, * childStartIdx, index). @@ -213,6 +218,27 @@ export class CstReader { } } + /** + * Get the handle of the child at `index`, given the current `edgeStartIdx`. + * The caller must track `edgeStartIdx`: for the first child, it's + * `startIdx(parentHandle)`; for subsequent children, it's + * `startIdx(prevChild) + matchLength(prevChild)`. + */ + childAt(handle: number, index: number, edgeStartIdx: number): number { + const raw = handle & MASK; + const slot = this._ctx.view.getUint32(raw + CST_CHILDREN_OFFSET + index * 4, true); + const suppressSpaces = (slot & 2) !== 0; + const rawChild = slot & ~2; + + const {getSpacesLenAt} = this._ctx; + const leadingSpacesLen = + !suppressSpaces && getSpacesLenAt && this._hasParentSpaces(rawChild) + ? Math.max(0, getSpacesLenAt(edgeStartIdx)) + : 0; + + return createHandle(rawChild, edgeStartIdx + leadingSpacesLen); + } + /** * Whether the nonterminal at `handle` is a syntactic rule. * Uses cached metadata (ruleIsSyntactic), not string formatting. diff --git a/packages/semantics/bench.ts b/packages/semantics/bench.ts new file mode 100644 index 00000000..226d4fe7 --- /dev/null +++ b/packages/semantics/bench.ts @@ -0,0 +1,106 @@ +import {readFileSync} from 'node:fs'; +import process from 'node:process'; + +import {Bench} from 'tinybench'; +import * as ohm from '@ohm-js/compiler/compat'; +import {createReader} from 'ohm-js/cstReader'; + +import {createOperation} from './src/index.ts'; +import {createReaderOperation} from './src/reader.ts'; + +const smallSize = process.argv.includes('--small-size'); + +const scriptRel = (relPath: string) => new URL(relPath, import.meta.url); +const es5Source = readFileSync(scriptRel('../../examples/ecmascript/src/es5.ohm'), 'utf8'); +const g = ohm.grammars(es5Source).ES5; + +const input = smallSize ? 'var x = 1 + 2;' : readFileSync(scriptRel('../compiler/test/data/_underscore-1.8.3.js'), 'utf8'); + +// --- CstNode-based (createOperation) --- + +const countNodesCstNode = createOperation('countNodes', { + _nonterminal(ctx, ...children) { + let sum = 1; + for (const c of children) sum += countNodesCstNode(c); + return sum; + }, + _terminal(ctx) { + return 1; + }, + _default(ctx, ...children) { + let sum = 1; + for (const c of children) sum += countNodesCstNode(c); + return sum; + }, +}); + +// --- CstReader-based (createReaderOperation) --- + +let _rd: ReturnType; + +const countNodesCstReader = createReaderOperation('countNodes', { + _nonterminal(h) { + let sum = 1; + _rd.forEachChild(h, child => { + sum += countNodesCstReader(_rd, child); + }); + return sum; + }, + _terminal(h) { + return 1; + }, + _default(h) { + let sum = 1; + _rd.forEachChild(h, child => { + sum += countNodesCstReader(_rd, child); + }); + return sum; + }, +}); + +// --- Benchmark --- + +const opts = { + afterEach() { + process.stderr.write('.'); + }, +}; + +const bench = new Bench({ + iterations: smallSize ? 1 : 10, + time: 0, + warmup: !smallSize, + throws: true, +}); + +bench.add( + 'createOperation (CstNode)', + () => g.match(input).use((r: any) => countNodesCstNode(r.getCstRoot())), + opts +); + +bench.add( + 'createReaderOperation (CstReader)', + () => + g.match(input).use((r: any) => { + _rd = createReader(r); + return countNodesCstReader(_rd, _rd.root); + }), + opts +); + +console.log(`Input: ${smallSize ? 'small' : 'underscore-1.8.3.js'} (${input.length} bytes)\n`); + +(async () => { + await bench.run(); + process.stderr.write('\n'); + + for (const task of bench.tasks) { + const {mean, sd, samplesCount} = task.result!.latency; + console.log(`${task.name}: ${mean.toFixed(0)}ms ± ${sd.toFixed(0)}ms (n=${samplesCount})`); + } + + const cstNodeMean = bench.tasks[0].result!.latency.mean; + const cstReaderMean = bench.tasks[1].result!.latency.mean; + console.log(`\nSpeedup: ${(cstNodeMean / cstReaderMean).toFixed(2)}x`); +})(); diff --git a/packages/semantics/package.json b/packages/semantics/package.json index 37efaf6a..d4fa353e 100644 --- a/packages/semantics/package.json +++ b/packages/semantics/package.json @@ -15,13 +15,24 @@ "author": "Patrick Dubroy ", "type": "module", "main": "dist/index.js", + "exports": { + ".": { + "types": "./dist/src/index.d.ts", + "default": "./dist/src/index.js" + }, + "./reader": { + "types": "./dist/src/reader.d.ts", + "default": "./dist/src/reader.js" + } + }, "scripts": { "build": "tsc", - "test": "ava" + "test": "ava && node --experimental-strip-types bench.ts --small-size" }, "devDependencies": { "@ohm-js/compiler": "workspace:^", - "ava": "^6.0.0" + "ava": "^6.0.0", + "tinybench": "^6.0.0" }, "peerDependencies": { "@ohm-js/compiler": "workspace:^", diff --git a/packages/semantics/src/index.test.ts b/packages/semantics/src/index.test.ts index 3e07c676..b8fc8eb6 100644 --- a/packages/semantics/src/index.test.ts +++ b/packages/semantics/src/index.test.ts @@ -1,10 +1,9 @@ /* global URL */ import * as ohm from '@ohm-js/compiler/compat'; -import type {CstNode, ListNode, SucceededMatchResult, TerminalNode} from 'ohm-js'; +import type {CstNode} from 'ohm-js'; import test from 'ava'; import {readFileSync} from 'node:fs'; -import * as ohmJs from 'ohm-js'; import type {Operation, VisitorCtx} from './types.ts'; import {createOperation} from './index.ts'; diff --git a/packages/semantics/src/reader.test.ts b/packages/semantics/src/reader.test.ts new file mode 100644 index 00000000..19223f61 --- /dev/null +++ b/packages/semantics/src/reader.test.ts @@ -0,0 +1,102 @@ +/* global URL */ + +import * as ohm from '@ohm-js/compiler/compat'; +import {createReader} from 'ohm-js/cstReader'; +import test from 'ava'; +import {readFileSync} from 'node:fs'; + +import type {ReaderOperation} from './reader.ts'; +import {collect, createReaderOperation, ifPresent} from './reader.ts'; + +const scriptRel = (relPath: string) => new URL(relPath, import.meta.url); + +test('reader-based: arithmetic', t => { + const g2 = ohm.grammar(readFileSync(scriptRel('../../ohm-js/test/arithmetic.ohm'), 'utf8')); + g2.match('1+(2*3)').use(r => { + if (!r.succeeded()) return t.fail('parse failed'); + const rd = createReader(r); + + const evalIt: ReaderOperation = createReaderOperation('evalIt', { + addExp_plus(h, a, _, b) { + return evalIt(rd, a) + evalIt(rd, b); + }, + addExp_minus(h, a, _, b) { + return evalIt(rd, a) - evalIt(rd, b); + }, + mulExp_times(h, a, _, b) { + return evalIt(rd, a) * evalIt(rd, b); + }, + mulExp_divide(h, a, _, b) { + return evalIt(rd, a) / evalIt(rd, b); + }, + priExp_paren(h, _, e, _2) { + return evalIt(rd, e); + }, + number(h, _) { + return parseInt(rd.sourceString(h), 10); + }, + _default(h) { + let result = 0; + rd.forEachChild(h, child => { + result = evalIt(rd, child); + }); + return result; + }, + }); + t.is(evalIt(rd, rd.root), 7); + }); +}); + +test('reader-based: list and opt', t => { + const g = ohm.grammar(String.raw` + G { + Start = ~end #"a" &(letter "c") ("b"+ letter?)* punc? + punc = ("!" space?)+ + } + `); + + g.match('abcbc!!').use(r => { + if (!r.succeeded()) return t.fail('parse failed'); + const rd = createReader(r); + + const reversed: ReaderOperation = createReaderOperation('reversed', { + Start(h, a, list, opt) { + return ( + ifPresent( + rd, + opt, + p => reversed(rd, p), + () => '' + )! + + collect(rd, list, (b, optLetter) => { + return ( + ifPresent( + rd, + optLetter, + l => reversed(rd, l), + () => '' + )! + collect(rd, b, b => reversed(rd, b)).join('') + ); + }) + .reverse() + .join('') + + reversed(rd, a) + ); + }, + punc(h, list) { + return collect(rd, list, (c, opt) => reversed(rd, c)).join(''); + }, + _terminal(h) { + return rd.sourceString(h); + }, + _default(h) { + let result = ''; + rd.forEachChild(h, child => { + result = reversed(rd, child); + }); + return result; + }, + }); + t.is(reversed(rd, rd.root), '!!cbcba'); + }); +}); diff --git a/packages/semantics/src/reader.ts b/packages/semantics/src/reader.ts new file mode 100644 index 00000000..a8b62c46 --- /dev/null +++ b/packages/semantics/src/reader.ts @@ -0,0 +1,187 @@ +import type {CstReader} from 'ohm-js/cstReader'; +import {CstNodeType} from 'ohm-js/cstReader'; + +export type ReaderActionDict = { + _nonterminal?: (handle: number) => R; + _terminal?: (handle: number) => R; + _default?: (handle: number) => R; + [ruleName: string]: ((handle: number, ...children: number[]) => R) | undefined; +}; + +export type ReaderOperation = (reader: CstReader, handle: number) => R; + +function nextEdgePos(reader: CstReader, child: number): number { + return reader.startIdx(child) + reader.matchLength(child); +} + +function callWithChildren( + reader: CstReader, + handle: number, + action: (handle: number, ...children: number[]) => R +): R { + const count = reader.childCount(handle); + let ep = reader.startIdx(handle); + + if (count < 8) { + if (count === 0) return action(handle); + + const c0 = reader.childAt(handle, 0, ep); + if (count === 1) return action(handle, c0); + + ep = nextEdgePos(reader, c0); + const c1 = reader.childAt(handle, 1, ep); + if (count === 2) return action(handle, c0, c1); + + ep = nextEdgePos(reader, c1); + const c2 = reader.childAt(handle, 2, ep); + if (count === 3) return action(handle, c0, c1, c2); + + ep = nextEdgePos(reader, c2); + const c3 = reader.childAt(handle, 3, ep); + if (count === 4) return action(handle, c0, c1, c2, c3); + + ep = nextEdgePos(reader, c3); + const c4 = reader.childAt(handle, 4, ep); + if (count === 5) return action(handle, c0, c1, c2, c3, c4); + + ep = nextEdgePos(reader, c4); + const c5 = reader.childAt(handle, 5, ep); + if (count === 6) return action(handle, c0, c1, c2, c3, c4, c5); + + ep = nextEdgePos(reader, c5); + const c6 = reader.childAt(handle, 6, ep); + return action(handle, c0, c1, c2, c3, c4, c5, c6); + } + + // Fallback for >=8 children. + const children: number[] = []; + for (let i = 0; i < count; i++) { + const child = reader.childAt(handle, i, ep); + children.push(child); + ep = nextEdgePos(reader, child); + } + return action(handle, ...children); +} + +type ActionFn = (handle: number, ...children: number[]) => R; + +// Sentinel values used in the dispatch table for fallback actions. +const NO_ACTION = 0; +const USE_NONTERMINAL = 1; +const USE_DEFAULT = 2; + +export function createReaderOperation( + name: string, + actions: ReaderActionDict +): ReaderOperation { + // Lazily-built dispatch table: actionTable[ruleId] is either an action + // function or a sentinel (NO_ACTION / USE_NONTERMINAL / USE_DEFAULT). + let actionTable: (ActionFn | number)[] | undefined; + let cachedRuleNames: readonly string[] | undefined; + + function buildTable(ruleNames: readonly string[]): (ActionFn | number)[] { + const table: (ActionFn | number)[] = new Array(ruleNames.length); + for (let i = 0; i < ruleNames.length; i++) { + const ctorName = ruleNames[i].split('<')[0]; + const action = actions[ctorName]; + if (action) { + table[i] = action; + } else if (actions._nonterminal) { + table[i] = USE_NONTERMINAL; + } else if (actions._default) { + table[i] = USE_DEFAULT; + } else { + table[i] = NO_ACTION; + } + } + return table; + } + + function getTable(reader: CstReader): (ActionFn | number)[] { + const ruleNames = reader.ruleNames; + if (actionTable && cachedRuleNames === ruleNames) return actionTable; + cachedRuleNames = ruleNames; + actionTable = buildTable(ruleNames); + return actionTable; + } + + return (reader: CstReader, handle: number): R => { + const nodeType = reader.type(handle); + + // Terminal — no children, no table lookup needed. + if (nodeType === CstNodeType.TERMINAL) { + if (actions._terminal) return actions._terminal(handle); + if (actions._default) return actions._default(handle); + throw new Error(`missing semantic action for '_terminal'`); + } + + // List or Opt — use _default. + if (nodeType === CstNodeType.LIST || nodeType === CstNodeType.OPT) { + if (actions._default) return actions._default(handle); + throw new Error(`missing semantic action for '${reader.ctorName(handle)}'`); + } + + // Nonterminal — use dispatch table indexed by ruleId. + const table = getTable(reader); + const ruleId = reader.details(handle); + const entry = table[ruleId]; + + if (typeof entry === 'function') { + return callWithChildren(reader, handle, entry); + } + if (entry === USE_NONTERMINAL) { + return actions._nonterminal!(handle); + } + if (entry === USE_DEFAULT) { + return actions._default!(handle); + } + throw new Error(`missing semantic action for '${reader.ctorName(handle)}'`); + }; +} + +function getChildren(reader: CstReader, handle: number): number[] { + const count = reader.childCount(handle); + const children: number[] = []; + let ep = reader.startIdx(handle); + for (let i = 0; i < count; i++) { + const child = reader.childAt(handle, i, ep); + children.push(child); + ep = nextEdgePos(reader, child); + } + return children; +} + +export function collect( + reader: CstReader, + handle: number, + cb: (...items: number[]) => R +): R[] { + const arity = reader.details(handle); + const children = getChildren(reader, handle); + + const results: R[] = []; + if (arity <= 1) { + for (const child of children) { + results.push(cb(child)); + } + } else { + for (let i = 0; i < children.length; i += arity) { + results.push(cb(...children.slice(i, i + arity))); + } + } + return results; +} + +export function ifPresent( + reader: CstReader, + handle: number, + consume: (...children: number[]) => R, + orElse?: () => R +): R | undefined { + const count = reader.childCount(handle); + if (count === 0) { + return orElse ? orElse() : undefined; + } + const children = getChildren(reader, handle); + return consume(...children); +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8c0d4c13..128f1555 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -356,6 +356,9 @@ importers: ava: specifier: ^6.0.0 version: 6.2.0(rollup@4.43.0) + tinybench: + specifier: ^6.0.0 + version: 6.0.0 packages/to-ast-compat: dependencies: From 19a9172d6429b837ee0fb95fd2ed74385436f8e0 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Fri, 3 Apr 2026 21:22:22 +0200 Subject: [PATCH 02/11] style cleanups --- packages/compiler/test/test-cstReader.js | 38 +++++++ packages/runtime/src/cstReader.ts | 119 ++++++++++++++++++++ packages/semantics/bench.ts | 4 +- packages/semantics/src/reader.test.ts | 11 +- packages/semantics/src/reader.ts | 136 +++++++---------------- 5 files changed, 205 insertions(+), 103 deletions(-) diff --git a/packages/compiler/test/test-cstReader.js b/packages/compiler/test/test-cstReader.js index 56803d9e..a5a44699 100644 --- a/packages/compiler/test/test-cstReader.js +++ b/packages/compiler/test/test-cstReader.js @@ -136,6 +136,44 @@ test('optional node: absent', async t => { }); }); +test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { + const g = await compileAndLoad('G { start = ("a" "b"?)* }'); + g.match('abab').use(mr => { + const reader = createReader(mr); + let list; + reader.forEachChild(reader.root, child => { + list = child; + }); + + t.is(reader.tupleArity(list), 2); + + const tuples = []; + reader.forEachTuple(list, (a, b) => { + tuples.push( + reader.sourceString(a) + + reader.withChildren(b, (_handle, child) => (reader.isPresent(b) ? reader.sourceString(child) : '')) + ); + }); + t.deepEqual(tuples, ['ab', 'ab']); + + let emptyOpt; + g.match('a').use(mr2 => { + const reader2 = createReader(mr2); + reader2.forEachChild(reader2.root, child => { + list = child; + }); + reader2.forEachTuple(list, (_a, b) => { + emptyOpt = b; + }); + t.false(reader2.isPresent(emptyOpt)); + t.is( + reader2.withChildren(emptyOpt, (_handle, child) => (child === undefined ? 'missing' : 'present')), + 'missing' + ); + }); + }); +}); + // --- unparse via walk --- test('unparse: simple terminals', async t => { diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstReader.ts index 9a23b920..eb915edc 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstReader.ts @@ -32,6 +32,10 @@ function unpackStartIdx(handle: number): number { return (handle - raw) / SHIFT; } +function nextEdgePos(reader: CstReader, child: number): number { + return reader.startIdx(child) + reader.matchLength(child); +} + /** Extract the raw CST pointer from a packed handle. */ export function rawHandle(handle: number): number { return handle & MASK; @@ -150,6 +154,21 @@ export class CstReader { return this._ctx.view.getInt32(raw + CST_TYPE_AND_DETAILS_OFFSET, true) >>> 2; } + /** Rule ID for a nonterminal node. */ + ruleId(handle: number): number { + return this.details(handle); + } + + /** Children per tuple for a list node. */ + tupleArity(handle: number): number { + return this.details(handle); + } + + /** Whether an optional node has a child. */ + isPresent(handle: number): boolean { + return this.childCount(handle) > 0; + } + /** Source string for a node (startIdx is extracted from the handle). */ sourceString(handle: number): string { const si = unpackStartIdx(handle); @@ -239,6 +258,106 @@ export class CstReader { return createHandle(rawChild, edgeStartIdx + leadingSpacesLen); } + /** + * Call `fn` with the node handle followed by its children. + * Avoids allocation for nodes with up to 7 children. + */ + withChildren( + handle: number, + fn: (handle: number, ...children: number[]) => R + ): R { + const count = this.childCount(handle); + let edgeStartIdx = this.startIdx(handle); + + if (count < 8) { + if (count === 0) return fn(handle); + + const c0 = this.childAt(handle, 0, edgeStartIdx); + if (count === 1) return fn(handle, c0); + + edgeStartIdx = nextEdgePos(this, c0); + const c1 = this.childAt(handle, 1, edgeStartIdx); + if (count === 2) return fn(handle, c0, c1); + + edgeStartIdx = nextEdgePos(this, c1); + const c2 = this.childAt(handle, 2, edgeStartIdx); + if (count === 3) return fn(handle, c0, c1, c2); + + edgeStartIdx = nextEdgePos(this, c2); + const c3 = this.childAt(handle, 3, edgeStartIdx); + if (count === 4) return fn(handle, c0, c1, c2, c3); + + edgeStartIdx = nextEdgePos(this, c3); + const c4 = this.childAt(handle, 4, edgeStartIdx); + if (count === 5) return fn(handle, c0, c1, c2, c3, c4); + + edgeStartIdx = nextEdgePos(this, c4); + const c5 = this.childAt(handle, 5, edgeStartIdx); + if (count === 6) return fn(handle, c0, c1, c2, c3, c4, c5); + + edgeStartIdx = nextEdgePos(this, c5); + const c6 = this.childAt(handle, 6, edgeStartIdx); + return fn(handle, c0, c1, c2, c3, c4, c5, c6); + } + + const children: number[] = []; + for (let i = 0; i < count; i++) { + const child = this.childAt(handle, i, edgeStartIdx); + children.push(child); + edgeStartIdx = nextEdgePos(this, child); + } + return fn(handle, ...children); + } + + /** + * Iterate over a list node in tuple-sized groups. + * Avoids allocation for arities up to 3. + */ + forEachTuple(handle: number, fn: (...children: number[]) => void): void { + const arity = this.tupleArity(handle); + if (arity <= 1) { + this.forEachChild(handle, child => fn(child)); + return; + } + + const count = this.childCount(handle); + let edgeStartIdx = this.startIdx(handle); + + if (arity === 2) { + for (let i = 0; i < count; i += 2) { + const c0 = this.childAt(handle, i, edgeStartIdx); + edgeStartIdx = nextEdgePos(this, c0); + const c1 = this.childAt(handle, i + 1, edgeStartIdx); + edgeStartIdx = nextEdgePos(this, c1); + fn(c0, c1); + } + return; + } + + if (arity === 3) { + for (let i = 0; i < count; i += 3) { + const c0 = this.childAt(handle, i, edgeStartIdx); + edgeStartIdx = nextEdgePos(this, c0); + const c1 = this.childAt(handle, i + 1, edgeStartIdx); + edgeStartIdx = nextEdgePos(this, c1); + const c2 = this.childAt(handle, i + 2, edgeStartIdx); + edgeStartIdx = nextEdgePos(this, c2); + fn(c0, c1, c2); + } + return; + } + + const tuple = new Array(arity); + for (let i = 0; i < count; ) { + for (let j = 0; j < arity; j++, i++) { + const child = this.childAt(handle, i, edgeStartIdx); + tuple[j] = child; + edgeStartIdx = nextEdgePos(this, child); + } + fn(...tuple); + } + } + /** * Whether the nonterminal at `handle` is a syntactic rule. * Uses cached metadata (ruleIsSyntactic), not string formatting. diff --git a/packages/semantics/bench.ts b/packages/semantics/bench.ts index 226d4fe7..8770a907 100644 --- a/packages/semantics/bench.ts +++ b/packages/semantics/bench.ts @@ -14,7 +14,9 @@ const scriptRel = (relPath: string) => new URL(relPath, import.meta.url); const es5Source = readFileSync(scriptRel('../../examples/ecmascript/src/es5.ohm'), 'utf8'); const g = ohm.grammars(es5Source).ES5; -const input = smallSize ? 'var x = 1 + 2;' : readFileSync(scriptRel('../compiler/test/data/_underscore-1.8.3.js'), 'utf8'); +const input = smallSize + ? 'var x = 1 + 2;' + : readFileSync(scriptRel('../compiler/test/data/_underscore-1.8.3.js'), 'utf8'); // --- CstNode-based (createOperation) --- diff --git a/packages/semantics/src/reader.test.ts b/packages/semantics/src/reader.test.ts index 19223f61..ef559125 100644 --- a/packages/semantics/src/reader.test.ts +++ b/packages/semantics/src/reader.test.ts @@ -35,13 +35,6 @@ test('reader-based: arithmetic', t => { number(h, _) { return parseInt(rd.sourceString(h), 10); }, - _default(h) { - let result = 0; - rd.forEachChild(h, child => { - result = evalIt(rd, child); - }); - return result; - }, }); t.is(evalIt(rd, rd.root), 7); }); @@ -67,7 +60,7 @@ test('reader-based: list and opt', t => { opt, p => reversed(rd, p), () => '' - )! + + ) + collect(rd, list, (b, optLetter) => { return ( ifPresent( @@ -75,7 +68,7 @@ test('reader-based: list and opt', t => { optLetter, l => reversed(rd, l), () => '' - )! + collect(rd, b, b => reversed(rd, b)).join('') + ) + collect(rd, b, b => reversed(rd, b)).join('') ); }) .reverse() diff --git a/packages/semantics/src/reader.ts b/packages/semantics/src/reader.ts index a8b62c46..793979ce 100644 --- a/packages/semantics/src/reader.ts +++ b/packages/semantics/src/reader.ts @@ -10,59 +10,6 @@ export type ReaderActionDict = { export type ReaderOperation = (reader: CstReader, handle: number) => R; -function nextEdgePos(reader: CstReader, child: number): number { - return reader.startIdx(child) + reader.matchLength(child); -} - -function callWithChildren( - reader: CstReader, - handle: number, - action: (handle: number, ...children: number[]) => R -): R { - const count = reader.childCount(handle); - let ep = reader.startIdx(handle); - - if (count < 8) { - if (count === 0) return action(handle); - - const c0 = reader.childAt(handle, 0, ep); - if (count === 1) return action(handle, c0); - - ep = nextEdgePos(reader, c0); - const c1 = reader.childAt(handle, 1, ep); - if (count === 2) return action(handle, c0, c1); - - ep = nextEdgePos(reader, c1); - const c2 = reader.childAt(handle, 2, ep); - if (count === 3) return action(handle, c0, c1, c2); - - ep = nextEdgePos(reader, c2); - const c3 = reader.childAt(handle, 3, ep); - if (count === 4) return action(handle, c0, c1, c2, c3); - - ep = nextEdgePos(reader, c3); - const c4 = reader.childAt(handle, 4, ep); - if (count === 5) return action(handle, c0, c1, c2, c3, c4); - - ep = nextEdgePos(reader, c4); - const c5 = reader.childAt(handle, 5, ep); - if (count === 6) return action(handle, c0, c1, c2, c3, c4, c5); - - ep = nextEdgePos(reader, c5); - const c6 = reader.childAt(handle, 6, ep); - return action(handle, c0, c1, c2, c3, c4, c5, c6); - } - - // Fallback for >=8 children. - const children: number[] = []; - for (let i = 0; i < count; i++) { - const child = reader.childAt(handle, i, ep); - children.push(child); - ep = nextEdgePos(reader, child); - } - return action(handle, ...children); -} - type ActionFn = (handle: number, ...children: number[]) => R; // Sentinel values used in the dispatch table for fallback actions. @@ -78,6 +25,13 @@ export function createReaderOperation( // function or a sentinel (NO_ACTION / USE_NONTERMINAL / USE_DEFAULT). let actionTable: (ActionFn | number)[] | undefined; let cachedRuleNames: readonly string[] | undefined; + const terminalAction = actions._terminal; + const nonterminalAction = actions._nonterminal; + const defaultAction = actions._default; + + function fail(reader: CstReader, handle: number): never { + throw new Error(`missing semantic action for '${reader.ctorName(handle)}' in '${name}'`); + } function buildTable(ruleNames: readonly string[]): (ActionFn | number)[] { const table: (ActionFn | number)[] = new Array(ruleNames.length); @@ -86,9 +40,9 @@ export function createReaderOperation( const action = actions[ctorName]; if (action) { table[i] = action; - } else if (actions._nonterminal) { + } else if (nonterminalAction) { table[i] = USE_NONTERMINAL; - } else if (actions._default) { + } else if (defaultAction) { table[i] = USE_DEFAULT; } else { table[i] = NO_ACTION; @@ -105,50 +59,44 @@ export function createReaderOperation( return actionTable; } - return (reader: CstReader, handle: number): R => { + const doIt: ReaderOperation = (reader: CstReader, handle: number): R => { const nodeType = reader.type(handle); // Terminal — no children, no table lookup needed. if (nodeType === CstNodeType.TERMINAL) { - if (actions._terminal) return actions._terminal(handle); - if (actions._default) return actions._default(handle); - throw new Error(`missing semantic action for '_terminal'`); + if (terminalAction) return terminalAction(handle); + if (defaultAction) return defaultAction(handle); + return fail(reader, handle); } // List or Opt — use _default. if (nodeType === CstNodeType.LIST || nodeType === CstNodeType.OPT) { - if (actions._default) return actions._default(handle); - throw new Error(`missing semantic action for '${reader.ctorName(handle)}'`); + if (defaultAction) return defaultAction(handle); + return fail(reader, handle); } // Nonterminal — use dispatch table indexed by ruleId. const table = getTable(reader); - const ruleId = reader.details(handle); + const ruleId = reader.ruleId(handle); const entry = table[ruleId]; if (typeof entry === 'function') { - return callWithChildren(reader, handle, entry); + return reader.withChildren(handle, entry); } if (entry === USE_NONTERMINAL) { - return actions._nonterminal!(handle); + return nonterminalAction!(handle); } if (entry === USE_DEFAULT) { - return actions._default!(handle); + return defaultAction!(handle); + } + if (reader.childCount(handle) === 1) { + const child = reader.childAt(handle, 0, reader.startIdx(handle)); + return doIt(reader, child); } - throw new Error(`missing semantic action for '${reader.ctorName(handle)}'`); + return fail(reader, handle); }; -} -function getChildren(reader: CstReader, handle: number): number[] { - const count = reader.childCount(handle); - const children: number[] = []; - let ep = reader.startIdx(handle); - for (let i = 0; i < count; i++) { - const child = reader.childAt(handle, i, ep); - children.push(child); - ep = nextEdgePos(reader, child); - } - return children; + return doIt; } export function collect( @@ -156,32 +104,34 @@ export function collect( handle: number, cb: (...items: number[]) => R ): R[] { - const arity = reader.details(handle); - const children = getChildren(reader, handle); - const results: R[] = []; - if (arity <= 1) { - for (const child of children) { - results.push(cb(child)); - } - } else { - for (let i = 0; i < children.length; i += arity) { - results.push(cb(...children.slice(i, i + arity))); - } - } + reader.forEachTuple(handle, (...items) => { + results.push(cb(...items)); + }); return results; } +export function ifPresent( + reader: CstReader, + handle: number, + consume: (...children: number[]) => R +): R | undefined; + +export function ifPresent( + reader: CstReader, + handle: number, + consume: (...children: number[]) => R, + orElse: () => R +): R; + export function ifPresent( reader: CstReader, handle: number, consume: (...children: number[]) => R, orElse?: () => R ): R | undefined { - const count = reader.childCount(handle); - if (count === 0) { + if (!reader.isPresent(handle)) { return orElse ? orElse() : undefined; } - const children = getChildren(reader, handle); - return consume(...children); + return reader.withChildren(handle, (_handle, ...children) => consume(...children)); } From 04d004ed6e24f252fd1ff4bf49ccb0fc5669cde6 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Fri, 3 Apr 2026 21:22:36 +0200 Subject: [PATCH 03/11] formatting --- packages/compiler/test/test-cstReader.js | 8 ++++++-- packages/runtime/src/cstReader.ts | 5 +---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/compiler/test/test-cstReader.js b/packages/compiler/test/test-cstReader.js index a5a44699..b6d2a26e 100644 --- a/packages/compiler/test/test-cstReader.js +++ b/packages/compiler/test/test-cstReader.js @@ -151,7 +151,9 @@ test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { reader.forEachTuple(list, (a, b) => { tuples.push( reader.sourceString(a) + - reader.withChildren(b, (_handle, child) => (reader.isPresent(b) ? reader.sourceString(child) : '')) + reader.withChildren(b, (_handle, child) => + reader.isPresent(b) ? reader.sourceString(child) : '' + ) ); }); t.deepEqual(tuples, ['ab', 'ab']); @@ -167,7 +169,9 @@ test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { }); t.false(reader2.isPresent(emptyOpt)); t.is( - reader2.withChildren(emptyOpt, (_handle, child) => (child === undefined ? 'missing' : 'present')), + reader2.withChildren(emptyOpt, (_handle, child) => + child === undefined ? 'missing' : 'present' + ), 'missing' ); }); diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstReader.ts index eb915edc..f41eba36 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstReader.ts @@ -262,10 +262,7 @@ export class CstReader { * Call `fn` with the node handle followed by its children. * Avoids allocation for nodes with up to 7 children. */ - withChildren( - handle: number, - fn: (handle: number, ...children: number[]) => R - ): R { + withChildren(handle: number, fn: (handle: number, ...children: number[]) => R): R { const count = this.childCount(handle); let edgeStartIdx = this.startIdx(handle); From 4e224676925d9739c3e82736652e94272034eb50 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Fri, 3 Apr 2026 21:32:35 +0200 Subject: [PATCH 04/11] add assertions --- packages/compiler/test/test-cstReader.js | 23 +++++++++++++++++++++++ packages/runtime/src/cstReader.ts | 4 ++++ 2 files changed, 27 insertions(+) diff --git a/packages/compiler/test/test-cstReader.js b/packages/compiler/test/test-cstReader.js index b6d2a26e..bcd2d116 100644 --- a/packages/compiler/test/test-cstReader.js +++ b/packages/compiler/test/test-cstReader.js @@ -178,6 +178,29 @@ test('withChildren, tupleArity, forEachTuple, and isPresent', async t => { }); }); +test('type-specific helpers assert on the wrong handle kind', async t => { + const g = await compileAndLoad('G { Start = ("a" "b"?)* }'); + g.match('ab').use(mr => { + const reader = createReader(mr); + let list; + reader.forEachChild(reader.root, child => { + list = child; + }); + + let terminal; + let opt; + reader.forEachTuple(list, (a, b) => { + terminal = a; + opt = b; + }); + + t.throws(() => reader.ruleId(list), {message: 'Not a nonterminal'}); + t.throws(() => reader.tupleArity(reader.root), {message: 'Not a list'}); + t.throws(() => reader.isPresent(terminal), {message: 'Not an opt'}); + t.true(reader.isPresent(opt)); + }); +}); + // --- unparse via walk --- test('unparse: simple terminals', async t => { diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstReader.ts index f41eba36..9d7ab409 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstReader.ts @@ -8,6 +8,7 @@ import { MatchRecordType, rawMatchRecordType, } from './miniohm.ts'; +import {assert} from './assert.ts'; import type {MatchContext, SucceededMatchResult} from './miniohm.ts'; @@ -156,16 +157,19 @@ export class CstReader { /** Rule ID for a nonterminal node. */ ruleId(handle: number): number { + assert(this.type(handle) === CstNodeType.NONTERMINAL, 'Not a nonterminal'); return this.details(handle); } /** Children per tuple for a list node. */ tupleArity(handle: number): number { + assert(this.type(handle) === CstNodeType.LIST, 'Not a list'); return this.details(handle); } /** Whether an optional node has a child. */ isPresent(handle: number): boolean { + assert(this.type(handle) === CstNodeType.OPT, 'Not an opt'); return this.childCount(handle) > 0; } From 7721654efdd489c2cd5049ed9dbaa06c9ba724f5 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Fri, 3 Apr 2026 22:08:09 +0200 Subject: [PATCH 05/11] amp cleanups --- packages/compiler/test/test-cstReader.js | 32 ++--- packages/runtime/src/cstReader.ts | 143 +++++------------------ packages/runtime/src/cstReaderFactory.ts | 26 +++++ packages/runtime/src/cstReaderShared.ts | 47 ++++++++ packages/runtime/src/miniohm.ts | 13 ++- packages/runtime/src/unstableDebug.ts | 4 +- packages/semantics/src/reader.test.ts | 51 ++++---- packages/semantics/src/reader.ts | 54 +++------ 8 files changed, 172 insertions(+), 198 deletions(-) create mode 100644 packages/runtime/src/cstReaderFactory.ts create mode 100644 packages/runtime/src/cstReaderShared.ts diff --git a/packages/compiler/test/test-cstReader.js b/packages/compiler/test/test-cstReader.js index bcd2d116..75066878 100644 --- a/packages/compiler/test/test-cstReader.js +++ b/packages/compiler/test/test-cstReader.js @@ -1,9 +1,10 @@ import test from 'ava'; -import {createHandle, createReader, CstNodeType} from '../../runtime/src/cstReader.ts'; +import {createReader, CstNodeType} from '../../runtime/src/cstReader.ts'; +import {createHandle} from '../../runtime/src/cstReaderShared.ts'; import {compileAndLoad, matchWithInput} from './_helpers.js'; -const childrenOf = (reader, handle, i) => { +const childrenOf = (reader, handle) => { const arr = []; reader.forEachChild(handle, c => arr.push(c)); return arr; @@ -30,8 +31,8 @@ test('terminal children', async t => { g.match('abcd').use(mr => { const reader = createReader(mr); const children = []; - reader.forEachChild(reader.root, (child, leadingSpaces, startIdx, index) => { - children.push({child, leadingSpaces, startIdx, index}); + reader.forEachChild(reader.root, (child, leadingSpaces, index) => { + children.push({child, leadingSpaces, startIdx: reader.startIdx(child), index}); }); t.is(children.length, 2); @@ -56,8 +57,8 @@ test('nonterminal children', async t => { g.match('xy').use(mr => { const reader = createReader(mr); const children = []; - reader.forEachChild(reader.root, (child, ls, startIdx, i) => { - children.push({child, ls, startIdx, i}); + reader.forEachChild(reader.root, (child, ls, i) => { + children.push({child, ls, startIdx: reader.startIdx(child), i}); }); t.is(children.length, 2); t.is(reader.ctorName(children[0].child), 'a'); @@ -279,7 +280,7 @@ test('rootLeadingSpacesLen: present', async t => { g.match(' x').use(mr => { const reader = createReader(mr); t.is(reader.rootLeadingSpacesLen, 2); - t.is(reader.sourceSlice(0, reader.rootLeadingSpacesLen), ' '); + t.is(reader.input.slice(0, reader.rootLeadingSpacesLen), ' '); t.is(reader.startIdx(reader.root), 2); }); }); @@ -297,14 +298,15 @@ test('child leadingSpaces in syntactic rule', async t => { g.match('a b').use(mr => { const reader = createReader(mr); const spacesInfo = []; - reader.forEachChild(reader.root, (child, leadingSpacesLen, childStartIdx, index) => { + reader.forEachChild(reader.root, (child, leadingSpacesLen, index) => { + const childStartIdx = reader.startIdx(child); spacesInfo.push({ index, hasSpaces: leadingSpacesLen > 0, spacesLen: leadingSpacesLen, spacesStr: leadingSpacesLen > 0 - ? reader.sourceSlice(childStartIdx - leadingSpacesLen, leadingSpacesLen) + ? reader.input.slice(childStartIdx - leadingSpacesLen, childStartIdx) : '', }); }); @@ -336,8 +338,8 @@ const spaceMemoIgnored = test.macro(async (t, twoBody, input = '> xx') => { const reader = createReader(mr); const [two] = childrenOf(reader, reader.root); const children = []; - reader.forEachChild(two, (child, leadingSpacesLen, childStartIdx) => { - children.push({child, leadingSpacesLen, childStartIdx}); + reader.forEachChild(two, (child, leadingSpacesLen) => { + children.push({child, leadingSpacesLen, childStartIdx: reader.startIdx(child)}); }); t.deepEqual( children.map(({leadingSpacesLen}) => leadingSpacesLen), @@ -369,15 +371,13 @@ test( '> x' ); -// --- details --- +// --- rule metadata --- -test('details returns ruleId for nonterminals', async t => { +test('ruleId returns a stable rule index for nonterminals', async t => { const g = await compileAndLoad('G { start = a\na = "x" }'); g.match('x').use(mr => { const reader = createReader(mr); - // Root is 'start', details should be its ruleId (>= 0). - const d = reader.details(reader.root); - t.true(d >= 0); + t.true(reader.ruleId(reader.root) >= 0); }); }); diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstReader.ts index 9d7ab409..b4d54bb8 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstReader.ts @@ -2,6 +2,7 @@ import { CST_CHILD_COUNT_OFFSET, CST_CHILDREN_OFFSET, CST_MATCH_LENGTH_OFFSET, + CST_NO_LEADING_SPACES_FLAG, CST_TYPE_AND_DETAILS_OFFSET, CstNodeType, isTaggedTerminal, @@ -9,78 +10,35 @@ import { rawMatchRecordType, } from './miniohm.ts'; import {assert} from './assert.ts'; +import {createReaderFromCtx} from './cstReaderFactory.ts'; +import {createHandle, rawHandle, unpackStartIdx} from './cstReaderShared.ts'; import type {MatchContext, SucceededMatchResult} from './miniohm.ts'; export {CstNodeType}; -const HANDLE_BITS = 27; -const SHIFT = 2 ** HANDLE_BITS; // 134217728 -const MASK = SHIFT - 1; // 0x7FFFFFF - -/** - * Pack a raw CST handle and startIdx into a single Number handle. - * Uses 53 of the available integer-precision bits in an IEEE 754 double - * (27 bits for the pointer, 26 bits for startIdx). Accessor methods - * (isTerminal, matchLength, etc.) extract the low 27 bits via `& MASK`. - */ -function pack(rawHandle: number, startIdx: number): number { - return startIdx * SHIFT + rawHandle; -} - -function unpackStartIdx(handle: number): number { - const raw = handle & MASK; - return (handle - raw) / SHIFT; -} - function nextEdgePos(reader: CstReader, child: number): number { return reader.startIdx(child) + reader.matchLength(child); } -/** Extract the raw CST pointer from a packed handle. */ -export function rawHandle(handle: number): number { - return handle & MASK; -} - -/** - * Create a packed handle from a raw pointer and startIdx. - * Validates that both values fit in the packed representation. - */ -export function createHandle(rawPtr: number, startIdx: number): number { - if (rawPtr >= SHIFT) { - throw new Error( - `Raw CST pointer ${rawPtr} exceeds ${HANDLE_BITS}-bit limit (max ${SHIFT - 1})` - ); - } - const startIdxLimit = 2 ** (53 - HANDLE_BITS); - if (startIdx >= startIdxLimit) { - throw new Error( - `startIdx ${startIdx} exceeds ${53 - HANDLE_BITS}-bit limit (max ${startIdxLimit - 1})` - ); - } - return pack(rawPtr, startIdx); -} - /** * Zero-allocation access to the CST stored in Wasm linear memory. * - * Handles have startIdx packed in the upper bits. Accessor methods - * (isTerminal, matchLength, childCount, ctorName, details) extract - * the raw pointer via `& MASK`. + * Handles have startIdx packed in the upper bits. * * forEachChild(handle, fn) iterates visible children. The callback receives - * (childHandle, leadingSpacesLen, childStartIdx, index). + * (childHandle, leadingSpacesLen, index). * * Leading spaces are edge data (they belong to the parent→child relationship), * not node data. For each child edge: - * - childStartIdx === startIdx(childHandle) + * - startIdx(childHandle) is the child's start position * - leadingSpacesLen >= 0 - * - leading spaces span: start = childStartIdx - leadingSpacesLen, length = leadingSpacesLen - * - child source span: start = childStartIdx, length = matchLength(childHandle) + * - leading spaces span: start = startIdx(childHandle) - leadingSpacesLen, length = leadingSpacesLen + * - child source span: start = startIdx(childHandle), length = matchLength(childHandle) * * For root: * - startIdx(root) === rootLeadingSpacesLen - * - leading spaces before root are sourceSlice(0, rootLeadingSpacesLen) + * - leading spaces before root are input.slice(0, rootLeadingSpacesLen) */ export class CstReader { /** @internal */ @@ -105,7 +63,7 @@ export class CstReader { /** Node type: NONTERMINAL, TERMINAL, LIST, or OPT. */ type(handle: number): CstNodeType { - const raw = handle & MASK; + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return CstNodeType.TERMINAL; const mrType = rawMatchRecordType(this._ctx.view, raw); if (mrType === MatchRecordType.NONTERMINAL) return CstNodeType.NONTERMINAL; @@ -116,14 +74,14 @@ export class CstReader { /** Number of raw children stored in this match record. */ childCount(handle: number): number { - const raw = handle & MASK; + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return 0; return this._ctx.view.getUint32(raw + CST_CHILD_COUNT_OFFSET, true); } /** Length of matched input (in UTF-16 code units). */ matchLength(handle: number): number { - const raw = handle & MASK; + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return raw >>> 2; return this._ctx.view.getUint32(raw + CST_MATCH_LENGTH_OFFSET, true); } @@ -133,7 +91,7 @@ export class CstReader { * For other types: '_terminal', '_list', '_opt'. */ ctorName(handle: number): string { - const raw = handle & MASK; + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return '_terminal'; const type = rawMatchRecordType(this._ctx.view, raw); if (type === MatchRecordType.NONTERMINAL) { @@ -145,12 +103,9 @@ export class CstReader { return '_opt'; } - /** - * Upper bits of typeAndDetails. For NONTERMINAL: the ruleId. - * For ITER_FLAG: the arity (children per iteration). - */ - details(handle: number): number { - const raw = handle & MASK; + /** @internal */ + private _details(handle: number): number { + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return 0; return this._ctx.view.getInt32(raw + CST_TYPE_AND_DETAILS_OFFSET, true) >>> 2; } @@ -158,13 +113,13 @@ export class CstReader { /** Rule ID for a nonterminal node. */ ruleId(handle: number): number { assert(this.type(handle) === CstNodeType.NONTERMINAL, 'Not a nonterminal'); - return this.details(handle); + return this._details(handle); } /** Children per tuple for a list node. */ tupleArity(handle: number): number { assert(this.type(handle) === CstNodeType.LIST, 'Not a list'); - return this.details(handle); + return this._details(handle); } /** Whether an optional node has a child. */ @@ -179,11 +134,6 @@ export class CstReader { return this._ctx.input.slice(si, si + this.matchLength(handle)); } - /** Extract a substring from the input. */ - sourceSlice(startIdx: number, len: number): string { - return this._ctx.input.slice(startIdx, startIdx + len); - } - /** The full input string that was parsed. */ get input(): string { return this._ctx.input; @@ -196,19 +146,19 @@ export class CstReader { /** * Iterate over children. The callback receives (childHandle, leadingSpacesLen, - * childStartIdx, index). + * index). * - * Leading spaces belong to the parent→child edge. Use sourceSlice() to - * extract the spaces text: sourceSlice(childStartIdx - leadingSpacesLen, leadingSpacesLen). + * Leading spaces belong to the parent→child edge. Use `startIdx(child)` and + * `input.slice()` to recover the spaces text. * * Only NONTERMINAL and TERMINAL children may have leading spaces; * LIST and OPT children always have leadingSpacesLen === 0. */ forEachChild( handle: number, - fn: (child: number, leadingSpacesLen: number, childStartIdx: number, index: number) => void + fn: (child: number, leadingSpacesLen: number, index: number) => void ): void { - const raw = handle & MASK; + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return; const count = this._ctx.view.getUint32(raw + CST_CHILD_COUNT_OFFSET, true); @@ -218,10 +168,8 @@ export class CstReader { for (let i = 0; i < count; i++) { const slot = this._ctx.view.getUint32(raw + CST_CHILDREN_OFFSET + i * 4, true); - // Bit 1 of the child slot is the NO_LEADING_SPACES edge flag. - const suppressSpaces = (slot & 2) !== 0; - // Strip the edge flag to get the actual value. - const rawChild = slot & ~2; + const suppressSpaces = (slot & CST_NO_LEADING_SPACES_FLAG) !== 0; + const rawChild = slot & ~CST_NO_LEADING_SPACES_FLAG; const leadingSpacesLen = !suppressSpaces && getSpacesLenAt && this._hasParentSpaces(rawChild) @@ -231,7 +179,7 @@ export class CstReader { const childStartIdx = edgeStartIdx + leadingSpacesLen; const childHandle = createHandle(rawChild, childStartIdx); - fn(childHandle, leadingSpacesLen, childStartIdx, i); + fn(childHandle, leadingSpacesLen, i); const len = isTaggedTerminal(rawChild) ? rawChild >>> 2 @@ -246,12 +194,13 @@ export class CstReader { * The caller must track `edgeStartIdx`: for the first child, it's * `startIdx(parentHandle)`; for subsequent children, it's * `startIdx(prevChild) + matchLength(prevChild)`. + * @internal */ childAt(handle: number, index: number, edgeStartIdx: number): number { - const raw = handle & MASK; + const raw = rawHandle(handle); const slot = this._ctx.view.getUint32(raw + CST_CHILDREN_OFFSET + index * 4, true); - const suppressSpaces = (slot & 2) !== 0; - const rawChild = slot & ~2; + const suppressSpaces = (slot & CST_NO_LEADING_SPACES_FLAG) !== 0; + const rawChild = slot & ~CST_NO_LEADING_SPACES_FLAG; const {getSpacesLenAt} = this._ctx; const leadingSpacesLen = @@ -364,7 +313,7 @@ export class CstReader { * Uses cached metadata (ruleIsSyntactic), not string formatting. */ isSyntactic(handle: number): boolean { - const raw = handle & MASK; + const raw = rawHandle(handle); if (isTaggedTerminal(raw)) return false; const mrType = rawMatchRecordType(this._ctx.view, raw); if (mrType !== MatchRecordType.NONTERMINAL) return false; @@ -399,36 +348,6 @@ export class CstReader { } } -/** - * Create a CstReader from a MatchContext and Wasm exports. - * Validates packed-handle limits (heap size and input length). - * @internal - */ -export function createReaderFromCtx(ctx: MatchContext, exports: any): CstReader { - const heapTop = exports.__offset.value; - if (heapTop >= SHIFT) { - throw new Error( - `Wasm heap too large for CstReader: ${heapTop} bytes exceeds ${HANDLE_BITS}-bit limit (${SHIFT} bytes)` - ); - } - // Two constraints on input length: - // 1. startIdx must fit in (53 - HANDLE_BITS) bits when packed. - // 2. Tagged terminals encode as (matchLength << 2) | flags, so - // matchLength (≤ input.length) must fit in (HANDLE_BITS - 2) bits. - const startIdxLimit = 2 ** (53 - HANDLE_BITS); - const terminalLimit = 2 ** (HANDLE_BITS - 2); - const inputLimit = Math.min(startIdxLimit, terminalLimit); - if (ctx.input.length >= inputLimit) { - throw new Error( - `Input too long for CstReader: ${ctx.input.length} chars exceeds limit (${inputLimit} chars)` - ); - } - - const rootLeadingSpacesLen = Math.max(0, exports.getSpacesLenAt(0)); - const rootPtr = exports.bindingsAt(0); - return new CstReader(ctx, createHandle(rootPtr, rootLeadingSpacesLen), rootLeadingSpacesLen); -} - export function createReader(result: SucceededMatchResult): CstReader { const exports = (result.grammar as any)._instance.exports; return createReaderFromCtx(result._ctx, exports); diff --git a/packages/runtime/src/cstReaderFactory.ts b/packages/runtime/src/cstReaderFactory.ts new file mode 100644 index 00000000..4e05e2d0 --- /dev/null +++ b/packages/runtime/src/cstReaderFactory.ts @@ -0,0 +1,26 @@ +import type {MatchContext} from './miniohm.ts'; + +import {CstReader} from './cstReader.ts'; +import {createHandle, HANDLE_BITS, INPUT_LENGTH_LIMIT, SHIFT} from './cstReaderShared.ts'; + +/** + * Create a CstReader from a MatchContext and Wasm exports. + * Validates packed-handle limits (heap size and input length). + */ +export function createReaderFromCtx(ctx: MatchContext, exports: any): CstReader { + const heapTop = exports.__offset.value; + if (heapTop >= SHIFT) { + throw new Error( + `Wasm heap too large for CstReader: ${heapTop} bytes exceeds ${HANDLE_BITS}-bit limit (${SHIFT} bytes)` + ); + } + if (ctx.input.length >= INPUT_LENGTH_LIMIT) { + throw new Error( + `Input too long for CstReader: ${ctx.input.length} chars exceeds limit (${INPUT_LENGTH_LIMIT} chars)` + ); + } + + const rootLeadingSpacesLen = Math.max(0, exports.getSpacesLenAt(0)); + const rootPtr = exports.bindingsAt(0); + return new CstReader(ctx, createHandle(rootPtr, rootLeadingSpacesLen), rootLeadingSpacesLen); +} diff --git a/packages/runtime/src/cstReaderShared.ts b/packages/runtime/src/cstReaderShared.ts new file mode 100644 index 00000000..5ff82ffd --- /dev/null +++ b/packages/runtime/src/cstReaderShared.ts @@ -0,0 +1,47 @@ +const HANDLE_BITS = 27; +const SHIFT = 2 ** HANDLE_BITS; // 134217728 +const MASK = SHIFT - 1; // 0x7FFFFFF +const START_IDX_BITS = 53 - HANDLE_BITS; +const START_IDX_LIMIT = 2 ** START_IDX_BITS; +const TERMINAL_LENGTH_LIMIT = 2 ** (HANDLE_BITS - 2); +const INPUT_LENGTH_LIMIT = Math.min(START_IDX_LIMIT, TERMINAL_LENGTH_LIMIT); + +/** + * Pack a raw CST handle and startIdx into a single Number handle. + * Uses 53 of the available integer-precision bits in an IEEE 754 double + * (27 bits for the pointer, 26 bits for startIdx). + */ +function pack(rawHandle: number, startIdx: number): number { + return startIdx * SHIFT + rawHandle; +} + +/** Extract the raw CST pointer from a packed handle. */ +export function rawHandle(handle: number): number { + return handle & MASK; +} + +/** Extract the startIdx from a packed handle. */ +export function unpackStartIdx(handle: number): number { + const raw = rawHandle(handle); + return (handle - raw) / SHIFT; +} + +/** + * Create a packed handle from a raw pointer and startIdx. + * Validates that both values fit in the packed representation. + */ +export function createHandle(rawPtr: number, startIdx: number): number { + if (rawPtr >= SHIFT) { + throw new Error( + `Raw CST pointer ${rawPtr} exceeds ${HANDLE_BITS}-bit limit (max ${SHIFT - 1})` + ); + } + if (startIdx >= START_IDX_LIMIT) { + throw new Error( + `startIdx ${startIdx} exceeds ${START_IDX_BITS}-bit limit (max ${START_IDX_LIMIT - 1})` + ); + } + return pack(rawPtr, startIdx); +} + +export {HANDLE_BITS, INPUT_LENGTH_LIMIT, SHIFT}; diff --git a/packages/runtime/src/miniohm.ts b/packages/runtime/src/miniohm.ts index 5d8acdc1..91b13be8 100644 --- a/packages/runtime/src/miniohm.ts +++ b/packages/runtime/src/miniohm.ts @@ -1,5 +1,7 @@ import {assert, checkNotNull} from './assert.ts'; -import {CstReader, createHandle, createReaderFromCtx, rawHandle} from './cstReader.ts'; +import {CstReader} from './cstReader.ts'; +import {createReaderFromCtx} from './cstReaderFactory.ts'; +import {createHandle, rawHandle} from './cstReaderShared.ts'; import {getLineAndColumn, getLineAndColumnMessage} from './extras.ts'; export const MATCH_RECORD_TYPE_MASK = 0b11; @@ -10,6 +12,9 @@ export const CST_TYPE_AND_DETAILS_OFFSET = 4; export const CST_CHILD_COUNT_OFFSET = 8; export const CST_CHILDREN_OFFSET = 16; +/** Bit 1 of a child slot is the NO_LEADING_SPACES edge flag. */ +export const CST_NO_LEADING_SPACES_FLAG = 2; + // Tagged terminal: (matchLength << 2) | 1. Bit 0 distinguishes from real pointers. // Bit 1 is the NO_LEADING_SPACES edge flag (set on child slots, not on root handles). export function isTaggedTerminal(handle: number): boolean { @@ -630,7 +635,7 @@ class CstNodeImpl implements CstNodeBase { : new SeqNodeImpl(n.children, n.source, n.sourceString); return new OptNodeImpl(child, n.source, n.sourceString); } else if (type === CstNodeType.LIST) { - const arity = n._reader.details(n._handle); + const arity = n._reader.tupleArity(n._handle); if (arity <= 1) { return new ListNodeImpl(n.children, n.source, n.sourceString); } @@ -640,7 +645,7 @@ class CstNodeImpl implements CstNodeBase { // FIXME: We don't need any of this nonsense if we actually build the SeqNodes at parse time. const seqChildren = n.children.slice(i, i + arity); const endIdx = checkNotNull(seqChildren.at(-1)).source.endIdx; - const sourceString = n._reader.sourceSlice(startIdx, endIdx - startIdx); + const sourceString = n._reader.input.slice(startIdx, endIdx); arr.push(new SeqNodeImpl(seqChildren, {startIdx, endIdx}, sourceString)); startIdx = endIdx; } @@ -708,7 +713,7 @@ class LazySpacesNode implements NonterminalNode { get sourceString(): string { if (this._sourceString === undefined) { - this._sourceString = this._reader.sourceSlice(this._startIdx, this._matchLength); + this._sourceString = this._reader.input.slice(this._startIdx, this._startIdx + this._matchLength); } return this._sourceString; } diff --git a/packages/runtime/src/unstableDebug.ts b/packages/runtime/src/unstableDebug.ts index 38817d6f..b1049187 100644 --- a/packages/runtime/src/unstableDebug.ts +++ b/packages/runtime/src/unstableDebug.ts @@ -1,3 +1,4 @@ +import {CST_NO_LEADING_SPACES_FLAG} from './miniohm.ts'; import type {SucceededMatchResult} from './miniohm.ts'; const MATCH_RECORD_TYPE_MASK = 0b11; @@ -108,8 +109,7 @@ function walkRecordTree( // Tagged terminal (bit 0 = 1). Bit 1 may be the edge flag — not a heap object either way. stats.countByType.terminal++; } else { - // Heap pointer — strip bit 1 (NO_LEADING_SPACES edge flag). - const childPtr = slot & ~2; + const childPtr = slot & ~CST_NO_LEADING_SPACES_FLAG; if (!visited.has(childPtr)) { visited.add(childPtr); stack.push(childPtr); diff --git a/packages/semantics/src/reader.test.ts b/packages/semantics/src/reader.test.ts index ef559125..34c28b89 100644 --- a/packages/semantics/src/reader.test.ts +++ b/packages/semantics/src/reader.test.ts @@ -6,7 +6,7 @@ import test from 'ava'; import {readFileSync} from 'node:fs'; import type {ReaderOperation} from './reader.ts'; -import {collect, createReaderOperation, ifPresent} from './reader.ts'; +import {createReaderOperation} from './reader.ts'; const scriptRel = (relPath: string) => new URL(relPath, import.meta.url); @@ -54,30 +54,35 @@ test('reader-based: list and opt', t => { const reversed: ReaderOperation = createReaderOperation('reversed', { Start(h, a, list, opt) { - return ( - ifPresent( - rd, - opt, - p => reversed(rd, p), - () => '' - ) + - collect(rd, list, (b, optLetter) => { - return ( - ifPresent( - rd, - optLetter, - l => reversed(rd, l), - () => '' - ) + collect(rd, b, b => reversed(rd, b)).join('') - ); - }) - .reverse() - .join('') + - reversed(rd, a) - ); + const parts: string[] = []; + rd.forEachTuple(list, (b, optLetter) => { + parts.push(reversed(rd, optLetter) + reversed(rd, b)); + }); + return reversed(rd, opt) + parts.reverse().join('') + reversed(rd, a); }, punc(h, list) { - return collect(rd, list, (c, opt) => reversed(rd, c)).join(''); + return reversed(rd, list); + }, + _list(h) { + const parts: string[] = []; + rd.forEachTuple(h, (...children) => { + let text = ''; + for (const child of children) { + text += reversed(rd, child); + } + parts.push(text); + }); + return parts.join(''); + }, + _opt(h) { + if (!rd.isPresent(h)) return ''; + return rd.withChildren(h, (_handle, ...children) => { + let text = ''; + for (const child of children) { + text += reversed(rd, child); + } + return text; + }); }, _terminal(h) { return rd.sourceString(h); diff --git a/packages/semantics/src/reader.ts b/packages/semantics/src/reader.ts index 793979ce..ee7aae50 100644 --- a/packages/semantics/src/reader.ts +++ b/packages/semantics/src/reader.ts @@ -2,7 +2,9 @@ import type {CstReader} from 'ohm-js/cstReader'; import {CstNodeType} from 'ohm-js/cstReader'; export type ReaderActionDict = { + _list?: (handle: number) => R; _nonterminal?: (handle: number) => R; + _opt?: (handle: number) => R; _terminal?: (handle: number) => R; _default?: (handle: number) => R; [ruleName: string]: ((handle: number, ...children: number[]) => R) | undefined; @@ -25,8 +27,10 @@ export function createReaderOperation( // function or a sentinel (NO_ACTION / USE_NONTERMINAL / USE_DEFAULT). let actionTable: (ActionFn | number)[] | undefined; let cachedRuleNames: readonly string[] | undefined; + const listAction = actions._list; const terminalAction = actions._terminal; const nonterminalAction = actions._nonterminal; + const optAction = actions._opt; const defaultAction = actions._default; function fail(reader: CstReader, handle: number): never { @@ -69,8 +73,14 @@ export function createReaderOperation( return fail(reader, handle); } - // List or Opt — use _default. - if (nodeType === CstNodeType.LIST || nodeType === CstNodeType.OPT) { + if (nodeType === CstNodeType.LIST) { + if (listAction) return listAction(handle); + if (defaultAction) return defaultAction(handle); + return fail(reader, handle); + } + + if (nodeType === CstNodeType.OPT) { + if (optAction) return optAction(handle); if (defaultAction) return defaultAction(handle); return fail(reader, handle); } @@ -90,48 +100,10 @@ export function createReaderOperation( return defaultAction!(handle); } if (reader.childCount(handle) === 1) { - const child = reader.childAt(handle, 0, reader.startIdx(handle)); - return doIt(reader, child); + return reader.withChildren(handle, (_handle, child) => doIt(reader, child)); } return fail(reader, handle); }; return doIt; } - -export function collect( - reader: CstReader, - handle: number, - cb: (...items: number[]) => R -): R[] { - const results: R[] = []; - reader.forEachTuple(handle, (...items) => { - results.push(cb(...items)); - }); - return results; -} - -export function ifPresent( - reader: CstReader, - handle: number, - consume: (...children: number[]) => R -): R | undefined; - -export function ifPresent( - reader: CstReader, - handle: number, - consume: (...children: number[]) => R, - orElse: () => R -): R; - -export function ifPresent( - reader: CstReader, - handle: number, - consume: (...children: number[]) => R, - orElse?: () => R -): R | undefined { - if (!reader.isPresent(handle)) { - return orElse ? orElse() : undefined; - } - return reader.withChildren(handle, (_handle, ...children) => consume(...children)); -} From df491bae2da09e762c2657620625ba1d46fed383 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Fri, 3 Apr 2026 22:11:44 +0200 Subject: [PATCH 06/11] formatting --- packages/runtime/src/miniohm.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/runtime/src/miniohm.ts b/packages/runtime/src/miniohm.ts index 646f7ac7..ef2ff0b0 100644 --- a/packages/runtime/src/miniohm.ts +++ b/packages/runtime/src/miniohm.ts @@ -713,7 +713,10 @@ class LazySpacesNode implements NonterminalNode { get sourceString(): string { if (this._sourceString === undefined) { - this._sourceString = this._reader.input.slice(this._startIdx, this._startIdx + this._matchLength); + this._sourceString = this._reader.input.slice( + this._startIdx, + this._startIdx + this._matchLength + ); } return this._sourceString; } From 7aae18dfe053f5e85c1620c3cad6f6a055e1a1d0 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Sat, 4 Apr 2026 08:53:07 +0200 Subject: [PATCH 07/11] cleanups after merge, fix bug with failures on previous result --- packages/compiler/test/test-cstReader.js | 7 +++-- packages/compiler/test/test-wasm.js | 40 ++++++++++++++++++++++++ packages/lang-python/convertToOhm.ts | 4 +-- packages/runtime/src/miniohm.ts | 18 +++++++---- packages/semantics/src/index.ts | 1 + 5 files changed, 59 insertions(+), 11 deletions(-) diff --git a/packages/compiler/test/test-cstReader.js b/packages/compiler/test/test-cstReader.js index b02f0014..0b97acdf 100644 --- a/packages/compiler/test/test-cstReader.js +++ b/packages/compiler/test/test-cstReader.js @@ -498,7 +498,8 @@ function checkInvariants(reader, handle, isLexicalParent) { let cursor = start; let reconstructed = ''; - reader.forEachChild(handle, (child, leadingSpacesLen, childStartIdx, index) => { + reader.forEachChild(handle, (child, leadingSpacesLen, index) => { + const childStartIdx = reader.startIdx(child); indices.push(index); callbackCount++; @@ -532,7 +533,7 @@ function checkInvariants(reader, handle, isLexicalParent) { // Round-trip reconstruction: interleave spaces + child text. if (leadingSpacesLen > 0) { - reconstructed += reader.sourceSlice(childStartIdx - leadingSpacesLen, leadingSpacesLen); + reconstructed += reader.input.slice(childStartIdx - leadingSpacesLen, childStartIdx); } reconstructed += reader.sourceString(child); @@ -593,7 +594,7 @@ function checkMatch(reader) { } // -- Root round-trip: leadingSpaces + render(root) === input -- - const rootSpaces = reader.sourceSlice(0, rootLeadingSpacesLen); + const rootSpaces = input.slice(0, rootLeadingSpacesLen); const rootText = reader.sourceString(root); if (rootSpaces + rootText !== input) { errors.push( diff --git a/packages/compiler/test/test-wasm.js b/packages/compiler/test/test-wasm.js index 744bf0d5..0a735229 100644 --- a/packages/compiler/test/test-wasm.js +++ b/packages/compiler/test/test-wasm.js @@ -2127,3 +2127,43 @@ test('edge flag: tagged terminal decoding with HAS_LEADING_SPACES bit', async t t.is(letter.sourceString, 'c'); t.falsy(letter.leadingSpaces); }); + +// Regression: MatchResult.input must reflect the input from *its* match, +// not the most recent match on the same grammar. +test('MatchResult.input is stable after a subsequent match', async t => { + const g = await compileAndLoad('G { start = letter+ }'); + using r1 = g.match('abc'); + using r2 = g.match('xy'); + t.is(r1.input, 'abc'); + t.is(r2.input, 'xy'); +}); + +// Regression: getRightmostFailures() must not silently return wrong data +// when wasm state has been overwritten by a subsequent match(). +test('FailedMatchResult.getRightmostFailures throws if not the most recent match', async t => { + const g = await compileAndLoad('G { start = "ok" end }'); + + using r1 = g.match('bad'); + t.true(r1.failed()); + + // A subsequent match overwrites the wasm state. + using r2 = g.match('ok'); + t.true(r2.succeeded()); + + // Accessing failures on the stale result should throw. + t.throws(() => r1.getRightmostFailures(), { + message: /not the most recent match/, + }); +}); + +// getRightmostFailures() works when called on the most recent match. +test('FailedMatchResult.getRightmostFailures works on most recent match', async t => { + const g = await compileAndLoad('G { start = "ok" end }'); + + using r1 = g.match('bad'); + t.true(r1.failed()); + + const failures = r1.getRightmostFailures(); + t.true(failures.length > 0); + t.is(r1.getRightmostFailurePosition(), 0); +}); diff --git a/packages/lang-python/convertToOhm.ts b/packages/lang-python/convertToOhm.ts index 3e124cb7..4bdfe3cb 100644 --- a/packages/lang-python/convertToOhm.ts +++ b/packages/lang-python/convertToOhm.ts @@ -1,8 +1,8 @@ import assert from 'node:assert'; import {grammar} from '@ohm-js/compiler/compat'; -import type {Operation} from '@ohm-js/semantics/src/types.ts'; -import {createOperation} from '@ohm-js/semantics/src/index.ts'; +import {createOperation} from '@ohm-js/semantics'; +import type {Operation} from '@ohm-js/semantics'; const hasOwn = (obj: object, prop: string) => Object.hasOwnProperty.call(obj, prop); diff --git a/packages/runtime/src/miniohm.ts b/packages/runtime/src/miniohm.ts index ef2ff0b0..9e34e4ab 100644 --- a/packages/runtime/src/miniohm.ts +++ b/packages/runtime/src/miniohm.ts @@ -922,7 +922,7 @@ export abstract class MatchResult { } get input(): string { - return (this.grammar as any)._input; + return this._ctx.input; } // `using` accesses [Symbol.dispose] at declaration time to get the @@ -1017,14 +1017,20 @@ export class FailedMatchResult extends MatchResult { this._rightmostFailurePosition = rightmostFailurePosition; } - /** @internal */ - private _assertAttached(property: string) { + private _assertMostRecent(method: string) { if (!this._attached) { throw new Error( - `Cannot access '${property}' after MatchResult has been disposed. ` + + `Cannot access '${method}' after MatchResult has been disposed. ` + `Access failure information before calling dispose(), or use result.use().` ); } + const stack = (this.grammar as any)._resultStack; + if (stack.at(-1) !== this) { + throw new Error( + `Cannot call ${method} on a FailedMatchResult that is not the most recent match. ` + + `Failure information is only available before a subsequent match() call.` + ); + } } getRightmostFailurePosition(): number { @@ -1033,11 +1039,11 @@ export class FailedMatchResult extends MatchResult { getRightmostFailures(): Failure[] { if (this._rightmostFailures === null) { - this._assertAttached('getRightmostFailures()'); + this._assertMostRecent('getRightmostFailures()'); const {exports} = (this.grammar as any)._instance; const ruleIds = (this.grammar as any)._ruleIds; const ruleNames = (this.grammar as any)._ruleNames; - const inputLength = (this.grammar as any)._input.length; + const inputLength = this._ctx.input.length; exports.recordFailures(inputLength, ruleIds.get(ruleNames[0])); // Use a Map to deduplicate by description while preserving fluffy status. diff --git a/packages/semantics/src/index.ts b/packages/semantics/src/index.ts index 9d89c3f7..206a9def 100644 --- a/packages/semantics/src/index.ts +++ b/packages/semantics/src/index.ts @@ -1,5 +1,6 @@ import type {CstNode, NonterminalNode} from 'ohm-js'; import type {ActionDict, Operation, VisitorCtx} from './types.ts'; +export type {Operation} from './types.ts'; const globalActionStack: [string, string, string][] = []; From 2b2683735f663f1829bb65ee910ad8079890fa40 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Sat, 4 Apr 2026 09:08:43 +0200 Subject: [PATCH 08/11] fix error with action stack --- packages/semantics/src/index.test.ts | 27 +++++++++++++++++++++++++++ packages/semantics/src/index.ts | 3 ++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/semantics/src/index.test.ts b/packages/semantics/src/index.test.ts index b8fc8eb6..17f7a5d0 100644 --- a/packages/semantics/src/index.test.ts +++ b/packages/semantics/src/index.test.ts @@ -102,3 +102,30 @@ test('it handles v17 CSTs', t => { t.fail('parse failed'); } }); + +// Regression: a missing-action error in a nested call should not corrupt +// the global action stack (the finally block should only pop if this frame pushed). +test('missing action does not corrupt the action stack', t => { + // 'start' has an action that catches the missing-action error for 'broken' + // (which has 2 children, so no default action applies) and then visits + // 'alsoBroken' (also 2 children, no action). + // With the bug: broken's finally pops start's frame, then start's finally + // pops from empty. When alsoBroken throws, the stack trace is empty — + // it should still show start. + const twoChildG = ohm.grammar( + 'G { start = broken alsoBroken broken = "a" "b" alsoBroken = "c" "d" }' + ); + const op: Operation = createOperation('op', { + start(ctx, broken, alsoBroken) { + try { op(broken); } catch {} + return op(alsoBroken); + }, + _terminal(ctx) { + return (ctx.thisNode as any).sourceString; + }, + }); + const cst = twoChildG.match('abcd').getCstRoot(); + const err = t.throws(() => op(cst), {message: /missing semantic action: alsoBroken/}); + // The error trace should show that we're inside 'start'. + t.regex(err!.message, /op > start/); +}); diff --git a/packages/semantics/src/index.ts b/packages/semantics/src/index.ts index 206a9def..8b7776aa 100644 --- a/packages/semantics/src/index.ts +++ b/packages/semantics/src/index.ts @@ -22,6 +22,7 @@ export function createOperation>( }; // Ported from Operation.execute in ohm-js/src/Semantics.js + const stackLen = globalActionStack.length; try { // Look for a semantic action whose name matches the node's constructor name, which is either // the name of a rule in the grammar, or '_terminal' (for a terminal node), or '_iter' (for an @@ -67,7 +68,7 @@ export function createOperation>( throw new Error(`missing semantic action: ${ctorName}` + getActionStackTrace()); // End inlined logic } finally { - globalActionStack.pop(); + globalActionStack.length = stackLen; } }; return doIt; From 086473aac2369a2810f528dc652b9125c9848d2f Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Sat, 4 Apr 2026 09:12:04 +0200 Subject: [PATCH 09/11] fix example --- packages/semantics/src/index.test.ts | 4 +++- packages/semantics/src/reader.test.ts | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/semantics/src/index.test.ts b/packages/semantics/src/index.test.ts index 17f7a5d0..97b645b0 100644 --- a/packages/semantics/src/index.test.ts +++ b/packages/semantics/src/index.test.ts @@ -117,7 +117,9 @@ test('missing action does not corrupt the action stack', t => { ); const op: Operation = createOperation('op', { start(ctx, broken, alsoBroken) { - try { op(broken); } catch {} + try { + op(broken); + } catch {} return op(alsoBroken); }, _terminal(ctx) { diff --git a/packages/semantics/src/reader.test.ts b/packages/semantics/src/reader.test.ts index 34c28b89..fa7621e2 100644 --- a/packages/semantics/src/reader.test.ts +++ b/packages/semantics/src/reader.test.ts @@ -90,7 +90,7 @@ test('reader-based: list and opt', t => { _default(h) { let result = ''; rd.forEachChild(h, child => { - result = reversed(rd, child); + result += reversed(rd, child); }); return result; }, From c81309eebc308a052745b4702f06d55627f51444 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Sat, 4 Apr 2026 09:19:24 +0200 Subject: [PATCH 10/11] fix CI --- packages/semantics/src/index.test.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/semantics/src/index.test.ts b/packages/semantics/src/index.test.ts index 97b645b0..80f96862 100644 --- a/packages/semantics/src/index.test.ts +++ b/packages/semantics/src/index.test.ts @@ -126,8 +126,9 @@ test('missing action does not corrupt the action stack', t => { return (ctx.thisNode as any).sourceString; }, }); - const cst = twoChildG.match('abcd').getCstRoot(); - const err = t.throws(() => op(cst), {message: /missing semantic action: alsoBroken/}); + const r = twoChildG.match('abcd'); + assert(r.succeeded(), 'match should succeed'); + const err = t.throws(() => op(r.getCstRoot()), {message: /missing semantic action: alsoBroken/}); // The error trace should show that we're inside 'start'. t.regex(err!.message, /op > start/); }); From 6abdc4e434143657748fc4a541136a782f56ed35 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Sat, 4 Apr 2026 10:16:01 +0200 Subject: [PATCH 11/11] fix CI for Node 22 --- packages/compiler/test/test-wasm.js | 39 ++++++++++++++++------------ packages/semantics/src/index.test.ts | 4 ++- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/packages/compiler/test/test-wasm.js b/packages/compiler/test/test-wasm.js index 0a735229..471164f8 100644 --- a/packages/compiler/test/test-wasm.js +++ b/packages/compiler/test/test-wasm.js @@ -2132,10 +2132,12 @@ test('edge flag: tagged terminal decoding with HAS_LEADING_SPACES bit', async t // not the most recent match on the same grammar. test('MatchResult.input is stable after a subsequent match', async t => { const g = await compileAndLoad('G { start = letter+ }'); - using r1 = g.match('abc'); - using r2 = g.match('xy'); - t.is(r1.input, 'abc'); - t.is(r2.input, 'xy'); + g.match('abc').use(r1 => { + g.match('xy').use(r2 => { + t.is(r1.input, 'abc'); + t.is(r2.input, 'xy'); + }); + }); }); // Regression: getRightmostFailures() must not silently return wrong data @@ -2143,16 +2145,18 @@ test('MatchResult.input is stable after a subsequent match', async t => { test('FailedMatchResult.getRightmostFailures throws if not the most recent match', async t => { const g = await compileAndLoad('G { start = "ok" end }'); - using r1 = g.match('bad'); - t.true(r1.failed()); + g.match('bad').use(r1 => { + t.true(r1.failed()); - // A subsequent match overwrites the wasm state. - using r2 = g.match('ok'); - t.true(r2.succeeded()); + // A subsequent match overwrites the wasm state. + g.match('ok').use(r2 => { + t.true(r2.succeeded()); - // Accessing failures on the stale result should throw. - t.throws(() => r1.getRightmostFailures(), { - message: /not the most recent match/, + // Accessing failures on the stale result should throw. + t.throws(() => r1.getRightmostFailures(), { + message: /not the most recent match/, + }); + }); }); }); @@ -2160,10 +2164,11 @@ test('FailedMatchResult.getRightmostFailures throws if not the most recent match test('FailedMatchResult.getRightmostFailures works on most recent match', async t => { const g = await compileAndLoad('G { start = "ok" end }'); - using r1 = g.match('bad'); - t.true(r1.failed()); + g.match('bad').use(r1 => { + t.true(r1.failed()); - const failures = r1.getRightmostFailures(); - t.true(failures.length > 0); - t.is(r1.getRightmostFailurePosition(), 0); + const failures = r1.getRightmostFailures(); + t.true(failures.length > 0); + t.is(r1.getRightmostFailurePosition(), 0); + }); }); diff --git a/packages/semantics/src/index.test.ts b/packages/semantics/src/index.test.ts index 80f96862..f49bc635 100644 --- a/packages/semantics/src/index.test.ts +++ b/packages/semantics/src/index.test.ts @@ -128,7 +128,9 @@ test('missing action does not corrupt the action stack', t => { }); const r = twoChildG.match('abcd'); assert(r.succeeded(), 'match should succeed'); - const err = t.throws(() => op(r.getCstRoot()), {message: /missing semantic action: alsoBroken/}); + const err = t.throws(() => op(r.getCstRoot()), { + message: /missing semantic action: alsoBroken/, + }); // The error trace should show that we're inside 'start'. t.regex(err!.message, /op > start/); });