From ee7c58f6999c562b82146a2d7d285aca421865b6 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 14:33:51 -0700 Subject: [PATCH 1/6] dedup_key: add tableShareKey for post-embed smallTable identity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tableShareKey, a stable identifier for the share group of slice-headers inside a smallTable. After smallTable is embedded into faState by value, two states that hold copies of one source smallTable still share the underlying steps/ceilings/epsilons backing arrays — and a key built from SliceData(steps) + len(steps) captures that equivalence. This replaces *smallTable-pointer-identity as the dedup key in the next commit. No behavioral change yet; this commit only adds the helper and unit tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- dedup_key.go | 26 +++++++++++++++++++++ dedup_key_test.go | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 dedup_key.go create mode 100644 dedup_key_test.go diff --git a/dedup_key.go b/dedup_key.go new file mode 100644 index 0000000..aaa830e --- /dev/null +++ b/dedup_key.go @@ -0,0 +1,26 @@ +package quamina + +import "unsafe" + +// tableShareKey returns a stable identifier for a smallTable's "share group". +// Two states whose smallTables hold slice-headers pointing at the same `steps` +// backing array (which is what happens when one smallTable struct value is +// copied into multiple faStates during construction) will produce equal +// keys. This replaces *smallTable-pointer identity as the dedup key in +// epsilon-closure computation after smallTable is embedded into faState +// by value. +// +// A zero key (nil pointer, len 0) means "no share group" — used for tables +// with no byte transitions. Callers that want to dedup such tables should +// skip the zero key. +type tableShareKey struct { + stepsData unsafe.Pointer + stepsLen int +} + +func newTableShareKey(t *smallTable) tableShareKey { + return tableShareKey{ + stepsData: unsafe.Pointer(unsafe.SliceData(t.steps)), + stepsLen: len(t.steps), + } +} diff --git a/dedup_key_test.go b/dedup_key_test.go new file mode 100644 index 0000000..7ba95b5 --- /dev/null +++ b/dedup_key_test.go @@ -0,0 +1,57 @@ +package quamina + +import ( + "testing" +) + +func TestTableShareKey_SharedBackings(t *testing.T) { + // Construct one smallTable, value-copy it (simulating post-embed share). + src := smallTable{ + ceilings: []byte{'a', 'b', byte(byteCeiling)}, + steps: []*faState{nil, nil, nil}, + } + copy1 := src + copy2 := src + if newTableShareKey(©1) != newTableShareKey(©2) { + t.Errorf("value-copied tables should share key; got %v vs %v", + newTableShareKey(©1), newTableShareKey(©2)) + } +} + +func TestTableShareKey_DistinctBackings(t *testing.T) { + t1 := smallTable{ + ceilings: []byte{'a', byte(byteCeiling)}, + steps: []*faState{nil, nil}, + } + t2 := smallTable{ + ceilings: []byte{'a', byte(byteCeiling)}, + steps: []*faState{nil, nil}, + } + if newTableShareKey(&t1) == newTableShareKey(&t2) { + t.Errorf("independently-built tables should not share key") + } +} + +// TestTableShareKey_AppendBreaksShare verifies that when a value-copy +// is mutated via append in a way that reallocates the backing array, +// the keys diverge. We force reallocation by starting at cap=1 and +// appending many entries. +func TestTableShareKey_AppendBreaksShare(t *testing.T) { + src := smallTable{ + ceilings: make([]byte, 0, 1), + steps: make([]*faState, 0, 1), + } + src.ceilings = append(src.ceilings, byte(byteCeiling)) + src.steps = append(src.steps, nil) + copy1 := src + // Appending 8 entries to a slice with cap=1 guarantees at least one + // realloc of the steps backing. + for i := 0; i < 8; i++ { + copy1.steps = append(copy1.steps, nil) + copy1.ceilings = append(copy1.ceilings, byte(i)) + } + if newTableShareKey(&src) == newTableShareKey(©1) { + t.Errorf("expected keys to diverge after append-with-realloc; got equal: %v", + newTableShareKey(&src)) + } +} From c6d4ef15f453d6d77770b5f794c238020be33d56 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 12:56:50 -0700 Subject: [PATCH 2/6] epsi_closure: dedup via tableShareKey instead of *smallTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switches closureBuffers.tables from map[*smallTable]*tableMark to map[tableShareKey]*tableMark. With *smallTable pointer identity still intact (pre-embed), the new key is equivalent: two states that share a *smallTable trivially share their steps backing. No behavioral change. Sets up the next commit, which embeds smallTable in faState by value — at which point pointer-identity goes away but slice-backing identity remains. --- epsi_closure.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/epsi_closure.go b/epsi_closure.go index 7e923b3..6f1ace2 100644 --- a/epsi_closure.go +++ b/epsi_closure.go @@ -20,24 +20,25 @@ type closureBuffers struct { gen uint32 closureSetGen uint32 closureList []*faState - tables map[*smallTable]*tableMark + tables map[tableShareKey]*tableMark states map[*faState]uint32 } func newClosureBuffers() *closureBuffers { return &closureBuffers{ gen: 1, - tables: make(map[*smallTable]*tableMark), + tables: make(map[tableShareKey]*tableMark), states: make(map[*faState]uint32), } } // tableMarkOf returns the tableMark for t, creating one on first access. func (b *closureBuffers) tableMarkOf(t *smallTable) *tableMark { - m, ok := b.tables[t] + key := newTableShareKey(t) + m, ok := b.tables[key] if !ok { m = &tableMark{} - b.tables[t] = m + b.tables[key] = m } return m } From 25593150d3e6777a2392363e4ec4c4e0d5fec37b Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 13:48:46 -0700 Subject: [PATCH 3/6] nfa: embed smallTable into faState by value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit faState.table changes from *smallTable to smallTable (inline). This shrinks per-state memory: - faState 64B + smallTable 80B-class = 144B per state pair - embedded faState fits 128B size class = 128B per state - saves 16B/state on workloads without table-pointer sharing vmFields.startTable becomes startState *faState (the start state owns the start table inline). traverseNFA, epsilonClosure, and related helpers take *faState instead of *smallTable. Epsilon-closure dedup continues to work via tableShareKey (added in the previous two commits) — value-copies of one source smallTable still share their slice backings, and SliceData(steps) is the new identity. Size-assertion tests are not yet recalibrated; that follows in the next commit. --- anything_but.go | 8 ++-- core_matcher_test.go | 4 +- epsi_closure.go | 25 +++++++------ epsi_closure_test.go | 67 ++++++++++++++------------------- memory_cost.go | 16 ++++---- memory_cost_test.go | 6 +-- monocase.go | 24 ++++++------ nfa.go | 75 +++++++++++++++---------------------- nfa_test.go | 34 +++++++++-------- prettyprinter.go | 34 ++++++++++++++--- prettyprinter_test.go | 26 ++++++------- quantified_atom.go | 8 ++-- regexp_nfa.go | 83 +++++++++++++++++++---------------------- regexp_nfa_test.go | 26 ++++++------- regexp_validity_test.go | 8 ++-- rune_range.go | 30 +++++++-------- rune_range_test.go | 8 ++-- shell_style.go | 14 +++---- shell_style_test.go | 2 +- small_table.go | 8 ++-- small_table_test.go | 12 +++--- stats.go | 8 ++-- value_matcher.go | 79 ++++++++++++++++++++++----------------- value_matcher_test.go | 42 ++++++++++----------- wildcard.go | 14 +++---- 25 files changed, 334 insertions(+), 327 deletions(-) diff --git a/anything_but.go b/anything_but.go index 8253f5b..0faf426 100644 --- a/anything_but.go +++ b/anything_but.go @@ -70,12 +70,12 @@ func readAnythingButSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typ // Making a succession of anything-but automata for each of "a" and "b" and then merging them turns out not // to work because what the caller means is really an AND - everything that matches neither "a" nor "b". So // in principle we could intersect automata. -func makeMultiAnythingButFA(vals [][]byte) (*smallTable, *fieldMatcher) { +func makeMultiAnythingButFA(vals [][]byte) (*faState, *fieldMatcher) { nextField := newFieldMatcher() success := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}} - ret, _ := makeOneMultiAnythingButStep(vals, 0, success), nextField - return ret, nextField + startTable := makeOneMultiAnythingButStep(vals, 0, success) + return &faState{table: startTable}, nextField } // makeOneMultiAnythingButStep - spookeh. The idea is that there will be N smallTables in this FA, where N is @@ -84,7 +84,7 @@ func makeMultiAnythingButFA(vals [][]byte) (*smallTable, *fieldMatcher) { // yet been exhausted. We notice when we get to the end of each val and put in a valueTerminator transition // to a step with no nextField entry, i.e. failure because we've exactly matched one of the anything-but // strings. -func makeOneMultiAnythingButStep(vals [][]byte, index int, success *faState) *smallTable { +func makeOneMultiAnythingButStep(vals [][]byte, index int, success *faState) smallTable { // this will be the default transition in all the anything-but tables. var u unpackedTable for i := range u { diff --git a/core_matcher_test.go b/core_matcher_test.go index dc83979..51370f2 100644 --- a/core_matcher_test.go +++ b/core_matcher_test.go @@ -330,8 +330,8 @@ func TestSimpleaddPattern(t *testing.T) { // which means the finite automata are hidden deep inside the coreMatcher instance // and hard to get at. This helper routine fetches the value-matcher automaton // corresponding to the "path" argument -func fetchFAForPath(t *testing.T, cm *coreMatcher, path string) *smallTable { +func fetchFAForPath(t *testing.T, cm *coreMatcher, path string) *faState { t.Helper() vm := cm.fields().state.fields().transitions[path] - return vm.fields().startTable + return vm.fields().startState } diff --git a/epsi_closure.go b/epsi_closure.go index 6f1ace2..196deda 100644 --- a/epsi_closure.go +++ b/epsi_closure.go @@ -43,29 +43,30 @@ func (b *closureBuffers) tableMarkOf(t *smallTable) *tableMark { return m } -// epsilonClosure walks the automaton starting from the given table +// epsilonClosure walks the automaton starting from the given state // and precomputes the epsilon closure for every reachable faState. -func epsilonClosure(table *smallTable) { +func epsilonClosure(start *faState) { bufs := newClosureBuffers() - closureForNfa(table, bufs) + closureForState(start, bufs) + closureForNfa(start, bufs) } -func closureForNfa(table *smallTable, bufs *closureBuffers) { - mark := bufs.tableMarkOf(table) +func closureForNfa(state *faState, bufs *closureBuffers) { + mark := bufs.tableMarkOf(&state.table) if mark.lastVisitedGen == bufs.gen { return } mark.lastVisitedGen = bufs.gen - for _, state := range table.steps { - if state != nil { - closureForState(state, bufs) - closureForNfa(state.table, bufs) + for _, s := range state.table.steps { + if s != nil { + closureForState(s, bufs) + closureForNfa(s, bufs) } } - for _, eps := range table.epsilons { + for _, eps := range state.table.epsilons { closureForState(eps, bufs) - closureForNfa(eps.table, bufs) + closureForNfa(eps, bufs) } } @@ -106,7 +107,7 @@ func closureForState(state *faState, bufs *closureBuffers) { dedupGen := bufs.gen closure := make([]*faState, 0, len(bufs.closureList)) for _, s := range bufs.closureList { - mark := bufs.tableMarkOf(s.table) + mark := bufs.tableMarkOf(&s.table) if mark.closureGen == dedupGen { if sameFieldTransitions(mark.closureRep, s) { continue diff --git a/epsi_closure_test.go b/epsi_closure_test.go index 58313cf..8c0f522 100644 --- a/epsi_closure_test.go +++ b/epsi_closure_test.go @@ -6,23 +6,19 @@ import ( ) func TestEpsilonClosure(t *testing.T) { - var st *smallTable - pp := newPrettyPrinter(4589) - st = newSmallTable() - aSa := &faState{table: st} - pp.labelTable(aSa.table, "aSa") + aSa := &faState{table: newSmallTable()} + pp.labelTable(&aSa.table, "aSa") aSstar := &faState{} aSc := &faState{} - st.addByteStep('b', aSstar) - st = newSmallTable() - st.epsilons = []*faState{aSstar} - st.addByteStep('c', aSc) - aSstar.table = st - pp.labelTable(aSstar.table, "aSstar") + aSa.table.addByteStep('b', aSstar) + aSstar.table = newSmallTable() + aSstar.table.epsilons = []*faState{aSstar} + aSstar.table.addByteStep('c', aSc) + pp.labelTable(&aSstar.table, "aSstar") aSc.table = newSmallTable() - pp.labelTable(aSc.table, "aSc") + pp.labelTable(&aSc.table, "aSc") aFM := newFieldMatcher() aSc.fieldTransitions = []*fieldMatcher{aFM} @@ -41,31 +37,28 @@ func TestEpsilonClosure(t *testing.T) { // (b) ab|*x var bSa, bSb, bSsplice, bSstar, bSx *faState - st = newSmallTable() - bSa = &faState{table: st} + bSa = &faState{table: newSmallTable()} bFM1 := newFieldMatcher() bSb = &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{bFM1}} bSa.table.addByteStep('b', bSb) bFM2 := newFieldMatcher() bSx = &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{bFM2}} - st = newSmallTable() - bSstar = &faState{table: st} - st.epsilons = []*faState{bSstar} - st.addByteStep('x', bSx) - st.epsilons = []*faState{bSstar} + bSstar = &faState{table: newSmallTable()} + bSstar.table.epsilons = []*faState{bSstar} + bSstar.table.addByteStep('x', bSx) + bSstar.table.epsilons = []*faState{bSstar} - st = newSmallTable() - st.epsilons = []*faState{bSa, bSstar} - bSsplice = &faState{table: st} + bSsplice = &faState{table: newSmallTable()} + bSsplice.table.epsilons = []*faState{bSa, bSstar} // var bSa, bSb, bSsplice, bSstar, bSx *faState - pp.labelTable(bSa.table, "bSa") - pp.labelTable(bSb.table, "bSb") - pp.labelTable(bSstar.table, "bSstar") - pp.labelTable(bSx.table, "bSx") - pp.labelTable(bSsplice.table, "bSsplice") + pp.labelTable(&bSa.table, "bSa") + pp.labelTable(&bSb.table, "bSb") + pp.labelTable(&bSstar.table, "bSstar") + pp.labelTable(&bSx.table, "bSx") + pp.labelTable(&bSsplice.table, "bSsplice") bEcShouldBeOne := []*faState{bSa, bSb, bSx, bSstar} zNames := []string{"bSa", "bSb", "bSx", "bSstar"} @@ -102,8 +95,7 @@ func TestEpsilonClosure(t *testing.T) { names := []string{"cStart", "cSa", "cSb", "cSc", "cSz"} states := []*faState{cStart, cSa, cSb, cSc, cSz} for i, name := range names { - st = states[i].table - pp.labelTable(st, name) + pp.labelTable(&states[i].table, name) } closureForStateNoBufs(cStart) @@ -158,16 +150,15 @@ func TestTablePointerDedupPreservesFieldTransitions(t *testing.T) { xState := &faState{table: xTable} // quoteState transitions on 'x' to xState - quoteTable := newSmallTable() - quoteTable.addByteStep('x', xState) - quoteState := &faState{table: quoteTable} + quoteState := &faState{table: newSmallTable()} + quoteState.table.addByteStep('x', xState) // start transitions on '"' to quoteState - startTable := newSmallTable() - startTable.addByteStep('"', quoteState) + start := &faState{table: newSmallTable()} + start.table.addByteStep('"', quoteState) // Compute epsilon closures for the whole automaton - epsilonClosure(startTable) + epsilonClosure(start) // Verify: xState's closure must include both stateA and stateB if !containsState(t, xState.epsilonClosure, stateA) { @@ -181,7 +172,7 @@ func TestTablePointerDedupPreservesFieldTransitions(t *testing.T) { bufs := newNfaBuffers() tm := bufs.getTransmap() tm.push() - nfaResult := traverseNFA(startTable, []byte(`"x"`), nil, bufs) + nfaResult := traverseNFA(start, []byte(`"x"`), nil, bufs) tm.pop() if !slices.Contains(nfaResult, fmA) { @@ -192,8 +183,8 @@ func TestTablePointerDedupPreservesFieldTransitions(t *testing.T) { } // Verify via DFA conversion: both field matchers must survive - dfa := nfa2Dfa(startTable) - dfaResult := traverseDFA(dfa.table, []byte(`"x"`), nil) + dfa := nfa2Dfa(start) + dfaResult := traverseDFA(dfa, []byte(`"x"`), nil) if !slices.Contains(dfaResult, fmA) { t.Error("DFA traversal missing fmA") diff --git a/memory_cost.go b/memory_cost.go index 794d5c6..1211704 100644 --- a/memory_cost.go +++ b/memory_cost.go @@ -23,11 +23,11 @@ func cmFieldMatcherStats(fm *fieldMatcher, stats *matcherStats, pp printer) { if singleton != nil { stats.bytes += int64(len(singleton)) } - table := vm.fields().startTable - if table == nil { + start := vm.fields().startState + if start == nil { continue } - cmStateStats(&faState{table: table}, stats, pp) + cmStateStats(start, stats, pp) } } @@ -49,7 +49,9 @@ func cmStateStats(state *faState, stats *matcherStats, pp printer) { } } for _, eps := range state.table.epsilons { - cmStateStats(eps, stats, pp) + if eps != nil { + cmStateStats(eps, stats, pp) + } } for _, trans := range state.fieldTransitions { cmFieldMatcherStats(trans, stats, pp) @@ -66,9 +68,9 @@ func mcSmallTable(st *smallTable) int64 { func mcFaState(state *faState) int64 { cost := mcFaStateBase - if state.table != nil { - cost += mcSmallTable(state.table) - } + cost += int64(cap(state.table.ceilings)) + cost += mcPointer * int64(cap(state.table.steps)) + cost += mcPointer * int64(cap(state.table.epsilons)) cost += mcPointer * int64(cap(state.fieldTransitions)) cost += mcPointer * int64(cap(state.epsilonClosure)) return cost diff --git a/memory_cost_test.go b/memory_cost_test.go index aba59df..7a6f7f7 100644 --- a/memory_cost_test.go +++ b/memory_cost_test.go @@ -10,9 +10,9 @@ func TestMcBasicSizes(t *testing.T) { table := newSmallTable() // NewSmallTable output: base + 1 byte of ceiling + 1 pointer of steps (8b) + want := tableBase + 1 + mcPointer - tableGot := mcSmallTable(table) + tableGot := mcSmallTable(&table) if want != tableGot { - t.Errorf("Table wanted %d got %d", want, mcSmallTable(table)) + t.Errorf("Table wanted %d got %d", want, mcSmallTable(&table)) } stateBase := int64(unsafe.Sizeof(faState{})) state := faState{table: table} @@ -69,7 +69,7 @@ func TestMcNfaSizes(t *testing.T) { stats := &matcherStats{ seenStates: make(map[*faState]bool), } - cmStateStats(&faState{table: fa1}, stats, pp) + cmStateStats(fa1, stats, pp) wantedBytes := int64(1321) // laboriously hand-calculated wantedFanout := int64(5) wantedMaxFanout := int64(2) diff --git a/monocase.go b/monocase.go index c662b4a..68a41bf 100644 --- a/monocase.go +++ b/monocase.go @@ -40,11 +40,11 @@ func readMonocaseSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []typedV // multi-byte and in fact not even the same number of bytes. So in that case you need two paths forward that step // through the bytes of each form and then rejoin to arrive at a state. Also note // that in many cases the upper/lower case versions of a rune have leading bytes in common -func makeMonocaseFA(val []byte, pp printer) (*smallTable, *fieldMatcher) { +func makeMonocaseFA(val []byte, pp printer) (*faState, *fieldMatcher) { fm := newFieldMatcher() index := 0 - table := newSmallTable() // start state - startTable := table + startState := &faState{table: newSmallTable()} // start state + currentTable := &startState.table var nextStep *faState for index < len(val) { var orig, alt []byte @@ -56,32 +56,32 @@ func makeMonocaseFA(val []byte, pp printer) (*smallTable, *fieldMatcher) { utf8.EncodeRune(alt, altRune) } nextStep = &faState{table: newSmallTable()} - pp.labelTable(nextStep.table, fmt.Sprintf("On %d, alt=%v", val[index], alt)) + pp.labelTable(&nextStep.table, fmt.Sprintf("On %d, alt=%v", val[index], alt)) if alt == nil { // easy case, no casefolding issues. We should maybe try to coalesce these // no-casefolding sections and only call makeFAFragment once for all of them origFA := makeFAFragment(orig, nextStep, pp) - table.addByteStep(orig[0], origFA) + currentTable.addByteStep(orig[0], origFA) } else { // two paths to next state // but they might have a common prefix var commonPrefix int for commonPrefix = 0; orig[commonPrefix] == alt[commonPrefix]; commonPrefix++ { prefixStep := &faState{table: newSmallTable()} - table.addByteStep(orig[commonPrefix], prefixStep) - table = prefixStep.table - pp.labelTable(table, fmt.Sprintf("common prologue on %x", orig[commonPrefix])) + currentTable.addByteStep(orig[commonPrefix], prefixStep) + currentTable = &prefixStep.table + pp.labelTable(currentTable, fmt.Sprintf("common prologue on %x", orig[commonPrefix])) } // now build automata for the orig and alt versions of the char origFA := makeFAFragment(orig[commonPrefix:], nextStep, pp) altFA := makeFAFragment(alt[commonPrefix:], nextStep, pp) - table.addByteStep(orig[commonPrefix], origFA) - table.addByteStep(alt[commonPrefix], altFA) + currentTable.addByteStep(orig[commonPrefix], origFA) + currentTable.addByteStep(alt[commonPrefix], altFA) } - table = nextStep.table + currentTable = &nextStep.table index += width } lastState := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{fm}} nextStep.table.addByteStep(valueTerminator, lastState) - return startTable, fm + return startState, fm } diff --git a/nfa.go b/nfa.go index b6b9d89..989cdae 100644 --- a/nfa.go +++ b/nfa.go @@ -11,7 +11,7 @@ import ( // automaton requires a smallTable and for some of them, taking the step means you've matched a value and can // transition to a new fieldMatcher, in which case the fieldTransitions slice will be non-nil type faState struct { - table *smallTable + table smallTable fieldTransitions []*fieldMatcher epsilonClosure []*faState // precomputed epsilon closure including self isSpinner bool @@ -93,8 +93,6 @@ type nfaBuffers struct { resultBuf []X transmap *transmap fieldSet map[*fieldMatcher]bool - startState *faState - startClosure []*faState qNumBuf [MaxBytesInEncoding]byte } @@ -140,25 +138,13 @@ func (nb *nfaBuffers) getFieldSet() map[*fieldMatcher]bool { return nb.fieldSet } -func (nb *nfaBuffers) getStartState(table *smallTable) *faState { - if nb.startState == nil { - nb.startState = &faState{} - nb.startClosure = make([]*faState, 1) - } - nb.startState.table = table - nb.startClosure[0] = nb.startState - nb.startState.epsilonClosure = nb.startClosure - return nb.startState -} - // nfa2Dfa does what the name says, but as of 2026/01 is not used. -func nfa2Dfa(nfaTable *smallTable) *faState { +func nfa2Dfa(nfaStart *faState) *faState { // The start state always has a trivial epsilon closure (just itself) because // all Quamina automata begin by matching the opening quote (0x22). The start // table therefore has a single transition on `"` and never has epsilons. - startState := &faState{table: nfaTable} - startState.epsilonClosure = []*faState{startState} - startNfa := []*faState{startState} + nfaStart.epsilonClosure = []*faState{nfaStart} + startNfa := []*faState{nfaStart} return n2dNode(startNfa, newStateLists()) } @@ -189,14 +175,14 @@ func n2dNode(rawNStates []*faState, sList *stateLists) *faState { // to simplify, let's unpack all the ingredients nUnpacked := make([]*unpackedTable, len(ingredients)) for i, nState := range ingredients { - nUnpacked[i] = unpackTable(nState.table) + nUnpacked[i] = unpackTable(&nState.table) } // Unpack the DFA table once, set all byte transitions, then pack once — // the old code called addByteStep per byte which unpacked and repacked // for each of up to 256 values. rawStates is allocated once and reset // with [:0] each iteration to avoid per-byte-value slice allocation. - dfaUnpacked := unpackTable(dfaState.table) + dfaUnpacked := unpackTable(&dfaState.table) rawStates := make([]*faState, 0, len(ingredients)) for utf8byte := 0; utf8byte < byteCeiling; utf8byte++ { rawStates = rawStates[:0] @@ -238,7 +224,8 @@ func n2dNode(rawNStates []*faState, sList *stateLists) *faState { // NFA-capable data structure, we can traverse it deterministically if we know in advance that every // combination of an faState with a byte will transition to at most one other faState. -func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []*fieldMatcher { +func traverseDFA(start *faState, val []byte, transitions []*fieldMatcher) []*fieldMatcher { + table := &start.table for index := 0; index <= len(val); index++ { var utf8Byte byte if index < len(val) { @@ -251,7 +238,7 @@ func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []* break } transitions = append(transitions, next.fieldTransitions...) - table = next.table + table = &next.table } return transitions } @@ -262,12 +249,12 @@ func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []* // collected in the nextStates list. The bufs structure contains three buffers, one each for // currentStates, nextStates, and the epsilon closure of one particular state. These are re-used // and should grow with use and minimize the need for memory allocation. -func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *nfaBuffers) []*fieldMatcher { +func traverseNFA(start *faState, val []byte, transitions []*fieldMatcher, bufs *nfaBuffers) []*fieldMatcher { currentStates := bufs.getBuf1() // The start state always has a trivial epsilon closure (just itself) because // all Quamina automata begin by matching the opening quote (0x22). The start // table therefore has a single transition on `"` and never has epsilons. - currentStates = append(currentStates, bufs.getStartState(table)) + currentStates = append(currentStates, start) nextStates := bufs.getBuf2() // Use flat dedup set — no stacking needed since traverseNFA is not recursive @@ -376,9 +363,9 @@ func simplifyCollect(s *faState, visited map[*faState]struct{}, targets []*faSta // minimal or even avoids being wasteful. // INVARIANT: neither argument is nil // INVARIANT: To be thread-safe, no existing table can be updated except when we're building it -func mergeFAs(table1, table2 *smallTable, pp printer) *smallTable { - state1 := &faState{table: table1} - state2 := &faState{table: table2} +func mergeFAs(table1, table2 *smallTable, pp printer) smallTable { + state1 := &faState{table: *table1} + state2 := &faState{table: *table2} s := mergeFAStates(state1, state2, make(map[faStepKey]*faState), pp) return s.table } @@ -399,7 +386,7 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, pp p // TODO: This is still creating way too many splice states and slowing down traversal. Fix that. switch { case state1.isSpinner && state2.isSpinner: - pp.labelTable(combined.table, "2Spinners") + pp.labelTable(&combined.table, "2Spinners") combined = symmetricSpinnerMerge(state1, state2, keyMemo, pp) keyMemo[mKey] = combined return combined @@ -420,7 +407,7 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, pp p // If either of the states to be merged has epsilons we have to do a splice. // To avoid deep nesting of splice states, we flatten the epsilon targets. if len(state1.table.epsilons) != 0 || len(state2.table.epsilons) != 0 { - pp.labelTable(combined.table, "Splice") + pp.labelTable(&combined.table, "Splice") combined.table.epsilons = simplifySplices(state1, state2) keyMemo[mKey] = combined return combined @@ -428,14 +415,14 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState, pp p combined.fieldTransitions = append(state1.fieldTransitions, state2.fieldTransitions...) - pp.labelTable(combined.table, fmt.Sprintf("%d∎%d", - pp.tableSerial(state1.table), pp.tableSerial(state2.table))) + pp.labelTable(&combined.table, fmt.Sprintf("%d∎%d", + pp.tableSerial(&state1.table), pp.tableSerial(&state2.table))) keyMemo[mKey] = combined var iter1, iter2 stIterator - iter1.table = state1.table - iter2.table = state2.table + iter1.table = &state1.table + iter2.table = &state2.table var uComb unpackedTable var merged *faState @@ -478,14 +465,14 @@ func asymmetricSpinnerMerge(spinner, nonSpinner *faState, keyMemo map[faStepKey] combined := &faState{table: newSmallTable()} combined.fieldTransitions = append(spinner.fieldTransitions, nonSpinner.fieldTransitions...) - pp.labelTable(combined.table, fmt.Sprintf("%d∎%d", - pp.tableSerial(spinner.table), pp.tableSerial(nonSpinner.table))) + pp.labelTable(&combined.table, fmt.Sprintf("%d∎%d", + pp.tableSerial(&spinner.table), pp.tableSerial(&nonSpinner.table))) keyMemo[mKey] = combined var iter1, iter2 stIterator - iter1.table = spinner.table - iter2.table = nonSpinner.table + iter1.table = &spinner.table + iter2.table = &nonSpinner.table var uComb unpackedTable var mergedState *faState @@ -505,7 +492,7 @@ func asymmetricSpinnerMerge(spinner, nonSpinner *faState, keyMemo map[faStepKey] // nonspinner has a branch here // if the current spinner value is a loopback, we need to make a new state whose value // is the nonspinner with the addition of the epsilon link back to the spinner - mergedTable := &smallTable{ + mergedTable := smallTable{ steps: nonSpinnernext.table.steps, ceilings: nonSpinnernext.table.ceilings, epsilons: append(nonSpinnernext.table.epsilons, spinner), @@ -545,14 +532,14 @@ func symmetricSpinnerMerge(state1, state2 *faState, keyMemo map[faStepKey]*faSta combined := &faState{table: newSmallTable()} combined.fieldTransitions = append(state1.fieldTransitions, state2.fieldTransitions...) - pp.labelTable(combined.table, fmt.Sprintf("%d∎%d", - pp.tableSerial(state1.table), pp.tableSerial(state2.table))) + pp.labelTable(&combined.table, fmt.Sprintf("%d∎%d", + pp.tableSerial(&state1.table), pp.tableSerial(&state2.table))) keyMemo[makeFaStepKey(state1, state2)] = combined var iter1, iter2 stIterator - iter1.table = state1.table - iter2.table = state2.table + iter1.table = &state1.table + iter2.table = &state2.table var uComb unpackedTable var mergedState *faState @@ -570,7 +557,7 @@ func symmetricSpinnerMerge(state1, state2 *faState, keyMemo map[faStepKey]*faSta case next1 == state1 && next2 != state2: // next2 is an actual branch, so we will have to install the spin pointer in the target - table := &smallTable{ + table := smallTable{ ceilings: next2.table.ceilings, steps: next2.table.steps, epsilons: append(state2.table.epsilons, combined), @@ -581,7 +568,7 @@ func symmetricSpinnerMerge(state1, state2 *faState, keyMemo map[faStepKey]*faSta } case next2 == state2 && next1 != state1: // next1 is an actual branch, so we will have to install the spin pointer in the target - table := &smallTable{ + table := smallTable{ ceilings: next1.table.ceilings, steps: next1.table.steps, epsilons: append(state1.table.epsilons, combined), diff --git a/nfa_test.go b/nfa_test.go index 056246f..95a3618 100644 --- a/nfa_test.go +++ b/nfa_test.go @@ -54,7 +54,7 @@ func TestFocusedMerge(t *testing.T) { "ab*", "*ab", } - var automata []*smallTable + var automata []*faState var matchers []*fieldMatcher for _, shellStyle := range shellStyles { @@ -72,13 +72,13 @@ func TestFocusedMerge(t *testing.T) { merged := newSmallTable() for _, automaton := range automata { - merged = mergeFAs(merged, automaton, sharedNullPrinter) + merged = mergeFAs(&merged, &automaton.table, sharedNullPrinter) s := statsAccum{ fmVisited: make(map[*fieldMatcher]bool), vmVisited: make(map[*valueMatcher]bool), stVisited: make(map[*smallTable]bool), } - faStats(merged, &s) + faStats(&merged, &s) fmt.Println(s.stStats()) } } @@ -158,15 +158,15 @@ func TestNfa2Dfa(t *testing.T) { } } dfa := nfa2Dfa(nfa) - // fmt.Println("DFA: " + pp.printNFA(dfa.table)) + // fmt.Println("DFA: " + pp.printNFA(&dfa.table)) for _, should := range test.shoulds { - matched := traverseDFA(dfa.table, asQuotedBytes(t, should), transitions) + matched := traverseDFA(dfa, asQuotedBytes(t, should), transitions) if len(matched) != 1 { t.Errorf("DFA %s didn't match %s ", test.pattern, should) } } for _, nope := range test.nopes { - matched := traverseDFA(dfa.table, asQuotedBytes(t, nope), transitions) + matched := traverseDFA(dfa, asQuotedBytes(t, nope), transitions) if len(matched) != 0 { t.Errorf("DFA %s matched %s", test.pattern, nope) } @@ -182,10 +182,10 @@ func asQuotedBytes(t *testing.T, s string) []byte { // testTraverseNFA wraps traverseNFA with the push/pop that tryToMatch // normally provides. Test-only convenience so direct callers don't need // to manage the transmap stack themselves. -func testTraverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *nfaBuffers) []*fieldMatcher { +func testTraverseNFA(start *faState, val []byte, transitions []*fieldMatcher, bufs *nfaBuffers) []*fieldMatcher { tm := bufs.getTransmap() tm.push() - result := traverseNFA(table, val, transitions, bufs) + result := traverseNFA(start, val, transitions, bufs) tm.pop() return result } @@ -432,10 +432,10 @@ func TestTransmapBufferReuse(t *testing.T) { } // collectClosureStats walks an NFA and reports epsilon closure size statistics. -func collectClosureStats(startTable *smallTable) (stateCount, totalEntries, maxClosure int, tableSharing int) { +func collectClosureStats(start *faState) (stateCount, totalEntries, maxClosure int, tableSharing int) { visitedTables := make(map[*smallTable]bool) visitedStates := make(map[*faState]bool) - tableCounts := make(map[*smallTable]int) + tableCounts := make(map[tableShareKey]int) var walkTable func(t *smallTable) walkTable = func(t *smallTable) { @@ -446,29 +446,31 @@ func collectClosureStats(startTable *smallTable) (stateCount, totalEntries, maxC for _, state := range t.steps { if state != nil && !visitedStates[state] { visitedStates[state] = true - tableCounts[state.table]++ + tableCounts[newTableShareKey(&state.table)]++ ec := len(state.epsilonClosure) totalEntries += ec if ec > maxClosure { maxClosure = ec } - walkTable(state.table) + walkTable(&state.table) } } for _, eps := range t.epsilons { if !visitedStates[eps] { visitedStates[eps] = true - tableCounts[eps.table]++ + tableCounts[newTableShareKey(&eps.table)]++ ec := len(eps.epsilonClosure) totalEntries += ec if ec > maxClosure { maxClosure = ec } - walkTable(eps.table) + walkTable(&eps.table) } } } - walkTable(startTable) + if start != nil { + walkTable(&start.table) + } for _, count := range tableCounts { if count > 1 { @@ -612,7 +614,7 @@ func TestTablePointerDedup(t *testing.T) { m := q.matcher.(*coreMatcher) vm := m.fields().state.fields().transitions["val"] - nfaStart := vm.fields().startTable + nfaStart := vm.fields().startState stateCount, totalEntries, maxClosure, tableSharing := collectClosureStats(nfaStart) if stateCount != wl.stateCount { diff --git a/prettyprinter.go b/prettyprinter.go index 0a724c3..f581296 100644 --- a/prettyprinter.go +++ b/prettyprinter.go @@ -106,11 +106,33 @@ func (a ppAlready) remember(state *faState, table *smallTable) { } func (pp *prettyPrinter) printNFA(t *smallTable) string { - return pp.printNFAStep(&faState{table: t}, 0, newPpAlready()) + // Use the caller's *smallTable pointer for label lookup. Building a + // throwaway faState{table: *t} would re-locate the smallTable in memory + // and lose the label (labels are keyed by address). + return pp.printNFAFromTable(t, newPpAlready()) +} + +// printNFAFromTable prints starting from a smallTable, using its address for +// label lookup. Used for the start node where there's no real *faState owning +// the requested address. +func (pp *prettyPrinter) printNFAFromTable(t *smallTable, already ppAlready) string { + tableCost := mcSmallTable(t) + stateCost := mcFaStateBase + tableCost - mcSmallTableBase + trailer := fmt.Sprintf("[s/t %d/%d] \n", tableCost, stateCost) + s := " " + pp.printTable(t) + trailer + for _, step := range t.steps { + if step != nil { + s += pp.printNFAStep(step, 1, already) + } + } + for _, step := range t.epsilons { + s += pp.printNFAStep(step, 1, already) + } + return s } func (pp *prettyPrinter) printNFAStep(fas *faState, indent int, already ppAlready) string { - t := fas.table + t := &fas.table if already.sawThis(fas, t) { return "" } @@ -122,7 +144,7 @@ func (pp *prettyPrinter) printNFAStep(fas *faState, indent int, already ppAlread if len(fas.fieldTransitions) != 0 { trailer += fmt.Sprintf(" [%d transition(s)]", len(fas.fieldTransitions)) } - trailer += fmt.Sprintf("[s/t %d/%d] ", mcSmallTable(fas.table), mcFaState(fas)) + trailer += fmt.Sprintf("[s/t %d/%d] ", mcSmallTable(&fas.table), mcFaState(fas)) trailer += "\n" s := " " + pp.printTable(t) + trailer for _, step := range t.steps { @@ -207,11 +229,11 @@ func (pp *prettyPrinter) printTable(t *smallTable) string { } func (pp *prettyPrinter) nextString(n *faState) string { - label := pp.tableLabel(n.table) + label := pp.tableLabel(&n.table) if len(label) == 0 { - label = shortTableAddress(n.table) + label = shortTableAddress(&n.table) } - return fmt.Sprintf("%d[%s]", pp.tableSerial(n.table), label) + return fmt.Sprintf("%d[%s]", pp.tableSerial(&n.table), label) } func branchChar(b byte) string { diff --git a/prettyprinter_test.go b/prettyprinter_test.go index cc62011..75a989e 100644 --- a/prettyprinter_test.go +++ b/prettyprinter_test.go @@ -6,21 +6,21 @@ import ( func TestPP(t *testing.T) { pp := newPrettyPrinter(1) - table, _ := makeShellStyleFA([]byte(`"x*9"`), pp) - pp.labelTable(table, "START HERE") - wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 216/280] - 914[on " at 0] '78/x' → (384[*-Spinner][s/t 216/280] - 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/280] - 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 224/288] - 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/280] - 301[last step at 5] [1 transition(s)][s/t 81/153] + state, _ := makeShellStyleFA([]byte(`"x*9"`), pp) + pp.labelTable(&state.table, "START HERE") + wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 216/280] + 914[on " at 0] '78/x' → (384[*-Spinner][s/t 216/280] + 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/280] + 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 224/288] + 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/280] + 301[last step at 5] [1 transition(s)][s/t 81/153] ` - s := pp.printNFA(table) + s := pp.printNFA(&state.table) if s != wanted { t.Errorf("LONG: wanted\n<%s>\ngot\n<%s>\n", wanted, s) } - if pp.shortPrintNFA(table) != "884[START HERE]" { - t.Errorf("SHORT: wanted <%s> got <%s>\n", "758[START HERE]", pp.shortPrintNFA(table)) + if pp.shortPrintNFA(&state.table) != "884[START HERE]" { + t.Errorf("SHORT: wanted <%s> got <%s>\n", "758[START HERE]", pp.shortPrintNFA(&state.table)) } } @@ -28,8 +28,8 @@ func TestNullPP(t *testing.T) { np := &nullPrinter{} table := newSmallTable() table.addByteStep(3, &faState{}) - np.labelTable(table, "foo") - if np.printNFA(table) != noPP || np.shortPrintNFA(table) != noPP { + np.labelTable(&table, "foo") + if np.printNFA(&table) != noPP || np.shortPrintNFA(&table) != noPP { t.Error("didn't get noPP") } } diff --git a/quantified_atom.go b/quantified_atom.go index 0cd326d..6afd4b2 100644 --- a/quantified_atom.go +++ b/quantified_atom.go @@ -49,12 +49,12 @@ func (qa *quantifiedAtom) isMinimumOnly() bool { return qa.quantMax == regexpMinimumOnly } -func (qa *quantifiedAtom) makeFA(nextStep *faState, pp printer) *smallTable { - var table *smallTable +func (qa *quantifiedAtom) makeFA(nextStep *faState, pp printer) smallTable { + var table smallTable switch { case qa.isDot(): table = makeDotFA(nextStep) - pp.labelTable(table, "Dot") + pp.labelTable(&table, "Dot") case qa.getSubtree() != nil: table = makeNFAFromBranches(qa.getSubtree(), nextStep, false, pp) case qa.runeRangeCache() != "": @@ -64,7 +64,7 @@ func (qa *quantifiedAtom) makeFA(nextStep *faState, pp printer) *smallTable { default: // if it's none of these other things, it has to boil down to a rune range table = makeRuneRangeNFA(qa.runes, nextStep, pp) - pp.labelTable(table, fmt.Sprintf("RR %x/%x, %d-%d", qa.runes[0].Lo, qa.runes[0].Hi, qa.quantMin, qa.quantMax)) + pp.labelTable(&table, fmt.Sprintf("RR %x/%x, %d-%d", qa.runes[0].Lo, qa.runes[0].Hi, qa.quantMin, qa.quantMax)) } return table } diff --git a/regexp_nfa.go b/regexp_nfa.go index 27b83f2..0275fa6 100644 --- a/regexp_nfa.go +++ b/regexp_nfa.go @@ -20,46 +20,48 @@ type regexpRoot []regexpBranch // makeRegexpNFA traverses the parsed regexp tree and generates a finite automaton // that matches it. The FA has states that match " at the beginning and end because // all Quamina field values are enclosed in quotes. -func makeRegexpNFA(root regexpRoot, pp printer) (*smallTable, *fieldMatcher) { +func makeRegexpNFA(root regexpRoot, pp printer) (*faState, *fieldMatcher) { nextField := newFieldMatcher() nextStep := makeNFATrailer(nextField) - pp.labelTable(nextStep.table, "Trailer") + pp.labelTable(&nextStep.table, "Trailer") table := makeSmallTable(nil, []byte{'"'}, []*faState{nextStep}) - pp.labelTable(table, "") + pp.labelTable(&table, "") nextStep = &faState{table: table} - fa := makeNFAFromBranches(root, nextStep, true, pp) - return fa, nextField + startTable := makeNFAFromBranches(root, nextStep, true, pp) + return &faState{table: startTable}, nextField } -func makeNFAFromBranches(root regexpRoot, nextStep *faState, addQuoteTransition bool, pp printer) *smallTable { +func makeNFAFromBranches(root regexpRoot, nextStep *faState, addQuoteTransition bool, pp printer) smallTable { // completely empty regexp if len(root) == 0 { return makeSmallTable(nil, []byte{'"'}, []*faState{nextStep}) } - var fa *smallTable + var fa smallTable + first := true for _, branch := range root { - var nextBranch *smallTable + var nextBranch smallTable if len(branch) == 0 { nextBranch = makeSmallTable(nil, []byte{'"'}, []*faState{nextStep}) - pp.labelTable(nextBranch, "next on len 0") + pp.labelTable(&nextBranch, "next on len 0") } else { nextBranch = faFromBranch(branch, nextStep, addQuoteTransition, pp) } - if fa != nil { - fa = mergeFAs(fa, nextBranch, pp) + if !first { + fa = mergeFAs(&fa, &nextBranch, pp) } else { fa = nextBranch + first = false } } return fa } -func faFromBranch(branch regexpBranch, nextStep *faState, addQuoteTransition bool, pp printer) *smallTable { +func faFromBranch(branch regexpBranch, nextStep *faState, addQuoteTransition bool, pp printer) smallTable { state := faFromQuantifiedAtom(branch, 0, nextStep, pp) table := state.table if addQuoteTransition { firstState := &faState{table: table} table = makeSmallTable(nil, []byte{'"'}, []*faState{firstState}) - pp.labelTable(table, "") + pp.labelTable(&table, "") } return table } @@ -79,7 +81,7 @@ func faFromQuantifiedAtom(branch regexpBranch, index int, finalStep *faState, pp case atom.isPlus(): // the + construction requires a loopback state in front of the state table plusLoopback := &faState{table: newSmallTable()} - pp.labelTable(plusLoopback.table, "PlusLoopback") + pp.labelTable(&plusLoopback.table, "PlusLoopback") state = &faState{table: atom.makeFA(plusLoopback, pp)} // for the + case, need to loop back to the newly created state @@ -99,8 +101,8 @@ func faFromQuantifiedAtom(branch regexpBranch, index int, finalStep *faState, pp nextMinMaxStep := nextState for counter := atom.quantMax; counter > 0; counter-- { - stepTable := faFromShell(shellTable, PlaceholderState, nextMinMaxStep) - pp.labelTable(stepTable, fmt.Sprintf("minmax at %d", counter)) + stepTable := faFromShell(&shellTable, PlaceholderState, nextMinMaxStep) + pp.labelTable(&stepTable, fmt.Sprintf("minmax at %d", counter)) // if it's between quantMin & max, we're in optional territory // so it needs an epsilon to allow jumping out @@ -127,8 +129,8 @@ func faFromQuantifiedAtom(branch regexpBranch, index int, finalStep *faState, pp var lastState *faState for counter := atom.quantMin; counter > 0; counter-- { - stepTable := faFromShell(shellTable, PlaceholderState, nextMinMaxStep) - pp.labelTable(stepTable, fmt.Sprintf("minmax at %d", counter)) + stepTable := faFromShell(&shellTable, PlaceholderState, nextMinMaxStep) + pp.labelTable(&stepTable, fmt.Sprintf("minmax at %d", counter)) state = &faState{table: stepTable} // there's a chain of the minimum-count steps, but the last one has to @@ -160,57 +162,50 @@ func makeNFATrailer(nextField *fieldMatcher) *faState { return &faState{table: table} } -func makeByteDotFA(dest *faState, pp printer) *smallTable { +func makeByteDotFA(dest *faState, pp printer) smallTable { ceilings := []byte{0xC0, 0xC2, 0xF5, 0xF6} steps := []*faState{dest, nil, dest, nil} - t := &smallTable{ceilings: ceilings, steps: steps} - pp.labelTable(t, " · ") + t := smallTable{ceilings: ceilings, steps: steps} + pp.labelTable(&t, " · ") return t } -func makeDotFA(dest *faState) *smallTable { - sLast := &smallTable{ +func makeDotFA(dest *faState) smallTable { + targetLast := &faState{table: smallTable{ ceilings: []byte{0x80, 0xc0, byte(byteCeiling)}, steps: []*faState{nil, dest, nil}, - } - targetLast := &faState{table: sLast} - sLastInter := &smallTable{ + }} + targetLastInter := &faState{table: smallTable{ ceilings: []byte{0x80, 0xc0, byte(byteCeiling)}, steps: []*faState{nil, targetLast, nil}, - } - targetLastInter := &faState{table: sLastInter} - sFirstInter := &smallTable{ + }} + targetFirstInter := &faState{table: smallTable{ ceilings: []byte{0x80, 0xc0, byte(byteCeiling)}, steps: []*faState{nil, targetLastInter, nil}, - } - targetFirstInter := &faState{table: sFirstInter} + }} - sE0 := &smallTable{ + targetE0 := &faState{table: smallTable{ ceilings: []byte{0xa0, 0xc0, byte(byteCeiling)}, steps: []*faState{nil, targetLast, nil}, - } - targetE0 := &faState{table: sE0} + }} - sED := &smallTable{ + targetED := &faState{table: smallTable{ ceilings: []byte{0x80, 0xA0, byte(byteCeiling)}, steps: []*faState{nil, targetLast, nil}, - } - targetED := &faState{table: sED} + }} - sF0 := &smallTable{ + targetF0 := &faState{table: smallTable{ ceilings: []byte{0x90, 0xC0, byte(byteCeiling)}, steps: []*faState{nil, targetLastInter, nil}, - } - targetF0 := &faState{table: sF0} + }} - sF4 := &smallTable{ + targetF4 := &faState{table: smallTable{ ceilings: []byte{0x80, 0x90, byte(byteCeiling)}, steps: []*faState{nil, targetLastInter, nil}, - } - targetF4 := &faState{table: sF4} + }} // for reference, see https://www.tbray.org/ongoing/When/202x/2024/12/29/Matching-Dot-Redux - return &smallTable{ + return smallTable{ ceilings: []byte{ 0x80, // 0 0xC2, // 1 diff --git a/regexp_nfa_test.go b/regexp_nfa_test.go index e16e1df..08b1101 100644 --- a/regexp_nfa_test.go +++ b/regexp_nfa_test.go @@ -66,14 +66,14 @@ func applyAndRunRegexp(t *testing.T, regexp string, match string, pp printer) in t.Helper() qm := []byte(`"` + match + `"`) fa := faFromRegexp(t, regexp, pp) - fmt.Println("N:\n" + pp.printNFA(fa)) + fmt.Println("N:\n" + pp.printNFA(&fa.table)) var transitions []*fieldMatcher bufs := newNfaBuffers() matches := testTraverseNFA(fa, qm, transitions, bufs) return len(matches) } -func faFromRegexp(t *testing.T, r string, pp printer) *smallTable { +func faFromRegexp(t *testing.T, r string, pp printer) *faState { t.Helper() parse, err := readRegexp(r) if err != nil { @@ -95,7 +95,7 @@ func TestRegexpPlus(t *testing.T) { "[123]+|[abc]+", } pp := newPrettyPrinter(4623) - var fa *smallTable + var fa *faState for _, re := range res { fa = faFromRegexp(t, re, pp) epsilonClosure(fa) @@ -147,11 +147,11 @@ func TestExploreUTF8Form(t *testing.T) { wantFM := &fieldMatcher{} targetState := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{wantFM}} - table := makeDotFA(targetState) + startState := &faState{table: makeDotFA(targetState)} var matchers []*fieldMatcher var got []*fieldMatcher for i, bad := range bads { - got = traverseDFA(table, bad, matchers) + got = traverseDFA(startState, bad, matchers) if len(got) != 0 { t.Errorf("accepted index %d", i) } @@ -161,7 +161,7 @@ func TestExploreUTF8Form(t *testing.T) { func TestDotSemantics(t *testing.T) { wantFM := &fieldMatcher{} targetState := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{wantFM}} - table := makeDotFA(targetState) + startState := &faState{table: makeDotFA(targetState)} var matchers []*fieldMatcher var got []*fieldMatcher var r rune @@ -170,7 +170,7 @@ func TestDotSemantics(t *testing.T) { if r >= 0xD800 && r <= 0xDFFF { continue } - got = traverseDFA(table, []byte(string([]rune{r})), matchers) + got = traverseDFA(startState, []byte(string([]rune{r})), matchers) if len(got) != 1 || got[0] != wantFM { t.Errorf("failed on %x", r) } @@ -188,14 +188,14 @@ func TestDotSemantics(t *testing.T) { } for _, good := range goodUTF8 { - got = traverseDFA(table, good, matchers) + got = traverseDFA(startState, good, matchers) if len(got) != 1 || got[0] != wantFM { t.Errorf("failed on non-surrogate %04x", r) } matchers = matchers[:0] } for _, bad := range badUTF8 { - got = traverseDFA(table, bad, matchers) + got = traverseDFA(startState, bad, matchers) if len(got) != 0 { t.Errorf("accepted surrogate %04x", r) } @@ -341,17 +341,17 @@ func TestMultiLengthRR(t *testing.T) { wantFM := &fieldMatcher{} dest := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{wantFM}} - st := makeRuneRangeNFA(rr, dest, sharedNullPrinter) + startState := &faState{table: makeRuneRangeNFA(rr, dest, sharedNullPrinter)} matchers := []*fieldMatcher{} var got []*fieldMatcher for _, rp := range multiLengthTest { - got = traverseDFA(st, []byte(string([]rune{rp.Lo})), matchers) + got = traverseDFA(startState, []byte(string([]rune{rp.Lo})), matchers) if len(got) != 1 || got[0] != wantFM { t.Errorf("failed on %x", rp.Lo) } } - nfaSize(t, st) + nfaSize(t, &startState.table) } } @@ -384,7 +384,7 @@ func nfaSizeStep(t *testing.T, st *smallTable, s *statsAccum, depth int) { } for _, step := range st.steps { if step != nil { - nfaSizeStep(t, step.table, s, depth+1) + nfaSizeStep(t, &step.table, s, depth+1) } } } diff --git a/regexp_validity_test.go b/regexp_validity_test.go index f56b503..0d4adc0 100644 --- a/regexp_validity_test.go +++ b/regexp_validity_test.go @@ -54,7 +54,7 @@ func TestEmptyRegexp(t *testing.T) { } func TestToxicStack(t *testing.T) { - var table *smallTable + var start *faState pp := newPrettyPrinter(34897) re3 := "(([~.~~~?~*~+~{~}~[~]~(~)~|]?)*)+" @@ -66,12 +66,12 @@ func TestToxicStack(t *testing.T) { if err != nil { t.Error("OOPS: " + err.Error()) } - table, _ = makeRegexpNFA(parse.tree, pp) - epsilonClosure(table) + start, _ = makeRegexpNFA(parse.tree, pp) + epsilonClosure(start) var transitions []*fieldMatcher bufs := newNfaBuffers() - trans := testTraverseNFA(table, []byte(str), transitions, bufs) + trans := testTraverseNFA(start, []byte(str), transitions, bufs) if len(trans) != 1 { t.Error("Toxic stack failure") } diff --git a/rune_range.go b/rune_range.go index 3f3ddf1..f85c5e3 100644 --- a/rune_range.go +++ b/rune_range.go @@ -15,17 +15,17 @@ import ( // can safely build and update the cachedRRFaShells var PlaceholderState *faState = &faState{table: newSmallTable()} -var cachedFaShells = make(map[string]*smallTable) +var cachedFaShells = make(map[string]smallTable) -func faFromShell(shell *smallTable, oldNext *faState, newNext *faState) *smallTable { - return copyShellNode(&faState{table: shell}, oldNext, newNext, make(map[*faState]*faState)).table +func faFromShell(shell *smallTable, oldNext *faState, newNext *faState) smallTable { + return copyShellNode(&faState{table: *shell}, oldNext, newNext, make(map[*faState]*faState)).table } func copyShellNode(shell *faState, oldNext *faState, newNext *faState, mem map[*faState]*faState) *faState { already, ok := mem[shell] if ok { return already } - table := &smallTable{ + table := smallTable{ ceilings: slices.Clone(shell.table.ceilings), steps: make([]*faState, len(shell.table.steps)), epsilons: make([]*faState, len(shell.table.epsilons)), @@ -37,9 +37,9 @@ func copyShellNode(shell *faState, oldNext *faState, newNext *faState, mem map[* case nil: // no-op case oldNext: - table.steps[i] = newNext + state.table.steps[i] = newNext default: - table.steps[i] = copyShellNode(step, oldNext, newNext, mem) + state.table.steps[i] = copyShellNode(step, oldNext, newNext, mem) } } for i, epsilon := range shell.table.epsilons { @@ -47,9 +47,9 @@ func copyShellNode(shell *faState, oldNext *faState, newNext *faState, mem map[* case nil: // no-op case oldNext: - table.epsilons[i] = newNext + state.table.epsilons[i] = newNext default: - table.epsilons[i] = copyShellNode(epsilon, oldNext, newNext, mem) + state.table.epsilons[i] = copyShellNode(epsilon, oldNext, newNext, mem) } } return state @@ -97,17 +97,17 @@ func newRuneRangeIterator(rr RuneRange) (*runeRangeIterator, error) { // here's the problem: A construct like [~p{L}~p[Nd}~p{Zs}] is going to be brutally expensive, because // it'll have to build the FA to match the combination of all those huge rune-ranges. -func makeAndCacheRuneRangeFA(rr RuneRange, next *faState, name string, pp printer) *smallTable { +func makeAndCacheRuneRangeFA(rr RuneRange, next *faState, name string, pp printer) smallTable { if name != "" { fa, ok := cachedFaShells[name] if !ok { fa = makeAndCacheRuneRangeFA(rr, PlaceholderState, "", pp) cachedFaShells[name] = fa } - return faFromShell(fa, PlaceholderState, next) + return faFromShell(&fa, PlaceholderState, next) } - pp.labelTable(next.table, "Next") + pp.labelTable(&next.table, "Next") // turn the slice of hi/lo inclusive endpoints into a slice of utf8 encodings ri, err := newRuneRangeIterator(rr) @@ -125,7 +125,7 @@ func makeAndCacheRuneRangeFA(rr RuneRange, next *faState, name string, pp printe return nfaFromSkinnyRuneTree(root, pp) } -func makeRuneRangeNFA(rr RuneRange, next *faState, pp printer) *smallTable { +func makeRuneRangeNFA(rr RuneRange, next *faState, pp printer) smallTable { return makeAndCacheRuneRangeFA(rr, next, "", pp) } @@ -188,10 +188,10 @@ func addSkinnyRuneTreeEntry(root *skinnyRuneTreeNode, r rune, dest *faState) { node = nextEntry.node } } -func nfaFromSkinnyRuneTree(root *skinnyRuneTreeNode, pp printer) *smallTable { +func nfaFromSkinnyRuneTree(root *skinnyRuneTreeNode, pp printer) smallTable { return tableFromSkinnyRuneTreeNode(root, pp) } -func tableFromSkinnyRuneTreeNode(node *skinnyRuneTreeNode, pp printer) *smallTable { +func tableFromSkinnyRuneTreeNode(node *skinnyRuneTreeNode, pp printer) smallTable { var unpacked unpackedTable for index, byteVal := range node.byteVals { entry := node.entries[index] @@ -199,7 +199,7 @@ func tableFromSkinnyRuneTreeNode(node *skinnyRuneTreeNode, pp printer) *smallTab unpacked[byteVal] = entry.next } else { table := tableFromSkinnyRuneTreeNode(entry.node, pp) - pp.labelTable(table, fmt.Sprintf("on %x", byteVal)) + pp.labelTable(&table, fmt.Sprintf("on %x", byteVal)) unpacked[byteVal] = &faState{table: table} } } diff --git a/rune_range_test.go b/rune_range_test.go index 36e7b49..27febc3 100644 --- a/rune_range_test.go +++ b/rune_range_test.go @@ -12,15 +12,15 @@ func TestSkinnyRuneTree(t *testing.T) { srt := &skinnyRuneTreeNode{} pp := newPrettyPrinter(246758) tt := newSmallTable() - pp.labelTable(tt, "Next") + pp.labelTable(&tt, "Next") dest := &faState{table: tt, fieldTransitions: []*fieldMatcher{{}}} addSkinnyRuneTreeEntry(srt, r, dest) addSkinnyRuneTreeEntry(srt, r+1, dest) addSkinnyRuneTreeEntry(srt, r+3, dest) - fa := nfaFromSkinnyRuneTree(srt, pp) - fmt.Println("FA:\n" + pp.printNFA(fa)) + startState := &faState{table: nfaFromSkinnyRuneTree(srt, pp)} + fmt.Println("FA:\n" + pp.printNFA(&startState.table)) trans := []*fieldMatcher{} - matches := traverseDFA(fa, utf8, trans) + matches := traverseDFA(startState, utf8, trans) if len(matches) != 1 { t.Error("MISSED") } diff --git a/shell_style.go b/shell_style.go index da50454..b737744 100644 --- a/shell_style.go +++ b/shell_style.go @@ -44,10 +44,10 @@ func readShellStyleSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []type // makeShellStyleFA does what it says. It is precisely equivalent to a regex with the only operator // being a single ".*". Once we've implemented regular expressions we can use that to more or less eliminate this -func makeShellStyleFA(val []byte, pp printer) (start *smallTable, nextField *fieldMatcher) { +func makeShellStyleFA(val []byte, pp printer) (start *faState, nextField *fieldMatcher) { state := &faState{table: newSmallTable()} - start = state.table - pp.labelTable(start, "SHELLSTYLE") + start = state + pp.labelTable(&start.table, "SHELLSTYLE") nextField = newFieldMatcher() // for each byte in the pattern @@ -63,19 +63,19 @@ func makeShellStyleFA(val []byte, pp printer) (start *smallTable, nextField *fie spinEscape.table.epsilons = []*faState{spinner} spinner.table = makeByteDotFA(spinner, pp) spinner.table.addByteStep(val[valIndex], spinEscape) - pp.labelTable(spinner.table, "*-Spinner") - pp.labelTable(spinEscape.table, fmt.Sprintf("spinEscape on %c at %d", val[valIndex], valIndex)) + pp.labelTable(&spinner.table, "*-Spinner") + pp.labelTable(&spinEscape.table, fmt.Sprintf("spinEscape on %c at %d", val[valIndex], valIndex)) state = spinEscape } else { nextStep := &faState{table: newSmallTable()} - pp.labelTable(nextStep.table, fmt.Sprintf("on %c at %d", val[valIndex], valIndex)) + pp.labelTable(&nextStep.table, fmt.Sprintf("on %c at %d", val[valIndex], valIndex)) state.table.addByteStep(ch, nextStep) state = nextStep } valIndex++ } lastStep := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}} - pp.labelTable(lastStep.table, fmt.Sprintf("last step at %d", valIndex)) + pp.labelTable(&lastStep.table, fmt.Sprintf("last step at %d", valIndex)) state.table.addByteStep(valueTerminator, lastStep) return } diff --git a/shell_style_test.go b/shell_style_test.go index 6f9d573..aa905b8 100644 --- a/shell_style_test.go +++ b/shell_style_test.go @@ -62,7 +62,7 @@ func TestMakeShellStyleFA(t *testing.T) { a, wanted := makeShellStyleFA([]byte(pattern), sharedNullPrinter) epsilonClosure(a) vm := newValueMatcher() - vmf := vmFields{startTable: a} + vmf := vmFields{startState: a} vm.update(&vmf) bufs := newNfaBuffers() for _, should := range shouldsForPatterns[i] { diff --git a/small_table.go b/small_table.go index 5b80da0..ec7fb1e 100644 --- a/small_table.go +++ b/small_table.go @@ -45,8 +45,8 @@ type smallTable struct { // newSmallTable mostly exists to enforce the constraint that every smallTable has a byteCeiling entry at // the end, which smallTable.step totally depends on. -func newSmallTable() *smallTable { - return &smallTable{ +func newSmallTable() smallTable { + return smallTable{ ceilings: []byte{byte(byteCeiling)}, steps: []*faState{nil}, } @@ -105,7 +105,7 @@ func (t *smallTable) dStep(utf8Byte byte) *faState { // value, and then a few other values with their indexes and values specified in the other two arguments. The // goal is to reduce memory churn // constraint: positions must be provided in order -func makeSmallTable(defaultStep *faState, indices []byte, steps []*faState) *smallTable { +func makeSmallTable(defaultStep *faState, indices []byte, steps []*faState) smallTable { t := smallTable{ ceilings: make([]byte, 0, len(indices)+2), steps: make([]*faState, 0, len(indices)+2), @@ -125,7 +125,7 @@ func makeSmallTable(defaultStep *faState, indices []byte, steps []*faState) *sma t.ceilings = append(t.ceilings, byte(byteCeiling)) t.steps = append(t.steps, defaultStep) } - return &t + return t } // For manipulating larger-scale machines, the performance starts to be dominated by diff --git a/small_table_test.go b/small_table_test.go index 16db156..4cab429 100644 --- a/small_table_test.go +++ b/small_table_test.go @@ -81,7 +81,7 @@ func TestSmallTableIterator(t *testing.T) { for i, byteval := range bytevals { wanted[byteval] = steps[i] } - iter := newSTIterator(st, nil) + iter := newSTIterator(&st, nil) for iter.hasNext() { utf8byte, step := iter.next() if wanted[utf8byte] != step { @@ -96,8 +96,8 @@ func TestSmallTableIterator(t *testing.T) { t.Errorf("at u=%x wanted %p got %p", i, wanted[i], state) } } - unpacked := unpackTable(st) - iter = newSTIterator(st, &iter) + unpacked := unpackTable(&st) + iter = newSTIterator(&st, &iter) for iter.hasNext() { utf8byte, step := iter.next() if unpacked[utf8byte] != step { @@ -115,15 +115,15 @@ func TestSmallTableIterator(t *testing.T) { for i, byteval := range bytevals { wanted[byteval] = steps[i] } - iter = newSTIterator(st, &iter) + iter = newSTIterator(&st, &iter) for iter.hasNext() { utf8byte, step := iter.next() if wanted[utf8byte] != step { t.Errorf("at u=%x wanted %p got %p", utf8byte, wanted[utf8byte], step) } } - unpacked = unpackTable(st) - iter = newSTIterator(st, &iter) + unpacked = unpackTable(&st) + iter = newSTIterator(&st, &iter) for iter.hasNext() { utf8byte, step := iter.next() if unpacked[utf8byte] != step { diff --git a/stats.go b/stats.go index ae5017e..cbaf20a 100644 --- a/stats.go +++ b/stats.go @@ -152,8 +152,8 @@ func vmStats(m *valueMatcher, s *statsAccum) { s.siCount++ fmStats(state.singletonTransition, s) } - if state.startTable != nil { - faStats(state.startTable, s) + if state.startState != nil { + faStats(&state.startState.table, s) } } @@ -180,10 +180,10 @@ func faStats(t *smallTable, s *statsAccum) { } for _, next := range t.steps { if next != nil { - faStats(next.table, s) + faStats(&next.table, s) } } for _, epsilon := range t.epsilons { - faStats(epsilon.table, s) + faStats(&epsilon.table, s) } } diff --git a/value_matcher.go b/value_matcher.go index 96e45e6..1010e15 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -23,7 +23,7 @@ type valueMatcher struct { updateable atomic.Pointer[vmFields] } type vmFields struct { - startTable *smallTable + startState *faState singletonMatch []byte singletonTransition *fieldMatcher hasNumbers bool @@ -66,23 +66,23 @@ func (m *valueMatcher) transitionOn(eventField *Field, bufs *nfaBuffers) []*fiel } return transitions - case vmFields.startTable != nil: + case vmFields.startState != nil: // if there is a potential for a numeric match, try making a Q number from the event if vmFields.hasNumbers && eventField.IsNumber { qNum, err := qNumFromBytesBuf(val, &bufs.qNumBuf) if err == nil { if vmFields.isNondeterministic { - return traverseNFA(vmFields.startTable, qNum, transitions, bufs) + return traverseNFA(vmFields.startState, qNum, transitions, bufs) } - return traverseDFA(vmFields.startTable, qNum, transitions) + return traverseDFA(vmFields.startState, qNum, transitions) } } // if it doesn't work as a Q number for some reason, go ahead and compare the string values if vmFields.isNondeterministic { - return traverseNFA(vmFields.startTable, val, transitions, bufs) + return traverseNFA(vmFields.startState, val, transitions, bufs) } - return traverseDFA(vmFields.startTable, val, transitions) + return traverseDFA(vmFields.startState, val, transitions) default: // no FA, no singleton, nothing to do, this probably can't happen because a flattener @@ -97,7 +97,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche var err error // special case - virgin state and this is a string match - if fields.startTable == nil && fields.singletonMatch == nil && (val.vType == stringType || val.vType == literalType) { + if fields.startState == nil && fields.singletonMatch == nil && (val.vType == stringType || val.vType == literalType) { fields.singletonMatch = valBytes fields.singletonTransition = newFieldMatcher() m.update(fields) @@ -114,38 +114,46 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche // no dodges, we have to build an automaton to match this value var nextField *fieldMatcher - var newFA *smallTable + // newFA holds the newly-built automaton. Most builders return a smallTable + // value to be wrapped in an faState; makeRegexpNFA and a few NFA builders + // return *faState directly. After this switch, newFAState is the start + // faState for the new automaton. + var newFAState *faState switch val.vType { case stringType, literalType: - newFA, nextField = makeStringFA(valBytes, nil, false) + t, fm := makeStringFA(valBytes, nil, false) + newFAState, nextField = &faState{table: t}, fm case numberType: - newFA, nextField = makeStringFA(valBytes, nil, true) + t, fm := makeStringFA(valBytes, nil, true) + newFAState, nextField = &faState{table: t}, fm fields.hasNumbers = true case anythingButType: - newFA, nextField = makeMultiAnythingButFA(val.list) + newFAState, nextField = makeMultiAnythingButFA(val.list) case shellStyleType: - newFA, nextField = makeShellStyleFA(valBytes, printer) + newFAState, nextField = makeShellStyleFA(valBytes, printer) fields.isNondeterministic = true case wildcardType: - newFA, nextField = makeWildCardFA(valBytes, printer) + newFAState, nextField = makeWildCardFA(valBytes, printer) fields.isNondeterministic = true case prefixType: - newFA, nextField = makePrefixFA(valBytes) + t, fm := makePrefixFA(valBytes) + newFAState, nextField = &faState{table: t}, fm case monocaseType: - newFA, nextField = makeMonocaseFA(valBytes, printer) + newFAState, nextField = makeMonocaseFA(valBytes, printer) case regexpType: - newFA, nextField = makeRegexpNFA(val.parsedRegexp, sharedNullPrinter) - if newFA.isNondeterministic() { + newFAState, nextField = makeRegexpNFA(val.parsedRegexp, sharedNullPrinter) + if newFAState.table.isNondeterministic() { fields.isNondeterministic = true } - printer.labelTable(newFA, "RX start") + printer.labelTable(&newFAState.table, "RX start") default: panic("unknown value type") } // there's already a table, thus an out-degree > 1 - if fields.startTable != nil { - fields.startTable = mergeFAs(fields.startTable, newFA, printer) + if fields.startState != nil { + mergedTable := mergeFAs(&fields.startState.table, &newFAState.table, printer) + fields.startState = &faState{table: mergedTable} if err != nil { return nil } @@ -156,7 +164,7 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche // if (bytesAllocated() - mm.baseAlloc) > mm.headroom { if fields.isNondeterministic { - epsilonClosure(fields.startTable) + epsilonClosure(fields.startState) } m.update(fields) @@ -167,35 +175,36 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche if fields.singletonMatch != nil { // singleton is here, we don't match, so our outdegree becomes 2, so we have // to build an automaton with two values in it. - singletonAutomaton, _ := makeStringFA(fields.singletonMatch, fields.singletonTransition, false) + singletonTable, _ := makeStringFA(fields.singletonMatch, fields.singletonTransition, false) // now table is ready for use, nuke singleton to signal threads to use it - fields.startTable = mergeFAs(singletonAutomaton, newFA, sharedNullPrinter) + mergedTable := mergeFAs(&singletonTable, &newFAState.table, sharedNullPrinter) + fields.startState = &faState{table: mergedTable} if err != nil { return nil } if fields.isNondeterministic { - epsilonClosure(fields.startTable) + epsilonClosure(fields.startState) } fields.singletonMatch = nil fields.singletonTransition = nil } else { // empty valueMatcher, no special cases, just jam in the new FA - fields.startTable = newFA + fields.startState = newFAState if fields.isNondeterministic { - epsilonClosure(fields.startTable) + epsilonClosure(fields.startState) } } m.update(fields) return nextField } -func makePrefixFA(val []byte) (*smallTable, *fieldMatcher) { +func makePrefixFA(val []byte) (smallTable, *fieldMatcher) { nextField := newFieldMatcher() return makeOnePrefixFAStep(val, 0, nextField), nextField } -func makeOnePrefixFAStep(val []byte, index int, nextField *fieldMatcher) *smallTable { +func makeOnePrefixFAStep(val []byte, index int, nextField *fieldMatcher) smallTable { // have to stop one short to skip the closing " var nextState *faState if index == len(val)-2 { @@ -211,7 +220,7 @@ func makeOnePrefixFAStep(val []byte, index int, nextField *fieldMatcher) *smallT // is recursive because this allows the use of the makeSmallTable call, which // reduces memory churn. Converting from a straightforward implementation to // this approximately doubled the fields/second rate in addPattern -func makeStringFA(val []byte, useThisTransition *fieldMatcher, isNumber bool) (*smallTable, *fieldMatcher) { +func makeStringFA(val []byte, useThisTransition *fieldMatcher, isNumber bool) (smallTable, *fieldMatcher) { var nextField *fieldMatcher if useThisTransition != nil { nextField = useThisTransition @@ -227,7 +236,7 @@ func makeStringFA(val []byte, useThisTransition *fieldMatcher, isNumber bool) (* qNum, err := qNumFromBytes(val) if err == nil { numberFA := makeOneStringFAStep(qNum, 0, nextField) - stringFA = mergeFAs(stringFA, numberFA, sharedNullPrinter) + stringFA = mergeFAs(&stringFA, &numberFA, sharedNullPrinter) } } return stringFA, nextField @@ -249,22 +258,22 @@ func makeFAFragment(val []byte, endAt *faState, pp printer) *faState { for index := 1; index < len(val); index++ { if index == len(val)-1 { table := makeSmallTable(nil, []byte{val[index]}, []*faState{endAt}) - pp.labelTable(table, fmt.Sprintf("exiting on %v", val[index])) + pp.labelTable(&table, fmt.Sprintf("exiting on %v", val[index])) step.table = table - pp.labelTable(step.table, "Last step") + pp.labelTable(&step.table, "Last step") } else { nextState := &faState{} table := makeSmallTable(nil, []byte{val[index]}, []*faState{nextState}) - pp.labelTable(table, fmt.Sprintf("stepping on %c", val[index])) + pp.labelTable(&table, fmt.Sprintf("stepping on %c", val[index])) step.table = table - pp.labelTable(step.table, "Step") + pp.labelTable(&step.table, "Step") step = nextState } } return firstStep } -func makeOneStringFAStep(val []byte, index int, nextField *fieldMatcher) *smallTable { +func makeOneStringFAStep(val []byte, index int, nextField *fieldMatcher) smallTable { var nextStep *faState if index == len(val)-1 { lastStep := &faState{ diff --git a/value_matcher_test.go b/value_matcher_test.go index 6c61e06..442715d 100644 --- a/value_matcher_test.go +++ b/value_matcher_test.go @@ -348,9 +348,8 @@ func TestMakeFAFragment(t *testing.T) { pp := newPrettyPrinter(3234) for _, datum := range data { frag := makeFAFragment([]byte(datum), targetState, pp) - startTable := frag.table var transIn []*fieldMatcher - transOut := traverseDFA(startTable, []byte(datum)[1:], transIn) + transOut := traverseDFA(frag, []byte(datum)[1:], transIn) if len(transOut) != 1 || transOut[0] != targetFA { t.Error("fail on ", datum) } @@ -462,8 +461,8 @@ func TestEpsilonClosureAfterMerge(t *testing.T) { } // Walk the automaton and verify all states have epsilon closures computed - visited := make(map[*smallTable]bool) - missingClosures := checkEpsilonClosures(fields.startTable, visited) + visited := make(map[*faState]bool) + missingClosures := checkEpsilonClosures(fields.startState, visited) if len(missingClosures) > 0 { t.Errorf("found %d states with missing epsilon closures", len(missingClosures)) } @@ -489,26 +488,26 @@ func TestEpsilonClosureAfterMerge(t *testing.T) { // checkEpsilonClosures walks the automaton and returns states that have // epsilon transitions but no computed epsilon closure. -func checkEpsilonClosures(table *smallTable, visited map[*smallTable]bool) []*faState { +func checkEpsilonClosures(start *faState, visited map[*faState]bool) []*faState { var missing []*faState - if visited[table] { + if visited[start] { return missing } - visited[table] = true + visited[start] = true - for _, state := range table.steps { + for _, state := range start.table.steps { if state != nil { if len(state.table.epsilons) > 0 && state.epsilonClosure == nil { missing = append(missing, state) } - missing = append(missing, checkEpsilonClosures(state.table, visited)...) + missing = append(missing, checkEpsilonClosures(state, visited)...) } } - for _, eps := range table.epsilons { + for _, eps := range start.table.epsilons { if eps.epsilonClosure == nil { missing = append(missing, eps) } - missing = append(missing, checkEpsilonClosures(eps.table, visited)...) + missing = append(missing, checkEpsilonClosures(eps, visited)...) } return missing } @@ -547,7 +546,7 @@ func TestEpsilonClosureRequired(t *testing.T) { // Step 2: Clear all epsilon closures to simulate missing epsilonClosure call fields := vm.fields() - clearEpsilonClosures(fields.startTable, make(map[*smallTable]bool)) + clearEpsilonClosures(fields.startState, make(map[*faState]bool)) // Step 3: Without closures, traverseNFA fails because it iterates over // state.epsilonClosure which is now nil (empty loop = no matches) @@ -563,7 +562,7 @@ func TestEpsilonClosureRequired(t *testing.T) { } // Step 4: Restore closures and verify matching works again - epsilonClosure(fields.startTable) + epsilonClosure(fields.startState) trans = testTransitionOn(vm, []byte("abc"), bufs) if len(trans) != 1 { @@ -576,20 +575,19 @@ func TestEpsilonClosureRequired(t *testing.T) { } // clearEpsilonClosures walks the automaton and sets all epsilonClosure fields to nil -func clearEpsilonClosures(table *smallTable, visited map[*smallTable]bool) { - if visited[table] { +func clearEpsilonClosures(start *faState, visited map[*faState]bool) { + if visited[start] { return } - visited[table] = true + visited[start] = true + start.epsilonClosure = nil - for _, state := range table.steps { + for _, state := range start.table.steps { if state != nil { - state.epsilonClosure = nil - clearEpsilonClosures(state.table, visited) + clearEpsilonClosures(state, visited) } } - for _, eps := range table.epsilons { - eps.epsilonClosure = nil - clearEpsilonClosures(eps.table, visited) + for _, eps := range start.table.epsilons { + clearEpsilonClosures(eps, visited) } } diff --git a/wildcard.go b/wildcard.go index 837c903..b25015d 100644 --- a/wildcard.go +++ b/wildcard.go @@ -74,10 +74,10 @@ func readWildcardSpecial(pb *patternBuild, valsIn []typedVal) ([]typedVal, error // makeWildcardFA is a replacement for shellstyle patterns, the only difference being that escaping is // provided for * and \. -func makeWildCardFA(val []byte, pp printer) (start *smallTable, nextField *fieldMatcher) { +func makeWildCardFA(val []byte, pp printer) (start *faState, nextField *fieldMatcher) { state := &faState{table: newSmallTable()} - start = state.table - pp.labelTable(start, "WILDCARD") + start = state + pp.labelTable(&start.table, "WILDCARD") nextField = newFieldMatcher() // for each byte in the pattern. \-escape processing is simplified because illegal constructs such as \a and \ @@ -99,19 +99,19 @@ func makeWildCardFA(val []byte, pp printer) (start *smallTable, nextField *field spinEscape.table.epsilons = []*faState{spinner} spinner.table = makeByteDotFA(spinner, pp) spinner.table.addByteStep(val[valIndex], spinEscape) - pp.labelTable(spinner.table, "*-Spinner") - pp.labelTable(spinEscape.table, fmt.Sprintf("spinEscape on %c at %d", val[valIndex], valIndex)) + pp.labelTable(&spinner.table, "*-Spinner") + pp.labelTable(&spinEscape.table, fmt.Sprintf("spinEscape on %c at %d", val[valIndex], valIndex)) state = spinEscape } else { nextStep := &faState{table: newSmallTable()} - pp.labelTable(nextStep.table, fmt.Sprintf("on %c at %d", val[valIndex], valIndex)) + pp.labelTable(&nextStep.table, fmt.Sprintf("on %c at %d", val[valIndex], valIndex)) state.table.addByteStep(ch, nextStep) state = nextStep } valIndex++ } lastStep := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}} - pp.labelTable(lastStep.table, fmt.Sprintf("last step at %d", valIndex)) + pp.labelTable(&lastStep.table, fmt.Sprintf("last step at %d", valIndex)) state.table.addByteStep(valueTerminator, lastStep) return } From b8d21493842dc9dadb52c5433cf1363c065a7f35 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 14:19:23 -0700 Subject: [PATCH 4/6] tests: recalibrate size assertions after embedding smallTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-state size shrunk (faState now 128B with inline smallTable). Update hand-calibrated constants in TestMcBasicSizes, TestQuaminaMemoryCost, TestMcNfaSizes. Update TestPP s/t numbers and restore trailing whitespace in wanted literal (producer still emits "[s/t X/Y] \n" with the trailing space before the newline). TestTablePointerDedup's tableSharing and totalEntries expectations also needed updating — the dedup metric now uses slice-backing identity (via tableShareKey from earlier commits) rather than *smallTable pointer identity, so value-copies of a source smallTable register as shared. Recalibrated to the new ground-truth values. Minor cleanups: stale "startTable" reference in TestQuaminaMemoryCostSingleton comment (renamed to startState); inline a dead local in copyShellNode. Co-Authored-By: Claude Sonnet 4.6 --- memory_cost_test.go | 18 ++++++++++-------- nfa_test.go | 8 ++++---- prettyprinter_test.go | 13 ++++++------- rune_range.go | 5 ++--- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/memory_cost_test.go b/memory_cost_test.go index 7a6f7f7..00dbdf3 100644 --- a/memory_cost_test.go +++ b/memory_cost_test.go @@ -16,8 +16,10 @@ func TestMcBasicSizes(t *testing.T) { } stateBase := int64(unsafe.Sizeof(faState{})) state := faState{table: table} - // want base + tableActual - want = stateBase + tableGot + // faState embeds smallTable, so stateBase already covers the smallTable struct. + // Add only the slice-backing bytes (1 ceiling byte + 1 step pointer), not tableGot + // (which includes mcSmallTableBase again and would double-count the struct overhead). + want = stateBase + 1 + mcPointer stateGot := mcFaState(&state) if stateGot != want { t.Errorf("State wanted %d got %d", want, stateGot) @@ -31,7 +33,7 @@ func TestQuaminaMemoryCost(t *testing.T) { t.Error(err) } bytes := q.GetMatcherStats()["bytes"] - if bytes != 1321 { + if bytes != 1289 { t.Error("WRONG NUMBERS") } err = q.AddPattern("x", `{"y":[{"wildcard": "*y"}]}}`) @@ -39,15 +41,15 @@ func TestQuaminaMemoryCost(t *testing.T) { t.Error(err) } bytes = q.GetMatcherStats()["bytes"] - if bytes != 2*1321 { + if bytes != 2*1289 { t.Error("WRONG NUMBERS") } } // Regression: GetMatcherStats panicked when a valueMatcher used the -// singleton-match optimization (singletonMatch set, startTable nil), +// singleton-match optimization (singletonMatch set, startState nil), // e.g. boolean-valued patterns. cmFieldMatcherStats now skips the nil -// startTable rather than building a faState with state.table == nil. +// startState rather than building a faState with state.table == nil. func TestQuaminaMemoryCostSingleton(t *testing.T) { q, _ := New() if err := q.AddPattern("p", `{"Animated": [false]}`); err != nil { @@ -70,8 +72,8 @@ func TestMcNfaSizes(t *testing.T) { seenStates: make(map[*faState]bool), } cmStateStats(fa1, stats, pp) - wantedBytes := int64(1321) // laboriously hand-calculated - wantedFanout := int64(5) + wantedBytes := int64(1289) // recalibrated after embedding smallTable in faState + wantedFanout := int64(6) wantedMaxFanout := int64(2) if stats.bytes != wantedBytes { t.Errorf("Wanted %d bytes, got %d", wantedBytes, stats.bytes) diff --git a/nfa_test.go b/nfa_test.go index 95a3618..9138097 100644 --- a/nfa_test.go +++ b/nfa_test.go @@ -505,9 +505,9 @@ var dedupWorkloads = []dedupWorkload{ "([xyz]?)*end", "(([mno]?)*)+", "([pqr]+)*s", }, stateCount: 1101, - totalEntries: 4371, + totalEntries: 4369, maxMax: 20, - tableSharing: 11, + tableSharing: 981, matches: []int{3, 2, 7}, }, { @@ -520,7 +520,7 @@ var dedupWorkloads = []dedupWorkload{ "(([op]?)*)+", "([qr]+)*t", "(e*)*f", "(g*)*h", }, stateCount: 149, - totalEntries: 261, + totalEntries: 254, maxMax: 50, tableSharing: 39, matches: []int{0, 0, 0}, @@ -568,7 +568,7 @@ var dedupWorkloads = []dedupWorkload{ stateCount: 837, totalEntries: 3410, maxMax: 30, - tableSharing: 16, + tableSharing: 744, matches: []int{10, 10, 10}, }, } diff --git a/prettyprinter_test.go b/prettyprinter_test.go index 75a989e..6dc0cd8 100644 --- a/prettyprinter_test.go +++ b/prettyprinter_test.go @@ -8,13 +8,12 @@ func TestPP(t *testing.T) { pp := newPrettyPrinter(1) state, _ := makeShellStyleFA([]byte(`"x*9"`), pp) pp.labelTable(&state.table, "START HERE") - wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 216/280] - 914[on " at 0] '78/x' → (384[*-Spinner][s/t 216/280] - 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/280] - 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 224/288] - 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/280] - 301[last step at 5] [1 transition(s)][s/t 81/153] -` + wanted := " 884[START HERE] '22/\"' → (914[on \" at 0][s/t 216/272] \n" + + " 914[on \" at 0] '78/x' → (384[*-Spinner][s/t 216/272] \n" + + " 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/272] \n" + + " 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/\"' → (769[on \" at 4][s/t 224/280] \n" + + " 769[on \" at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/272] \n" + + " 301[last step at 5] [1 transition(s)][s/t 81/145] \n" s := pp.printNFA(&state.table) if s != wanted { t.Errorf("LONG: wanted\n<%s>\ngot\n<%s>\n", wanted, s) diff --git a/rune_range.go b/rune_range.go index f85c5e3..7f3d9b5 100644 --- a/rune_range.go +++ b/rune_range.go @@ -25,12 +25,11 @@ func copyShellNode(shell *faState, oldNext *faState, newNext *faState, mem map[* if ok { return already } - table := smallTable{ + state := &faState{table: smallTable{ ceilings: slices.Clone(shell.table.ceilings), steps: make([]*faState, len(shell.table.steps)), epsilons: make([]*faState, len(shell.table.epsilons)), - } - state := &faState{table: table} + }} mem[shell] = state for i, step := range shell.table.steps { switch step { From 69415cee78dd83b7d4d119b15dadcb73385c9753 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sun, 31 May 2026 11:12:12 -0700 Subject: [PATCH 5/6] epsi_closure: pool buffers, restore two-counter dedup Port of the build-context-extract fix (6947edf) to the embedded-smallTable design. embed-smalltable inherited the same two regressions from the shared 32dc2a9 ancestor; shellstyle build at 1000 words was ~7x slower than main. 1. closureForNfa's walk dedup was broken. The refactor collapsed two independent counters into a single bufs.gen that closureForState mutates, so the walk's visited check never matched after the first state and the heavily-shared shellstyle graph got re-traversed (O(V*E)). Restored via bufs.walkGen, a snapshot closureForState never touches. 2. Per-call allocation. epsilonClosure allocated fresh maps per call and tableMarkOf heap-allocated a *tableMark per share group. closureBuffers are now pooled (sync.Pool, GC-reclaimable so no steady-state cost), maps are reused via a monotonic generation (no clearing), and tableMark is stored by value. Unlike the *smallTable-keyed bce branch, the walk here dedups by *faState identity rather than tableShareKey. Share-key dedup is unsafe for the walk: distinct states can share a steps backing array yet have different epsilons, and the zero key collapses all no-byte tables. The faState pointer is the natural unique identity now that smallTable is embedded by value. The post-pass table-pointer dedup keeps tableShareKey (collision-safe there, as it re-checks sameFieldTransitions) and now skips the zero key explicitly. Shellstyle build at 1000 words: 1363ms -> 473ms. Full suite passes. Co-Authored-By: Claude Opus 4.8 (1M context) --- epsi_closure.go | 104 +++++++++++++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 41 deletions(-) diff --git a/epsi_closure.go b/epsi_closure.go index 196deda..a16dc85 100644 --- a/epsi_closure.go +++ b/epsi_closure.go @@ -1,62 +1,77 @@ package quamina -// tableMark carries the per-smallTable scratch used only during epsilon -// closure computation (lastVisitedGen for NFA walk dedup, and closureGen / -// closureRep for table-pointer dedup). These used to live as fields on -// smallTable itself, but they are purely build-time state and their -// permanent presence on every smallTable was wasted steady-state memory. -// They now live in a per-call side table that is discarded when -// epsilonClosure returns. +import "sync" + +// tableMark carries the per-table-share-group scratch used by the closure +// post-pass that collapses states sharing a smallTable. It used to live as +// fields on smallTable itself, but that is purely build-time state whose +// permanent presence was wasted steady-state memory; it now lives in a +// pooled side table (closureBuffers.tables). +// +// tableMark is stored by value so marking a share group costs no per-entry +// heap allocation. type tableMark struct { - lastVisitedGen uint32 - closureGen uint32 - closureRep *faState + closureGen uint64 + closureRep *faState } -// closureBuffers carries per-epsilonClosure-call scratch. The two maps -// replace build-time fields that used to sit on smallTable/faState; -// they live only for the duration of the closure computation. +// closureBuffers carries the scratch for epsilon closure computation. It is +// pooled (see closureBufferPool) and reused across epsilonClosure calls, so +// the maps are allocated once and grown, not rebuilt per call. Visited +// tracking is generation-based: gen only ever increases, so stale map +// entries from a previous use are simply older than the current generation +// and need no clearing. type closureBuffers struct { - gen uint32 - closureSetGen uint32 - closureList []*faState - tables map[tableShareKey]*tableMark - states map[*faState]uint32 + gen uint64 // monotonic counter; bumped by closureForState's two dedup phases + walkGen uint64 // snapshot of gen for the current closureForNfa walk (NFA state dedup) + closureSetGen uint64 // snapshot of gen for the current closureForState faState dedup + closureList []*faState // reusable accumulator for the state list before the dedup post-pass + tables map[tableShareKey]tableMark // share-group scratch for the post-pass (closureGen, closureRep) + states map[*faState]uint64 // per-faState last-visited gen, used by traverseEpsilons + walkVisited map[*faState]uint64 // per-faState last-walked gen, used by closureForNfa } func newClosureBuffers() *closureBuffers { return &closureBuffers{ - gen: 1, - tables: make(map[tableShareKey]*tableMark), - states: make(map[*faState]uint32), + tables: make(map[tableShareKey]tableMark), + states: make(map[*faState]uint64), + walkVisited: make(map[*faState]uint64), } } -// tableMarkOf returns the tableMark for t, creating one on first access. -func (b *closureBuffers) tableMarkOf(t *smallTable) *tableMark { - key := newTableShareKey(t) - m, ok := b.tables[key] - if !ok { - m = &tableMark{} - b.tables[key] = m - } - return m +// closureBufferPool reuses closureBuffers (and their maps) across the many +// epsilonClosure calls a build performs, eliminating per-call map allocation. +// The pool is concurrency-safe, and sync.Pool drops its contents on GC, so +// the maps do not become permanent steady-state memory. +var closureBufferPool = sync.Pool{ + New: func() any { return newClosureBuffers() }, } // epsilonClosure walks the automaton starting from the given state // and precomputes the epsilon closure for every reachable faState. func epsilonClosure(start *faState) { - bufs := newClosureBuffers() + bufs := closureBufferPool.Get().(*closureBuffers) + // Take a fresh generation for this walk. closureForState bumps bufs.gen + // for its own dedup phases, but it never touches walkGen, so the state + // dedup in closureForNfa compares against a value that stays fixed for + // the whole walk. + bufs.gen++ + bufs.walkGen = bufs.gen closureForState(start, bufs) closureForNfa(start, bufs) + closureBufferPool.Put(bufs) } +// closureForNfa dedups by faState identity, not table-share key: each state +// must be walked once. (Share-key dedup is unsafe here — distinct states can +// share a steps backing array yet have different epsilons, and the zero key +// collapses all no-byte tables; the post-pass below re-checks fieldTransitions +// on collision, but the walk has no such guard.) func closureForNfa(state *faState, bufs *closureBuffers) { - mark := bufs.tableMarkOf(&state.table) - if mark.lastVisitedGen == bufs.gen { + if bufs.walkVisited[state] == bufs.walkGen { return } - mark.lastVisitedGen = bufs.gen + bufs.walkVisited[state] = bufs.walkGen for _, s := range state.table.steps { if s != nil { @@ -87,8 +102,8 @@ func closureForState(state *faState, bufs *closureBuffers) { return } - // Use generation-based visited tracking instead of a fresh map per - // traversal. bufs.states records which gen last visited each state. + // Generation-based visited tracking: bufs.states records which gen last + // visited each state, so we never clear the map between traversals. bufs.gen++ bufs.closureSetGen = bufs.gen bufs.closureList = bufs.closureList[:0] @@ -99,15 +114,21 @@ func closureForState(state *faState, bufs *closureBuffers) { traverseEpsilons(state, state.table.epsilons, bufs) // Table-pointer dedup: when multiple states in the closure share the - // same *smallTable, their byte transitions are identical, so only one - // representative is needed. This is done as a post-pass over the - // closure list rather than during traversal to keep traverseEpsilons - // zero-overhead. States with different fieldTransitions are preserved. + // same smallTable (steps backing array), their byte transitions are + // identical, so only one representative is needed. Done as a post-pass + // over the closure list to keep traverseEpsilons zero-overhead. The + // zero key (no byte transitions) is never deduped, and states with + // different fieldTransitions are preserved. bufs.gen++ dedupGen := bufs.gen closure := make([]*faState, 0, len(bufs.closureList)) for _, s := range bufs.closureList { - mark := bufs.tableMarkOf(&s.table) + key := newTableShareKey(&s.table) + if (key == tableShareKey{}) { + closure = append(closure, s) + continue + } + mark := bufs.tables[key] if mark.closureGen == dedupGen { if sameFieldTransitions(mark.closureRep, s) { continue @@ -115,6 +136,7 @@ func closureForState(state *faState, bufs *closureBuffers) { } else { mark.closureGen = dedupGen mark.closureRep = s + bufs.tables[key] = mark } closure = append(closure, s) } From 324284121451d36b133e7069f19be15febec70cd Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sun, 31 May 2026 12:08:07 -0700 Subject: [PATCH 6/6] state_lists: dedup intern() via sort+compact, drop the seen map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nfa2Dfa/intern is slated to go live. Profiling BenchmarkNfa2Dfa showed it ~64% slower than main, entirely inside intern()'s dedup: clear(sl.seen) plus a per-state map assign into a map[*faState]struct{} cost ~600ms where main's faState.closureSetGen generation-counter compare cost ~50ms. That field was removed to shrink steady-state memory, so the map was the fallback. intern already sorts the state set by pointer to build a canonical key, so duplicates are adjacent after the sort. Replacing the map-based dedup with slices.Compact over the sorted buffer removes the map (and its clear()) with no per-faState field and no extra sort — sorting was already happening. Nfa2Dfa vs main (geomean, n=6): time +63.8% -> +2.65%, B/op -1.05% -> -1.65%, allocs/op -7.8% -> -9.6%. Full suite passes. Co-Authored-By: Claude Opus 4.8 (1M context) --- state_lists.go | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/state_lists.go b/state_lists.go index 36f36bb..a4a5521 100644 --- a/state_lists.go +++ b/state_lists.go @@ -20,15 +20,13 @@ type internEntry struct { type stateLists struct { entries map[string]internEntry // Scratch space reused across intern() calls - sortBuf []*faState // reusable sorted buffer - keyBuf []byte // reusable key bytes buffer - seen map[*faState]struct{} // reusable dedup set, cleared per call + sortBuf []*faState // reusable sorted buffer + keyBuf []byte // reusable key bytes buffer } func newStateLists() *stateLists { return &stateLists{ entries: make(map[string]internEntry), - seen: make(map[*faState]struct{}), } } @@ -38,23 +36,16 @@ func newStateLists() *stateLists { // which either has already been computed for the set or is created and empty, and // a boolean indicating whether the DFA state has already been computed or not. func (sl *stateLists) intern(list []*faState) ([]*faState, *faState, bool) { - // Dedup within this call using a reused map. Previously this rode on - // a generation counter stored inline on each faState; that field has - // been removed to shrink steady-state memory. - clear(sl.seen) - sl.sortBuf = sl.sortBuf[:0] - for _, state := range list { - if _, ok := sl.seen[state]; ok { - continue - } - sl.seen[state] = struct{}{} - sl.sortBuf = append(sl.sortBuf, state) - } - - // compute a key representing the set + // Dedup by sorting then compacting adjacent duplicates. The set key is + // built from sorted pointers anyway, so sorting is not extra work; once + // sorted, duplicates are adjacent and Compact removes them in one linear + // pass. This avoids both a per-call dedup map and a per-faState + // generation field (the latter was removed to shrink steady-state memory). + sl.sortBuf = append(sl.sortBuf[:0], list...) slices.SortFunc(sl.sortBuf, func(a, b *faState) int { return cmp.Compare(uintptr(unsafe.Pointer(a)), uintptr(unsafe.Pointer(b))) }) + sl.sortBuf = slices.Compact(sl.sortBuf) // Pre-size the key buffer and write pointers with PutUint64 instead of // appending byte-by-byte, avoiding 8 append calls and bounds checks per state.