From 32dc2a9d75e900badf4fef7a8403672b62ee4e5b Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Thu, 16 Apr 2026 16:57:00 -0700 Subject: [PATCH 1/9] nfa: move build-time gen state off smallTable/faState to side tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smallTable.lastVisitedGen, smallTable.closureGen, smallTable.closureRep and faState.closureSetGen were all only read/written during pattern-add time (epsilon closure computation, merge simplification, and the test-only nfa2Dfa intern path). They sat on every table and state permanently, bloating steady-state memory by 24 B/table and 8 B/state. Move that state to a per-call side table held on closureBuffers (maps keyed by *smallTable / *faState). A fresh closureBuffers is created at the top of each epsilonClosure call and dropped when the call returns. simplifySplices and stateLists.intern each get their own local visited set using the same pattern. The global closureGeneration counter is no longer needed. Struct sizes (measured via unsafe.Sizeof): smallTable 96 B -> 72 B -24 faState 72 B -> 64 B -8 ------- per pair -32 B Bench (Apple M1 Ultra, n=6): ShellStyleBuildTime-20 56.10m -> 48.77m -13.07% 958 B -> 797 B -16.81% 1 allocs -> 0 allocs Match-time benchmarks are neutral or marginally faster (CityLots, NumberMatching, 8259Example each show ~-1% — likely the cache-footprint win from the shrunken structs). Passes -race. Co-Authored-By: Claude Opus 4.7 (1M context) --- epsi_closure.go | 86 ++++++++++++++++++++++++++++++++----------------- nfa.go | 21 ++++++------ small_table.go | 17 ++-------- state_lists.go | 22 +++++++------ 4 files changed, 83 insertions(+), 63 deletions(-) diff --git a/epsi_closure.go b/epsi_closure.go index 1aff56e..7e923b3 100644 --- a/epsi_closure.go +++ b/epsi_closure.go @@ -1,35 +1,60 @@ package quamina -// closureGeneration is a global counter used for generation-based visited -// tracking. It is incremented by epsilonClosure (for NFA walk dedup via -// lastVisitedGen) and by closureForState (for table-pointer dedup -// via closureGen). Each smallTable stores the generation it was last -// visited in, avoiding the need for a visited map. This works because -// epsilonClosure snapshots the counter into bufs.generation before the -// walk begins, so subsequent increments by the dedup pass don't interfere. -var closureGeneration uint64 +// tableMark carries the per-smallTable scratch used only during epsilon +// closure computation (lastVisitedGen for NFA walk dedup, and closureGen / +// closureRep for table-pointer dedup). These used to live as fields on +// smallTable itself, but they are purely build-time state and their +// permanent presence on every smallTable was wasted steady-state memory. +// They now live in a per-call side table that is discarded when +// epsilonClosure returns. +type tableMark struct { + lastVisitedGen uint32 + closureGen uint32 + closureRep *faState +} +// closureBuffers carries per-epsilonClosure-call scratch. The two maps +// replace build-time fields that used to sit on smallTable/faState; +// they live only for the duration of the closure computation. type closureBuffers struct { - generation uint64 // used by closureForNfa to avoid revisiting smallTables - closureSetGen uint64 // used by traverseEpsilons to avoid revisiting faStates - closureList []*faState // accumulated closure members, reused across calls + gen uint32 + closureSetGen uint32 + closureList []*faState + tables map[*smallTable]*tableMark + states map[*faState]uint32 +} + +func newClosureBuffers() *closureBuffers { + return &closureBuffers{ + gen: 1, + tables: make(map[*smallTable]*tableMark), + states: make(map[*faState]uint32), + } +} + +// tableMarkOf returns the tableMark for t, creating one on first access. +func (b *closureBuffers) tableMarkOf(t *smallTable) *tableMark { + m, ok := b.tables[t] + if !ok { + m = &tableMark{} + b.tables[t] = m + } + return m } // epsilonClosure walks the automaton starting from the given table // and precomputes the epsilon closure for every reachable faState. func epsilonClosure(table *smallTable) { - closureGeneration++ - bufs := &closureBuffers{ - generation: closureGeneration, - } + bufs := newClosureBuffers() closureForNfa(table, bufs) } func closureForNfa(table *smallTable, bufs *closureBuffers) { - if table.lastVisitedGen == bufs.generation { + mark := bufs.tableMarkOf(table) + if mark.lastVisitedGen == bufs.gen { return } - table.lastVisitedGen = bufs.generation + mark.lastVisitedGen = bufs.gen for _, state := range table.steps { if state != nil { @@ -46,7 +71,7 @@ func closureForNfa(table *smallTable, bufs *closureBuffers) { // closureForStateNoBufs computes the epsilon closure for a single state. // Used directly in tests; production code uses closureForState. func closureForStateNoBufs(state *faState) { - bufs := &closureBuffers{} + bufs := newClosureBuffers() closureForState(state, bufs) } @@ -60,12 +85,13 @@ func closureForState(state *faState, bufs *closureBuffers) { return } - // Use generation-based visited tracking instead of a map - closureGeneration++ - bufs.closureSetGen = closureGeneration + // Use generation-based visited tracking instead of a fresh map per + // traversal. bufs.states records which gen last visited each state. + bufs.gen++ + bufs.closureSetGen = bufs.gen bufs.closureList = bufs.closureList[:0] if !state.table.isEpsilonOnly() { - state.closureSetGen = bufs.closureSetGen + bufs.states[state] = bufs.closureSetGen bufs.closureList = append(bufs.closureList, state) } traverseEpsilons(state, state.table.epsilons, bufs) @@ -75,16 +101,18 @@ func closureForState(state *faState, bufs *closureBuffers) { // representative is needed. This is done as a post-pass over the // closure list rather than during traversal to keep traverseEpsilons // zero-overhead. States with different fieldTransitions are preserved. - closureGeneration++ + bufs.gen++ + dedupGen := bufs.gen closure := make([]*faState, 0, len(bufs.closureList)) for _, s := range bufs.closureList { - if s.table.closureGen == closureGeneration { - if sameFieldTransitions(s.table.closureRep, s) { + mark := bufs.tableMarkOf(s.table) + if mark.closureGen == dedupGen { + if sameFieldTransitions(mark.closureRep, s) { continue } } else { - s.table.closureGen = closureGeneration - s.table.closureRep = s + mark.closureGen = dedupGen + mark.closureRep = s } closure = append(closure, s) } @@ -95,10 +123,10 @@ func closureForState(state *faState, bufs *closureBuffers) { // via epsilon transitions into bufs.closureList. func traverseEpsilons(start *faState, epsilons []*faState, bufs *closureBuffers) { for _, eps := range epsilons { - if eps == start || eps.closureSetGen == bufs.closureSetGen { + if eps == start || bufs.states[eps] == bufs.closureSetGen { continue } - eps.closureSetGen = bufs.closureSetGen + bufs.states[eps] = bufs.closureSetGen if !eps.table.isEpsilonOnly() { bufs.closureList = append(bufs.closureList, eps) } diff --git a/nfa.go b/nfa.go index 023bbff..b6b9d89 100644 --- a/nfa.go +++ b/nfa.go @@ -13,9 +13,8 @@ import ( type faState struct { table *smallTable fieldTransitions []*fieldMatcher - isSpinner bool epsilonClosure []*faState // precomputed epsilon closure including self - closureSetGen uint64 // generation for closure set visited tracking + isSpinner bool } /* @@ -344,23 +343,25 @@ func makeFaStepKey(s1, s2 *faState) faStepKey { // epsilon transitions from state1 and state2. This prevents deep nesting of // splice states that would otherwise accumulate during repeated merges. func simplifySplices(state1, state2 *faState) []*faState { - closureGeneration++ - gen := closureGeneration + // A freshly-allocated visited map is used as a side table; the old + // approach stored a generation counter on faState itself, which bloated + // every state permanently for build-only state. + visited := make(map[*faState]struct{}) targets := make([]*faState, 0, 4) - targets = simplifyCollect(state1, gen, targets) - targets = simplifyCollect(state2, gen, targets) + targets = simplifyCollect(state1, visited, targets) + targets = simplifyCollect(state2, visited, targets) return targets } -func simplifyCollect(s *faState, gen uint64, targets []*faState) []*faState { - if s.closureSetGen == gen { +func simplifyCollect(s *faState, visited map[*faState]struct{}, targets []*faState) []*faState { + if _, seen := visited[s]; seen { return targets } - s.closureSetGen = gen + visited[s] = struct{}{} if s.table.isEpsilonOnly() { for _, eps := range s.table.epsilons { - targets = simplifyCollect(eps, gen, targets) + targets = simplifyCollect(eps, visited, targets) } } else { targets = append(targets, s) diff --git a/small_table.go b/small_table.go index 1a8f3a5..5b80da0 100644 --- a/small_table.go +++ b/small_table.go @@ -38,20 +38,9 @@ const valueTerminator byte = 0xf5 // by branching on 'b' to a state that has no byte transitions but two epsilons, one each for s1 and s2. type smallTable struct { - ceilings []byte - steps []*faState - epsilons []*faState - lastVisitedGen uint64 // generation counter for epsilon closure traversal - // closureGen records which closureGeneration this table's - // representative was set in. If it equals the current global - // closureGeneration, then closureRep is valid; otherwise, the - // table has not yet been seen in this dedup pass. - closureGen uint64 - // closureRep is the representative faState for this table in the - // current closure dedup pass. When multiple states share the same - // smallTable and have identical fieldTransitions, only this - // representative is kept in the closure. - closureRep *faState + ceilings []byte + steps []*faState + epsilons []*faState } // newSmallTable mostly exists to enforce the constraint that every smallTable has a byteCeiling entry at diff --git a/state_lists.go b/state_lists.go index 54c664e..36f36bb 100644 --- a/state_lists.go +++ b/state_lists.go @@ -20,13 +20,15 @@ type internEntry struct { type stateLists struct { entries map[string]internEntry // Scratch space reused across intern() calls - sortBuf []*faState // reusable sorted buffer - keyBuf []byte // reusable key bytes buffer + sortBuf []*faState // reusable sorted buffer + keyBuf []byte // reusable key bytes buffer + seen map[*faState]struct{} // reusable dedup set, cleared per call } func newStateLists() *stateLists { return &stateLists{ entries: make(map[string]internEntry), + seen: make(map[*faState]struct{}), } } @@ -36,17 +38,17 @@ func newStateLists() *stateLists { // which either has already been computed for the set or is created and empty, and // a boolean indicating whether the DFA state has already been computed or not. func (sl *stateLists) intern(list []*faState) ([]*faState, *faState, bool) { - // Dedupe using the global generation counter and faState.closureSetGen - // instead of allocating a map per call. Safe to reuse closureSetGen - // because nfa2Dfa runs after epsilon closure computation is complete. - closureGeneration++ - gen := closureGeneration + // Dedup within this call using a reused map. Previously this rode on + // a generation counter stored inline on each faState; that field has + // been removed to shrink steady-state memory. + clear(sl.seen) sl.sortBuf = sl.sortBuf[:0] for _, state := range list { - if state.closureSetGen != gen { - state.closureSetGen = gen - sl.sortBuf = append(sl.sortBuf, state) + if _, ok := sl.seen[state]; ok { + continue } + sl.seen[state] = struct{}{} + sl.sortBuf = append(sl.sortBuf, state) } // compute a key representing the set From 2326bc04553963565cdac7cfad3c034fc1afee93 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 10:57:10 -0700 Subject: [PATCH 2/9] tests: recalibrate size assertions after gen-state side-tables move Moving build-time generation state off faState/smallTable (commit 192aaa0, cherry-picked from dec9a56) shrinks both structs. The hardcoded size constants in TestQuaminaMemoryCost, TestMcNfaSizes, and TestPP were calibrated against the pre-move sizes on main: faState/smallTable bytes per state: 1481 -> 1321 (-160 / -10.8%) prettyprinter s/t per state: 240/312 -> 216/280 (-24 / -32) TestMcBasicSizes computes its expectation from unsafe.Sizeof so it needs no update; only the hand-calibrated values do. Co-Authored-By: Claude Opus 4.7 (1M context) --- memory_cost_test.go | 6 +++--- prettyprinter_test.go | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/memory_cost_test.go b/memory_cost_test.go index 4569039..21e49e5 100644 --- a/memory_cost_test.go +++ b/memory_cost_test.go @@ -31,7 +31,7 @@ func TestQuaminaMemoryCost(t *testing.T) { t.Error(err) } bytes := q.GetMatcherStats()["bytes"] - if bytes != 1481 { + if bytes != 1321 { t.Error("WRONG NUMBERS") } err = q.AddPattern("x", `{"y":[{"wildcard": "*y"}]}}`) @@ -39,7 +39,7 @@ func TestQuaminaMemoryCost(t *testing.T) { t.Error(err) } bytes = q.GetMatcherStats()["bytes"] - if bytes != 2*1481 { + if bytes != 2*1321 { t.Error("WRONG NUMBERS") } } @@ -55,7 +55,7 @@ func TestMcNfaSizes(t *testing.T) { seenStates: make(map[*faState]bool), } cmStateStats(&faState{table: fa1}, stats, pp) - wantedBytes := int64(1481) // laboriously hand-calculated + wantedBytes := int64(1321) // laboriously hand-calculated wantedFanout := int64(5) wantedMaxFanout := int64(2) if stats.bytes != wantedBytes { diff --git a/prettyprinter_test.go b/prettyprinter_test.go index 4968f56..cc62011 100644 --- a/prettyprinter_test.go +++ b/prettyprinter_test.go @@ -8,12 +8,12 @@ func TestPP(t *testing.T) { pp := newPrettyPrinter(1) table, _ := makeShellStyleFA([]byte(`"x*9"`), pp) pp.labelTable(table, "START HERE") - wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 240/312] - 914[on " at 0] '78/x' → (384[*-Spinner][s/t 240/312] - 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 240/312] - 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 248/320] - 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 240/312] - 301[last step at 5] [1 transition(s)][s/t 105/185] + wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 216/280] + 914[on " at 0] '78/x' → (384[*-Spinner][s/t 216/280] + 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/280] + 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 224/288] + 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/280] + 301[last step at 5] [1 transition(s)][s/t 81/153] ` s := pp.printNFA(table) if s != wanted { From 6bddf824df77d174037d0052285940d48fc9e70d Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 11:56:47 -0700 Subject: [PATCH 3/9] memory_cost: fix nil-deref in GetMatcherStats for singleton valueMatchers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a valueMatcher uses the singleton-match optimization (singletonMatch holds the raw bytes, startTable is nil — e.g. boolean-valued patterns like `{"Animated": [false]}`), cmFieldMatcherStats was constructing &faState{table: nil} and passing it to cmStateStats, which then crashed on state.table.steps. The singleton byte count is already added a few lines above; there is no NFA to walk, so just skip the nil-table case. Add TestQuaminaMemoryCostSingleton as a regression test. Minimal repro is q.AddPattern("p", `{"Animated": [false]}`) followed by q.GetMatcherStats() — without this fix it panics. Co-Authored-By: Claude Opus 4.7 (1M context) --- memory_cost.go | 3 +++ memory_cost_test.go | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/memory_cost.go b/memory_cost.go index 198c5e1..794d5c6 100644 --- a/memory_cost.go +++ b/memory_cost.go @@ -24,6 +24,9 @@ func cmFieldMatcherStats(fm *fieldMatcher, stats *matcherStats, pp printer) { stats.bytes += int64(len(singleton)) } table := vm.fields().startTable + if table == nil { + continue + } cmStateStats(&faState{table: table}, stats, pp) } } diff --git a/memory_cost_test.go b/memory_cost_test.go index 21e49e5..aba59df 100644 --- a/memory_cost_test.go +++ b/memory_cost_test.go @@ -44,6 +44,21 @@ func TestQuaminaMemoryCost(t *testing.T) { } } +// Regression: GetMatcherStats panicked when a valueMatcher used the +// singleton-match optimization (singletonMatch set, startTable nil), +// e.g. boolean-valued patterns. cmFieldMatcherStats now skips the nil +// startTable rather than building a faState with state.table == nil. +func TestQuaminaMemoryCostSingleton(t *testing.T) { + q, _ := New() + if err := q.AddPattern("p", `{"Animated": [false]}`); err != nil { + t.Fatal(err) + } + s := q.GetMatcherStats() + if s["bytes"] == 0 { + t.Errorf("expected bytes > 0 for singleton matcher, got %v", s["bytes"]) + } +} + func TestMcNfaSizes(t *testing.T) { pp := newPrettyPrinter(2355) wc1 := `"*z"` From 5225540409b6d037dc4cc0d2236f10956fc26d85 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 16:04:08 -0700 Subject: [PATCH 4/9] memory_cost_test: clarify singleton-optimization comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tim flagged that the singleton-match optimization isn't specific to booleans — it applies to any field with a single string or literal value, where the matcher can use bytes.Compare instead of building an FA. Reword the regression-test comment accordingly; keep the boolean pattern as the minimal repro rather than the defining case. Co-Authored-By: Claude Opus 4.7 (1M context) --- memory_cost_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/memory_cost_test.go b/memory_cost_test.go index aba59df..ee31785 100644 --- a/memory_cost_test.go +++ b/memory_cost_test.go @@ -45,9 +45,11 @@ func TestQuaminaMemoryCost(t *testing.T) { } // Regression: GetMatcherStats panicked when a valueMatcher used the -// singleton-match optimization (singletonMatch set, startTable nil), -// e.g. boolean-valued patterns. cmFieldMatcherStats now skips the nil -// startTable rather than building a faState with state.table == nil. +// singleton-match optimization (singletonMatch set, startTable nil). +// That optimization fires for any field with a single string or literal +// value — the matcher uses bytes.Compare instead of building an FA. +// Minimal repro: {"Animated": [false]}. cmFieldMatcherStats now skips +// the nil startTable rather than building a faState with state.table == nil. func TestQuaminaMemoryCostSingleton(t *testing.T) { q, _ := New() if err := q.AddPattern("p", `{"Animated": [false]}`); err != nil { From 0cc74342b9eff89561bc75b1e12e0b0f03ec83d1 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 16:11:29 -0700 Subject: [PATCH 5/9] epsi_closure: restore field-level comments on closureBuffers The side-tables refactor (32dc2a9) replaced field-level comments with a block comment about the struct's purpose. The block comment explains why two maps exist but stopped documenting what each field is for. Put the field-level docs back; matches the pre-refactor style for generation/closureSetGen/closureList and adds documentation for the two new map fields. Co-Authored-By: Claude Opus 4.7 (1M context) --- epsi_closure.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/epsi_closure.go b/epsi_closure.go index 7e923b3..35d9853 100644 --- a/epsi_closure.go +++ b/epsi_closure.go @@ -17,11 +17,11 @@ type tableMark struct { // replace build-time fields that used to sit on smallTable/faState; // they live only for the duration of the closure computation. type closureBuffers struct { - gen uint32 - closureSetGen uint32 - closureList []*faState - tables map[*smallTable]*tableMark - states map[*faState]uint32 + gen uint32 // bumped by closureForNfa (NFA walk dedup) and the closureForState post-pass (table-pointer dedup) + closureSetGen uint32 // snapshot of gen used by traverseEpsilons to dedup faState visits within one closure + closureList []*faState // reusable accumulator for the state list before the dedup post-pass + tables map[*smallTable]*tableMark // per-call side-table for smallTable scratch (lastVisitedGen, closureRep) + states map[*faState]uint32 // per-faState last-visited generation, used by traverseEpsilons } func newClosureBuffers() *closureBuffers { From 9e71c62b8104fb38f348aee2c4f94c99d90167fd Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 16:31:11 -0700 Subject: [PATCH 6/9] nfa: document map[K]struct{} set idiom in simplifyCollect Tim flagged that `visited[s] = struct{}{}` isn't a familiar idiom. Add a comment above the insert line explaining map[K]struct{} is Go's keys-only set type (struct{}{} is zero bytes, so the value half is free), with the insert and membership idioms. Co-Authored-By: Claude Opus 4.7 (1M context) --- nfa.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nfa.go b/nfa.go index b6b9d89..a3a652c 100644 --- a/nfa.go +++ b/nfa.go @@ -357,6 +357,9 @@ func simplifyCollect(s *faState, visited map[*faState]struct{}, targets []*faSta if _, seen := visited[s]; seen { return targets } + // map[K]struct{} is the idiomatic Go set: struct{}{} is a zero-byte + // value, so the map's value half costs nothing. Insert with + // `m[k] = struct{}{}`, test with `_, ok := m[k]`. visited[s] = struct{}{} if s.table.isEpsilonOnly() { From f5b75ba77e9e2ff8b0948809c9208436a56753a7 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 16:37:57 -0700 Subject: [PATCH 7/9] memory_cost: use cap(singleton) not len(singleton) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Tim — the byte cost reported by GetMatcherStats should reflect the backing array footprint, matching how mcSmallTable / mcFaState already count their slice fields (cap, not len). Co-Authored-By: Claude Opus 4.7 (1M context) --- memory_cost.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memory_cost.go b/memory_cost.go index 794d5c6..c1863ff 100644 --- a/memory_cost.go +++ b/memory_cost.go @@ -21,7 +21,7 @@ func cmFieldMatcherStats(fm *fieldMatcher, stats *matcherStats, pp printer) { for _, vm := range fmTrans { singleton := vm.fields().singletonMatch if singleton != nil { - stats.bytes += int64(len(singleton)) + stats.bytes += int64(cap(singleton)) } table := vm.fields().startTable if table == nil { From 44069ae52cabc6614ae6be414369dcc87ac3eee1 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 16:44:42 -0700 Subject: [PATCH 8/9] state_lists, nfa: use map[K]bool for sets, matching quamina convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Tim — the rest of the codebase (match_set, stats, matcher, memory_cost, anything_but) uses map[K]bool for sets. The two newer sites that used map[K]struct{} (stateLists.seen and simplifyCollect's visited) were stylistic outliers; switch to bool for consistency. Also drops the comment I just added explaining the struct{}{} idiom — no longer needed since the code is now plain map[K]bool with `if m[k]` / `m[k] = true`. Co-Authored-By: Claude Opus 4.7 (1M context) --- nfa.go | 11 ++++------- state_lists.go | 8 ++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/nfa.go b/nfa.go index a3a652c..a9341db 100644 --- a/nfa.go +++ b/nfa.go @@ -346,21 +346,18 @@ func simplifySplices(state1, state2 *faState) []*faState { // A freshly-allocated visited map is used as a side table; the old // approach stored a generation counter on faState itself, which bloated // every state permanently for build-only state. - visited := make(map[*faState]struct{}) + visited := make(map[*faState]bool) targets := make([]*faState, 0, 4) targets = simplifyCollect(state1, visited, targets) targets = simplifyCollect(state2, visited, targets) return targets } -func simplifyCollect(s *faState, visited map[*faState]struct{}, targets []*faState) []*faState { - if _, seen := visited[s]; seen { +func simplifyCollect(s *faState, visited map[*faState]bool, targets []*faState) []*faState { + if visited[s] { return targets } - // map[K]struct{} is the idiomatic Go set: struct{}{} is a zero-byte - // value, so the map's value half costs nothing. Insert with - // `m[k] = struct{}{}`, test with `_, ok := m[k]`. - visited[s] = struct{}{} + visited[s] = true if s.table.isEpsilonOnly() { for _, eps := range s.table.epsilons { diff --git a/state_lists.go b/state_lists.go index 36f36bb..1b1bd2d 100644 --- a/state_lists.go +++ b/state_lists.go @@ -22,13 +22,13 @@ type stateLists struct { // Scratch space reused across intern() calls sortBuf []*faState // reusable sorted buffer keyBuf []byte // reusable key bytes buffer - seen map[*faState]struct{} // reusable dedup set, cleared per call + seen map[*faState]bool // reusable dedup set, cleared per call } func newStateLists() *stateLists { return &stateLists{ entries: make(map[string]internEntry), - seen: make(map[*faState]struct{}), + seen: make(map[*faState]bool), } } @@ -44,10 +44,10 @@ func (sl *stateLists) intern(list []*faState) ([]*faState, *faState, bool) { clear(sl.seen) sl.sortBuf = sl.sortBuf[:0] for _, state := range list { - if _, ok := sl.seen[state]; ok { + if sl.seen[state] { continue } - sl.seen[state] = struct{}{} + sl.seen[state] = true sl.sortBuf = append(sl.sortBuf, state) } From 408dc57603262d7fe6f77f9a9a48dc7a93f5d1d3 Mon Sep 17 00:00:00 2001 From: Robert Sayre Date: Sat, 30 May 2026 16:46:00 -0700 Subject: [PATCH 9/9] state_lists: gofmt comment alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The seen-field type swap (struct{} → bool) narrowed the field type, which gofmt then re-aligned the adjacent comments around. Apply the formatter. Co-Authored-By: Claude Opus 4.7 (1M context) --- state_lists.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/state_lists.go b/state_lists.go index 1b1bd2d..8040ed5 100644 --- a/state_lists.go +++ b/state_lists.go @@ -20,8 +20,8 @@ type internEntry struct { type stateLists struct { entries map[string]internEntry // Scratch space reused across intern() calls - sortBuf []*faState // reusable sorted buffer - keyBuf []byte // reusable key bytes buffer + sortBuf []*faState // reusable sorted buffer + keyBuf []byte // reusable key bytes buffer seen map[*faState]bool // reusable dedup set, cleared per call }