diff --git a/epsi_closure.go b/epsi_closure.go index 1aff56e..35d9853 100644 --- a/epsi_closure.go +++ b/epsi_closure.go @@ -1,35 +1,60 @@ package quamina -// closureGeneration is a global counter used for generation-based visited -// tracking. It is incremented by epsilonClosure (for NFA walk dedup via -// lastVisitedGen) and by closureForState (for table-pointer dedup -// via closureGen). Each smallTable stores the generation it was last -// visited in, avoiding the need for a visited map. This works because -// epsilonClosure snapshots the counter into bufs.generation before the -// walk begins, so subsequent increments by the dedup pass don't interfere. -var closureGeneration uint64 +// tableMark carries the per-smallTable scratch used only during epsilon +// closure computation (lastVisitedGen for NFA walk dedup, and closureGen / +// closureRep for table-pointer dedup). These used to live as fields on +// smallTable itself, but they are purely build-time state and their +// permanent presence on every smallTable was wasted steady-state memory. +// They now live in a per-call side table that is discarded when +// epsilonClosure returns. +type tableMark struct { + lastVisitedGen uint32 + closureGen uint32 + closureRep *faState +} +// closureBuffers carries per-epsilonClosure-call scratch. The two maps +// replace build-time fields that used to sit on smallTable/faState; +// they live only for the duration of the closure computation. type closureBuffers struct { - generation uint64 // used by closureForNfa to avoid revisiting smallTables - closureSetGen uint64 // used by traverseEpsilons to avoid revisiting faStates - closureList []*faState // accumulated closure members, reused across calls + gen uint32 // bumped by closureForNfa (NFA walk dedup) and the closureForState post-pass (table-pointer dedup) + closureSetGen uint32 // snapshot of gen used by traverseEpsilons to dedup faState visits within one closure + closureList []*faState // reusable accumulator for the state list before the dedup post-pass + tables map[*smallTable]*tableMark // per-call side-table for smallTable scratch (lastVisitedGen, closureRep) + states map[*faState]uint32 // per-faState last-visited generation, used by traverseEpsilons +} + +func newClosureBuffers() *closureBuffers { + return &closureBuffers{ + gen: 1, + tables: make(map[*smallTable]*tableMark), + states: make(map[*faState]uint32), + } +} + +// tableMarkOf returns the tableMark for t, creating one on first access. +func (b *closureBuffers) tableMarkOf(t *smallTable) *tableMark { + m, ok := b.tables[t] + if !ok { + m = &tableMark{} + b.tables[t] = m + } + return m } // epsilonClosure walks the automaton starting from the given table // and precomputes the epsilon closure for every reachable faState. func epsilonClosure(table *smallTable) { - closureGeneration++ - bufs := &closureBuffers{ - generation: closureGeneration, - } + bufs := newClosureBuffers() closureForNfa(table, bufs) } func closureForNfa(table *smallTable, bufs *closureBuffers) { - if table.lastVisitedGen == bufs.generation { + mark := bufs.tableMarkOf(table) + if mark.lastVisitedGen == bufs.gen { return } - table.lastVisitedGen = bufs.generation + mark.lastVisitedGen = bufs.gen for _, state := range table.steps { if state != nil { @@ -46,7 +71,7 @@ func closureForNfa(table *smallTable, bufs *closureBuffers) { // closureForStateNoBufs computes the epsilon closure for a single state. // Used directly in tests; production code uses closureForState. func closureForStateNoBufs(state *faState) { - bufs := &closureBuffers{} + bufs := newClosureBuffers() closureForState(state, bufs) } @@ -60,12 +85,13 @@ func closureForState(state *faState, bufs *closureBuffers) { return } - // Use generation-based visited tracking instead of a map - closureGeneration++ - bufs.closureSetGen = closureGeneration + // Use generation-based visited tracking instead of a fresh map per + // traversal. bufs.states records which gen last visited each state. + bufs.gen++ + bufs.closureSetGen = bufs.gen bufs.closureList = bufs.closureList[:0] if !state.table.isEpsilonOnly() { - state.closureSetGen = bufs.closureSetGen + bufs.states[state] = bufs.closureSetGen bufs.closureList = append(bufs.closureList, state) } traverseEpsilons(state, state.table.epsilons, bufs) @@ -75,16 +101,18 @@ func closureForState(state *faState, bufs *closureBuffers) { // representative is needed. This is done as a post-pass over the // closure list rather than during traversal to keep traverseEpsilons // zero-overhead. States with different fieldTransitions are preserved. - closureGeneration++ + bufs.gen++ + dedupGen := bufs.gen closure := make([]*faState, 0, len(bufs.closureList)) for _, s := range bufs.closureList { - if s.table.closureGen == closureGeneration { - if sameFieldTransitions(s.table.closureRep, s) { + mark := bufs.tableMarkOf(s.table) + if mark.closureGen == dedupGen { + if sameFieldTransitions(mark.closureRep, s) { continue } } else { - s.table.closureGen = closureGeneration - s.table.closureRep = s + mark.closureGen = dedupGen + mark.closureRep = s } closure = append(closure, s) } @@ -95,10 +123,10 @@ func closureForState(state *faState, bufs *closureBuffers) { // via epsilon transitions into bufs.closureList. func traverseEpsilons(start *faState, epsilons []*faState, bufs *closureBuffers) { for _, eps := range epsilons { - if eps == start || eps.closureSetGen == bufs.closureSetGen { + if eps == start || bufs.states[eps] == bufs.closureSetGen { continue } - eps.closureSetGen = bufs.closureSetGen + bufs.states[eps] = bufs.closureSetGen if !eps.table.isEpsilonOnly() { bufs.closureList = append(bufs.closureList, eps) } diff --git a/memory_cost.go b/memory_cost.go index 198c5e1..c1863ff 100644 --- a/memory_cost.go +++ b/memory_cost.go @@ -21,9 +21,12 @@ func cmFieldMatcherStats(fm *fieldMatcher, stats *matcherStats, pp printer) { for _, vm := range fmTrans { singleton := vm.fields().singletonMatch if singleton != nil { - stats.bytes += int64(len(singleton)) + stats.bytes += int64(cap(singleton)) } table := vm.fields().startTable + if table == nil { + continue + } cmStateStats(&faState{table: table}, stats, pp) } } diff --git a/memory_cost_test.go b/memory_cost_test.go index 4569039..ee31785 100644 --- a/memory_cost_test.go +++ b/memory_cost_test.go @@ -31,7 +31,7 @@ func TestQuaminaMemoryCost(t *testing.T) { t.Error(err) } bytes := q.GetMatcherStats()["bytes"] - if bytes != 1481 { + if bytes != 1321 { t.Error("WRONG NUMBERS") } err = q.AddPattern("x", `{"y":[{"wildcard": "*y"}]}}`) @@ -39,11 +39,28 @@ func TestQuaminaMemoryCost(t *testing.T) { t.Error(err) } bytes = q.GetMatcherStats()["bytes"] - if bytes != 2*1481 { + if bytes != 2*1321 { t.Error("WRONG NUMBERS") } } +// Regression: GetMatcherStats panicked when a valueMatcher used the +// singleton-match optimization (singletonMatch set, startTable nil). +// That optimization fires for any field with a single string or literal +// value — the matcher uses bytes.Compare instead of building an FA. +// Minimal repro: {"Animated": [false]}. cmFieldMatcherStats now skips +// the nil startTable rather than building a faState with state.table == nil. +func TestQuaminaMemoryCostSingleton(t *testing.T) { + q, _ := New() + if err := q.AddPattern("p", `{"Animated": [false]}`); err != nil { + t.Fatal(err) + } + s := q.GetMatcherStats() + if s["bytes"] == 0 { + t.Errorf("expected bytes > 0 for singleton matcher, got %v", s["bytes"]) + } +} + func TestMcNfaSizes(t *testing.T) { pp := newPrettyPrinter(2355) wc1 := `"*z"` @@ -55,7 +72,7 @@ func TestMcNfaSizes(t *testing.T) { seenStates: make(map[*faState]bool), } cmStateStats(&faState{table: fa1}, stats, pp) - wantedBytes := int64(1481) // laboriously hand-calculated + wantedBytes := int64(1321) // laboriously hand-calculated wantedFanout := int64(5) wantedMaxFanout := int64(2) if stats.bytes != wantedBytes { diff --git a/nfa.go b/nfa.go index 023bbff..a9341db 100644 --- a/nfa.go +++ b/nfa.go @@ -13,9 +13,8 @@ import ( type faState struct { table *smallTable fieldTransitions []*fieldMatcher - isSpinner bool epsilonClosure []*faState // precomputed epsilon closure including self - closureSetGen uint64 // generation for closure set visited tracking + isSpinner bool } /* @@ -344,23 +343,25 @@ func makeFaStepKey(s1, s2 *faState) faStepKey { // epsilon transitions from state1 and state2. This prevents deep nesting of // splice states that would otherwise accumulate during repeated merges. func simplifySplices(state1, state2 *faState) []*faState { - closureGeneration++ - gen := closureGeneration + // A freshly-allocated visited map is used as a side table; the old + // approach stored a generation counter on faState itself, which bloated + // every state permanently for build-only state. + visited := make(map[*faState]bool) targets := make([]*faState, 0, 4) - targets = simplifyCollect(state1, gen, targets) - targets = simplifyCollect(state2, gen, targets) + targets = simplifyCollect(state1, visited, targets) + targets = simplifyCollect(state2, visited, targets) return targets } -func simplifyCollect(s *faState, gen uint64, targets []*faState) []*faState { - if s.closureSetGen == gen { +func simplifyCollect(s *faState, visited map[*faState]bool, targets []*faState) []*faState { + if visited[s] { return targets } - s.closureSetGen = gen + visited[s] = true if s.table.isEpsilonOnly() { for _, eps := range s.table.epsilons { - targets = simplifyCollect(eps, gen, targets) + targets = simplifyCollect(eps, visited, targets) } } else { targets = append(targets, s) diff --git a/prettyprinter_test.go b/prettyprinter_test.go index 4968f56..cc62011 100644 --- a/prettyprinter_test.go +++ b/prettyprinter_test.go @@ -8,12 +8,12 @@ func TestPP(t *testing.T) { pp := newPrettyPrinter(1) table, _ := makeShellStyleFA([]byte(`"x*9"`), pp) pp.labelTable(table, "START HERE") - wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 240/312] - 914[on " at 0] '78/x' → (384[*-Spinner][s/t 240/312] - 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 240/312] - 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 248/320] - 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 240/312] - 301[last step at 5] [1 transition(s)][s/t 105/185] + wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 216/280] + 914[on " at 0] '78/x' → (384[*-Spinner][s/t 216/280] + 384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/280] + 322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 224/288] + 769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/280] + 301[last step at 5] [1 transition(s)][s/t 81/153] ` s := pp.printNFA(table) if s != wanted { diff --git a/small_table.go b/small_table.go index 1a8f3a5..5b80da0 100644 --- a/small_table.go +++ b/small_table.go @@ -38,20 +38,9 @@ const valueTerminator byte = 0xf5 // by branching on 'b' to a state that has no byte transitions but two epsilons, one each for s1 and s2. type smallTable struct { - ceilings []byte - steps []*faState - epsilons []*faState - lastVisitedGen uint64 // generation counter for epsilon closure traversal - // closureGen records which closureGeneration this table's - // representative was set in. If it equals the current global - // closureGeneration, then closureRep is valid; otherwise, the - // table has not yet been seen in this dedup pass. - closureGen uint64 - // closureRep is the representative faState for this table in the - // current closure dedup pass. When multiple states share the same - // smallTable and have identical fieldTransitions, only this - // representative is kept in the closure. - closureRep *faState + ceilings []byte + steps []*faState + epsilons []*faState } // newSmallTable mostly exists to enforce the constraint that every smallTable has a byteCeiling entry at diff --git a/state_lists.go b/state_lists.go index 54c664e..8040ed5 100644 --- a/state_lists.go +++ b/state_lists.go @@ -20,13 +20,15 @@ type internEntry struct { type stateLists struct { entries map[string]internEntry // Scratch space reused across intern() calls - sortBuf []*faState // reusable sorted buffer - keyBuf []byte // reusable key bytes buffer + sortBuf []*faState // reusable sorted buffer + keyBuf []byte // reusable key bytes buffer + seen map[*faState]bool // reusable dedup set, cleared per call } func newStateLists() *stateLists { return &stateLists{ entries: make(map[string]internEntry), + seen: make(map[*faState]bool), } } @@ -36,17 +38,17 @@ func newStateLists() *stateLists { // which either has already been computed for the set or is created and empty, and // a boolean indicating whether the DFA state has already been computed or not. func (sl *stateLists) intern(list []*faState) ([]*faState, *faState, bool) { - // Dedupe using the global generation counter and faState.closureSetGen - // instead of allocating a map per call. Safe to reuse closureSetGen - // because nfa2Dfa runs after epsilon closure computation is complete. - closureGeneration++ - gen := closureGeneration + // Dedup within this call using a reused map. Previously this rode on + // a generation counter stored inline on each faState; that field has + // been removed to shrink steady-state memory. + clear(sl.seen) sl.sortBuf = sl.sortBuf[:0] for _, state := range list { - if state.closureSetGen != gen { - state.closureSetGen = gen - sl.sortBuf = append(sl.sortBuf, state) + if sl.seen[state] { + continue } + sl.seen[state] = true + sl.sortBuf = append(sl.sortBuf, state) } // compute a key representing the set