Skip to content
Merged
86 changes: 57 additions & 29 deletions epsi_closure.go
Original file line number Diff line number Diff line change
@@ -1,35 +1,60 @@
package quamina

// closureGeneration is a global counter used for generation-based visited
// tracking. It is incremented by epsilonClosure (for NFA walk dedup via
// lastVisitedGen) and by closureForState (for table-pointer dedup
// via closureGen). Each smallTable stores the generation it was last
// visited in, avoiding the need for a visited map. This works because
// epsilonClosure snapshots the counter into bufs.generation before the
// walk begins, so subsequent increments by the dedup pass don't interfere.
var closureGeneration uint64
// tableMark carries the per-smallTable scratch used only during epsilon
// closure computation (lastVisitedGen for NFA walk dedup, and closureGen /
// closureRep for table-pointer dedup). These used to live as fields on
// smallTable itself, but they are purely build-time state and their
// permanent presence on every smallTable was wasted steady-state memory.
// They now live in a per-call side table that is discarded when
// epsilonClosure returns.
type tableMark struct {
lastVisitedGen uint32
closureGen uint32
closureRep *faState
}

// closureBuffers carries per-epsilonClosure-call scratch. The two maps
// replace build-time fields that used to sit on smallTable/faState;
// they live only for the duration of the closure computation.
type closureBuffers struct {
generation uint64 // used by closureForNfa to avoid revisiting smallTables
closureSetGen uint64 // used by traverseEpsilons to avoid revisiting faStates
closureList []*faState // accumulated closure members, reused across calls
gen uint32 // bumped by closureForNfa (NFA walk dedup) and the closureForState post-pass (table-pointer dedup)
closureSetGen uint32 // snapshot of gen used by traverseEpsilons to dedup faState visits within one closure
closureList []*faState // reusable accumulator for the state list before the dedup post-pass
tables map[*smallTable]*tableMark // per-call side-table for smallTable scratch (lastVisitedGen, closureRep)
states map[*faState]uint32 // per-faState last-visited generation, used by traverseEpsilons
}

func newClosureBuffers() *closureBuffers {
return &closureBuffers{
gen: 1,
tables: make(map[*smallTable]*tableMark),
states: make(map[*faState]uint32),
}
}

// tableMarkOf returns the tableMark for t, creating one on first access.
func (b *closureBuffers) tableMarkOf(t *smallTable) *tableMark {
m, ok := b.tables[t]
if !ok {
m = &tableMark{}
b.tables[t] = m
}
return m
}

// epsilonClosure walks the automaton starting from the given table
// and precomputes the epsilon closure for every reachable faState.
func epsilonClosure(table *smallTable) {
closureGeneration++
Comment thread
sayrer marked this conversation as resolved.
bufs := &closureBuffers{
generation: closureGeneration,
}
bufs := newClosureBuffers()
closureForNfa(table, bufs)
}

func closureForNfa(table *smallTable, bufs *closureBuffers) {
if table.lastVisitedGen == bufs.generation {
mark := bufs.tableMarkOf(table)
if mark.lastVisitedGen == bufs.gen {
return
}
table.lastVisitedGen = bufs.generation
mark.lastVisitedGen = bufs.gen

for _, state := range table.steps {
if state != nil {
Expand All @@ -46,7 +71,7 @@ func closureForNfa(table *smallTable, bufs *closureBuffers) {
// closureForStateNoBufs computes the epsilon closure for a single state.
// Used directly in tests; production code uses closureForState.
func closureForStateNoBufs(state *faState) {
bufs := &closureBuffers{}
bufs := newClosureBuffers()
closureForState(state, bufs)
}

Expand All @@ -60,12 +85,13 @@ func closureForState(state *faState, bufs *closureBuffers) {
return
}

// Use generation-based visited tracking instead of a map
closureGeneration++
bufs.closureSetGen = closureGeneration
// Use generation-based visited tracking instead of a fresh map per
// traversal. bufs.states records which gen last visited each state.
bufs.gen++
bufs.closureSetGen = bufs.gen
bufs.closureList = bufs.closureList[:0]
if !state.table.isEpsilonOnly() {
state.closureSetGen = bufs.closureSetGen
bufs.states[state] = bufs.closureSetGen
bufs.closureList = append(bufs.closureList, state)
}
traverseEpsilons(state, state.table.epsilons, bufs)
Expand All @@ -75,16 +101,18 @@ func closureForState(state *faState, bufs *closureBuffers) {
// representative is needed. This is done as a post-pass over the
// closure list rather than during traversal to keep traverseEpsilons
// zero-overhead. States with different fieldTransitions are preserved.
closureGeneration++
bufs.gen++
dedupGen := bufs.gen
closure := make([]*faState, 0, len(bufs.closureList))
for _, s := range bufs.closureList {
if s.table.closureGen == closureGeneration {
if sameFieldTransitions(s.table.closureRep, s) {
mark := bufs.tableMarkOf(s.table)
if mark.closureGen == dedupGen {
if sameFieldTransitions(mark.closureRep, s) {
continue
}
} else {
s.table.closureGen = closureGeneration
s.table.closureRep = s
mark.closureGen = dedupGen
mark.closureRep = s
}
closure = append(closure, s)
}
Expand All @@ -95,10 +123,10 @@ func closureForState(state *faState, bufs *closureBuffers) {
// via epsilon transitions into bufs.closureList.
func traverseEpsilons(start *faState, epsilons []*faState, bufs *closureBuffers) {
for _, eps := range epsilons {
if eps == start || eps.closureSetGen == bufs.closureSetGen {
if eps == start || bufs.states[eps] == bufs.closureSetGen {
continue
}
eps.closureSetGen = bufs.closureSetGen
bufs.states[eps] = bufs.closureSetGen
if !eps.table.isEpsilonOnly() {
bufs.closureList = append(bufs.closureList, eps)
}
Expand Down
5 changes: 4 additions & 1 deletion memory_cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ func cmFieldMatcherStats(fm *fieldMatcher, stats *matcherStats, pp printer) {
for _, vm := range fmTrans {
singleton := vm.fields().singletonMatch
if singleton != nil {
stats.bytes += int64(len(singleton))
stats.bytes += int64(cap(singleton))
}
table := vm.fields().startTable
if table == nil {
continue
}
cmStateStats(&faState{table: table}, stats, pp)
}
}
Expand Down
23 changes: 20 additions & 3 deletions memory_cost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,36 @@ func TestQuaminaMemoryCost(t *testing.T) {
t.Error(err)
}
bytes := q.GetMatcherStats()["bytes"]
if bytes != 1481 {
if bytes != 1321 {
t.Error("WRONG NUMBERS")
}
err = q.AddPattern("x", `{"y":[{"wildcard": "*y"}]}}`)
if err != nil {
t.Error(err)
}
bytes = q.GetMatcherStats()["bytes"]
if bytes != 2*1481 {
if bytes != 2*1321 {
t.Error("WRONG NUMBERS")
}
}

// Regression: GetMatcherStats panicked when a valueMatcher used the
// singleton-match optimization (singletonMatch set, startTable nil).
// That optimization fires for any field with a single string or literal
// value — the matcher uses bytes.Compare instead of building an FA.
// Minimal repro: {"Animated": [false]}. cmFieldMatcherStats now skips
// the nil startTable rather than building a faState with state.table == nil.
func TestQuaminaMemoryCostSingleton(t *testing.T) {
q, _ := New()
if err := q.AddPattern("p", `{"Animated": [false]}`); err != nil {
t.Fatal(err)
}
s := q.GetMatcherStats()
if s["bytes"] == 0 {
t.Errorf("expected bytes > 0 for singleton matcher, got %v", s["bytes"])
}
}

func TestMcNfaSizes(t *testing.T) {
pp := newPrettyPrinter(2355)
wc1 := `"*z"`
Expand All @@ -55,7 +72,7 @@ func TestMcNfaSizes(t *testing.T) {
seenStates: make(map[*faState]bool),
}
cmStateStats(&faState{table: fa1}, stats, pp)
wantedBytes := int64(1481) // laboriously hand-calculated
wantedBytes := int64(1321) // laboriously hand-calculated
wantedFanout := int64(5)
wantedMaxFanout := int64(2)
if stats.bytes != wantedBytes {
Expand Down
21 changes: 11 additions & 10 deletions nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ import (
type faState struct {
table *smallTable
fieldTransitions []*fieldMatcher
isSpinner bool
Comment thread
timbray marked this conversation as resolved.
epsilonClosure []*faState // precomputed epsilon closure including self
closureSetGen uint64 // generation for closure set visited tracking
isSpinner bool
}

/*
Expand Down Expand Up @@ -344,23 +343,25 @@ func makeFaStepKey(s1, s2 *faState) faStepKey {
// epsilon transitions from state1 and state2. This prevents deep nesting of
// splice states that would otherwise accumulate during repeated merges.
func simplifySplices(state1, state2 *faState) []*faState {
closureGeneration++
gen := closureGeneration
// A freshly-allocated visited map is used as a side table; the old
// approach stored a generation counter on faState itself, which bloated
// every state permanently for build-only state.
visited := make(map[*faState]bool)
targets := make([]*faState, 0, 4)
targets = simplifyCollect(state1, gen, targets)
targets = simplifyCollect(state2, gen, targets)
targets = simplifyCollect(state1, visited, targets)
targets = simplifyCollect(state2, visited, targets)
return targets
}

func simplifyCollect(s *faState, gen uint64, targets []*faState) []*faState {
if s.closureSetGen == gen {
func simplifyCollect(s *faState, visited map[*faState]bool, targets []*faState) []*faState {
if visited[s] {
return targets
}
s.closureSetGen = gen
visited[s] = true

if s.table.isEpsilonOnly() {
for _, eps := range s.table.epsilons {
targets = simplifyCollect(eps, gen, targets)
targets = simplifyCollect(eps, visited, targets)
}
} else {
targets = append(targets, s)
Expand Down
12 changes: 6 additions & 6 deletions prettyprinter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ func TestPP(t *testing.T) {
pp := newPrettyPrinter(1)
table, _ := makeShellStyleFA([]byte(`"x*9"`), pp)
pp.labelTable(table, "START HERE")
wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 240/312]
914[on " at 0] '78/x' → (384[*-Spinner][s/t 240/312]
384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 240/312]
322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 248/320]
769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 240/312]
301[last step at 5] [1 transition(s)][s/t 105/185]
wanted := ` 884[START HERE] '22/"' → (914[on " at 0][s/t 216/280]
914[on " at 0] '78/x' → (384[*-Spinner][s/t 216/280]
384[*-Spinner] '39/9' → (322[spinEscape on 9 at 3] / ★ → 384[*-Spinner][s/t 216/280]
322[spinEscape on 9 at 3] ε → 384[*-Spinner] / '22/"' → (769[on " at 4][s/t 224/288]
769[on " at 4] 'f5/ℵ' → (301[last step at 5][s/t 216/280]
301[last step at 5] [1 transition(s)][s/t 81/153]
`
s := pp.printNFA(table)
if s != wanted {
Expand Down
17 changes: 3 additions & 14 deletions small_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,9 @@ const valueTerminator byte = 0xf5
// by branching on 'b' to a state that has no byte transitions but two epsilons, one each for s1 and s2.

type smallTable struct {
ceilings []byte
steps []*faState
epsilons []*faState
lastVisitedGen uint64 // generation counter for epsilon closure traversal
// closureGen records which closureGeneration this table's
// representative was set in. If it equals the current global
// closureGeneration, then closureRep is valid; otherwise, the
// table has not yet been seen in this dedup pass.
closureGen uint64
// closureRep is the representative faState for this table in the
// current closure dedup pass. When multiple states share the same
// smallTable and have identical fieldTransitions, only this
// representative is kept in the closure.
closureRep *faState
ceilings []byte
Comment thread
sayrer marked this conversation as resolved.
steps []*faState
epsilons []*faState
}

// newSmallTable mostly exists to enforce the constraint that every smallTable has a byteCeiling entry at
Expand Down
22 changes: 12 additions & 10 deletions state_lists.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ type internEntry struct {
type stateLists struct {
entries map[string]internEntry
// Scratch space reused across intern() calls
sortBuf []*faState // reusable sorted buffer
keyBuf []byte // reusable key bytes buffer
sortBuf []*faState // reusable sorted buffer
keyBuf []byte // reusable key bytes buffer
seen map[*faState]bool // reusable dedup set, cleared per call
}

func newStateLists() *stateLists {
return &stateLists{
entries: make(map[string]internEntry),
seen: make(map[*faState]bool),
}
}

Expand All @@ -36,17 +38,17 @@ func newStateLists() *stateLists {
// which either has already been computed for the set or is created and empty, and
// a boolean indicating whether the DFA state has already been computed or not.
func (sl *stateLists) intern(list []*faState) ([]*faState, *faState, bool) {
// Dedupe using the global generation counter and faState.closureSetGen
// instead of allocating a map per call. Safe to reuse closureSetGen
// because nfa2Dfa runs after epsilon closure computation is complete.
closureGeneration++
gen := closureGeneration
// Dedup within this call using a reused map. Previously this rode on
// a generation counter stored inline on each faState; that field has
// been removed to shrink steady-state memory.
clear(sl.seen)
sl.sortBuf = sl.sortBuf[:0]
for _, state := range list {
if state.closureSetGen != gen {
state.closureSetGen = gen
sl.sortBuf = append(sl.sortBuf, state)
if sl.seen[state] {
continue
}
sl.seen[state] = true
sl.sortBuf = append(sl.sortBuf, state)
}

// compute a key representing the set
Expand Down
Loading