Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion memory_cost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,11 @@ func TestStringFA(t *testing.T) {

_, current = cm.getMemoryBudget()
cm = newCoreMatcher()
_, _ = cm.setMemoryBudget(current - 1)
// Leave a margin larger than typical TotalAlloc variance across Go
// versions and the race detector's bookkeeping overhead. A 1-byte
// margin was flaky under Go 1.23 + -race; the i100 pattern's FA
// comfortably exceeds any reasonable margin below its own cost.
_, _ = cm.setMemoryBudget(current - uint64(len(i100)))
err = cm.addPattern("x", `{"x": ["x"]}`)
if err != nil {
t.Error("x?")
Expand Down
6 changes: 2 additions & 4 deletions nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,6 @@ func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, buf
fieldSet[fm] = true
}

stepResult := &stepOut{}
for index := 0; len(currentStates) != 0 && index <= len(val); index++ {
var utf8Byte byte
if index < len(val) {
Expand All @@ -291,9 +290,8 @@ func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, buf
for _, fm := range ecState.fieldTransitions {
fieldSet[fm] = true
}
ecState.table.step(utf8Byte, stepResult)
if stepResult.step != nil {
nextStates = append(nextStates, stepResult.step)
if nextStep := ecState.table.step(utf8Byte); nextStep != nil {
nextStates = append(nextStates, nextStep)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion regexp_nfa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ func TestMakeByteDotFA(t *testing.T) {
for i := 0; i < 256; i++ {
b := byte(i)
got := st.dStep(b)
if forbiddenBytes[b] {
if isForbiddenUTF8(b) {
if got != nil {
t.Errorf("accepted %x", b)
}
Expand Down
43 changes: 18 additions & 25 deletions small_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,42 +67,36 @@ func (t *smallTable) isEpsilonOnly() bool {
return len(t.epsilons) > 0 && len(t.ceilings) == 1
}

type stepOut struct {
step *faState
epsilons []*faState
}

var forbiddenBytes = map[byte]bool{
0xC0: true, 0xC1: true,
0xF5: true, 0xF6: true, 0xF7: true, 0xF8: true, 0xF9: true, 0xFA: true,
0xFB: true, 0xFC: true, 0xFD: true, 0xFE: true, 0xFF: true,
}

func (t *smallTable) isJustEpsilons() bool {
// TODO I think the second of the three conditions is unnecessary
return len(t.steps) == 1 && t.steps[0] == nil && len(t.epsilons) != 0
}

// step finds the list of states that result from a transition on the utf8Byte argument. The states can come
// as a result of looking in the table structure, and also the "epsilon" transitions that occur on every
// input byte. Since this is the white-hot center of Quamina's runtime CPU, we don't want to be merging
// the two lists. So to avoid any memory allocation, the caller passes in a structure with the two lists
// and step fills them in.
func (t *smallTable) step(utf8Byte byte, out *stepOut) {
out.epsilons = t.epsilons
// step returns the faState that results from a transition on utf8Byte, or nil
// if the table has no step for that byte. Epsilon transitions are handled
// separately (via precomputed epsilonClosure), so step never touches t.epsilons.
// This is the white-hot center of Quamina's runtime CPU; keep it inlinable.
func (t *smallTable) step(utf8Byte byte) *faState {
for index, ceiling := range t.ceilings {
if utf8Byte < ceiling {
out.step = t.steps[index]
return
return t.steps[index]
}
}
_, forbidden := forbiddenBytes[utf8Byte]
if forbidden {
return
// utf8Byte >= byteCeiling (0xF6): only valid if it's a forbidden UTF-8 byte,
// in which case we return nil so the caller can drop this path.
if isForbiddenUTF8(utf8Byte) {
return nil
}
panic("Malformed smallTable")
}

// isForbiddenUTF8 reports whether the byte can never appear in valid UTF-8.
// Range check instead of a map lookup — strictly faster and the forbidden set
// is three compact ranges: {0xC0, 0xC1} and {0xF5–0xFF}.
func isForbiddenUTF8(b byte) bool {
return b == 0xC0 || b == 0xC1 || b >= 0xF5
}

// dStep takes a step through an NFA in the case where it is known that the NFA in question
// is deterministic, i.e. each combination of an faState and a byte value transitions to at
// most one other byte value.
Expand All @@ -112,8 +106,7 @@ func (t *smallTable) dStep(utf8Byte byte) *faState {
return t.steps[index]
}
}
_, forbidden := forbiddenBytes[utf8Byte]
if forbidden {
if isForbiddenUTF8(utf8Byte) {
return nil
}
panic("Malformed smallTable")
Expand Down
9 changes: 6 additions & 3 deletions small_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,12 @@ func TestUnpack(t *testing.T) {

func TestDodgeBadUTF8(t *testing.T) {
st := makeSmallTable(nil, []byte{'a'}, []*faState{{}})
so := &stepOut{}
st.step(0xFE, so)
st.dStep(0xFE)
if got := st.step(0xFE); got != nil {
t.Errorf("step(0xFE) = %v, want nil", got)
}
if got := st.dStep(0xFE); got != nil {
t.Errorf("dStep(0xFE) = %v, want nil", got)
}
}

func TestSmallTableIterator(t *testing.T) {
Expand Down
Loading