Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 38 additions & 30 deletions epsi_closure.go
Original file line number Diff line number Diff line change
@@ -1,54 +1,62 @@
package quamina

type epsilonClosure struct {
closures map[*faState][]*faState
// epsilonClosure walks the automaton starting from the given table
// and precomputes the epsilon closure for every reachable faState.
func epsilonClosure(table *smallTable) {
closureForNfa(table, make(map[*smallTable]bool))
}

func newEpsilonClosure() *epsilonClosure {
return &epsilonClosure{make(map[*faState][]*faState)}
}
func closureForNfa(table *smallTable, visited map[*smallTable]bool) {
if visited[table] {
return
}
visited[table] = true

func (ec *epsilonClosure) getClosure(state *faState) []*faState {
var closure []*faState
var ok bool
if ec.closures != nil {
closure, ok = ec.closures[state]
if ok {
return closure
// Process each faState reachable via byte transitions
for _, state := range table.steps {
if state != nil {
closureForState(state)
closureForNfa(state.table, visited)
}
}
// Process each faState reachable via epsilon transitions
for _, eps := range table.epsilons {
closureForState(eps)
closureForNfa(eps.table, visited)
}
}

func closureForState(state *faState) {
if state.epsilonClosure != nil {
return // already computed
}

// not already known
if len(state.table.epsilons) == 0 {
justMe := []*faState{state}
if ec.closures != nil {
ec.closures[state] = justMe
}
return justMe
state.epsilonClosure = []*faState{state}
return
}

var closureStates = make(map[*faState]bool)
closureSet := make(map[*faState]bool)
if !state.table.isEpsilonOnly() {
closureStates[state] = true
closureSet[state] = true
}
traverseEpsilons(state, state.table.epsilons, closureStates)
for s := range closureStates {
traverseEpsilons(state, state.table.epsilons, closureSet)

closure := make([]*faState, 0, len(closureSet))
for s := range closureSet {
closure = append(closure, s)
}
if ec.closures != nil {
ec.closures[state] = closure
}
return closure
state.epsilonClosure = closure
}

func traverseEpsilons(start *faState, epsilons []*faState, closureStates map[*faState]bool) {
func traverseEpsilons(start *faState, epsilons []*faState, closureSet map[*faState]bool) {
for _, eps := range epsilons {
if eps == start || closureStates[eps] {
if eps == start || closureSet[eps] {
continue
}
if !eps.table.isEpsilonOnly() {
closureStates[eps] = true
closureSet[eps] = true
}
traverseEpsilons(start, eps.table.epsilons, closureStates)
traverseEpsilons(start, eps.table.epsilons, closureSet)
}
}
42 changes: 20 additions & 22 deletions epsi_closure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (

func TestEpsilonClosure(t *testing.T) {
var st *smallTable
var ec []*faState

pp := newPrettyPrinter(4589)

Expand All @@ -25,17 +24,17 @@ func TestEpsilonClosure(t *testing.T) {
pp.labelTable(aSc.table, "aSc")
aFM := newFieldMatcher()
aSc.fieldTransitions = []*fieldMatcher{aFM}
aEC := newEpsilonClosure()
ec = aEC.getClosure(aSa)
if len(ec) != 1 || !containsState(t, ec, aSa) {
t.Errorf("len(ec) = %d; want 0", len(ec))

closureForState(aSa)
if len(aSa.epsilonClosure) != 1 || !containsState(t, aSa.epsilonClosure, aSa) {
t.Errorf("len(ec) = %d; want 1", len(aSa.epsilonClosure))
}
ec = aEC.getClosure(aSstar)
if len(ec) != 1 || !containsState(t, ec, aSstar) {
closureForState(aSstar)
if len(aSstar.epsilonClosure) != 1 || !containsState(t, aSstar.epsilonClosure, aSstar) {
t.Error("aSstar")
}
ec = aEC.getClosure(aSc)
if len(ec) != 1 || !containsState(t, ec, aSc) {
closureForState(aSc)
if len(aSc.epsilonClosure) != 1 || !containsState(t, aSc.epsilonClosure, aSc) {
t.Error("aSc")
}

Expand Down Expand Up @@ -66,19 +65,18 @@ func TestEpsilonClosure(t *testing.T) {
pp.labelTable(bSstar.table, "bSstar")
pp.labelTable(bSx.table, "bSx")
pp.labelTable(bSsplice.table, "bSsplice")
//fmt.Println("B machine: " + pp.printNFA(bSsplice.table))

bEcShouldBeZero := []*faState{bSa, bSb, bSx, bSstar}
bEcShouldBeOne := []*faState{bSa, bSb, bSx, bSstar}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did a double-take here. I think these names are what it's supposed to be.

zNames := []string{"bSa", "bSb", "bSx", "bSstar"}
for i, shouldBeZero := range bEcShouldBeZero {
ec = aEC.getClosure(shouldBeZero)
if len(ec) != 1 || !containsState(t, ec, shouldBeZero) {
t.Errorf("should be Zero for %s, isn't", zNames[i])
for i, state := range bEcShouldBeOne {
closureForState(state)
if len(state.epsilonClosure) != 1 || !containsState(t, state.epsilonClosure, state) {
t.Errorf("should be 1 for %s, isn't", zNames[i])
}
}

ec = aEC.getClosure(bSsplice)
if len(ec) != 2 || !containsState(t, ec, bSa) || !containsState(t, ec, bSstar) {
closureForState(bSsplice)
if len(bSsplice.epsilonClosure) != 2 || !containsState(t, bSsplice.epsilonClosure, bSa) || !containsState(t, bSsplice.epsilonClosure, bSstar) {
t.Error("wrong EC for b")
}

Expand Down Expand Up @@ -106,14 +104,14 @@ func TestEpsilonClosure(t *testing.T) {
st = states[i].table
pp.labelTable(st, name)
}
// fmt.Println("C machine: " + pp.printNFA(cStart.table))

closureForState(cStart)
cWantInEC := []*faState{cStart, cSa, cSb, cSc, cSz}
ec = aEC.getClosure(cStart)
if len(ec) != 5 {
t.Errorf("len B ec %d wanted 5", len(ec))
if len(cStart.epsilonClosure) != 5 {
t.Errorf("len B ec %d wanted 5", len(cStart.epsilonClosure))
}
for i, want := range cWantInEC {
if !containsState(t, ec, want) {
if !containsState(t, cStart.epsilonClosure, want) {
t.Errorf("C missed %s", names[i])
}
}
Expand Down
33 changes: 14 additions & 19 deletions nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type faState struct {
table *smallTable
fieldTransitions []*fieldMatcher
isSpinner bool
epsilonClosure []*faState // precomputed epsilon closure including self
}

/*
Expand Down Expand Up @@ -79,7 +80,6 @@ func (tm *transmap) all() []*fieldMatcher {
// allocation will be reduced to nearly zero.
type nfaBuffers struct {
buf1, buf2 []*faState
eClosure *epsilonClosure
matches *matchSet
transitionsBuf []*fieldMatcher
resultBuf []X
Expand Down Expand Up @@ -107,13 +107,6 @@ func (nb *nfaBuffers) getBuf2() []*faState {
return nb.buf2
}

func (nb *nfaBuffers) getEClosure() *epsilonClosure {
if nb.eClosure == nil {
nb.eClosure = newEpsilonClosure()
}
return nb.eClosure
}

func (nb *nfaBuffers) getMatches() *matchSet {
if nb.matches == nil {
nb.matches = newMatchSet()
Expand All @@ -129,22 +122,24 @@ func (nb *nfaBuffers) getTransmap() *transmap {
}

// nfa2Dfa does what the name says, but as of 2025/12 is not used.
// Requires that precomputeEpsilonClosures has been called on the NFA.
func nfa2Dfa(nfaTable *smallTable) *faState {
startNfa := []*faState{{table: nfaTable}}
ec := newEpsilonClosure()
return n2dNode(startNfa, newStateLists(), ec)
startState := &faState{table: nfaTable}
closureForState(startState)
startNfa := []*faState{startState}
return n2dNode(startNfa, newStateLists())
}

// n2dNode input is a list of NFA states, which are all the states that are either the
// singleton start state or the states that can be reached from a previous state on
// a byte transition.
// It returns a DFA state (i.e. no epsilons) that corresponds to this aggregation of
// NFA states.
func n2dNode(rawNStates []*faState, sList *stateLists, ec *epsilonClosure) *faState {
func n2dNode(rawNStates []*faState, sList *stateLists) *faState {
// we expand the raw list of states by adding the epsilon closure of each
nStates := make([]*faState, 0, len(rawNStates))
for _, rawNState := range rawNStates {
nStates = append(nStates, ec.getClosure(rawNState)...)
nStates = append(nStates, rawNState.epsilonClosure...)
}

// the collection of states may have duplicates and, deduplicated, considered'
Expand Down Expand Up @@ -177,7 +172,7 @@ func n2dNode(rawNStates []*faState, sList *stateLists, ec *epsilonClosure) *faSt
rawStates = append(rawStates, ingredients[ingredient].table.epsilons...)
}
if len(rawStates) > 0 {
dfaState.table.addByteStep(byte(utf8byte), n2dNode(rawStates, sList, ec))
dfaState.table.addByteStep(byte(utf8byte), n2dNode(rawStates, sList))
}
}

Expand Down Expand Up @@ -220,7 +215,9 @@ func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []*
// and should grow with use and minimize the need for memory allocation.
func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *nfaBuffers, _ printer) []*fieldMatcher {
currentStates := bufs.getBuf1()
currentStates = append(currentStates, &faState{table: table})
startState := &faState{table: table}
closureForState(startState)
currentStates = append(currentStates, startState)
nextStates := bufs.getBuf2()

// a lot of the transitions stuff is going to be empty, but on the other hand
Expand All @@ -240,8 +237,7 @@ func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, buf
utf8Byte = valueTerminator
}
for _, state := range currentStates {
closure := bufs.getEClosure().getClosure(state)
for _, ecState := range closure {
for _, ecState := range state.epsilonClosure {
newTransitions.add(ecState.fieldTransitions)
ecState.table.step(utf8Byte, stepResult)
if stepResult.step != nil {
Expand Down Expand Up @@ -278,8 +274,7 @@ func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, buf
// we've run out of input bytes so we need to check the current states and their
// epsilon closures for matches
for _, state := range currentStates {
closure := bufs.getEClosure().getClosure(state)
for _, ecState := range closure {
for _, ecState := range state.epsilonClosure {
newTransitions.add(ecState.fieldTransitions)
}
}
Expand Down
4 changes: 3 additions & 1 deletion regexp_nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ func makeRegexpNFA(root regexpRoot, forField bool, pp printer) (*smallTable, *fi
pp.labelTable(table, "</Field>")
nextStep = &faState{table: table}
}
return makeNFAFromBranches(root, nextStep, forField, pp), nextField
fa := makeNFAFromBranches(root, nextStep, forField, pp)
epsilonClosure(fa)
return fa, nextField
}
func makeNFAFromBranches(root regexpRoot, nextStep *faState, forField bool, pp printer) *smallTable {
// completely empty regexp
Expand Down
3 changes: 1 addition & 2 deletions rune_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ func TestSkinnyRuneTree(t *testing.T) {
fa := nfaFromSkinnyRuneTree(srt, pp)
fmt.Println("FA:\n" + pp.printNFA(fa))
trans := []*fieldMatcher{}
bufs := newNfaBuffers()
matches := traverseNFA(fa, utf8, trans, bufs, pp)
matches := traverseDFA(fa, utf8, trans)
if len(matches) != 1 {
t.Error("MISSED")
}
Expand Down
1 change: 1 addition & 0 deletions shell_style.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,6 @@ func makeShellStyleFA(val []byte, pp printer) (start *smallTable, nextField *fie
lastStep := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}}
pp.labelTable(lastStep.table, fmt.Sprintf("last step at %d", valIndex))
state.table.addByteStep(valueTerminator, lastStep)
epsilonClosure(start)
return
}
3 changes: 1 addition & 2 deletions v2_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ import (

// Benchmarks designed to work with Go's 1.24 testing.B.Loop(). Note: When doing this kind of benchmarking, always
// call quamina.MatchesForEvent, as opposed to working directly with the coreMatcher, because the top-level function
// is clever about re-using the nfaBuffers structure, which in particular includes the epsilonClosure cache. If you
// work directly with coreMatcher your CPU and memory profiles will be dominated by epsilonClosure.
// is clever about re-using the nfaBuffers structure.

func Benchmark8259Example(b *testing.B) {
j := `{
Expand Down
9 changes: 9 additions & 0 deletions value_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
// there's already a table, thus an out-degree > 1
if fields.startTable != nil {
fields.startTable = mergeFAs(fields.startTable, newFA, printer)
if fields.isNondeterministic {
epsilonClosure(fields.startTable)
}
m.update(fields)
return nextField
}
Expand All @@ -156,11 +159,17 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche

// now table is ready for use, nuke singleton to signal threads to use it
fields.startTable = mergeFAs(singletonAutomaton, newFA, sharedNullPrinter)
if fields.isNondeterministic {
epsilonClosure(fields.startTable)
}
fields.singletonMatch = nil
fields.singletonTransition = nil
} else {
// empty valueMatcher, no special cases, just jam in the new FA
fields.startTable = newFA
if fields.isNondeterministic {
epsilonClosure(fields.startTable)
}
}
m.update(fields)
return nextField
Expand Down
Loading
Loading