timbray · sayrer · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/epsi_closure.go b/epsi_closure.go
@@ -1,54 +1,62 @@
 package quamina
 
-type epsilonClosure struct {
-	closures map[*faState][]*faState
+// epsilonClosure walks the automaton starting from the given table
+// and precomputes the epsilon closure for every reachable faState.
+func epsilonClosure(table *smallTable) {
+	closureForNfa(table, make(map[*smallTable]bool))
 }
 
-func newEpsilonClosure() *epsilonClosure {
-	return &epsilonClosure{make(map[*faState][]*faState)}
-}
+func closureForNfa(table *smallTable, visited map[*smallTable]bool) {
+	if visited[table] {
+		return
+	}
+	visited[table] = true
 
-func (ec *epsilonClosure) getClosure(state *faState) []*faState {
-	var closure []*faState
-	var ok bool
-	if ec.closures != nil {
-		closure, ok = ec.closures[state]
-		if ok {
-			return closure
+	// Process each faState reachable via byte transitions
+	for _, state := range table.steps {
+		if state != nil {
+			closureForState(state)
+			closureForNfa(state.table, visited)
 		}
 	}
+	// Process each faState reachable via epsilon transitions
+	for _, eps := range table.epsilons {
+		closureForState(eps)
+		closureForNfa(eps.table, visited)
+	}
+}
+
+func closureForState(state *faState) {
+	if state.epsilonClosure != nil {
+		return // already computed
+	}
 
-	// not already known
 	if len(state.table.epsilons) == 0 {
-		justMe := []*faState{state}
-		if ec.closures != nil {
-			ec.closures[state] = justMe
-		}
-		return justMe
+		state.epsilonClosure = []*faState{state}
+		return
 	}
 
-	var closureStates = make(map[*faState]bool)
+	closureSet := make(map[*faState]bool)
 	if !state.table.isEpsilonOnly() {
-		closureStates[state] = true
+		closureSet[state] = true
 	}
-	traverseEpsilons(state, state.table.epsilons, closureStates)
-	for s := range closureStates {
+	traverseEpsilons(state, state.table.epsilons, closureSet)
+
+	closure := make([]*faState, 0, len(closureSet))
+	for s := range closureSet {
 		closure = append(closure, s)
 	}
-	if ec.closures != nil {
-		ec.closures[state] = closure
-	}
-	return closure
+	state.epsilonClosure = closure
 }
 
-func traverseEpsilons(start *faState, epsilons []*faState, closureStates map[*faState]bool) {
+func traverseEpsilons(start *faState, epsilons []*faState, closureSet map[*faState]bool) {
 	for _, eps := range epsilons {
-		if eps == start || closureStates[eps] {
+		if eps == start || closureSet[eps] {
 			continue
 		}
 		if !eps.table.isEpsilonOnly() {
-			closureStates[eps] = true
+			closureSet[eps] = true
 		}
-		traverseEpsilons(start, eps.table.epsilons, closureStates)
+		traverseEpsilons(start, eps.table.epsilons, closureSet)
 	}
 }
diff --git a/epsi_closure_test.go b/epsi_closure_test.go
@@ -6,7 +6,6 @@ import (
 
 func TestEpsilonClosure(t *testing.T) {
 	var st *smallTable
-	var ec []*faState
 
 	pp := newPrettyPrinter(4589)
 
@@ -25,17 +24,17 @@ func TestEpsilonClosure(t *testing.T) {
 	pp.labelTable(aSc.table, "aSc")
 	aFM := newFieldMatcher()
 	aSc.fieldTransitions = []*fieldMatcher{aFM}
-	aEC := newEpsilonClosure()
-	ec = aEC.getClosure(aSa)
-	if len(ec) != 1 || !containsState(t, ec, aSa) {
-		t.Errorf("len(ec) = %d; want 0", len(ec))
+
+	closureForState(aSa)
+	if len(aSa.epsilonClosure) != 1 || !containsState(t, aSa.epsilonClosure, aSa) {
+		t.Errorf("len(ec) = %d; want 1", len(aSa.epsilonClosure))
 	}
-	ec = aEC.getClosure(aSstar)
-	if len(ec) != 1 || !containsState(t, ec, aSstar) {
+	closureForState(aSstar)
+	if len(aSstar.epsilonClosure) != 1 || !containsState(t, aSstar.epsilonClosure, aSstar) {
 		t.Error("aSstar")
 	}
-	ec = aEC.getClosure(aSc)
-	if len(ec) != 1 || !containsState(t, ec, aSc) {
+	closureForState(aSc)
+	if len(aSc.epsilonClosure) != 1 || !containsState(t, aSc.epsilonClosure, aSc) {
 		t.Error("aSc")
 	}
 
@@ -66,19 +65,18 @@ func TestEpsilonClosure(t *testing.T) {
 	pp.labelTable(bSstar.table, "bSstar")
 	pp.labelTable(bSx.table, "bSx")
 	pp.labelTable(bSsplice.table, "bSsplice")
-	//fmt.Println("B machine: " + pp.printNFA(bSsplice.table))
 
-	bEcShouldBeZero := []*faState{bSa, bSb, bSx, bSstar}
+	bEcShouldBeOne := []*faState{bSa, bSb, bSx, bSstar}
 	zNames := []string{"bSa", "bSb", "bSx", "bSstar"}
-	for i, shouldBeZero := range bEcShouldBeZero {
-		ec = aEC.getClosure(shouldBeZero)
-		if len(ec) != 1 || !containsState(t, ec, shouldBeZero) {
-			t.Errorf("should be Zero for %s, isn't", zNames[i])
+	for i, state := range bEcShouldBeOne {
+		closureForState(state)
+		if len(state.epsilonClosure) != 1 || !containsState(t, state.epsilonClosure, state) {
+			t.Errorf("should be 1 for %s, isn't", zNames[i])
 		}
 	}
 
-	ec = aEC.getClosure(bSsplice)
-	if len(ec) != 2 || !containsState(t, ec, bSa) || !containsState(t, ec, bSstar) {
+	closureForState(bSsplice)
+	if len(bSsplice.epsilonClosure) != 2 || !containsState(t, bSsplice.epsilonClosure, bSa) || !containsState(t, bSsplice.epsilonClosure, bSstar) {
 		t.Error("wrong EC for b")
 	}
 
@@ -106,14 +104,14 @@ func TestEpsilonClosure(t *testing.T) {
 		st = states[i].table
 		pp.labelTable(st, name)
 	}
-	// fmt.Println("C machine: " + pp.printNFA(cStart.table))
+
+	closureForState(cStart)
 	cWantInEC := []*faState{cStart, cSa, cSb, cSc, cSz}
-	ec = aEC.getClosure(cStart)
-	if len(ec) != 5 {
-		t.Errorf("len B ec %d wanted 5", len(ec))
+	if len(cStart.epsilonClosure) != 5 {
+		t.Errorf("len B ec %d wanted 5", len(cStart.epsilonClosure))
 	}
 	for i, want := range cWantInEC {
-		if !containsState(t, ec, want) {
+		if !containsState(t, cStart.epsilonClosure, want) {
 			t.Errorf("C missed %s", names[i])
 		}
 	}

diff --git a/nfa.go b/nfa.go
@@ -16,6 +16,7 @@ type faState struct {
 	table            *smallTable
 	fieldTransitions []*fieldMatcher
 	isSpinner        bool
+	epsilonClosure   []*faState // precomputed epsilon closure including self
 }
 
 /*
@@ -79,7 +80,6 @@ func (tm *transmap) all() []*fieldMatcher {
 // allocation will be reduced to nearly zero.
 type nfaBuffers struct {
 	buf1, buf2     []*faState
-	eClosure       *epsilonClosure
 	matches        *matchSet
 	transitionsBuf []*fieldMatcher
 	resultBuf      []X
@@ -107,13 +107,6 @@ func (nb *nfaBuffers) getBuf2() []*faState {
 	return nb.buf2
 }
 
-func (nb *nfaBuffers) getEClosure() *epsilonClosure {
-	if nb.eClosure == nil {
-		nb.eClosure = newEpsilonClosure()
-	}
-	return nb.eClosure
-}
-
 func (nb *nfaBuffers) getMatches() *matchSet {
 	if nb.matches == nil {
 		nb.matches = newMatchSet()
@@ -129,22 +122,24 @@ func (nb *nfaBuffers) getTransmap() *transmap {
 }
 
 // nfa2Dfa does what the name says, but as of 2025/12 is not used.
+// Requires that precomputeEpsilonClosures has been called on the NFA.
 func nfa2Dfa(nfaTable *smallTable) *faState {
-	startNfa := []*faState{{table: nfaTable}}
-	ec := newEpsilonClosure()
-	return n2dNode(startNfa, newStateLists(), ec)
+	startState := &faState{table: nfaTable}
+	closureForState(startState)
+	startNfa := []*faState{startState}
+	return n2dNode(startNfa, newStateLists())
 }
 
 // n2dNode input is a list of NFA states, which are all the states that are either the
 // singleton start state or the states that can be reached from a previous state on
 // a byte transition.
 // It returns a DFA state (i.e. no epsilons) that corresponds to this aggregation of
 // NFA states.
-func n2dNode(rawNStates []*faState, sList *stateLists, ec *epsilonClosure) *faState {
+func n2dNode(rawNStates []*faState, sList *stateLists) *faState {
 	// we expand the raw list of states by adding the epsilon closure of each
 	nStates := make([]*faState, 0, len(rawNStates))
 	for _, rawNState := range rawNStates {
-		nStates = append(nStates, ec.getClosure(rawNState)...)
+		nStates = append(nStates, rawNState.epsilonClosure...)
 	}
 
 	// the collection of states may have duplicates and, deduplicated, considered'
@@ -177,7 +172,7 @@ func n2dNode(rawNStates []*faState, sList *stateLists, ec *epsilonClosure) *faSt
 			rawStates = append(rawStates, ingredients[ingredient].table.epsilons...)
 		}
 		if len(rawStates) > 0 {
-			dfaState.table.addByteStep(byte(utf8byte), n2dNode(rawStates, sList, ec))
+			dfaState.table.addByteStep(byte(utf8byte), n2dNode(rawStates, sList))
 		}
 	}
 
@@ -220,7 +215,9 @@ func traverseDFA(table *smallTable, val []byte, transitions []*fieldMatcher) []*
 // and should grow with use and minimize the need for memory allocation.
 func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, bufs *nfaBuffers, _ printer) []*fieldMatcher {
 	currentStates := bufs.getBuf1()
-	currentStates = append(currentStates, &faState{table: table})
+	startState := &faState{table: table}
+	closureForState(startState)
+	currentStates = append(currentStates, startState)
 	nextStates := bufs.getBuf2()
 
 	// a lot of the transitions stuff is going to be empty, but on the other hand
@@ -240,8 +237,7 @@ func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, buf
 			utf8Byte = valueTerminator
 		}
 		for _, state := range currentStates {
-			closure := bufs.getEClosure().getClosure(state)
-			for _, ecState := range closure {
+			for _, ecState := range state.epsilonClosure {
 				newTransitions.add(ecState.fieldTransitions)
 				ecState.table.step(utf8Byte, stepResult)
 				if stepResult.step != nil {
@@ -278,8 +274,7 @@ func traverseNFA(table *smallTable, val []byte, transitions []*fieldMatcher, buf
 	// we've run out of input bytes so we need to check the current states and their
 	// epsilon closures for matches
 	for _, state := range currentStates {
-		closure := bufs.getEClosure().getClosure(state)
-		for _, ecState := range closure {
+		for _, ecState := range state.epsilonClosure {
 			newTransitions.add(ecState.fieldTransitions)
 		}
 	}

diff --git a/regexp_nfa.go b/regexp_nfa.go
@@ -29,7 +29,9 @@ func makeRegexpNFA(root regexpRoot, forField bool, pp printer) (*smallTable, *fi
 		pp.labelTable(table, "</Field>")
 		nextStep = &faState{table: table}
 	}
-	return makeNFAFromBranches(root, nextStep, forField, pp), nextField
+	fa := makeNFAFromBranches(root, nextStep, forField, pp)
+	epsilonClosure(fa)
+	return fa, nextField
 }
 func makeNFAFromBranches(root regexpRoot, nextStep *faState, forField bool, pp printer) *smallTable {
 	// completely empty regexp

diff --git a/rune_range_test.go b/rune_range_test.go
@@ -20,8 +20,7 @@ func TestSkinnyRuneTree(t *testing.T) {
 	fa := nfaFromSkinnyRuneTree(srt, pp)
 	fmt.Println("FA:\n" + pp.printNFA(fa))
 	trans := []*fieldMatcher{}
-	bufs := newNfaBuffers()
-	matches := traverseNFA(fa, utf8, trans, bufs, pp)
+	matches := traverseDFA(fa, utf8, trans)
 	if len(matches) != 1 {
 		t.Error("MISSED")
 	}

diff --git a/shell_style.go b/shell_style.go
@@ -77,5 +77,6 @@ func makeShellStyleFA(val []byte, pp printer) (start *smallTable, nextField *fie
 	lastStep := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}}
 	pp.labelTable(lastStep.table, fmt.Sprintf("last step at %d", valIndex))
 	state.table.addByteStep(valueTerminator, lastStep)
+	epsilonClosure(start)
 	return
 }
diff --git a/v2_bench_test.go b/v2_bench_test.go
@@ -9,8 +9,7 @@ import (
 
 // Benchmarks designed to work with Go's 1.24 testing.B.Loop().  Note: When doing this kind of benchmarking, always
 // call quamina.MatchesForEvent, as opposed to working directly with the coreMatcher, because the top-level function
-// is clever about re-using the nfaBuffers structure, which in particular includes the epsilonClosure cache. If you
-// work directly with coreMatcher your CPU and memory profiles will be dominated by epsilonClosure.
+// is clever about re-using the nfaBuffers structure.
 
 func Benchmark8259Example(b *testing.B) {
 	j := `{

diff --git a/value_matcher.go b/value_matcher.go
@@ -144,6 +144,9 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
 	// there's already a table, thus an out-degree > 1
 	if fields.startTable != nil {
 		fields.startTable = mergeFAs(fields.startTable, newFA, printer)
+		if fields.isNondeterministic {
+			epsilonClosure(fields.startTable)
+		}
 		m.update(fields)
 		return nextField
 	}
@@ -156,11 +159,17 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche
 
 		// now table is ready for use, nuke singleton to signal threads to use it
 		fields.startTable = mergeFAs(singletonAutomaton, newFA, sharedNullPrinter)
+		if fields.isNondeterministic {
+			epsilonClosure(fields.startTable)
+		}
 		fields.singletonMatch = nil
 		fields.singletonTransition = nil
 	} else {
 		// empty valueMatcher, no special cases, just jam in the new FA
 		fields.startTable = newFA
+		if fields.isNondeterministic {
+			epsilonClosure(fields.startTable)
+		}
 	}
 	m.update(fields)
 	return nextField