From 69013bae65400eb04de65d50ffd8c50f66290dcd Mon Sep 17 00:00:00 2001 From: Benny Powers Date: Thu, 29 May 2025 14:36:33 +0300 Subject: [PATCH 1/3] feat: QueryCursor iterators Closes #30 --- query.go | 41 +++++++++++++++++++++++++++++++++++++++++ query_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/query.go b/query.go index 8e41ccf..2977244 100644 --- a/query.go +++ b/query.go @@ -10,6 +10,7 @@ import "C" import ( "bytes" + "iter" "fmt" "math" "regexp" @@ -767,6 +768,27 @@ func (qc *QueryCursor) Matches(query *Query, node *Node, text []byte) QueryMatch return qm } +// Iterator yielding all of the matches in the order that they were found. +// +// Each match contains the index of the pattern that matched, and a list of +// captures. Because multiple patterns can match the same set of nodes, +// one match may contain captures that appear *before* some of the +// captures from a previous match. +func (qc *QueryCursor) IterMatches(query *Query, node *Node, text []byte) iter.Seq[*QueryMatch] { + qm := qc.Matches(query, node, text) + return func(yield func(*QueryMatch) bool) { + for { + c := qm.Next() + if c == nil { + break + } + if !yield(c) { + return + } + } + } +} + // This C function is passed to Tree-sitter as the progress callback. // //export queryProgressCallback @@ -824,6 +846,25 @@ func (qc *QueryCursor) Captures(query *Query, node *Node, text []byte) QueryCapt } } +// Iterator yielding all of the individual captures in the order that they +// appear. +// +// This is useful if you don't care about which pattern matched, and just +// want a single, ordered sequence of captures. +func (qc *QueryCursor) IterCaptures(query *Query, node *Node, text []byte) iter.Seq2[*QueryMatch, uint] { + qm := qc.Captures(query, node, text) + return func(yield func(*QueryMatch, uint) bool) { + for { + c, i := qm.Next() + if c == nil { + break + } + if !yield(c, i) { + return + } + } + } +} // Set the range of bytes in which the query will be executed. // // The query cursor will return matches that intersect with the given point range. diff --git a/query_test.go b/query_test.go index 602f8b2..5e5a2ba 100644 --- a/query_test.go +++ b/query_test.go @@ -3881,6 +3881,50 @@ func TestQueryCapturesAndMatchesIteratorsAreFused(t *testing.T) { assert.Nil(t, matches.Next()) } +func TestQueryCapturesAndMatchesIter(t *testing.T) { + language := getLanguage("javascript") + query, err := NewQuery( + language, + ` + (comment) @comment + `, + ) + assert.Nil(t, err) + defer query.Close() + + source := ` + // one + // two + // three + /* unfinished + ` + + parser := NewParser() + defer parser.Close() + parser.SetLanguage(language) + + tree := parser.Parse([]byte(source), nil) + defer tree.Close() + + cursor := NewQueryCursor() + defer cursor.Close() + + captureMap := make(map[uint]*QueryMatch) + for capture, i := range cursor.IterCaptures(query, tree.RootNode(), []byte(source)) { + captureMap[i] = capture + } + + assert.Len(t, captureMap, 3) + + matchesSlice := make([]*QueryMatch, 0) + + for match := range cursor.IterMatches(query, tree.RootNode(), []byte(source)) { + matchesSlice = append(matchesSlice, match) + } + + assert.Len(t, matchesSlice, 3) +} + func TestQueryStartEndByteForPattern(t *testing.T) { language := getLanguage("javascript") From 19569d40c3feadfd1b070a2d5e49d36fa34a5492 Mon Sep 17 00:00:00 2001 From: Benny Powers Date: Thu, 29 May 2025 14:44:04 +0300 Subject: [PATCH 2/3] test: improved querycursor iterator test --- query_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/query_test.go b/query_test.go index 5e5a2ba..15f912e 100644 --- a/query_test.go +++ b/query_test.go @@ -3909,12 +3909,15 @@ func TestQueryCapturesAndMatchesIter(t *testing.T) { cursor := NewQueryCursor() defer cursor.Close() - captureMap := make(map[uint]*QueryMatch) + captureSlice := make([]*QueryMatch, 0) + indexSlice := make([]uint, 0) for capture, i := range cursor.IterCaptures(query, tree.RootNode(), []byte(source)) { - captureMap[i] = capture + captureSlice = append(captureSlice, capture) + indexSlice = append(indexSlice, i) } - assert.Len(t, captureMap, 3) + assert.Len(t, captureSlice, 3) + assert.EqualValues(t, indexSlice, []uint{0,0,0}) matchesSlice := make([]*QueryMatch, 0) From e16aa6d6d177e8d174d64c692553e7bd8bd227eb Mon Sep 17 00:00:00 2001 From: Benny Powers Date: Thu, 29 May 2025 15:09:56 +0300 Subject: [PATCH 3/3] refactor: docs and names --- query.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/query.go b/query.go index 2977244..3a7f2f1 100644 --- a/query.go +++ b/query.go @@ -774,7 +774,10 @@ func (qc *QueryCursor) Matches(query *Query, node *Node, text []byte) QueryMatch // captures. Because multiple patterns can match the same set of nodes, // one match may contain captures that appear *before* some of the // captures from a previous match. -func (qc *QueryCursor) IterMatches(query *Query, node *Node, text []byte) iter.Seq[*QueryMatch] { +// +// Each match yielded by the iterator will overwrite the memory at the same location as prior matches, since the memory is reused. You can think of this as a stateful iterator. +// If you need to keep the data of a prior match without it being overwritten, you should copy what you need before moving on to the next match +func (qc *QueryCursor) AllMatches(query *Query, node *Node, text []byte) iter.Seq[*QueryMatch] { qm := qc.Matches(query, node, text) return func(yield func(*QueryMatch) bool) { for { @@ -851,7 +854,10 @@ func (qc *QueryCursor) Captures(query *Query, node *Node, text []byte) QueryCapt // // This is useful if you don't care about which pattern matched, and just // want a single, ordered sequence of captures. -func (qc *QueryCursor) IterCaptures(query *Query, node *Node, text []byte) iter.Seq2[*QueryMatch, uint] { + +// Each capture yielded by the iterator will overwrite the memory at the same location as prior captures, since the memory is reused. You can think of this as a stateful iterator. +// If you need to keep the data of a prior capture without it being overwritten, you should copy what you need before moving on to the next capture +func (qc *QueryCursor) AllCaptures(query *Query, node *Node, text []byte) iter.Seq2[*QueryMatch, uint] { qm := qc.Captures(query, node, text) return func(yield func(*QueryMatch, uint) bool) { for { @@ -865,6 +871,7 @@ func (qc *QueryCursor) IterCaptures(query *Query, node *Node, text []byte) iter. } } } + // Set the range of bytes in which the query will be executed. // // The query cursor will return matches that intersect with the given point range.