-
-
Notifications
You must be signed in to change notification settings - Fork 446
Improve parser and lexer #811
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f835edd
10e4040
89f608f
436b8ae
152f67d
2f4437c
a13ee0e
af94f4b
e2111b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,36 @@ | ||
package file | ||
|
||
import ( | ||
"strings" | ||
"unicode/utf8" | ||
) | ||
import "strings" | ||
|
||
type Source []rune | ||
type Source struct { | ||
raw string | ||
} | ||
|
||
func NewSource(contents string) Source { | ||
return []rune(contents) | ||
return Source{ | ||
raw: contents, | ||
} | ||
} | ||
|
||
func (s Source) String() string { | ||
return string(s) | ||
return s.raw | ||
} | ||
|
||
func (s Source) Snippet(line int) (string, bool) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is no longer used but I'm keeping it in case someone was using it. |
||
if s == nil { | ||
if s.raw == "" { | ||
return "", false | ||
} | ||
lines := strings.Split(string(s), "\n") | ||
lineOffsets := make([]int, len(lines)) | ||
var offset int | ||
for i, line := range lines { | ||
offset = offset + utf8.RuneCountInString(line) + 1 | ||
lineOffsets[i] = offset | ||
} | ||
charStart, found := getLineOffset(lineOffsets, line) | ||
if !found || len(s) == 0 { | ||
return "", false | ||
var start int | ||
for i := 1; i < line; i++ { | ||
pos := strings.IndexByte(s.raw[start:], '\n') | ||
if pos < 0 { | ||
return "", false | ||
} | ||
start += pos + 1 | ||
} | ||
charEnd, found := getLineOffset(lineOffsets, line+1) | ||
if found { | ||
return string(s[charStart : charEnd-1]), true | ||
} | ||
return string(s[charStart:]), true | ||
} | ||
|
||
func getLineOffset(lineOffsets []int, line int) (int, bool) { | ||
if line == 1 { | ||
return 0, true | ||
} else if line > 1 && line <= len(lineOffsets) { | ||
offset := lineOffsets[line-2] | ||
return offset, true | ||
end := start + strings.IndexByte(s.raw[start:], '\n') | ||
if end < start { | ||
end = len(s.raw) | ||
} | ||
return -1, false | ||
return s.raw[start:end], true | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package ring | ||
|
||
// Ring is a very simple ring buffer implementation that uses a slice. The | ||
// internal slice will only grow, never shrink. When it grows, it grows in | ||
// chunks of "chunkSize" (given as argument in the [New] function). Pointer and | ||
// reference types can be safely used because memory is cleared. | ||
type Ring[T any] struct { | ||
data []T | ||
back, len, chunkSize int | ||
} | ||
|
||
func New[T any](chunkSize int) *Ring[T] { | ||
if chunkSize < 1 { | ||
panic("chunkSize must be greater than zero") | ||
} | ||
return &Ring[T]{ | ||
chunkSize: chunkSize, | ||
} | ||
} | ||
|
||
func (r *Ring[T]) Len() int { | ||
return r.len | ||
} | ||
|
||
func (r *Ring[T]) Cap() int { | ||
return len(r.data) | ||
} | ||
|
||
func (r *Ring[T]) Reset() { | ||
var zero T | ||
for i := range r.data { | ||
r.data[i] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used | ||
} | ||
r.back = 0 | ||
r.len = 0 | ||
} | ||
|
||
// Nth returns the n-th oldest value (zero-based) in the ring without making | ||
// any change. | ||
func (r *Ring[T]) Nth(n int) (v T, ok bool) { | ||
if n < 0 || n >= r.len || len(r.data) == 0 { | ||
return v, false | ||
} | ||
n = (n + r.back) % len(r.data) | ||
return r.data[n], true | ||
} | ||
|
||
// Dequeue returns the oldest value. | ||
func (r *Ring[T]) Dequeue() (v T, ok bool) { | ||
if r.len == 0 { | ||
return v, false | ||
} | ||
v, r.data[r.back] = r.data[r.back], v // retrieve and clear mem | ||
r.len-- | ||
r.back = (r.back + 1) % len(r.data) | ||
return v, true | ||
} | ||
|
||
// Enqueue adds an item to the ring. | ||
func (r *Ring[T]) Enqueue(v T) { | ||
if r.len == len(r.data) { | ||
r.grow() | ||
} | ||
writePos := (r.back + r.len) % len(r.data) | ||
r.data[writePos] = v | ||
r.len++ | ||
} | ||
|
||
func (r *Ring[T]) grow() { | ||
s := make([]T, len(r.data)+r.chunkSize) | ||
if r.len > 0 { | ||
chunk1 := r.back + r.len | ||
if chunk1 > len(r.data) { | ||
chunk1 = len(r.data) | ||
} | ||
copied := copy(s, r.data[r.back:chunk1]) | ||
|
||
if copied < r.len { // wrapped slice | ||
chunk2 := r.len - copied | ||
copy(s[copied:], r.data[:chunk2]) | ||
} | ||
} | ||
r.back = 0 | ||
r.data = s | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
package ring | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
) | ||
|
||
func TestRing(t *testing.T) { | ||
type op = ringOp[int] | ||
testRing(t, New[int](3), | ||
// noops on empty ring | ||
op{cap: 0, opType: opRst, value: 0, items: []int{}}, | ||
op{cap: 0, opType: opDeq, value: 0, items: []int{}}, | ||
|
||
// basic | ||
op{cap: 3, opType: opEnq, value: 1, items: []int{1}}, | ||
op{cap: 3, opType: opDeq, value: 1, items: []int{}}, | ||
|
||
// wrapping | ||
op{cap: 3, opType: opEnq, value: 2, items: []int{2}}, | ||
op{cap: 3, opType: opEnq, value: 3, items: []int{2, 3}}, | ||
op{cap: 3, opType: opEnq, value: 4, items: []int{2, 3, 4}}, | ||
op{cap: 3, opType: opDeq, value: 2, items: []int{3, 4}}, | ||
op{cap: 3, opType: opDeq, value: 3, items: []int{4}}, | ||
op{cap: 3, opType: opDeq, value: 4, items: []int{}}, | ||
|
||
// resetting | ||
op{cap: 3, opType: opEnq, value: 2, items: []int{2}}, | ||
op{cap: 3, opType: opRst, value: 0, items: []int{}}, | ||
op{cap: 3, opType: opDeq, value: 0, items: []int{}}, | ||
|
||
// growing without wrapping | ||
op{cap: 3, opType: opEnq, value: 5, items: []int{5}}, | ||
op{cap: 3, opType: opEnq, value: 6, items: []int{5, 6}}, | ||
op{cap: 3, opType: opEnq, value: 7, items: []int{5, 6, 7}}, | ||
op{cap: 6, opType: opEnq, value: 8, items: []int{5, 6, 7, 8}}, | ||
op{cap: 6, opType: opRst, value: 0, items: []int{}}, | ||
op{cap: 6, opType: opDeq, value: 0, items: []int{}}, | ||
|
||
// growing and wrapping | ||
op{cap: 6, opType: opEnq, value: 9, items: []int{9}}, | ||
op{cap: 6, opType: opEnq, value: 10, items: []int{9, 10}}, | ||
op{cap: 6, opType: opEnq, value: 11, items: []int{9, 10, 11}}, | ||
op{cap: 6, opType: opEnq, value: 12, items: []int{9, 10, 11, 12}}, | ||
op{cap: 6, opType: opEnq, value: 13, items: []int{9, 10, 11, 12, 13}}, | ||
op{cap: 6, opType: opEnq, value: 14, items: []int{9, 10, 11, 12, 13, 14}}, | ||
op{cap: 6, opType: opDeq, value: 9, items: []int{10, 11, 12, 13, 14}}, | ||
op{cap: 6, opType: opDeq, value: 10, items: []int{11, 12, 13, 14}}, | ||
op{cap: 6, opType: opEnq, value: 15, items: []int{11, 12, 13, 14, 15}}, | ||
op{cap: 6, opType: opEnq, value: 16, items: []int{11, 12, 13, 14, 15, 16}}, | ||
op{cap: 9, opType: opEnq, value: 17, items: []int{11, 12, 13, 14, 15, 16, 17}}, // grows wrapped | ||
op{cap: 9, opType: opDeq, value: 11, items: []int{12, 13, 14, 15, 16, 17}}, | ||
op{cap: 9, opType: opDeq, value: 12, items: []int{13, 14, 15, 16, 17}}, | ||
op{cap: 9, opType: opDeq, value: 13, items: []int{14, 15, 16, 17}}, | ||
op{cap: 9, opType: opDeq, value: 14, items: []int{15, 16, 17}}, | ||
op{cap: 9, opType: opDeq, value: 15, items: []int{16, 17}}, | ||
op{cap: 9, opType: opDeq, value: 16, items: []int{17}}, | ||
op{cap: 9, opType: opDeq, value: 17, items: []int{}}, | ||
op{cap: 9, opType: opDeq, value: 0, items: []int{}}, | ||
) | ||
|
||
t.Run("should panic on invalid chunkSize", func(t *testing.T) { | ||
defer func() { | ||
if r := recover(); r == nil { | ||
t.Fatalf("should have panicked") | ||
} | ||
}() | ||
New[int](0) | ||
}) | ||
} | ||
|
||
const ( | ||
opEnq = iota // enqueue an item | ||
opDeq // dequeue an item and an item was available | ||
opRst // reset | ||
) | ||
|
||
type ringOp[T comparable] struct { | ||
cap int // expected values | ||
opType int // opEnq or opDeq | ||
value T // value to enqueue or value expected for dequeue; ignored for opRst | ||
items []T // items left | ||
} | ||
|
||
func testRing[T comparable](t *testing.T, r *Ring[T], ops ...ringOp[T]) { | ||
for i, op := range ops { | ||
testOK := t.Run(fmt.Sprintf("opIndex=%v", i), func(t *testing.T) { | ||
testRingOp(t, r, op) | ||
}) | ||
if !testOK { | ||
return | ||
} | ||
} | ||
} | ||
|
||
func testRingOp[T comparable](t *testing.T, r *Ring[T], op ringOp[T]) { | ||
var zero T | ||
switch op.opType { | ||
case opEnq: | ||
r.Enqueue(op.value) | ||
case opDeq: | ||
shouldSucceed := r.Len() > 0 | ||
v, ok := r.Dequeue() | ||
switch { | ||
case ok != shouldSucceed: | ||
t.Fatalf("should have succeeded: %v", shouldSucceed) | ||
case ok && v != op.value: | ||
t.Fatalf("expected value: %v; got: %v", op.value, v) | ||
case !ok && v != zero: | ||
t.Fatalf("expected zero value; got: %v", v) | ||
} | ||
case opRst: | ||
r.Reset() | ||
} | ||
if c := r.Cap(); c != op.cap { | ||
t.Fatalf("expected cap: %v; got: %v", op.cap, c) | ||
} | ||
if l := r.Len(); l != len(op.items) { | ||
t.Errorf("expected Len(): %v; got: %v", len(op.items), l) | ||
} | ||
var got []T | ||
for i := 0; ; i++ { | ||
v, ok := r.Nth(i) | ||
if !ok { | ||
break | ||
} | ||
got = append(got, v) | ||
} | ||
if l := len(got); l != len(op.items) { | ||
t.Errorf("expected items: %v\ngot items: %v", op.items, got) | ||
} | ||
for i := range op.items { | ||
if op.items[i] != got[i] { | ||
t.Fatalf("expected items: %v\ngot items: %v", op.items, got) | ||
} | ||
} | ||
if v, ok := r.Nth(len(op.items)); ok || v != zero { | ||
t.Fatalf("expected no more items, got: v=%v; ok=%v", v, ok) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package parser | ||
|
||
import "testing" | ||
|
||
func BenchmarkParser(b *testing.B) { | ||
const source = ` | ||
/* | ||
Showing worst case scenario | ||
*/ | ||
let value = trim("contains escapes \n\"\\ \U0001F600 and non ASCII ñ"); // inline comment | ||
len(value) == 0x2A | ||
// let's introduce an error too | ||
whatever | ||
` | ||
b.ReportAllocs() | ||
p := new(Parser) | ||
for i := 0; i < b.N; i++ { | ||
p.Parse(source, nil) | ||
} | ||
Comment on lines
+16
to
+19
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the previous code does not have a reusable parser, the code that I run to benchmark the old code was: for i := 0; i < b.N; i++ {
Parse(source)
} |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe just abandon Source altogether? And simply use string?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree, I can do that really quick or in a separate PR if you prefer for easier reviewing.