From 8e4afa3a82cad54d85d0a0cdfbfc1b47957a1b21 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Mon, 26 Mar 2018 13:28:11 -0700 Subject: [PATCH 1/9] broke up lexer.go into multiple files Signed-off-by: Tim Henderson --- lexer.go | 145 ----------------------------------------------------- scanner.go | 105 ++++++++++++++++++++++++++++++++++++++ token.go | 52 +++++++++++++++++++ 3 files changed, 157 insertions(+), 145 deletions(-) create mode 100644 scanner.go create mode 100644 token.go diff --git a/lexer.go b/lexer.go index ab6b219..3009966 100644 --- a/lexer.go +++ b/lexer.go @@ -1,7 +1,6 @@ package lexmachine import ( - "bytes" "fmt" ) @@ -12,52 +11,6 @@ import ( "github.com/timtadh/lexmachine/machines" ) -// Token is an optional token representation you could use to represent the -// tokens produced by a lexer built with lexmachine. -// -// Here is an example for constructing a lexer Action which turns a -// machines.Match struct into a token using the scanners Token helper function. -// -// func token(name string, tokenIds map[string]int) lex.Action { -// return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { -// return s.Token(tokenIds[name], string(m.Bytes), m), nil -// } -// } -// -type Token struct { - Type int - Value interface{} - Lexeme []byte - TC int - StartLine int - StartColumn int - EndLine int - EndColumn int -} - -// Equals checks the equality of two tokens ignoring the Value field. -func (t *Token) Equals(other *Token) bool { - if t == nil && other == nil { - return true - } else if t == nil { - return false - } else if other == nil { - return false - } - return t.TC == other.TC && - t.StartLine == other.StartLine && - t.StartColumn == other.StartColumn && - t.EndLine == other.EndLine && - t.EndColumn == other.EndColumn && - bytes.Equal(t.Lexeme, other.Lexeme) && - t.Type == other.Type -} - -// String formats the token in a human readable form. -func (t *Token) String() string { - return fmt.Sprintf("%d %q %d (%d, %d)-(%d, %d)", t.Type, t.Value, t.TC, t.StartLine, t.StartColumn, t.EndLine, t.EndColumn) -} - // An Action is a function which get called when the Scanner finds a match // during the lexing process. They turn a low level machines.Match struct into // a token for the users program. As different compilers/interpretters/parsers @@ -84,104 +37,6 @@ type Lexer struct { dfa *dfapkg.DFA } -// Scanner tokenizes a byte string based on the patterns provided to the lexer -// object which constructed the scanner. This object works as functional -// iterator using the Next method. -// -// Example -// -// lexer, err := CreateLexer() -// if err != nil { -// return err -// } -// scanner, err := lexer.Scanner(someBytes) -// if err != nil { -// return err -// } -// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() { -// if err != nil { -// return err -// } -// fmt.Println(tok) -// } -// -type Scanner struct { - lexer *Lexer - matches map[int]int - scan machines.Scanner - Text []byte - TC int - pTC int - sLine int - sColumn int - eLine int - eColumn int -} - -// Next iterates through the string being scanned returning one token at a time -// until either an error is encountered or the end of the string is reached. -// The token is returned by the tok value. An error is indicated by err. -// Finally, eos (a bool) indicates the End Of String when it returns as true. -// -// Example -// -// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() { -// if err != nil { -// // handle the error and exit the loop. For example: -// return err -// } -// // do some processing on tok or store it somewhere. eg. -// fmt.Println(tok) -// } -// -// One useful error type which could be returned by Next() is a -// match.UnconsumedInput which provides the position information for where in -// the text the scanning failed. -// -// For more information on functional iterators see: -// http://hackthology.com/functional-iteration-in-go.html -func (s *Scanner) Next() (tok interface{}, err error, eos bool) { - var token interface{} - for token == nil { - tc, match, err, scan := s.scan(s.TC) - if scan == nil { - return nil, nil, true - } else if err != nil { - return nil, err, false - } else if match == nil { - return nil, fmt.Errorf("No match but no error"), false - } - s.scan = scan - s.pTC = s.TC - s.TC = tc - s.sLine = match.StartLine - s.sColumn = match.StartColumn - s.eLine = match.EndLine - s.eColumn = match.EndColumn - - pattern := s.lexer.patterns[s.matches[match.PC]] - token, err = pattern.action(s, match) - if err != nil { - return nil, err, false - } - } - return token, nil, false -} - -// Token is a helper function for constructing a Token type inside of a Action. -func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token { - return &Token{ - Type: typ, - Value: value, - Lexeme: m.Bytes, - TC: m.TC, - StartLine: m.StartLine, - StartColumn: m.StartColumn, - EndLine: m.EndLine, - EndColumn: m.EndColumn, - } -} - // NewLexer constructs a new lexer object. func NewLexer() *Lexer { return &Lexer{} diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..e8f72dd --- /dev/null +++ b/scanner.go @@ -0,0 +1,105 @@ +package lexmachine + +import ( + "fmt" + + "github.com/timtadh/lexmachine/machines" +) + +// Scanner tokenizes a byte string based on the patterns provided to the lexer +// object which constructed the scanner. This object works as functional +// iterator using the Next method. +// +// Example +// +// lexer, err := CreateLexer() +// if err != nil { +// return err +// } +// scanner, err := lexer.Scanner(someBytes) +// if err != nil { +// return err +// } +// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() { +// if err != nil { +// return err +// } +// fmt.Println(tok) +// } +// +type Scanner struct { + lexer *Lexer + matches map[int]int + scan machines.Scanner + Text []byte + TC int + pTC int + sLine int + sColumn int + eLine int + eColumn int +} + +// Next iterates through the string being scanned returning one token at a time +// until either an error is encountered or the end of the string is reached. +// The token is returned by the tok value. An error is indicated by err. +// Finally, eos (a bool) indicates the End Of String when it returns as true. +// +// Example +// +// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() { +// if err != nil { +// // handle the error and exit the loop. For example: +// return err +// } +// // do some processing on tok or store it somewhere. eg. +// fmt.Println(tok) +// } +// +// One useful error type which could be returned by Next() is a +// match.UnconsumedInput which provides the position information for where in +// the text the scanning failed. +// +// For more information on functional iterators see: +// http://hackthology.com/functional-iteration-in-go.html +func (s *Scanner) Next() (tok interface{}, err error, eos bool) { + var token interface{} + for token == nil { + tc, match, err, scan := s.scan(s.TC) + if scan == nil { + return nil, nil, true + } else if err != nil { + return nil, err, false + } else if match == nil { + return nil, fmt.Errorf("No match but no error"), false + } + s.scan = scan + s.pTC = s.TC + s.TC = tc + s.sLine = match.StartLine + s.sColumn = match.StartColumn + s.eLine = match.EndLine + s.eColumn = match.EndColumn + + pattern := s.lexer.patterns[s.matches[match.PC]] + token, err = pattern.action(s, match) + if err != nil { + return nil, err, false + } + } + return token, nil, false +} + +// Token is a helper function for constructing a Token type inside of a Action. +func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token { + return &Token{ + Type: typ, + Value: value, + Lexeme: m.Bytes, + TC: m.TC, + StartLine: m.StartLine, + StartColumn: m.StartColumn, + EndLine: m.EndLine, + EndColumn: m.EndColumn, + } +} diff --git a/token.go b/token.go new file mode 100644 index 0000000..d596b24 --- /dev/null +++ b/token.go @@ -0,0 +1,52 @@ +package lexmachine + +import ( + "bytes" + "fmt" +) + +// Token is an optional token representation you could use to represent the +// tokens produced by a lexer built with lexmachine. +// +// Here is an example for constructing a lexer Action which turns a +// machines.Match struct into a token using the scanners Token helper function. +// +// func token(name string, tokenIds map[string]int) lex.Action { +// return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { +// return s.Token(tokenIds[name], string(m.Bytes), m), nil +// } +// } +// +type Token struct { + Type int + Value interface{} + Lexeme []byte + TC int + StartLine int + StartColumn int + EndLine int + EndColumn int +} + +// Equals checks the equality of two tokens ignoring the Value field. +func (t *Token) Equals(other *Token) bool { + if t == nil && other == nil { + return true + } else if t == nil { + return false + } else if other == nil { + return false + } + return t.TC == other.TC && + t.StartLine == other.StartLine && + t.StartColumn == other.StartColumn && + t.EndLine == other.EndLine && + t.EndColumn == other.EndColumn && + bytes.Equal(t.Lexeme, other.Lexeme) && + t.Type == other.Type +} + +// String formats the token in a human readable form. +func (t *Token) String() string { + return fmt.Sprintf("%d %q %d (%d, %d)-(%d, %d)", t.Type, t.Value, t.TC, t.StartLine, t.StartColumn, t.EndLine, t.EndColumn) +} From 9c9840feaeb10288ddfe6ae568837ee01ff43981 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Mon, 26 Mar 2018 13:28:35 -0700 Subject: [PATCH 2/9] add stream/ to represent byte streams Signed-off-by: Tim Henderson --- stream.go | 1 + stream/buffered.go | 189 +++++++++++++++++++++++++++ stream/stream.go | 54 ++++++++ stream/stream_test.go | 290 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 534 insertions(+) create mode 100644 stream.go create mode 100644 stream/buffered.go create mode 100644 stream/stream.go create mode 100644 stream/stream_test.go diff --git a/stream.go b/stream.go new file mode 100644 index 0000000..753de52 --- /dev/null +++ b/stream.go @@ -0,0 +1 @@ +package lexmachine diff --git a/stream/buffered.go b/stream/buffered.go new file mode 100644 index 0000000..a69f8ce --- /dev/null +++ b/stream/buffered.go @@ -0,0 +1,189 @@ +package stream + +import ( + "bufio" + "fmt" + "io" + "sync" +) + +type bufferedStream struct { + lock sync.Mutex + r *bufio.Reader + tc int + line int + column int + started bool + eos bool + buf []byte + err error +} + +func BufferedStream(r io.Reader) Stream { + b := &bufferedStream{ + r: bufio.NewReader(r), + tc: -1, + line: 1, + column: 0, + } + return b +} + +func (b *bufferedStream) Byte() byte { + b.lock.Lock() + defer b.lock.Unlock() + if !b.started { + panic(fmt.Errorf("Call to Byte() before first call to Advance")) + } else if b.eos { + panic(fmt.Errorf("Call to Byte() after first call to Advance returned false")) + } + return b.buf[0] +} + +func (b *bufferedStream) Position() (tc, line, column int) { + b.lock.Lock() + defer b.lock.Unlock() + if !b.started { + panic(fmt.Errorf("Call to Position() before first call to Advance")) + } else if b.eos { + panic(fmt.Errorf("Call to Position() after first call to Advance returned false")) + } + return b.tc, b.line, b.column +} + +func (b *bufferedStream) Peek(i int) (char byte, has bool) { + b.lock.Lock() + defer b.lock.Unlock() + if b.eos { + panic(fmt.Errorf("Call to Byte() after first call to Advance returned false")) + } + if i <= 0 { + panic(fmt.Errorf("Peek() must be called with positive lookahead got %d", i)) + } + // the "cursor" technically starts at -1, this does that adjustment + if !b.started { + i-- + } + if len(b.buf) >= i+1 { + return b.buf[i], true + } + if !b.read(i) { + return 0, false + } + return b.buf[i], true +} + +func (b *bufferedStream) Started() bool { + b.lock.Lock() + defer b.lock.Unlock() + return b.eos +} + +func (b *bufferedStream) EOS() bool { + b.lock.Lock() + defer b.lock.Unlock() + return b.eos +} + +func (b *bufferedStream) Err() error { + b.lock.Lock() + defer b.lock.Unlock() + if !b.started { + panic(fmt.Errorf("Call to Err() before first call to Advance")) + } else if !b.eos { + panic(fmt.Errorf("Call to Err() before call to Advance returned false")) + } + return b.err +} + +func (b *bufferedStream) Advance(i int) bool { + b.lock.Lock() + defer b.lock.Unlock() + return b.advance(i) +} + +func (b *bufferedStream) advance(i int) bool { + if i <= 0 { + panic(fmt.Errorf("Advance() must be called with positive move got %d", i)) + } + // the "cursor" technically starts at -1, this does that adjustment + if !b.started { + b.started = true + i-- + // ensures a read happens even if i==0 when the buf is empty + if len(b.buf) <= 0 && !b.read(1) { + b.eos = true + return false + } + } + i = i - b.trimBuffer(i) + if len(b.buf) <= i { + if !b.read(i) { + b.eos = true + return false + } + } + if i > 0 { + i = i - b.trimBuffer(i) + if i != 0 { + panic(fmt.Errorf("i != 0 (i = %d)", i)) + } + } + b.trackPos(b.buf[0]) + return true +} + +// trims the buffer by up i bytes and returns the number of +// bytes trimmed. +func (b *bufferedStream) trimBuffer(i int) int { + for j := 1; j < i && j < len(b.buf); j++ { + b.trackPos(b.buf[j]) + } + if len(b.buf) > i { + // we already recorded the position + // of b.buf[0]. we need to track all the chars + // we are dropping by the skip + copy(b.buf[:len(b.buf)-i], b.buf[i:]) + b.buf = b.buf[:len(b.buf)-i] + return i + } else { + trimmed := len(b.buf) + b.buf = b.buf[:0] + return trimmed + } + return 0 +} + +// updates the position information for the given character. +// only call once per character in the stream. +func (b *bufferedStream) trackPos(char byte) { + b.tc++ + if char == '\n' { + b.line++ + b.column = 0 + } else { + b.column++ + } +} + +func (b *bufferedStream) read(i int) bool { + if b.eos { + return false + } + buf := make([]byte, 4096) + for { + n, err := b.r.Read(buf) + if err != nil { + if err != io.EOF { + // only set err if it is an unexpected error. + b.err = err + } + return false + } + b.buf = append(b.buf, buf[:n]...) + if len(b.buf) >= i+1 { + break + } + } + return true +} diff --git a/stream/stream.go b/stream/stream.go new file mode 100644 index 0000000..9759cce --- /dev/null +++ b/stream/stream.go @@ -0,0 +1,54 @@ +package stream + +// Stream represents a stream of bytes. Its interface is analogous to +// bufio.Scanner. Here is an example for how to read all the bytes in a stream +// (and print them one by one): +// +// s := BufferedStream(reader) +// for s.Advance(1) { +// fmt.Println(s.Byte()) +// } +// if s.Err() != nil { +// return s.Err() +// } +// +type Stream interface { + + // Byte returns the current byte in the stream. This method will panic if + // Advance has not been called before this method or Advance has returned + // false. + Byte() byte + + // Position returns the position of the current byte: text counter, line, + // and column. This method will panic if Advance has not been called before + // this method or Advance has returned false. + Position() (tc, line, column int) + + // Peek returns byte at the current cursor + the lookahead in the stream if + // one exists. If lookahead == 0, Peek will panic, if lookahead == 1, it + // returns the next byte, and so on. Peek does not advance the cursor. If + // there are no further bytes in the stream (or lookahead causes a read + // past the end of the stream) Peek returns has == false. You may call this + // method before Advance. + Peek(lookahead int) (char byte, has bool) + + // Advance moves the cursor i bytes forward in the stream. If there is a + // byte to read it returns true. If it reaches the end of the stream (EOS) + // it returns false. Advance with i > than number of bytes remaining moves + // the cursor to the end of stream (may be less than i) and returns false + // (as you cannot read past the end of the stream). Advance must be called + // with positive movement otherwise it will panic. + Advance(i int) bool + + // Started returns true if at least 1 call to Advance has been made. + Started() bool + + // EOS returns true if the stream has reached the end of the stream. + EOS() bool + + // Err returns an error if there was an error reading from the underlying + // source of the bytes. Panics if called before Advance returns false. + // Err() will never return io.EOF (it will be nil in this case -- following + // the behavior of ioutil.ReadAll) + Err() error +} diff --git a/stream/stream_test.go b/stream/stream_test.go new file mode 100644 index 0000000..19a13cd --- /dev/null +++ b/stream/stream_test.go @@ -0,0 +1,290 @@ +package stream + +import ( + "bytes" + "testing" +) + +func TestReadFullStream(t *testing.T) { + text := "hello world" + var buf bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for s.Advance(1) { + if err := buf.WriteByte(s.Byte()); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if buf.String() != text { + t.Fatalf("expect %q got %q", text, buf.String()) + } +} + +func TestReadEveryOther(t *testing.T) { + text := "hello world" + expected := "el ol" + var buf bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for s.Advance(2) { + if err := buf.WriteByte(s.Byte()); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if buf.String() != expected { + t.Fatalf("expect %q got %q", expected, buf.String()) + } +} + +func TestReadEvery3(t *testing.T) { + text := "hello world" + expected := "l r" + var buf bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for s.Advance(3) { + if err := buf.WriteByte(s.Byte()); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if buf.String() != expected { + t.Fatalf("expect %q got %q", expected, buf.String()) + } +} + +func TestPeekTillW(t *testing.T) { + text := "hello world" + expected := "world" + var buf bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for i := 1; ; i++ { + b, has := s.Peek(i) + if !has { + break + } + if b == 'w' { + s.Advance(i) + break + } + } + if s.Byte() != 'w' { + t.Fatalf("expected w got %v", s.Byte()) + } + for { + if err := buf.WriteByte(s.Byte()); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + if !s.Advance(1) { + break + } + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if buf.String() != expected { + t.Fatalf("expect %q got %q", expected, buf.String()) + } +} + +func TestPeekTillWThenL(t *testing.T) { + text := "hello world" + expected := "ld" + var buf bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for i := 1; ; i++ { + b, has := s.Peek(i) + if !has { + break + } + if b == 'w' { + s.Advance(i) + break + } + } + if s.Byte() != 'w' { + t.Fatalf("expected w got %v", s.Byte()) + } + for i := 1; ; i++ { + b, has := s.Peek(i) + if !has { + break + } + if b == 'l' { + s.Advance(i) + break + } + } + if s.Byte() != 'l' { + t.Fatalf("expected l got %v", s.Byte()) + } + for { + if err := buf.WriteByte(s.Byte()); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + if !s.Advance(1) { + break + } + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if buf.String() != expected { + t.Fatalf("expect %q got %q", expected, buf.String()) + } +} + +func TestPeekTillWThenLThenEnd(t *testing.T) { + text := "hello world" + expected := "" + var buf bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for i := 1; ; i++ { + b, has := s.Peek(i) + if !has { + break + } + if b == 'w' { + s.Advance(i) + break + } + } + if s.Byte() != 'w' { + t.Fatalf("expected w got %v", s.Byte()) + } + for i := 1; ; i++ { + b, has := s.Peek(i) + if !has { + break + } + if b == 'l' { + s.Advance(i) + break + } + } + if s.Byte() != 'l' { + t.Fatalf("expected l got %v", s.Byte()) + } + for i := 1; ; i++ { + _, has := s.Peek(i) + if !has { + s.Advance(i) + break + } + } + if !s.EOS() { + t.Fatalf("expected EOS") + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if buf.String() != expected { + t.Fatalf("expect %q got %q", expected, buf.String()) + } +} + +func TestPeekThenReadFullStream(t *testing.T) { + text := "hello world" + var peek bytes.Buffer + var read bytes.Buffer + s := BufferedStream(bytes.NewBufferString(text)) + for i := 1; ; i++ { + b, has := s.Peek(i) + if !has { + break + } + if err := peek.WriteByte(b); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + } + for s.Advance(1) { + if err := read.WriteByte(s.Byte()); err != nil { + if err != nil { + t.Fatalf("err writing %v", err) + } + } + } + if s.Err() != nil { + t.Fatalf("stream err %v", s.Err()) + } + if peek.String() != text { + t.Fatalf("expect %q got %q", text, peek.String()) + } + if read.String() != text { + t.Fatalf("expect %q got %q", text, read.String()) + } +} + +func TestLineColumns(t *testing.T) { + text := `b + this + is + wizard +` + var expected = []struct { + tc, line, column int + char byte + }{ + {0, 1, 1, 'b'}, + {1, 2, 0, '\n'}, + {2, 2, 1, '\t'}, + {3, 2, 2, 't'}, + {4, 2, 3, 'h'}, + {5, 2, 4, 'i'}, + {6, 2, 5, 's'}, + {7, 3, 0, '\n'}, + {8, 3, 1, '\t'}, + {9, 3, 2, 'i'}, + {10, 3, 3, 's'}, + {11, 4, 0, '\n'}, + {12, 4, 1, '\t'}, + {13, 4, 2, 'w'}, + {14, 4, 3, 'i'}, + {15, 4, 4, 'z'}, + {16, 4, 5, 'a'}, + {17, 4, 6, 'r'}, + {18, 4, 7, 'd'}, + {19, 5, 0, '\n'}, + } + s := BufferedStream(bytes.NewBufferString(text)) + // pre-peek everything just to futz with the interior state + for i := 1; ; i++ { + _, has := s.Peek(i) + if !has { + break + } + } + for i := 0; s.Advance(1); i++ { + tc, line, column := s.Position() + char := s.Byte() + if char != expected[i].char { + t.Fatalf("got %v expected %v", char, expected[i].char) + } + if tc != expected[i].tc { + t.Fatalf("got %v expected %v", tc, expected[i].tc) + } + if line != expected[i].line { + t.Fatalf("got %v expected %v", line, expected[i].line) + } + if column != expected[i].column { + t.Fatalf("got %v expected %v", column, expected[i].column) + } + } +} From 121765db69dabee2d68b864368fd09ffeee684b5 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Tue, 27 Mar 2018 07:22:04 -0700 Subject: [PATCH 3/9] simplified the scanner Signed-off-by: Tim Henderson --- scanner.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scanner.go b/scanner.go index e8f72dd..9db5e11 100644 --- a/scanner.go +++ b/scanner.go @@ -33,11 +33,6 @@ type Scanner struct { scan machines.Scanner Text []byte TC int - pTC int - sLine int - sColumn int - eLine int - eColumn int } // Next iterates through the string being scanned returning one token at a time @@ -74,12 +69,7 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) { return nil, fmt.Errorf("No match but no error"), false } s.scan = scan - s.pTC = s.TC s.TC = tc - s.sLine = match.StartLine - s.sColumn = match.StartColumn - s.eLine = match.EndLine - s.eColumn = match.EndColumn pattern := s.lexer.patterns[s.matches[match.PC]] token, err = pattern.action(s, match) From 83f7e7df7e3832e1d702cd8611980065acd6734c Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Tue, 27 Mar 2018 07:22:32 -0700 Subject: [PATCH 4/9] stream now keeps a character buffer Signed-off-by: Tim Henderson --- stream/buffered.go | 57 +++++++++++++++++++++++++++++++------------ stream/stream.go | 22 ++++++++++++++++- stream/stream_test.go | 51 +++++++++++++++++++++++++++++++++----- 3 files changed, 107 insertions(+), 23 deletions(-) diff --git a/stream/buffered.go b/stream/buffered.go index a69f8ce..607c2ef 100644 --- a/stream/buffered.go +++ b/stream/buffered.go @@ -1,7 +1,6 @@ package stream import ( - "bufio" "fmt" "io" "sync" @@ -9,19 +8,20 @@ import ( type bufferedStream struct { lock sync.Mutex - r *bufio.Reader + r io.Reader tc int line int column int started bool eos bool - buf []byte + buf []Character err error } +// BufferedStream makes a Stream which is backed by an expandable buffer. func BufferedStream(r io.Reader) Stream { b := &bufferedStream{ - r: bufio.NewReader(r), + r: r, tc: -1, line: 1, column: 0, @@ -29,7 +29,20 @@ func BufferedStream(r io.Reader) Stream { return b } +// Byte returns the byte at the cursor func (b *bufferedStream) Byte() byte { + b.lock.Lock() + defer b.lock.Unlock() + if !b.started { + panic(fmt.Errorf("Call to Byte() before first call to Advance")) + } else if b.eos { + panic(fmt.Errorf("Call to Byte() after first call to Advance returned false")) + } + return b.buf[0].Byte +} + +// Character returns the character at the cursor +func (b *bufferedStream) Character() Character { b.lock.Lock() defer b.lock.Unlock() if !b.started { @@ -40,6 +53,7 @@ func (b *bufferedStream) Byte() byte { return b.buf[0] } +// Position gives the current position of the cursor func (b *bufferedStream) Position() (tc, line, column int) { b.lock.Lock() defer b.lock.Unlock() @@ -48,10 +62,11 @@ func (b *bufferedStream) Position() (tc, line, column int) { } else if b.eos { panic(fmt.Errorf("Call to Position() after first call to Advance returned false")) } - return b.tc, b.line, b.column + return b.buf[0].TC, b.buf[0].Line, b.buf[0].Column } -func (b *bufferedStream) Peek(i int) (char byte, has bool) { +// Peek gets the character at lookahead i +func (b *bufferedStream) Peek(i int) (char Character, has bool) { b.lock.Lock() defer b.lock.Unlock() if b.eos { @@ -68,23 +83,27 @@ func (b *bufferedStream) Peek(i int) (char byte, has bool) { return b.buf[i], true } if !b.read(i) { - return 0, false + return Character{}, false } return b.buf[i], true } +// Started indicates if Advance has been called at least once. func (b *bufferedStream) Started() bool { b.lock.Lock() defer b.lock.Unlock() return b.eos } +// EOS indicates whether the stream has reached End Of Stream func (b *bufferedStream) EOS() bool { b.lock.Lock() defer b.lock.Unlock() return b.eos } +// Err returns the error from the underlying io.Reader if io.Read() returned +// a non-EOF error. func (b *bufferedStream) Err() error { b.lock.Lock() defer b.lock.Unlock() @@ -96,12 +115,14 @@ func (b *bufferedStream) Err() error { return b.err } +// Advance moves the cursor forward by i func (b *bufferedStream) Advance(i int) bool { b.lock.Lock() defer b.lock.Unlock() return b.advance(i) } +// advance moves the cursor forward by i func (b *bufferedStream) advance(i int) bool { if i <= 0 { panic(fmt.Errorf("Advance() must be called with positive move got %d", i)) @@ -129,16 +150,11 @@ func (b *bufferedStream) advance(i int) bool { panic(fmt.Errorf("i != 0 (i = %d)", i)) } } - b.trackPos(b.buf[0]) return true } -// trims the buffer by up i bytes and returns the number of -// bytes trimmed. +// trims the buffer by up i bytes and returns the number of bytes trimmed. func (b *bufferedStream) trimBuffer(i int) int { - for j := 1; j < i && j < len(b.buf); j++ { - b.trackPos(b.buf[j]) - } if len(b.buf) > i { // we already recorded the position // of b.buf[0]. we need to track all the chars @@ -154,8 +170,8 @@ func (b *bufferedStream) trimBuffer(i int) int { return 0 } -// updates the position information for the given character. -// only call once per character in the stream. +// updates the position information for the given character. only call once +// per character in the stream. func (b *bufferedStream) trackPos(char byte) { b.tc++ if char == '\n' { @@ -166,6 +182,7 @@ func (b *bufferedStream) trackPos(char byte) { } } +// reads at least i bytes from the underlying reader into the buffer. func (b *bufferedStream) read(i int) bool { if b.eos { return false @@ -180,7 +197,15 @@ func (b *bufferedStream) read(i int) bool { } return false } - b.buf = append(b.buf, buf[:n]...) + for _, c := range buf[:n] { + b.trackPos(c) + b.buf = append(b.buf, Character{ + Byte: c, + TC: b.tc, + Line: b.line, + Column: b.column, + }) + } if len(b.buf) >= i+1 { break } diff --git a/stream/stream.go b/stream/stream.go index 9759cce..12a2f02 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -1,5 +1,7 @@ package stream +import "fmt" + // Stream represents a stream of bytes. Its interface is analogous to // bufio.Scanner. Here is an example for how to read all the bytes in a stream // (and print them one by one): @@ -19,6 +21,11 @@ type Stream interface { // false. Byte() byte + // Character returns the current byte in the stream. This method will panic + // if Advance has not been called before this method or Advance has + // returned false. + Character() Character + // Position returns the position of the current byte: text counter, line, // and column. This method will panic if Advance has not been called before // this method or Advance has returned false. @@ -30,7 +37,7 @@ type Stream interface { // there are no further bytes in the stream (or lookahead causes a read // past the end of the stream) Peek returns has == false. You may call this // method before Advance. - Peek(lookahead int) (char byte, has bool) + Peek(lookahead int) (char Character, has bool) // Advance moves the cursor i bytes forward in the stream. If there is a // byte to read it returns true. If it reaches the end of the stream (EOS) @@ -52,3 +59,16 @@ type Stream interface { // the behavior of ioutil.ReadAll) Err() error } + +// Character represents one byte in a stream with position information. +type Character struct { + Byte byte + TC int + Line int + Column int +} + +// String humanizes the character +func (c Character) String() string { + return fmt.Sprintf("<%q tc:%d @ %d:%d>", c.Byte, c.TC, c.Line, c.Column) +} diff --git a/stream/stream_test.go b/stream/stream_test.go index 19a13cd..bc4b004 100644 --- a/stream/stream_test.go +++ b/stream/stream_test.go @@ -74,7 +74,7 @@ func TestPeekTillW(t *testing.T) { if !has { break } - if b == 'w' { + if b.Byte == 'w' { s.Advance(i) break } @@ -110,7 +110,7 @@ func TestPeekTillWThenL(t *testing.T) { if !has { break } - if b == 'w' { + if b.Byte == 'w' { s.Advance(i) break } @@ -123,7 +123,7 @@ func TestPeekTillWThenL(t *testing.T) { if !has { break } - if b == 'l' { + if b.Byte == 'l' { s.Advance(i) break } @@ -159,7 +159,7 @@ func TestPeekTillWThenLThenEnd(t *testing.T) { if !has { break } - if b == 'w' { + if b.Byte == 'w' { s.Advance(i) break } @@ -172,7 +172,7 @@ func TestPeekTillWThenLThenEnd(t *testing.T) { if !has { break } - if b == 'l' { + if b.Byte == 'l' { s.Advance(i) break } @@ -208,7 +208,7 @@ func TestPeekThenReadFullStream(t *testing.T) { if !has { break } - if err := peek.WriteByte(b); err != nil { + if err := peek.WriteByte(b.Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -288,3 +288,42 @@ func TestLineColumns(t *testing.T) { } } } + +func TestEveryOtherLineColumns(t *testing.T) { + text := `b + this + is + wizard +` + var expected = []struct { + tc, line, column int + char byte + }{ + {1, 2, 0, '\n'}, + {3, 2, 2, 't'}, + {5, 2, 4, 'i'}, + {7, 3, 0, '\n'}, + {9, 3, 2, 'i'}, + {11, 4, 0, '\n'}, + {13, 4, 2, 'w'}, + {15, 4, 4, 'z'}, + {17, 4, 6, 'r'}, + {19, 5, 0, '\n'}, + } + s := BufferedStream(bytes.NewBufferString(text)) + for i := 0; s.Advance(2); i++ { + c := s.Character() + if c.Byte != expected[i].char { + t.Fatalf("got %v expected %v", c.Byte, expected[i].char) + } + if c.TC != expected[i].tc { + t.Fatalf("got %v expected %v", c.TC, expected[i].tc) + } + if c.Line != expected[i].line { + t.Fatalf("got %v expected %v", c.Line, expected[i].line) + } + if c.Column != expected[i].column { + t.Fatalf("got %v expected %v", c.Column, expected[i].column) + } + } +} From cce23c42cd51be5956b05ce242c92ca47a43aeae Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Tue, 27 Mar 2018 07:22:55 -0700 Subject: [PATCH 5/9] initial cut at DFA stream machine Signed-off-by: Tim Henderson --- stream_machines/dfa_machine.go | 97 ++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 stream_machines/dfa_machine.go diff --git a/stream_machines/dfa_machine.go b/stream_machines/dfa_machine.go new file mode 100644 index 0000000..33e22f6 --- /dev/null +++ b/stream_machines/dfa_machine.go @@ -0,0 +1,97 @@ +package stream_machines + +import ( + "github.com/timtadh/lexmachine/machines" + "github.com/timtadh/lexmachine/stream" +) + +type Scanner func() (*machines.Match, error, Scanner) + +// DFALexerEngine does the actual tokenization of the byte slice text using the +// DFA state machine. If the lexing process fails the Scanner will return +// an UnconsumedInput error. +func DFALexerEngine(startState, errorState int, trans machines.DFATrans, accepting machines.DFAAccepting, text stream.Stream) Scanner { + var scan Scanner + scan = func() (*machines.Match, error, Scanner) { + if text.EOS() { + return nil, nil, nil + } + buf := make([]stream.Character, 0, 10) + matchID := -1 + matchLH := -1 + state := startState + if match, has := accepting[state]; has { + matchID = match + matchLH = -1 + } + if !text.Started() { + if !text.Advance(1) { + return nil, nil, nil + } + } + startChar := text.Character() + buf = append(buf, startChar) + state = trans[state][startChar.Byte] + if match, has := accepting[state]; has { + matchID = match + matchLH = 0 + } + for lh := 1; state != errorState; lh++ { + c, has := text.Peek(lh) + if !has { + break + } + buf = append(buf, c) + state = trans[state][c.Byte] + if match, has := accepting[state]; has { + matchID = match + matchLH = lh + } + } + if match, has := accepting[state]; has { + matchID = match + matchLH = len(buf) - 1 + } + if matchLH == -1 && matchID > -1 { + err := &machines.EmptyMatchError{ + MatchID: matchID, + TC: buf[0].TC, + Line: buf[0].Line, + Column: buf[0].Column, + } + return nil, err, scan + } else if matchID > -1 && matchLH >= 0 { + lexeme := make([]byte, 0, matchLH+1) + for _, c := range buf[:matchLH+1] { + lexeme = append(lexeme, c.Byte) + } + match := &machines.Match{ + PC: matchID, + TC: buf[0].TC, + StartLine: buf[0].Line, + StartColumn: buf[0].Column, + EndLine: buf[matchLH].Line, + EndColumn: buf[matchLH].Column, + Bytes: lexeme, + } + text.Advance(matchLH + 1) + return match, nil, scan + } else { + lexeme := make([]byte, 0, len(buf)) + for _, c := range buf { + lexeme = append(lexeme, c.Byte) + } + err := &machines.UnconsumedInput{ + StartTC: buf[0].TC, + FailTC: buf[len(buf)-1].TC, + StartLine: buf[0].Line, + StartColumn: buf[0].Column, + FailLine: buf[len(buf)-1].Line, + FailColumn: buf[len(buf)-1].Column, + Text: lexeme, + } + return nil, err, scan + } + } + return scan +} From 05cde35d8e43ad011173d6137ec2428ab817c983 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Tue, 27 Mar 2018 08:07:31 -0700 Subject: [PATCH 6/9] prototype changes for support stream lexing Signed-off-by: Tim Henderson --- lexer.go | 51 +++++++++++++++++++------- lexer_test.go | 91 +++++++++++++++++++++++++++++++--------------- scanner.go | 11 ++++-- stream.go | 79 ++++++++++++++++++++++++++++++++++++++++ stream/buffered.go | 2 +- 5 files changed, 187 insertions(+), 47 deletions(-) diff --git a/lexer.go b/lexer.go index 3009966..9dedc43 100644 --- a/lexer.go +++ b/lexer.go @@ -2,27 +2,27 @@ package lexmachine import ( "fmt" -) -import ( dfapkg "github.com/timtadh/lexmachine/dfa" "github.com/timtadh/lexmachine/frontend" "github.com/timtadh/lexmachine/inst" "github.com/timtadh/lexmachine/machines" + "github.com/timtadh/lexmachine/stream" + "github.com/timtadh/lexmachine/stream_machines" ) +type pattern struct { + regex []byte + action Action +} + // An Action is a function which get called when the Scanner finds a match // during the lexing process. They turn a low level machines.Match struct into // a token for the users program. As different compilers/interpretters/parsers // have different needs Actions merely return an interface{}. This allows you // to represent a token in anyway you wish. An example Token struct is provided // above. -type Action func(scan *Scanner, match *machines.Match) (interface{}, error) - -type pattern struct { - regex []byte - action Action -} +type Action func(scan Scanner, match *machines.Match) (interface{}, error) // Lexer is a "builder" object which lets you construct a Scanner type which // does the actual work of tokenizing (splitting up and categorizing) a byte @@ -42,8 +42,8 @@ func NewLexer() *Lexer { return &Lexer{} } -// Scanner creates a scanner for a particular byte string from the lexer. -func (l *Lexer) Scanner(text []byte) (*Scanner, error) { +// TextScanner creates a scanner for a particular byte string from the lexer. +func (l *Lexer) TextScanner(text []byte) (*TextScanner, error) { if l.program == nil && l.dfa == nil { err := l.Compile() if err != nil { @@ -55,9 +55,9 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) { textCopy := make([]byte, len(text)) copy(textCopy, text) - var s *Scanner + var s *TextScanner if l.dfa != nil { - s = &Scanner{ + s = &TextScanner{ lexer: l, matches: l.dfaMatches, scan: machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, textCopy), @@ -65,7 +65,7 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) { TC: 0, } } else { - s = &Scanner{ + s = &TextScanner{ lexer: l, matches: l.nfaMatches, scan: machines.LexerEngine(l.program, textCopy), @@ -76,6 +76,29 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) { return s, nil } +// StreamScanner creates a scanner for a particular stream from the lexer. +func (l *Lexer) StreamScanner(text stream.Stream) (*StreamScanner, error) { + if l.program == nil && l.dfa == nil { + err := l.Compile() + if err != nil { + return nil, err + } + } + + var s *StreamScanner + if l.dfa != nil { + s = &StreamScanner{ + lexer: l, + matches: l.dfaMatches, + scan: stream_machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, text), + Text: text, + } + } else { + panic("not implemented") + } + return s, nil +} + // Add pattern to match on. When a match occurs during scanning the action // function will be called by the Scanner to turn the low level machines.Match // struct into a token. @@ -180,7 +203,7 @@ func (l *Lexer) CompileDFA() error { } func (l *Lexer) matchesEmptyString() (bool, error) { - s, err := l.Scanner([]byte("")) + s, err := l.TextScanner([]byte("")) if err != nil { return false, err } diff --git a/lexer_test.go b/lexer_test.go index 76749ae..f3f20ac 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -1,6 +1,7 @@ package lexmachine import ( + "bytes" "fmt" "strconv" "strings" @@ -8,6 +9,7 @@ import ( "github.com/timtadh/data-structures/test" "github.com/timtadh/lexmachine/machines" + "github.com/timtadh/lexmachine/stream" ) func TestSimple(x *testing.T) { @@ -22,25 +24,25 @@ func TestSimple(x *testing.T) { lexer.Add( []byte("print"), - func(scan *Scanner, match *machines.Match) (interface{}, error) { + func(scan Scanner, match *machines.Match) (interface{}, error) { return scan.Token(PRINT, nil, match), nil }, ) lexer.Add( []byte("([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*"), - func(scan *Scanner, match *machines.Match) (interface{}, error) { + func(scan Scanner, match *machines.Match) (interface{}, error) { return scan.Token(NAME, string(match.Bytes), match), nil }, ) lexer.Add( []byte("="), - func(scan *Scanner, match *machines.Match) (interface{}, error) { + func(scan Scanner, match *machines.Match) (interface{}, error) { return scan.Token(EQUALS, nil, match), nil }, ) lexer.Add( []byte("[0-9]+"), - func(scan *Scanner, match *machines.Match) (interface{}, error) { + func(scan Scanner, match *machines.Match) (interface{}, error) { i, err := strconv.Atoi(string(match.Bytes)) if err != nil { return nil, err @@ -50,29 +52,60 @@ func TestSimple(x *testing.T) { ) lexer.Add( []byte("( |\t|\n)"), - func(scan *Scanner, match *machines.Match) (interface{}, error) { + func(scan Scanner, match *machines.Match) (interface{}, error) { // skip white space return nil, nil }, ) lexer.Add( []byte("//[^\n]*\n"), - func(scan *Scanner, match *machines.Match) (interface{}, error) { + func(scan Scanner, match *machines.Match) (interface{}, error) { // skip white space return nil, nil }, ) lexer.Add( []byte("/\\*"), - func(scan *Scanner, match *machines.Match) (interface{}, error) { - for tc := scan.TC; tc < len(scan.Text); tc++ { - if scan.Text[tc] == '\\' { + //func(s Scanner, match *machines.Match) (interface{}, error) { + // scan := s.(*TextScanner) + // for tc := scan.TC; tc < len(scan.Text); tc++ { + // if scan.Text[tc] == '\\' { + // // the next character is skipped + // tc++ + // } else if scan.Text[tc] == '*' && tc+1 < len(scan.Text) { + // if scan.Text[tc+1] == '/' { + // scan.TC = tc + 2 + // return nil, nil + // } + // } + // } + // return nil, + // fmt.Errorf("unclosed comment starting at %d, (%d, %d)", + // match.TC, match.StartLine, match.StartColumn) + //}, + func(s Scanner, match *machines.Match) (interface{}, error) { + scan := s.(*StreamScanner) + if scan.Text.EOS() { + return nil, + fmt.Errorf("unclosed comment starting at %d, (%d, %d)", + match.TC, match.StartLine, match.StartColumn) + } + buf := make([]stream.Character, 0, 10) + buf = append(buf, scan.Text.Character()) + for lh := 1; ; lh++ { + c, has := scan.Text.Peek(lh) + if !has { + break + } + if c.Byte == '\\' { // the next character is skipped - tc++ - } else if scan.Text[tc] == '*' && tc+1 < len(scan.Text) { - if scan.Text[tc+1] == '/' { - scan.TC = tc + 2 - return nil, nil + lh++ + } else if c.Byte == '*' { + if n, has := scan.Text.Peek(lh + 1); has { + if n.Byte == '/' { + scan.Text.Advance(lh + 2) + return nil, nil + } } } } @@ -113,7 +146,7 @@ func TestSimple(x *testing.T) { } scan := func(lexer *Lexer) { - scanner, err := lexer.Scanner(text) + scanner, err := lexer.StreamScanner(stream.BufferedStream(bytes.NewBuffer(text))) if err != nil { t.Error(err) t.Log(lexer.program.Serialize()) @@ -132,9 +165,9 @@ func TestSimple(x *testing.T) { } } - // first do the test with the NFA - t.AssertNil(lexer.CompileNFA()) - scan(lexer) + // // first do the test with the NFA + // t.AssertNil(lexer.CompileNFA()) + // scan(lexer) // then do the test with the DFA lexer.program = nil @@ -216,7 +249,7 @@ func TestPartialLexer(x *testing.T) { } getToken := func(tokenType int) Action { - return func(s *Scanner, m *machines.Match) (interface{}, error) { + return func(s Scanner, m *machines.Match) (interface{}, error) { return s.Token(tokenType, string(m.Bytes), m), nil } } @@ -229,7 +262,7 @@ func TestPartialLexer(x *testing.T) { lexer.Add([]byte("[A-Za-z$][A-Za-z0-9$]+"), getToken(tokmap["IDENT"])) lexer.Add([]byte(">=|<=|=|>|<|\\|\\||&&"), getToken(tokmap["OP"])) scan := func(lexer *Lexer) { - scanner, err := lexer.Scanner([]byte(text)) + scanner, err := lexer.TextScanner([]byte(text)) t.AssertNil(err) i := 0 for tk, err, eof := scanner.Next(); !eof; tk, err, eof = scanner.Next() { @@ -256,7 +289,7 @@ func TestPartialLexer(x *testing.T) { func TestRegression(t *testing.T) { token := func(name string) Action { - return func(s *Scanner, m *machines.Match) (interface{}, error) { + return func(s Scanner, m *machines.Match) (interface{}, error) { return fmt.Sprintf("%v:%q", name, string(m.Bytes)), nil } } @@ -278,7 +311,7 @@ func TestRegression(t *testing.T) { runTest := func(lexer *Lexer) { for _, test := range tests { - scanner, err := lexer.Scanner([]byte(test.text)) + scanner, err := lexer.TextScanner([]byte(test.text)) if err != nil { t.Fatal(err) } @@ -356,11 +389,11 @@ ddns-update-style none; newLexer := func() *Lexer { lex := NewLexer() - skip := func(*Scanner, *machines.Match) (interface{}, error) { + skip := func(Scanner, *machines.Match) (interface{}, error) { return nil, nil } token := func(name string) Action { - return func(s *Scanner, m *machines.Match) (interface{}, error) { + return func(s Scanner, m *machines.Match) (interface{}, error) { return s.Token(tokenIds[name], string(m.Bytes), m), nil } } @@ -376,7 +409,7 @@ ddns-update-style none; } runTest := func(lexer *Lexer) { - scanner, err := lexer.Scanner([]byte(text)) + scanner, err := lexer.TextScanner([]byte(text)) if err != nil { return } @@ -425,11 +458,11 @@ func TestPythonStrings(t *testing.T) { for i, tok := range tokens { tokenIds[tok] = i } - skip := func(*Scanner, *machines.Match) (interface{}, error) { + skip := func(Scanner, *machines.Match) (interface{}, error) { return nil, nil } token := func(name string) Action { - return func(s *Scanner, m *machines.Match) (interface{}, error) { + return func(s Scanner, m *machines.Match) (interface{}, error) { return s.Token(tokenIds[name], string(m.Bytes), m), nil } } @@ -468,7 +501,7 @@ func TestPythonStrings(t *testing.T) { runTest := func(lexer *Lexer) { for _, test := range tests { fmt.Printf("test %q\n", test.text) - scanner, err := lexer.Scanner([]byte(test.text)) + scanner, err := lexer.TextScanner([]byte(test.text)) if err != nil { t.Fatal(err) } @@ -516,7 +549,7 @@ func TestPythonStrings(t *testing.T) { } func TestNoEmptyStrings(t *testing.T) { - skip := func(*Scanner, *machines.Match) (interface{}, error) { + skip := func(Scanner, *machines.Match) (interface{}, error) { return nil, nil } lexer := NewLexer() diff --git a/scanner.go b/scanner.go index 9db5e11..4c35fd9 100644 --- a/scanner.go +++ b/scanner.go @@ -6,6 +6,11 @@ import ( "github.com/timtadh/lexmachine/machines" ) +type Scanner interface { + Next() (tok interface{}, err error, eos bool) + Token(typ int, value interface{}, m *machines.Match) *Token +} + // Scanner tokenizes a byte string based on the patterns provided to the lexer // object which constructed the scanner. This object works as functional // iterator using the Next method. @@ -27,7 +32,7 @@ import ( // fmt.Println(tok) // } // -type Scanner struct { +type TextScanner struct { lexer *Lexer matches map[int]int scan machines.Scanner @@ -57,7 +62,7 @@ type Scanner struct { // // For more information on functional iterators see: // http://hackthology.com/functional-iteration-in-go.html -func (s *Scanner) Next() (tok interface{}, err error, eos bool) { +func (s *TextScanner) Next() (tok interface{}, err error, eos bool) { var token interface{} for token == nil { tc, match, err, scan := s.scan(s.TC) @@ -81,7 +86,7 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) { } // Token is a helper function for constructing a Token type inside of a Action. -func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token { +func (s *TextScanner) Token(typ int, value interface{}, m *machines.Match) *Token { return &Token{ Type: typ, Value: value, diff --git a/stream.go b/stream.go index 753de52..1617cd0 100644 --- a/stream.go +++ b/stream.go @@ -1 +1,80 @@ package lexmachine + +import ( + "fmt" + + "github.com/timtadh/lexmachine/machines" + "github.com/timtadh/lexmachine/stream" + "github.com/timtadh/lexmachine/stream_machines" +) + +// StreamScanner tokenizes a stream of bytes (see stream.Stream) which can be +// constructed from an io.Reader. This object work analogously to the regular +// Scanner. Note: if the stream you are scanning fits in memory using the +// regular Scanner is likely more efficient. Finally, stream.Stream objects can +// only advance the text forwards so an Action cannot move the text counter +// backwards (as is possible with Scanner). +type StreamScanner struct { + lexer *Lexer + matches map[int]int + scan stream_machines.Scanner + Text stream.Stream +} + +// Next iterates through the string being scanned returning one token at a time +// until either an error is encountered or the end of the string is reached. +// The token is returned by the tok value. An error is indicated by err. +// Finally, eos (a bool) indicates the End Of String when it returns as true. +// +// Example +// +// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() { +// if err != nil { +// // handle the error and exit the loop. For example: +// return err +// } +// // do some processing on tok or store it somewhere. eg. +// fmt.Println(tok) +// } +// +// One useful error type which could be returned by Next() is a +// match.UnconsumedInput which provides the position information for where in +// the text the scanning failed. +// +// For more information on functional iterators see: +// http://hackthology.com/functional-iteration-in-go.html +func (s *StreamScanner) Next() (tok interface{}, err error, eos bool) { + var token interface{} + for token == nil { + match, err, scan := s.scan() + if scan == nil { + return nil, nil, true + } else if err != nil { + return nil, err, false + } else if match == nil { + return nil, fmt.Errorf("No match but no error"), false + } + s.scan = scan + + pattern := s.lexer.patterns[s.matches[match.PC]] + token, err = pattern.action(s, match) + if err != nil { + return nil, err, false + } + } + return token, nil, false +} + +// Token is a helper function for constructing a Token type inside of a Action. +func (s *StreamScanner) Token(typ int, value interface{}, m *machines.Match) *Token { + return &Token{ + Type: typ, + Value: value, + Lexeme: m.Bytes, + TC: m.TC, + StartLine: m.StartLine, + StartColumn: m.StartColumn, + EndLine: m.EndLine, + EndColumn: m.EndColumn, + } +} diff --git a/stream/buffered.go b/stream/buffered.go index 607c2ef..2397f02 100644 --- a/stream/buffered.go +++ b/stream/buffered.go @@ -92,7 +92,7 @@ func (b *bufferedStream) Peek(i int) (char Character, has bool) { func (b *bufferedStream) Started() bool { b.lock.Lock() defer b.lock.Unlock() - return b.eos + return b.started } // EOS indicates whether the stream has reached End Of Stream From b762b7bd420496ba85cefbacf0aa30b8e30f2421 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Wed, 28 Mar 2018 09:08:35 -0700 Subject: [PATCH 7/9] Updated stream API to be easier to use. Not being able to call Peek(0) was a real pain and made using streams much more difficult. This changes the behavior of streams to make Peek(0) be ok but Peek() can only be called after Advance(>=1). Signed-off-by: Tim Henderson --- stream/buffered.go | 21 +++++++++++---------- stream/stream.go | 17 ++++++++++------- stream/stream_test.go | 31 ++++++++++++++++++++++++------- stream_machines/dfa_machine.go | 9 +-------- 4 files changed, 46 insertions(+), 32 deletions(-) diff --git a/stream/buffered.go b/stream/buffered.go index 2397f02..8156dae 100644 --- a/stream/buffered.go +++ b/stream/buffered.go @@ -69,15 +69,13 @@ func (b *bufferedStream) Position() (tc, line, column int) { func (b *bufferedStream) Peek(i int) (char Character, has bool) { b.lock.Lock() defer b.lock.Unlock() - if b.eos { - panic(fmt.Errorf("Call to Byte() after first call to Advance returned false")) - } - if i <= 0 { - panic(fmt.Errorf("Peek() must be called with positive lookahead got %d", i)) - } - // the "cursor" technically starts at -1, this does that adjustment if !b.started { - i-- + panic(fmt.Errorf("Call to Peek() before first call to Advance")) + } else if b.eos { + panic(fmt.Errorf("Call to Peek() after first call to Advance returned false")) + } + if i < 0 { + panic(fmt.Errorf("Peek() must be called with lookahead >= 0 got %d", i)) } if len(b.buf) >= i+1 { return b.buf[i], true @@ -124,8 +122,11 @@ func (b *bufferedStream) Advance(i int) bool { // advance moves the cursor forward by i func (b *bufferedStream) advance(i int) bool { - if i <= 0 { - panic(fmt.Errorf("Advance() must be called with positive move got %d", i)) + if i == 0 { + return true + } + if i < 0 { + panic(fmt.Errorf("Advance() must be called with move >= 0 got %d", i)) } // the "cursor" technically starts at -1, this does that adjustment if !b.started { diff --git a/stream/stream.go b/stream/stream.go index 12a2f02..6c58bcb 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -32,11 +32,12 @@ type Stream interface { Position() (tc, line, column int) // Peek returns byte at the current cursor + the lookahead in the stream if - // one exists. If lookahead == 0, Peek will panic, if lookahead == 1, it - // returns the next byte, and so on. Peek does not advance the cursor. If - // there are no further bytes in the stream (or lookahead causes a read - // past the end of the stream) Peek returns has == false. You may call this - // method before Advance. + // one exists. If lookahead == 0, it returns the same character Character() + // returns. If lookahead == 1, it returns the next byte, and so on. Peek + // does not advance the cursor. If there are no further bytes in the stream + // (or lookahead causes a read past the end of the stream) Peek returns has + // == false. If you call Peek() before Advance() has been called it will + // panic. Peek(lookahead int) (char Character, has bool) // Advance moves the cursor i bytes forward in the stream. If there is a @@ -44,10 +45,12 @@ type Stream interface { // it returns false. Advance with i > than number of bytes remaining moves // the cursor to the end of stream (may be less than i) and returns false // (as you cannot read past the end of the stream). Advance must be called - // with positive movement otherwise it will panic. + // with movement >= 0 otherwise it will panic. If Advance is called with + // i == 0 it does nothing (including setting the stream to started). Advance(i int) bool - // Started returns true if at least 1 call to Advance has been made. + // Started returns true the stream has been started (eg. a call to Advance + // has been made with a positive movement). Started() bool // EOS returns true if the stream has reached the end of the stream. diff --git a/stream/stream_test.go b/stream/stream_test.go index bc4b004..93d8337 100644 --- a/stream/stream_test.go +++ b/stream/stream_test.go @@ -69,7 +69,10 @@ func TestPeekTillW(t *testing.T) { expected := "world" var buf bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) - for i := 1; ; i++ { + if !s.Started() { + s.Advance(1) + } + for i := 0; ; i++ { b, has := s.Peek(i) if !has { break @@ -105,7 +108,10 @@ func TestPeekTillWThenL(t *testing.T) { expected := "ld" var buf bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) - for i := 1; ; i++ { + if !s.Started() { + s.Advance(1) + } + for i := 0; ; i++ { b, has := s.Peek(i) if !has { break @@ -154,7 +160,10 @@ func TestPeekTillWThenLThenEnd(t *testing.T) { expected := "" var buf bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) - for i := 1; ; i++ { + if !s.Started() { + s.Advance(1) + } + for i := 0; ; i++ { b, has := s.Peek(i) if !has { break @@ -203,7 +212,10 @@ func TestPeekThenReadFullStream(t *testing.T) { var peek bytes.Buffer var read bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) - for i := 1; ; i++ { + if !s.Started() { + s.Advance(1) + } + for i := 0; ; i++ { b, has := s.Peek(i) if !has { break @@ -214,12 +226,13 @@ func TestPeekThenReadFullStream(t *testing.T) { } } } - for s.Advance(1) { + for !s.EOS() { if err := read.WriteByte(s.Byte()); err != nil { if err != nil { t.Fatalf("err writing %v", err) } } + s.Advance(1) } if s.Err() != nil { t.Fatalf("stream err %v", s.Err()) @@ -265,13 +278,16 @@ func TestLineColumns(t *testing.T) { } s := BufferedStream(bytes.NewBufferString(text)) // pre-peek everything just to futz with the interior state - for i := 1; ; i++ { + if !s.Started() { + s.Advance(1) + } + for i := 0; ; i++ { _, has := s.Peek(i) if !has { break } } - for i := 0; s.Advance(1); i++ { + for i := 0; !s.EOS(); i++ { tc, line, column := s.Position() char := s.Byte() if char != expected[i].char { @@ -286,6 +302,7 @@ func TestLineColumns(t *testing.T) { if column != expected[i].column { t.Fatalf("got %v expected %v", column, expected[i].column) } + s.Advance(1) } } diff --git a/stream_machines/dfa_machine.go b/stream_machines/dfa_machine.go index 33e22f6..2c845b6 100644 --- a/stream_machines/dfa_machine.go +++ b/stream_machines/dfa_machine.go @@ -29,14 +29,7 @@ func DFALexerEngine(startState, errorState int, trans machines.DFATrans, accepti return nil, nil, nil } } - startChar := text.Character() - buf = append(buf, startChar) - state = trans[state][startChar.Byte] - if match, has := accepting[state]; has { - matchID = match - matchLH = 0 - } - for lh := 1; state != errorState; lh++ { + for lh := 0; state != errorState; lh++ { c, has := text.Peek(lh) if !has { break From 8c242ca51392dece6d88f10a46915cf19eccba01 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Wed, 28 Mar 2018 09:47:42 -0700 Subject: [PATCH 8/9] attempt to harmonize Stream/Text scanning Signed-off-by: Tim Henderson --- buffer.go | 84 +++++++++++++++++++++++++++++++++++++++ lexer_test.go | 107 ++++++++++++++++++++++++++++++++------------------ scanner.go | 12 ++++++ stream.go | 11 ++++++ 4 files changed, 175 insertions(+), 39 deletions(-) create mode 100644 buffer.go diff --git a/buffer.go b/buffer.go new file mode 100644 index 0000000..609d876 --- /dev/null +++ b/buffer.go @@ -0,0 +1,84 @@ +package lexmachine + +import ( + "fmt" + + "github.com/timtadh/lexmachine/stream" +) + +// Buffer is a abstracts to implementations of "text". The first is a []byte with a +type Buffer interface { + Byte(i int) byte + HasByte(i int) bool + TC() int + SetTC(i int) +} + +type SliceBuffer struct { + Text []byte + TextCounter int +} + +func sliceBuffer(text []byte, tc int) *SliceBuffer { + return &SliceBuffer{ + Text: text, + TextCounter: tc, + } +} + +func (s *SliceBuffer) Byte(i int) byte { + return s.Text[i] +} + +func (s *SliceBuffer) HasByte(i int) bool { + return i >= 0 && i < len(s.Text) +} + +func (s *SliceBuffer) TC() int { + return s.TextCounter +} + +func (s *SliceBuffer) SetTC(tc int) { + s.TextCounter = tc +} + +func (s *SliceBuffer) finalize() int { + return s.TextCounter +} + +type StreamBuffer struct { + Text stream.Stream + Lookahead int +} + +func streamBuffer(text stream.Stream) *StreamBuffer { + return &StreamBuffer{ + Text: text, + Lookahead: 0, + } +} + +func (s *StreamBuffer) Byte(i int) byte { + c, has := s.Text.Peek(i) + if !has { + panic(fmt.Errorf("read past the end of the buffer")) + } + return c.Byte +} + +func (s *StreamBuffer) HasByte(i int) bool { + _, has := s.Text.Peek(i) + return has +} + +func (s *StreamBuffer) TC() int { + return s.Lookahead +} + +func (s *StreamBuffer) SetTC(tc int) { + s.Lookahead = tc +} + +func (s *StreamBuffer) finalize() { + s.Text.Advance(s.Lookahead) +} diff --git a/lexer_test.go b/lexer_test.go index f3f20ac..7f3ab5f 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -84,34 +84,50 @@ func TestSimple(x *testing.T) { // match.TC, match.StartLine, match.StartColumn) //}, func(s Scanner, match *machines.Match) (interface{}, error) { - scan := s.(*StreamScanner) - if scan.Text.EOS() { - return nil, - fmt.Errorf("unclosed comment starting at %d, (%d, %d)", - match.TC, match.StartLine, match.StartColumn) - } - buf := make([]stream.Character, 0, 10) - buf = append(buf, scan.Text.Character()) - for lh := 1; ; lh++ { - c, has := scan.Text.Peek(lh) - if !has { - break + text := s.Buffer() + buf := make([]byte, 0, 10) + buf = append(buf, match.Bytes...) + open := 1 + tc := text.TC() + for ; open > 0; tc++ { + if !text.HasByte(tc) { + return nil, + fmt.Errorf("unclosed comment starting at %d, (%d, %d) containing %q", + match.TC, match.StartLine, match.StartColumn, buf) } - if c.Byte == '\\' { + char := text.Byte(tc) + buf = append(buf, char) + if char == '\\' { // the next character is skipped - lh++ - } else if c.Byte == '*' { - if n, has := scan.Text.Peek(lh + 1); has { - if n.Byte == '/' { - scan.Text.Advance(lh + 2) - return nil, nil + tc++ + if text.HasByte(tc) { + buf = append(buf, text.Byte(tc)) + } + } else if char == '/' { + if text.HasByte(tc + 1) { + next := text.Byte(tc + 1) + if next == '*' { + buf = append(buf, next) + tc++ + open++ + continue + } + } + } else if char == '*' { + if text.HasByte(tc + 1) { + next := text.Byte(tc + 1) + if next == '/' { + buf = append(buf, next) + tc++ + open-- + continue } } } } - return nil, - fmt.Errorf("unclosed comment starting at %d, (%d, %d)", - match.TC, match.StartLine, match.StartColumn) + fmt.Printf("%q\n", buf) + text.SetTC(tc) + return nil, nil }, ) @@ -121,10 +137,10 @@ func TestSimple(x *testing.T) { print fred name =12 // asdf comment - /*awef oiwe + /**//*awef oiwe ooiwje \*/ weoi weoi*/ printname = 13 - print printname + print printname/*/**/*/ `) expected := []*Token{ @@ -138,20 +154,14 @@ func TestSimple(x *testing.T) { {NAME, "name", []byte("name"), 41, 5, 3, 5, 6}, {EQUALS, nil, []byte("="), 46, 5, 8, 5, 8}, {NUMBER, 12, []byte("12"), 47, 5, 9, 5, 10}, - {NAME, "printname", []byte("printname"), 112, 9, 11, 9, 19}, - {EQUALS, nil, []byte("="), 122, 9, 21, 9, 21}, - {NUMBER, 13, []byte("13"), 124, 9, 23, 9, 24}, - {PRINT, nil, []byte("print"), 129, 10, 3, 10, 7}, - {NAME, "printname", []byte("printname"), 135, 10, 9, 10, 17}, + {NAME, "printname", []byte("printname"), 116, 9, 11, 9, 19}, + {EQUALS, nil, []byte("="), 126, 9, 21, 9, 21}, + {NUMBER, 13, []byte("13"), 128, 9, 23, 9, 24}, + {PRINT, nil, []byte("print"), 133, 10, 3, 10, 7}, + {NAME, "printname", []byte("printname"), 139, 10, 9, 10, 17}, } - scan := func(lexer *Lexer) { - scanner, err := lexer.StreamScanner(stream.BufferedStream(bytes.NewBuffer(text))) - if err != nil { - t.Error(err) - t.Log(lexer.program.Serialize()) - } - + scan := func(scanner Scanner) { i := 0 for tk, err, eof := scanner.Next(); !eof; tk, err, eof = scanner.Next() { if err != nil { @@ -166,14 +176,33 @@ func TestSimple(x *testing.T) { } // // first do the test with the NFA - // t.AssertNil(lexer.CompileNFA()) - // scan(lexer) + t.AssertNil(lexer.CompileNFA()) + { + scanner, err := lexer.TextScanner(text) + if err != nil { + t.Fatal(err) + } + scan(scanner) + } // then do the test with the DFA lexer.program = nil lexer.nfaMatches = nil t.AssertNil(lexer.CompileDFA()) - scan(lexer) + { + scanner, err := lexer.TextScanner(text) + if err != nil { + t.Fatal(err) + } + scan(scanner) + } + { + scanner, err := lexer.StreamScanner(stream.BufferedStream(bytes.NewBuffer(text))) + if err != nil { + t.Fatal(err) + } + scan(scanner) + } } func TestPartialLexer(x *testing.T) { diff --git a/scanner.go b/scanner.go index 4c35fd9..e029ff4 100644 --- a/scanner.go +++ b/scanner.go @@ -9,6 +9,7 @@ import ( type Scanner interface { Next() (tok interface{}, err error, eos bool) Token(typ int, value interface{}, m *machines.Match) *Token + Buffer() Buffer } // Scanner tokenizes a byte string based on the patterns provided to the lexer @@ -38,6 +39,14 @@ type TextScanner struct { scan machines.Scanner Text []byte TC int + buf *SliceBuffer +} + +func (s *TextScanner) Buffer() Buffer { + if s.buf == nil { + panic(fmt.Errorf("Buffer called outside of an Action")) + } + return s.buf } // Next iterates through the string being scanned returning one token at a time @@ -76,8 +85,11 @@ func (s *TextScanner) Next() (tok interface{}, err error, eos bool) { s.scan = scan s.TC = tc + s.buf = sliceBuffer(s.Text, s.TC) pattern := s.lexer.patterns[s.matches[match.PC]] token, err = pattern.action(s, match) + s.TC = s.buf.finalize() + s.buf = nil if err != nil { return nil, err, false } diff --git a/stream.go b/stream.go index 1617cd0..f3c31ce 100644 --- a/stream.go +++ b/stream.go @@ -19,6 +19,14 @@ type StreamScanner struct { matches map[int]int scan stream_machines.Scanner Text stream.Stream + buf *StreamBuffer +} + +func (s *StreamScanner) Buffer() Buffer { + if s.buf == nil { + panic(fmt.Errorf("Buffer called outside of an Action")) + } + return s.buf } // Next iterates through the string being scanned returning one token at a time @@ -56,8 +64,11 @@ func (s *StreamScanner) Next() (tok interface{}, err error, eos bool) { } s.scan = scan + s.buf = streamBuffer(s.Text) pattern := s.lexer.patterns[s.matches[match.PC]] token, err = pattern.action(s, match) + s.buf.finalize() + s.buf = nil if err != nil { return nil, err, false } From ff2a8174f9c2fe115f6d3ee8bb6ae9774cc86d02 Mon Sep 17 00:00:00 2001 From: Tim Henderson Date: Wed, 28 Mar 2018 09:57:27 -0700 Subject: [PATCH 9/9] removed unneeded methods from stream Signed-off-by: Tim Henderson --- stream/buffered.go | 24 -------------------- stream/stream.go | 12 +--------- stream/stream_test.go | 51 +++++++++++++++++++++---------------------- 3 files changed, 26 insertions(+), 61 deletions(-) diff --git a/stream/buffered.go b/stream/buffered.go index 8156dae..5f6a37a 100644 --- a/stream/buffered.go +++ b/stream/buffered.go @@ -29,18 +29,6 @@ func BufferedStream(r io.Reader) Stream { return b } -// Byte returns the byte at the cursor -func (b *bufferedStream) Byte() byte { - b.lock.Lock() - defer b.lock.Unlock() - if !b.started { - panic(fmt.Errorf("Call to Byte() before first call to Advance")) - } else if b.eos { - panic(fmt.Errorf("Call to Byte() after first call to Advance returned false")) - } - return b.buf[0].Byte -} - // Character returns the character at the cursor func (b *bufferedStream) Character() Character { b.lock.Lock() @@ -53,18 +41,6 @@ func (b *bufferedStream) Character() Character { return b.buf[0] } -// Position gives the current position of the cursor -func (b *bufferedStream) Position() (tc, line, column int) { - b.lock.Lock() - defer b.lock.Unlock() - if !b.started { - panic(fmt.Errorf("Call to Position() before first call to Advance")) - } else if b.eos { - panic(fmt.Errorf("Call to Position() after first call to Advance returned false")) - } - return b.buf[0].TC, b.buf[0].Line, b.buf[0].Column -} - // Peek gets the character at lookahead i func (b *bufferedStream) Peek(i int) (char Character, has bool) { b.lock.Lock() diff --git a/stream/stream.go b/stream/stream.go index 6c58bcb..a27770c 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -8,7 +8,7 @@ import "fmt" // // s := BufferedStream(reader) // for s.Advance(1) { -// fmt.Println(s.Byte()) +// fmt.Println(s.Character().Byte) // } // if s.Err() != nil { // return s.Err() @@ -16,21 +16,11 @@ import "fmt" // type Stream interface { - // Byte returns the current byte in the stream. This method will panic if - // Advance has not been called before this method or Advance has returned - // false. - Byte() byte - // Character returns the current byte in the stream. This method will panic // if Advance has not been called before this method or Advance has // returned false. Character() Character - // Position returns the position of the current byte: text counter, line, - // and column. This method will panic if Advance has not been called before - // this method or Advance has returned false. - Position() (tc, line, column int) - // Peek returns byte at the current cursor + the lookahead in the stream if // one exists. If lookahead == 0, it returns the same character Character() // returns. If lookahead == 1, it returns the next byte, and so on. Peek diff --git a/stream/stream_test.go b/stream/stream_test.go index 93d8337..ba52835 100644 --- a/stream/stream_test.go +++ b/stream/stream_test.go @@ -10,7 +10,7 @@ func TestReadFullStream(t *testing.T) { var buf bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) for s.Advance(1) { - if err := buf.WriteByte(s.Byte()); err != nil { + if err := buf.WriteByte(s.Character().Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -30,7 +30,7 @@ func TestReadEveryOther(t *testing.T) { var buf bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) for s.Advance(2) { - if err := buf.WriteByte(s.Byte()); err != nil { + if err := buf.WriteByte(s.Character().Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -50,7 +50,7 @@ func TestReadEvery3(t *testing.T) { var buf bytes.Buffer s := BufferedStream(bytes.NewBufferString(text)) for s.Advance(3) { - if err := buf.WriteByte(s.Byte()); err != nil { + if err := buf.WriteByte(s.Character().Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -82,11 +82,11 @@ func TestPeekTillW(t *testing.T) { break } } - if s.Byte() != 'w' { - t.Fatalf("expected w got %v", s.Byte()) + if s.Character().Byte != 'w' { + t.Fatalf("expected w got %v", s.Character().Byte) } for { - if err := buf.WriteByte(s.Byte()); err != nil { + if err := buf.WriteByte(s.Character().Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -121,8 +121,8 @@ func TestPeekTillWThenL(t *testing.T) { break } } - if s.Byte() != 'w' { - t.Fatalf("expected w got %v", s.Byte()) + if s.Character().Byte != 'w' { + t.Fatalf("expected w got %v", s.Character().Byte) } for i := 1; ; i++ { b, has := s.Peek(i) @@ -134,11 +134,11 @@ func TestPeekTillWThenL(t *testing.T) { break } } - if s.Byte() != 'l' { - t.Fatalf("expected l got %v", s.Byte()) + if s.Character().Byte != 'l' { + t.Fatalf("expected l got %v", s.Character().Byte) } for { - if err := buf.WriteByte(s.Byte()); err != nil { + if err := buf.WriteByte(s.Character().Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -173,8 +173,8 @@ func TestPeekTillWThenLThenEnd(t *testing.T) { break } } - if s.Byte() != 'w' { - t.Fatalf("expected w got %v", s.Byte()) + if s.Character().Byte != 'w' { + t.Fatalf("expected w got %v", s.Character().Byte) } for i := 1; ; i++ { b, has := s.Peek(i) @@ -186,8 +186,8 @@ func TestPeekTillWThenLThenEnd(t *testing.T) { break } } - if s.Byte() != 'l' { - t.Fatalf("expected l got %v", s.Byte()) + if s.Character().Byte != 'l' { + t.Fatalf("expected l got %v", s.Character().Byte) } for i := 1; ; i++ { _, has := s.Peek(i) @@ -227,7 +227,7 @@ func TestPeekThenReadFullStream(t *testing.T) { } } for !s.EOS() { - if err := read.WriteByte(s.Byte()); err != nil { + if err := read.WriteByte(s.Character().Byte); err != nil { if err != nil { t.Fatalf("err writing %v", err) } @@ -288,19 +288,18 @@ func TestLineColumns(t *testing.T) { } } for i := 0; !s.EOS(); i++ { - tc, line, column := s.Position() - char := s.Byte() - if char != expected[i].char { - t.Fatalf("got %v expected %v", char, expected[i].char) + char := s.Character() + if char.Byte != expected[i].char { + t.Fatalf("got %v expected %v", char.Byte, expected[i].char) } - if tc != expected[i].tc { - t.Fatalf("got %v expected %v", tc, expected[i].tc) + if char.TC != expected[i].tc { + t.Fatalf("got %v expected %v", char.TC, expected[i].tc) } - if line != expected[i].line { - t.Fatalf("got %v expected %v", line, expected[i].line) + if char.Line != expected[i].line { + t.Fatalf("got %v expected %v", char.Line, expected[i].line) } - if column != expected[i].column { - t.Fatalf("got %v expected %v", column, expected[i].column) + if char.Column != expected[i].column { + t.Fatalf("got %v expected %v", char.Column, expected[i].column) } s.Advance(1) }