From 8e4afa3a82cad54d85d0a0cdfbfc1b47957a1b21 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Mon, 26 Mar 2018 13:28:11 -0700
Subject: [PATCH 1/9] broke up lexer.go into multiple files

Signed-off-by: Tim Henderson <tadh@google.com>
---
 lexer.go   | 145 -----------------------------------------------------
 scanner.go | 105 ++++++++++++++++++++++++++++++++++++++
 token.go   |  52 +++++++++++++++++++
 3 files changed, 157 insertions(+), 145 deletions(-)
 create mode 100644 scanner.go
 create mode 100644 token.go

diff --git a/lexer.go b/lexer.go
index ab6b219..3009966 100644
--- a/lexer.go
+++ b/lexer.go
@@ -1,7 +1,6 @@
 package lexmachine
 
 import (
-	"bytes"
 	"fmt"
 )
 
@@ -12,52 +11,6 @@ import (
 	"github.com/timtadh/lexmachine/machines"
 )
 
-// Token is an optional token representation you could use to represent the
-// tokens produced by a lexer built with lexmachine.
-//
-// Here is an example for constructing a lexer Action which turns a
-// machines.Match struct into a token using the scanners Token helper function.
-//
-//     func token(name string, tokenIds map[string]int) lex.Action {
-//         return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
-//             return s.Token(tokenIds[name], string(m.Bytes), m), nil
-//         }
-//     }
-//
-type Token struct {
-	Type        int
-	Value       interface{}
-	Lexeme      []byte
-	TC          int
-	StartLine   int
-	StartColumn int
-	EndLine     int
-	EndColumn   int
-}
-
-// Equals checks the equality of two tokens ignoring the Value field.
-func (t *Token) Equals(other *Token) bool {
-	if t == nil && other == nil {
-		return true
-	} else if t == nil {
-		return false
-	} else if other == nil {
-		return false
-	}
-	return t.TC == other.TC &&
-		t.StartLine == other.StartLine &&
-		t.StartColumn == other.StartColumn &&
-		t.EndLine == other.EndLine &&
-		t.EndColumn == other.EndColumn &&
-		bytes.Equal(t.Lexeme, other.Lexeme) &&
-		t.Type == other.Type
-}
-
-// String formats the token in a human readable form.
-func (t *Token) String() string {
-	return fmt.Sprintf("%d %q %d (%d, %d)-(%d, %d)", t.Type, t.Value, t.TC, t.StartLine, t.StartColumn, t.EndLine, t.EndColumn)
-}
-
 // An Action is a function which get called when the Scanner finds a match
 // during the lexing process. They turn a low level machines.Match struct into
 // a token for the users program. As different compilers/interpretters/parsers
@@ -84,104 +37,6 @@ type Lexer struct {
 	dfa        *dfapkg.DFA
 }
 
-// Scanner tokenizes a byte string based on the patterns provided to the lexer
-// object which constructed the scanner. This object works as functional
-// iterator using the Next method.
-//
-// Example
-//
-//     lexer, err := CreateLexer()
-//     if err != nil {
-//         return err
-//     }
-//     scanner, err := lexer.Scanner(someBytes)
-//     if err != nil {
-//         return err
-//     }
-//     for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
-//         if err != nil {
-//             return err
-//         }
-//         fmt.Println(tok)
-//     }
-//
-type Scanner struct {
-	lexer   *Lexer
-	matches map[int]int
-	scan    machines.Scanner
-	Text    []byte
-	TC      int
-	pTC     int
-	sLine   int
-	sColumn int
-	eLine   int
-	eColumn int
-}
-
-// Next iterates through the string being scanned returning one token at a time
-// until either an error is encountered or the end of the string is reached.
-// The token is returned by the tok value. An error is indicated by err.
-// Finally, eos (a bool) indicates the End Of String when it returns as true.
-//
-// Example
-//
-//     for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
-//         if err != nil {
-//             // handle the error and exit the loop. For example:
-//             return err
-//         }
-//         // do some processing on tok or store it somewhere. eg.
-//         fmt.Println(tok)
-//     }
-//
-// One useful error type which could be returned by Next() is a
-// match.UnconsumedInput which provides the position information for where in
-// the text the scanning failed.
-//
-// For more information on functional iterators see:
-// http://hackthology.com/functional-iteration-in-go.html
-func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
-	var token interface{}
-	for token == nil {
-		tc, match, err, scan := s.scan(s.TC)
-		if scan == nil {
-			return nil, nil, true
-		} else if err != nil {
-			return nil, err, false
-		} else if match == nil {
-			return nil, fmt.Errorf("No match but no error"), false
-		}
-		s.scan = scan
-		s.pTC = s.TC
-		s.TC = tc
-		s.sLine = match.StartLine
-		s.sColumn = match.StartColumn
-		s.eLine = match.EndLine
-		s.eColumn = match.EndColumn
-
-		pattern := s.lexer.patterns[s.matches[match.PC]]
-		token, err = pattern.action(s, match)
-		if err != nil {
-			return nil, err, false
-		}
-	}
-	return token, nil, false
-}
-
-// Token is a helper function for constructing a Token type inside of a Action.
-func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token {
-	return &Token{
-		Type:        typ,
-		Value:       value,
-		Lexeme:      m.Bytes,
-		TC:          m.TC,
-		StartLine:   m.StartLine,
-		StartColumn: m.StartColumn,
-		EndLine:     m.EndLine,
-		EndColumn:   m.EndColumn,
-	}
-}
-
 // NewLexer constructs a new lexer object.
 func NewLexer() *Lexer {
 	return &Lexer{}
diff --git a/scanner.go b/scanner.go
new file mode 100644
index 0000000..e8f72dd
--- /dev/null
+++ b/scanner.go
@@ -0,0 +1,105 @@
+package lexmachine
+
+import (
+	"fmt"
+
+	"github.com/timtadh/lexmachine/machines"
+)
+
+// Scanner tokenizes a byte string based on the patterns provided to the lexer
+// object which constructed the scanner. This object works as functional
+// iterator using the Next method.
+//
+// Example
+//
+//     lexer, err := CreateLexer()
+//     if err != nil {
+//         return err
+//     }
+//     scanner, err := lexer.Scanner(someBytes)
+//     if err != nil {
+//         return err
+//     }
+//     for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
+//         if err != nil {
+//             return err
+//         }
+//         fmt.Println(tok)
+//     }
+//
+type Scanner struct {
+	lexer   *Lexer
+	matches map[int]int
+	scan    machines.Scanner
+	Text    []byte
+	TC      int
+	pTC     int
+	sLine   int
+	sColumn int
+	eLine   int
+	eColumn int
+}
+
+// Next iterates through the string being scanned returning one token at a time
+// until either an error is encountered or the end of the string is reached.
+// The token is returned by the tok value. An error is indicated by err.
+// Finally, eos (a bool) indicates the End Of String when it returns as true.
+//
+// Example
+//
+//     for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
+//         if err != nil {
+//             // handle the error and exit the loop. For example:
+//             return err
+//         }
+//         // do some processing on tok or store it somewhere. eg.
+//         fmt.Println(tok)
+//     }
+//
+// One useful error type which could be returned by Next() is a
+// match.UnconsumedInput which provides the position information for where in
+// the text the scanning failed.
+//
+// For more information on functional iterators see:
+// http://hackthology.com/functional-iteration-in-go.html
+func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
+	var token interface{}
+	for token == nil {
+		tc, match, err, scan := s.scan(s.TC)
+		if scan == nil {
+			return nil, nil, true
+		} else if err != nil {
+			return nil, err, false
+		} else if match == nil {
+			return nil, fmt.Errorf("No match but no error"), false
+		}
+		s.scan = scan
+		s.pTC = s.TC
+		s.TC = tc
+		s.sLine = match.StartLine
+		s.sColumn = match.StartColumn
+		s.eLine = match.EndLine
+		s.eColumn = match.EndColumn
+
+		pattern := s.lexer.patterns[s.matches[match.PC]]
+		token, err = pattern.action(s, match)
+		if err != nil {
+			return nil, err, false
+		}
+	}
+	return token, nil, false
+}
+
+// Token is a helper function for constructing a Token type inside of a Action.
+func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token {
+	return &Token{
+		Type:        typ,
+		Value:       value,
+		Lexeme:      m.Bytes,
+		TC:          m.TC,
+		StartLine:   m.StartLine,
+		StartColumn: m.StartColumn,
+		EndLine:     m.EndLine,
+		EndColumn:   m.EndColumn,
+	}
+}
diff --git a/token.go b/token.go
new file mode 100644
index 0000000..d596b24
--- /dev/null
+++ b/token.go
@@ -0,0 +1,52 @@
+package lexmachine
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// Token is an optional token representation you could use to represent the
+// tokens produced by a lexer built with lexmachine.
+//
+// Here is an example for constructing a lexer Action which turns a
+// machines.Match struct into a token using the scanners Token helper function.
+//
+//     func token(name string, tokenIds map[string]int) lex.Action {
+//         return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
+//             return s.Token(tokenIds[name], string(m.Bytes), m), nil
+//         }
+//     }
+//
+type Token struct {
+	Type        int
+	Value       interface{}
+	Lexeme      []byte
+	TC          int
+	StartLine   int
+	StartColumn int
+	EndLine     int
+	EndColumn   int
+}
+
+// Equals checks the equality of two tokens ignoring the Value field.
+func (t *Token) Equals(other *Token) bool {
+	if t == nil && other == nil {
+		return true
+	} else if t == nil {
+		return false
+	} else if other == nil {
+		return false
+	}
+	return t.TC == other.TC &&
+		t.StartLine == other.StartLine &&
+		t.StartColumn == other.StartColumn &&
+		t.EndLine == other.EndLine &&
+		t.EndColumn == other.EndColumn &&
+		bytes.Equal(t.Lexeme, other.Lexeme) &&
+		t.Type == other.Type
+}
+
+// String formats the token in a human readable form.
+func (t *Token) String() string {
+	return fmt.Sprintf("%d %q %d (%d, %d)-(%d, %d)", t.Type, t.Value, t.TC, t.StartLine, t.StartColumn, t.EndLine, t.EndColumn)
+}

From 9c9840feaeb10288ddfe6ae568837ee01ff43981 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Mon, 26 Mar 2018 13:28:35 -0700
Subject: [PATCH 2/9] add stream/ to represent byte streams

Signed-off-by: Tim Henderson <tadh@google.com>
---
 stream.go             |   1 +
 stream/buffered.go    | 189 +++++++++++++++++++++++++++
 stream/stream.go      |  54 ++++++++
 stream/stream_test.go | 290 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 534 insertions(+)
 create mode 100644 stream.go
 create mode 100644 stream/buffered.go
 create mode 100644 stream/stream.go
 create mode 100644 stream/stream_test.go

diff --git a/stream.go b/stream.go
new file mode 100644
index 0000000..753de52
--- /dev/null
+++ b/stream.go
@@ -0,0 +1 @@
+package lexmachine
diff --git a/stream/buffered.go b/stream/buffered.go
new file mode 100644
index 0000000..a69f8ce
--- /dev/null
+++ b/stream/buffered.go
@@ -0,0 +1,189 @@
+package stream
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"sync"
+)
+
+type bufferedStream struct {
+	lock    sync.Mutex
+	r       *bufio.Reader
+	tc      int
+	line    int
+	column  int
+	started bool
+	eos     bool
+	buf     []byte
+	err     error
+}
+
+func BufferedStream(r io.Reader) Stream {
+	b := &bufferedStream{
+		r:      bufio.NewReader(r),
+		tc:     -1,
+		line:   1,
+		column: 0,
+	}
+	return b
+}
+
+func (b *bufferedStream) Byte() byte {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	if !b.started {
+		panic(fmt.Errorf("Call to Byte() before first call to Advance"))
+	} else if b.eos {
+		panic(fmt.Errorf("Call to Byte() after first call to Advance returned false"))
+	}
+	return b.buf[0]
+}
+
+func (b *bufferedStream) Position() (tc, line, column int) {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	if !b.started {
+		panic(fmt.Errorf("Call to Position() before first call to Advance"))
+	} else if b.eos {
+		panic(fmt.Errorf("Call to Position() after first call to Advance returned false"))
+	}
+	return b.tc, b.line, b.column
+}
+
+func (b *bufferedStream) Peek(i int) (char byte, has bool) {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	if b.eos {
+		panic(fmt.Errorf("Call to Byte() after first call to Advance returned false"))
+	}
+	if i <= 0 {
+		panic(fmt.Errorf("Peek() must be called with positive lookahead got %d", i))
+	}
+	// the "cursor" technically starts at -1, this does that adjustment
+	if !b.started {
+		i--
+	}
+	if len(b.buf) >= i+1 {
+		return b.buf[i], true
+	}
+	if !b.read(i) {
+		return 0, false
+	}
+	return b.buf[i], true
+}
+
+func (b *bufferedStream) Started() bool {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	return b.eos
+}
+
+func (b *bufferedStream) EOS() bool {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	return b.eos
+}
+
+func (b *bufferedStream) Err() error {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	if !b.started {
+		panic(fmt.Errorf("Call to Err() before first call to Advance"))
+	} else if !b.eos {
+		panic(fmt.Errorf("Call to Err() before call to Advance returned false"))
+	}
+	return b.err
+}
+
+func (b *bufferedStream) Advance(i int) bool {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	return b.advance(i)
+}
+
+func (b *bufferedStream) advance(i int) bool {
+	if i <= 0 {
+		panic(fmt.Errorf("Advance() must be called with positive move got %d", i))
+	}
+	// the "cursor" technically starts at -1, this does that adjustment
+	if !b.started {
+		b.started = true
+		i--
+		// ensures a read happens even if i==0 when the buf is empty
+		if len(b.buf) <= 0 && !b.read(1) {
+			b.eos = true
+			return false
+		}
+	}
+	i = i - b.trimBuffer(i)
+	if len(b.buf) <= i {
+		if !b.read(i) {
+			b.eos = true
+			return false
+		}
+	}
+	if i > 0 {
+		i = i - b.trimBuffer(i)
+		if i != 0 {
+			panic(fmt.Errorf("i != 0 (i = %d)", i))
+		}
+	}
+	b.trackPos(b.buf[0])
+	return true
+}
+
+// trims the buffer by up i bytes and returns the number of
+// bytes trimmed.
+func (b *bufferedStream) trimBuffer(i int) int {
+	for j := 1; j < i && j < len(b.buf); j++ {
+		b.trackPos(b.buf[j])
+	}
+	if len(b.buf) > i {
+		// we already recorded the position
+		// of b.buf[0]. we need to track all the chars
+		// we are dropping by the skip
+		copy(b.buf[:len(b.buf)-i], b.buf[i:])
+		b.buf = b.buf[:len(b.buf)-i]
+		return i
+	} else {
+		trimmed := len(b.buf)
+		b.buf = b.buf[:0]
+		return trimmed
+	}
+	return 0
+}
+
+// updates the position information for the given character.
+// only call once per character in the stream.
+func (b *bufferedStream) trackPos(char byte) {
+	b.tc++
+	if char == '\n' {
+		b.line++
+		b.column = 0
+	} else {
+		b.column++
+	}
+}
+
+func (b *bufferedStream) read(i int) bool {
+	if b.eos {
+		return false
+	}
+	buf := make([]byte, 4096)
+	for {
+		n, err := b.r.Read(buf)
+		if err != nil {
+			if err != io.EOF {
+				// only set err if it is an unexpected error.
+				b.err = err
+			}
+			return false
+		}
+		b.buf = append(b.buf, buf[:n]...)
+		if len(b.buf) >= i+1 {
+			break
+		}
+	}
+	return true
+}
diff --git a/stream/stream.go b/stream/stream.go
new file mode 100644
index 0000000..9759cce
--- /dev/null
+++ b/stream/stream.go
@@ -0,0 +1,54 @@
+package stream
+
+// Stream represents a stream of bytes. Its interface is analogous to
+// bufio.Scanner. Here is an example for how to read all the bytes in a stream
+// (and print them one by one):
+//
+//     s := BufferedStream(reader)
+//     for s.Advance(1) {
+//         fmt.Println(s.Byte())
+//     }
+//     if s.Err() != nil {
+//         return s.Err()
+//     }
+//
+type Stream interface {
+
+	// Byte returns the current byte in the stream. This method will panic if
+	// Advance has not been called before this method or Advance has returned
+	// false.
+	Byte() byte
+
+	// Position returns the position of the current byte: text counter, line,
+	// and column. This method will panic if Advance has not been called before
+	// this method or Advance has returned false.
+	Position() (tc, line, column int)
+
+	// Peek returns byte at the current cursor + the lookahead in the stream if
+	// one exists. If lookahead == 0, Peek will panic, if lookahead == 1, it
+	// returns the next byte, and so on. Peek does not advance the cursor. If
+	// there are no further bytes in the stream (or lookahead causes a read
+	// past the end of the stream) Peek returns has == false. You may call this
+	// method before Advance.
+	Peek(lookahead int) (char byte, has bool)
+
+	// Advance moves the cursor i bytes forward in the stream. If there is a
+	// byte to read it returns true. If it reaches the end of the stream (EOS)
+	// it returns false. Advance with i > than number of bytes remaining moves
+	// the cursor to the end of stream (may be less than i) and returns false
+	// (as you cannot read past the end of the stream). Advance must be called
+	// with positive movement otherwise it will panic.
+	Advance(i int) bool
+
+	// Started returns true if at least 1 call to Advance has been made.
+	Started() bool
+
+	// EOS returns true if the stream has reached the end of the stream.
+	EOS() bool
+
+	// Err returns an error if there was an error reading from the underlying
+	// source of the bytes. Panics if called before Advance returns false.
+	// Err() will never return io.EOF (it will be nil in this case -- following
+	// the behavior of ioutil.ReadAll)
+	Err() error
+}
diff --git a/stream/stream_test.go b/stream/stream_test.go
new file mode 100644
index 0000000..19a13cd
--- /dev/null
+++ b/stream/stream_test.go
@@ -0,0 +1,290 @@
+package stream
+
+import (
+	"bytes"
+	"testing"
+)
+
+func TestReadFullStream(t *testing.T) {
+	text := "hello world"
+	var buf bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for s.Advance(1) {
+		if err := buf.WriteByte(s.Byte()); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if buf.String() != text {
+		t.Fatalf("expect %q got %q", text, buf.String())
+	}
+}
+
+func TestReadEveryOther(t *testing.T) {
+	text := "hello world"
+	expected := "el ol"
+	var buf bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for s.Advance(2) {
+		if err := buf.WriteByte(s.Byte()); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if buf.String() != expected {
+		t.Fatalf("expect %q got %q", expected, buf.String())
+	}
+}
+
+func TestReadEvery3(t *testing.T) {
+	text := "hello world"
+	expected := "l r"
+	var buf bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for s.Advance(3) {
+		if err := buf.WriteByte(s.Byte()); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if buf.String() != expected {
+		t.Fatalf("expect %q got %q", expected, buf.String())
+	}
+}
+
+func TestPeekTillW(t *testing.T) {
+	text := "hello world"
+	expected := "world"
+	var buf bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for i := 1; ; i++ {
+		b, has := s.Peek(i)
+		if !has {
+			break
+		}
+		if b == 'w' {
+			s.Advance(i)
+			break
+		}
+	}
+	if s.Byte() != 'w' {
+		t.Fatalf("expected w got %v", s.Byte())
+	}
+	for {
+		if err := buf.WriteByte(s.Byte()); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+		if !s.Advance(1) {
+			break
+		}
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if buf.String() != expected {
+		t.Fatalf("expect %q got %q", expected, buf.String())
+	}
+}
+
+func TestPeekTillWThenL(t *testing.T) {
+	text := "hello world"
+	expected := "ld"
+	var buf bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for i := 1; ; i++ {
+		b, has := s.Peek(i)
+		if !has {
+			break
+		}
+		if b == 'w' {
+			s.Advance(i)
+			break
+		}
+	}
+	if s.Byte() != 'w' {
+		t.Fatalf("expected w got %v", s.Byte())
+	}
+	for i := 1; ; i++ {
+		b, has := s.Peek(i)
+		if !has {
+			break
+		}
+		if b == 'l' {
+			s.Advance(i)
+			break
+		}
+	}
+	if s.Byte() != 'l' {
+		t.Fatalf("expected l got %v", s.Byte())
+	}
+	for {
+		if err := buf.WriteByte(s.Byte()); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+		if !s.Advance(1) {
+			break
+		}
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if buf.String() != expected {
+		t.Fatalf("expect %q got %q", expected, buf.String())
+	}
+}
+
+func TestPeekTillWThenLThenEnd(t *testing.T) {
+	text := "hello world"
+	expected := ""
+	var buf bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for i := 1; ; i++ {
+		b, has := s.Peek(i)
+		if !has {
+			break
+		}
+		if b == 'w' {
+			s.Advance(i)
+			break
+		}
+	}
+	if s.Byte() != 'w' {
+		t.Fatalf("expected w got %v", s.Byte())
+	}
+	for i := 1; ; i++ {
+		b, has := s.Peek(i)
+		if !has {
+			break
+		}
+		if b == 'l' {
+			s.Advance(i)
+			break
+		}
+	}
+	if s.Byte() != 'l' {
+		t.Fatalf("expected l got %v", s.Byte())
+	}
+	for i := 1; ; i++ {
+		_, has := s.Peek(i)
+		if !has {
+			s.Advance(i)
+			break
+		}
+	}
+	if !s.EOS() {
+		t.Fatalf("expected EOS")
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if buf.String() != expected {
+		t.Fatalf("expect %q got %q", expected, buf.String())
+	}
+}
+
+func TestPeekThenReadFullStream(t *testing.T) {
+	text := "hello world"
+	var peek bytes.Buffer
+	var read bytes.Buffer
+	s := BufferedStream(bytes.NewBufferString(text))
+	for i := 1; ; i++ {
+		b, has := s.Peek(i)
+		if !has {
+			break
+		}
+		if err := peek.WriteByte(b); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+	}
+	for s.Advance(1) {
+		if err := read.WriteByte(s.Byte()); err != nil {
+			if err != nil {
+				t.Fatalf("err writing %v", err)
+			}
+		}
+	}
+	if s.Err() != nil {
+		t.Fatalf("stream err %v", s.Err())
+	}
+	if peek.String() != text {
+		t.Fatalf("expect %q got %q", text, peek.String())
+	}
+	if read.String() != text {
+		t.Fatalf("expect %q got %q", text, read.String())
+	}
+}
+
+func TestLineColumns(t *testing.T) {
+	text := `b
+	this
+	is
+	wizard
+`
+	var expected = []struct {
+		tc, line, column int
+		char             byte
+	}{
+		{0, 1, 1, 'b'},
+		{1, 2, 0, '\n'},
+		{2, 2, 1, '\t'},
+		{3, 2, 2, 't'},
+		{4, 2, 3, 'h'},
+		{5, 2, 4, 'i'},
+		{6, 2, 5, 's'},
+		{7, 3, 0, '\n'},
+		{8, 3, 1, '\t'},
+		{9, 3, 2, 'i'},
+		{10, 3, 3, 's'},
+		{11, 4, 0, '\n'},
+		{12, 4, 1, '\t'},
+		{13, 4, 2, 'w'},
+		{14, 4, 3, 'i'},
+		{15, 4, 4, 'z'},
+		{16, 4, 5, 'a'},
+		{17, 4, 6, 'r'},
+		{18, 4, 7, 'd'},
+		{19, 5, 0, '\n'},
+	}
+	s := BufferedStream(bytes.NewBufferString(text))
+	// pre-peek everything just to futz with the interior state
+	for i := 1; ; i++ {
+		_, has := s.Peek(i)
+		if !has {
+			break
+		}
+	}
+	for i := 0; s.Advance(1); i++ {
+		tc, line, column := s.Position()
+		char := s.Byte()
+		if char != expected[i].char {
+			t.Fatalf("got %v expected %v", char, expected[i].char)
+		}
+		if tc != expected[i].tc {
+			t.Fatalf("got %v expected %v", tc, expected[i].tc)
+		}
+		if line != expected[i].line {
+			t.Fatalf("got %v expected %v", line, expected[i].line)
+		}
+		if column != expected[i].column {
+			t.Fatalf("got %v expected %v", column, expected[i].column)
+		}
+	}
+}

From 121765db69dabee2d68b864368fd09ffeee684b5 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Tue, 27 Mar 2018 07:22:04 -0700
Subject: [PATCH 3/9] simplified the scanner

Signed-off-by: Tim Henderson <tadh@google.com>
---
 scanner.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/scanner.go b/scanner.go
index e8f72dd..9db5e11 100644
--- a/scanner.go
+++ b/scanner.go
@@ -33,11 +33,6 @@ type Scanner struct {
 	scan    machines.Scanner
 	Text    []byte
 	TC      int
-	pTC     int
-	sLine   int
-	sColumn int
-	eLine   int
-	eColumn int
 }
 
 // Next iterates through the string being scanned returning one token at a time
@@ -74,12 +69,7 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
 			return nil, fmt.Errorf("No match but no error"), false
 		}
 		s.scan = scan
-		s.pTC = s.TC
 		s.TC = tc
-		s.sLine = match.StartLine
-		s.sColumn = match.StartColumn
-		s.eLine = match.EndLine
-		s.eColumn = match.EndColumn
 
 		pattern := s.lexer.patterns[s.matches[match.PC]]
 		token, err = pattern.action(s, match)

From 83f7e7df7e3832e1d702cd8611980065acd6734c Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Tue, 27 Mar 2018 07:22:32 -0700
Subject: [PATCH 4/9] stream now keeps a character buffer

Signed-off-by: Tim Henderson <tadh@google.com>
---
 stream/buffered.go    | 57 +++++++++++++++++++++++++++++++------------
 stream/stream.go      | 22 ++++++++++++++++-
 stream/stream_test.go | 51 +++++++++++++++++++++++++++++++++-----
 3 files changed, 107 insertions(+), 23 deletions(-)

diff --git a/stream/buffered.go b/stream/buffered.go
index a69f8ce..607c2ef 100644
--- a/stream/buffered.go
+++ b/stream/buffered.go
@@ -1,7 +1,6 @@
 package stream
 
 import (
-	"bufio"
 	"fmt"
 	"io"
 	"sync"
@@ -9,19 +8,20 @@ import (
 
 type bufferedStream struct {
 	lock    sync.Mutex
-	r       *bufio.Reader
+	r       io.Reader
 	tc      int
 	line    int
 	column  int
 	started bool
 	eos     bool
-	buf     []byte
+	buf     []Character
 	err     error
 }
 
+// BufferedStream makes a Stream which is backed by an expandable buffer.
 func BufferedStream(r io.Reader) Stream {
 	b := &bufferedStream{
-		r:      bufio.NewReader(r),
+		r:      r,
 		tc:     -1,
 		line:   1,
 		column: 0,
@@ -29,7 +29,20 @@ func BufferedStream(r io.Reader) Stream {
 	return b
 }
 
+// Byte returns the byte at the cursor
 func (b *bufferedStream) Byte() byte {
+	b.lock.Lock()
+	defer b.lock.Unlock()
+	if !b.started {
+		panic(fmt.Errorf("Call to Byte() before first call to Advance"))
+	} else if b.eos {
+		panic(fmt.Errorf("Call to Byte() after first call to Advance returned false"))
+	}
+	return b.buf[0].Byte
+}
+
+// Character returns the character at the cursor
+func (b *bufferedStream) Character() Character {
 	b.lock.Lock()
 	defer b.lock.Unlock()
 	if !b.started {
@@ -40,6 +53,7 @@ func (b *bufferedStream) Byte() byte {
 	return b.buf[0]
 }
 
+// Position gives the current position of the cursor
 func (b *bufferedStream) Position() (tc, line, column int) {
 	b.lock.Lock()
 	defer b.lock.Unlock()
@@ -48,10 +62,11 @@ func (b *bufferedStream) Position() (tc, line, column int) {
 	} else if b.eos {
 		panic(fmt.Errorf("Call to Position() after first call to Advance returned false"))
 	}
-	return b.tc, b.line, b.column
+	return b.buf[0].TC, b.buf[0].Line, b.buf[0].Column
 }
 
-func (b *bufferedStream) Peek(i int) (char byte, has bool) {
+// Peek gets the character at lookahead i
+func (b *bufferedStream) Peek(i int) (char Character, has bool) {
 	b.lock.Lock()
 	defer b.lock.Unlock()
 	if b.eos {
@@ -68,23 +83,27 @@ func (b *bufferedStream) Peek(i int) (char byte, has bool) {
 		return b.buf[i], true
 	}
 	if !b.read(i) {
-		return 0, false
+		return Character{}, false
 	}
 	return b.buf[i], true
 }
 
+// Started indicates if Advance has been called at least once.
 func (b *bufferedStream) Started() bool {
 	b.lock.Lock()
 	defer b.lock.Unlock()
 	return b.eos
 }
 
+// EOS indicates whether the stream has reached End Of Stream
 func (b *bufferedStream) EOS() bool {
 	b.lock.Lock()
 	defer b.lock.Unlock()
 	return b.eos
 }
 
+// Err returns the error from the underlying io.Reader if io.Read() returned
+// a non-EOF error.
 func (b *bufferedStream) Err() error {
 	b.lock.Lock()
 	defer b.lock.Unlock()
@@ -96,12 +115,14 @@ func (b *bufferedStream) Err() error {
 	return b.err
 }
 
+// Advance moves the cursor forward by i
 func (b *bufferedStream) Advance(i int) bool {
 	b.lock.Lock()
 	defer b.lock.Unlock()
 	return b.advance(i)
 }
 
+// advance moves the cursor forward by i
 func (b *bufferedStream) advance(i int) bool {
 	if i <= 0 {
 		panic(fmt.Errorf("Advance() must be called with positive move got %d", i))
@@ -129,16 +150,11 @@ func (b *bufferedStream) advance(i int) bool {
 			panic(fmt.Errorf("i != 0 (i = %d)", i))
 		}
 	}
-	b.trackPos(b.buf[0])
 	return true
 }
 
-// trims the buffer by up i bytes and returns the number of
-// bytes trimmed.
+// trims the buffer by up i bytes and returns the number of bytes trimmed.
 func (b *bufferedStream) trimBuffer(i int) int {
-	for j := 1; j < i && j < len(b.buf); j++ {
-		b.trackPos(b.buf[j])
-	}
 	if len(b.buf) > i {
 		// we already recorded the position
 		// of b.buf[0]. we need to track all the chars
@@ -154,8 +170,8 @@ func (b *bufferedStream) trimBuffer(i int) int {
 	return 0
 }
 
-// updates the position information for the given character.
-// only call once per character in the stream.
+// updates the position information for the given character.  only call once
+// per character in the stream.
 func (b *bufferedStream) trackPos(char byte) {
 	b.tc++
 	if char == '\n' {
@@ -166,6 +182,7 @@ func (b *bufferedStream) trackPos(char byte) {
 	}
 }
 
+// reads at least i bytes from the underlying reader into the buffer.
 func (b *bufferedStream) read(i int) bool {
 	if b.eos {
 		return false
@@ -180,7 +197,15 @@ func (b *bufferedStream) read(i int) bool {
 			}
 			return false
 		}
-		b.buf = append(b.buf, buf[:n]...)
+		for _, c := range buf[:n] {
+			b.trackPos(c)
+			b.buf = append(b.buf, Character{
+				Byte:   c,
+				TC:     b.tc,
+				Line:   b.line,
+				Column: b.column,
+			})
+		}
 		if len(b.buf) >= i+1 {
 			break
 		}
diff --git a/stream/stream.go b/stream/stream.go
index 9759cce..12a2f02 100644
--- a/stream/stream.go
+++ b/stream/stream.go
@@ -1,5 +1,7 @@
 package stream
 
+import "fmt"
+
 // Stream represents a stream of bytes. Its interface is analogous to
 // bufio.Scanner. Here is an example for how to read all the bytes in a stream
 // (and print them one by one):
@@ -19,6 +21,11 @@ type Stream interface {
 	// false.
 	Byte() byte
 
+	// Character returns the current byte in the stream. This method will panic
+	// if Advance has not been called before this method or Advance has
+	// returned false.
+	Character() Character
+
 	// Position returns the position of the current byte: text counter, line,
 	// and column. This method will panic if Advance has not been called before
 	// this method or Advance has returned false.
@@ -30,7 +37,7 @@ type Stream interface {
 	// there are no further bytes in the stream (or lookahead causes a read
 	// past the end of the stream) Peek returns has == false. You may call this
 	// method before Advance.
-	Peek(lookahead int) (char byte, has bool)
+	Peek(lookahead int) (char Character, has bool)
 
 	// Advance moves the cursor i bytes forward in the stream. If there is a
 	// byte to read it returns true. If it reaches the end of the stream (EOS)
@@ -52,3 +59,16 @@ type Stream interface {
 	// the behavior of ioutil.ReadAll)
 	Err() error
 }
+
+// Character represents one byte in a stream with position information.
+type Character struct {
+	Byte   byte
+	TC     int
+	Line   int
+	Column int
+}
+
+// String humanizes the character
+func (c Character) String() string {
+	return fmt.Sprintf("<%q tc:%d @ %d:%d>", c.Byte, c.TC, c.Line, c.Column)
+}
diff --git a/stream/stream_test.go b/stream/stream_test.go
index 19a13cd..bc4b004 100644
--- a/stream/stream_test.go
+++ b/stream/stream_test.go
@@ -74,7 +74,7 @@ func TestPeekTillW(t *testing.T) {
 		if !has {
 			break
 		}
-		if b == 'w' {
+		if b.Byte == 'w' {
 			s.Advance(i)
 			break
 		}
@@ -110,7 +110,7 @@ func TestPeekTillWThenL(t *testing.T) {
 		if !has {
 			break
 		}
-		if b == 'w' {
+		if b.Byte == 'w' {
 			s.Advance(i)
 			break
 		}
@@ -123,7 +123,7 @@ func TestPeekTillWThenL(t *testing.T) {
 		if !has {
 			break
 		}
-		if b == 'l' {
+		if b.Byte == 'l' {
 			s.Advance(i)
 			break
 		}
@@ -159,7 +159,7 @@ func TestPeekTillWThenLThenEnd(t *testing.T) {
 		if !has {
 			break
 		}
-		if b == 'w' {
+		if b.Byte == 'w' {
 			s.Advance(i)
 			break
 		}
@@ -172,7 +172,7 @@ func TestPeekTillWThenLThenEnd(t *testing.T) {
 		if !has {
 			break
 		}
-		if b == 'l' {
+		if b.Byte == 'l' {
 			s.Advance(i)
 			break
 		}
@@ -208,7 +208,7 @@ func TestPeekThenReadFullStream(t *testing.T) {
 		if !has {
 			break
 		}
-		if err := peek.WriteByte(b); err != nil {
+		if err := peek.WriteByte(b.Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -288,3 +288,42 @@ func TestLineColumns(t *testing.T) {
 		}
 	}
 }
+
+func TestEveryOtherLineColumns(t *testing.T) {
+	text := `b
+	this
+	is
+	wizard
+`
+	var expected = []struct {
+		tc, line, column int
+		char             byte
+	}{
+		{1, 2, 0, '\n'},
+		{3, 2, 2, 't'},
+		{5, 2, 4, 'i'},
+		{7, 3, 0, '\n'},
+		{9, 3, 2, 'i'},
+		{11, 4, 0, '\n'},
+		{13, 4, 2, 'w'},
+		{15, 4, 4, 'z'},
+		{17, 4, 6, 'r'},
+		{19, 5, 0, '\n'},
+	}
+	s := BufferedStream(bytes.NewBufferString(text))
+	for i := 0; s.Advance(2); i++ {
+		c := s.Character()
+		if c.Byte != expected[i].char {
+			t.Fatalf("got %v expected %v", c.Byte, expected[i].char)
+		}
+		if c.TC != expected[i].tc {
+			t.Fatalf("got %v expected %v", c.TC, expected[i].tc)
+		}
+		if c.Line != expected[i].line {
+			t.Fatalf("got %v expected %v", c.Line, expected[i].line)
+		}
+		if c.Column != expected[i].column {
+			t.Fatalf("got %v expected %v", c.Column, expected[i].column)
+		}
+	}
+}

From cce23c42cd51be5956b05ce242c92ca47a43aeae Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Tue, 27 Mar 2018 07:22:55 -0700
Subject: [PATCH 5/9] initial cut at DFA stream machine

Signed-off-by: Tim Henderson <tadh@google.com>
---
 stream_machines/dfa_machine.go | 97 ++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 stream_machines/dfa_machine.go

diff --git a/stream_machines/dfa_machine.go b/stream_machines/dfa_machine.go
new file mode 100644
index 0000000..33e22f6
--- /dev/null
+++ b/stream_machines/dfa_machine.go
@@ -0,0 +1,97 @@
+package stream_machines
+
+import (
+	"github.com/timtadh/lexmachine/machines"
+	"github.com/timtadh/lexmachine/stream"
+)
+
+type Scanner func() (*machines.Match, error, Scanner)
+
+// DFALexerEngine does the actual tokenization of the byte slice text using the
+// DFA state machine. If the lexing process fails the Scanner will return
+// an UnconsumedInput error.
+func DFALexerEngine(startState, errorState int, trans machines.DFATrans, accepting machines.DFAAccepting, text stream.Stream) Scanner {
+	var scan Scanner
+	scan = func() (*machines.Match, error, Scanner) {
+		if text.EOS() {
+			return nil, nil, nil
+		}
+		buf := make([]stream.Character, 0, 10)
+		matchID := -1
+		matchLH := -1
+		state := startState
+		if match, has := accepting[state]; has {
+			matchID = match
+			matchLH = -1
+		}
+		if !text.Started() {
+			if !text.Advance(1) {
+				return nil, nil, nil
+			}
+		}
+		startChar := text.Character()
+		buf = append(buf, startChar)
+		state = trans[state][startChar.Byte]
+		if match, has := accepting[state]; has {
+			matchID = match
+			matchLH = 0
+		}
+		for lh := 1; state != errorState; lh++ {
+			c, has := text.Peek(lh)
+			if !has {
+				break
+			}
+			buf = append(buf, c)
+			state = trans[state][c.Byte]
+			if match, has := accepting[state]; has {
+				matchID = match
+				matchLH = lh
+			}
+		}
+		if match, has := accepting[state]; has {
+			matchID = match
+			matchLH = len(buf) - 1
+		}
+		if matchLH == -1 && matchID > -1 {
+			err := &machines.EmptyMatchError{
+				MatchID: matchID,
+				TC:      buf[0].TC,
+				Line:    buf[0].Line,
+				Column:  buf[0].Column,
+			}
+			return nil, err, scan
+		} else if matchID > -1 && matchLH >= 0 {
+			lexeme := make([]byte, 0, matchLH+1)
+			for _, c := range buf[:matchLH+1] {
+				lexeme = append(lexeme, c.Byte)
+			}
+			match := &machines.Match{
+				PC:          matchID,
+				TC:          buf[0].TC,
+				StartLine:   buf[0].Line,
+				StartColumn: buf[0].Column,
+				EndLine:     buf[matchLH].Line,
+				EndColumn:   buf[matchLH].Column,
+				Bytes:       lexeme,
+			}
+			text.Advance(matchLH + 1)
+			return match, nil, scan
+		} else {
+			lexeme := make([]byte, 0, len(buf))
+			for _, c := range buf {
+				lexeme = append(lexeme, c.Byte)
+			}
+			err := &machines.UnconsumedInput{
+				StartTC:     buf[0].TC,
+				FailTC:      buf[len(buf)-1].TC,
+				StartLine:   buf[0].Line,
+				StartColumn: buf[0].Column,
+				FailLine:    buf[len(buf)-1].Line,
+				FailColumn:  buf[len(buf)-1].Column,
+				Text:        lexeme,
+			}
+			return nil, err, scan
+		}
+	}
+	return scan
+}

From 05cde35d8e43ad011173d6137ec2428ab817c983 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Tue, 27 Mar 2018 08:07:31 -0700
Subject: [PATCH 6/9] prototype changes for support stream lexing

Signed-off-by: Tim Henderson <tadh@google.com>
---
 lexer.go           | 51 +++++++++++++++++++-------
 lexer_test.go      | 91 +++++++++++++++++++++++++++++++---------------
 scanner.go         | 11 ++++--
 stream.go          | 79 ++++++++++++++++++++++++++++++++++++++++
 stream/buffered.go |  2 +-
 5 files changed, 187 insertions(+), 47 deletions(-)

diff --git a/lexer.go b/lexer.go
index 3009966..9dedc43 100644
--- a/lexer.go
+++ b/lexer.go
@@ -2,27 +2,27 @@ package lexmachine
 
 import (
 	"fmt"
-)
 
-import (
 	dfapkg "github.com/timtadh/lexmachine/dfa"
 	"github.com/timtadh/lexmachine/frontend"
 	"github.com/timtadh/lexmachine/inst"
 	"github.com/timtadh/lexmachine/machines"
+	"github.com/timtadh/lexmachine/stream"
+	"github.com/timtadh/lexmachine/stream_machines"
 )
 
+type pattern struct {
+	regex  []byte
+	action Action
+}
+
 // An Action is a function which get called when the Scanner finds a match
 // during the lexing process. They turn a low level machines.Match struct into
 // a token for the users program. As different compilers/interpretters/parsers
 // have different needs Actions merely return an interface{}. This allows you
 // to represent a token in anyway you wish. An example Token struct is provided
 // above.
-type Action func(scan *Scanner, match *machines.Match) (interface{}, error)
-
-type pattern struct {
-	regex  []byte
-	action Action
-}
+type Action func(scan Scanner, match *machines.Match) (interface{}, error)
 
 // Lexer is a "builder" object which lets you construct a Scanner type which
 // does the actual work of tokenizing (splitting up and categorizing) a byte
@@ -42,8 +42,8 @@ func NewLexer() *Lexer {
 	return &Lexer{}
 }
 
-// Scanner creates a scanner for a particular byte string from the lexer.
-func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
+// TextScanner creates a scanner for a particular byte string from the lexer.
+func (l *Lexer) TextScanner(text []byte) (*TextScanner, error) {
 	if l.program == nil && l.dfa == nil {
 		err := l.Compile()
 		if err != nil {
@@ -55,9 +55,9 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
 	textCopy := make([]byte, len(text))
 	copy(textCopy, text)
 
-	var s *Scanner
+	var s *TextScanner
 	if l.dfa != nil {
-		s = &Scanner{
+		s = &TextScanner{
 			lexer:   l,
 			matches: l.dfaMatches,
 			scan:    machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, textCopy),
@@ -65,7 +65,7 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
 			TC:      0,
 		}
 	} else {
-		s = &Scanner{
+		s = &TextScanner{
 			lexer:   l,
 			matches: l.nfaMatches,
 			scan:    machines.LexerEngine(l.program, textCopy),
@@ -76,6 +76,29 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
 	return s, nil
 }
 
+// StreamScanner creates a scanner for a particular stream from the lexer.
+func (l *Lexer) StreamScanner(text stream.Stream) (*StreamScanner, error) {
+	if l.program == nil && l.dfa == nil {
+		err := l.Compile()
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	var s *StreamScanner
+	if l.dfa != nil {
+		s = &StreamScanner{
+			lexer:   l,
+			matches: l.dfaMatches,
+			scan:    stream_machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, text),
+			Text:    text,
+		}
+	} else {
+		panic("not implemented")
+	}
+	return s, nil
+}
+
 // Add pattern to match on. When a match occurs during scanning the action
 // function will be called by the Scanner to turn the low level machines.Match
 // struct into a token.
@@ -180,7 +203,7 @@ func (l *Lexer) CompileDFA() error {
 }
 
 func (l *Lexer) matchesEmptyString() (bool, error) {
-	s, err := l.Scanner([]byte(""))
+	s, err := l.TextScanner([]byte(""))
 	if err != nil {
 		return false, err
 	}
diff --git a/lexer_test.go b/lexer_test.go
index 76749ae..f3f20ac 100644
--- a/lexer_test.go
+++ b/lexer_test.go
@@ -1,6 +1,7 @@
 package lexmachine
 
 import (
+	"bytes"
 	"fmt"
 	"strconv"
 	"strings"
@@ -8,6 +9,7 @@ import (
 
 	"github.com/timtadh/data-structures/test"
 	"github.com/timtadh/lexmachine/machines"
+	"github.com/timtadh/lexmachine/stream"
 )
 
 func TestSimple(x *testing.T) {
@@ -22,25 +24,25 @@ func TestSimple(x *testing.T) {
 
 	lexer.Add(
 		[]byte("print"),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
+		func(scan Scanner, match *machines.Match) (interface{}, error) {
 			return scan.Token(PRINT, nil, match), nil
 		},
 	)
 	lexer.Add(
 		[]byte("([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*"),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
+		func(scan Scanner, match *machines.Match) (interface{}, error) {
 			return scan.Token(NAME, string(match.Bytes), match), nil
 		},
 	)
 	lexer.Add(
 		[]byte("="),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
+		func(scan Scanner, match *machines.Match) (interface{}, error) {
 			return scan.Token(EQUALS, nil, match), nil
 		},
 	)
 	lexer.Add(
 		[]byte("[0-9]+"),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
+		func(scan Scanner, match *machines.Match) (interface{}, error) {
 			i, err := strconv.Atoi(string(match.Bytes))
 			if err != nil {
 				return nil, err
@@ -50,29 +52,60 @@ func TestSimple(x *testing.T) {
 	)
 	lexer.Add(
 		[]byte("( |\t|\n)"),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
+		func(scan Scanner, match *machines.Match) (interface{}, error) {
 			// skip white space
 			return nil, nil
 		},
 	)
 	lexer.Add(
 		[]byte("//[^\n]*\n"),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
+		func(scan Scanner, match *machines.Match) (interface{}, error) {
 			// skip white space
 			return nil, nil
 		},
 	)
 	lexer.Add(
 		[]byte("/\\*"),
-		func(scan *Scanner, match *machines.Match) (interface{}, error) {
-			for tc := scan.TC; tc < len(scan.Text); tc++ {
-				if scan.Text[tc] == '\\' {
+		//func(s Scanner, match *machines.Match) (interface{}, error) {
+		//	scan := s.(*TextScanner)
+		//	for tc := scan.TC; tc < len(scan.Text); tc++ {
+		//		if scan.Text[tc] == '\\' {
+		//			// the next character is skipped
+		//			tc++
+		//		} else if scan.Text[tc] == '*' && tc+1 < len(scan.Text) {
+		//			if scan.Text[tc+1] == '/' {
+		//				scan.TC = tc + 2
+		//				return nil, nil
+		//			}
+		//		}
+		//	}
+		//	return nil,
+		//		fmt.Errorf("unclosed comment starting at %d, (%d, %d)",
+		//			match.TC, match.StartLine, match.StartColumn)
+		//},
+		func(s Scanner, match *machines.Match) (interface{}, error) {
+			scan := s.(*StreamScanner)
+			if scan.Text.EOS() {
+				return nil,
+					fmt.Errorf("unclosed comment starting at %d, (%d, %d)",
+						match.TC, match.StartLine, match.StartColumn)
+			}
+			buf := make([]stream.Character, 0, 10)
+			buf = append(buf, scan.Text.Character())
+			for lh := 1; ; lh++ {
+				c, has := scan.Text.Peek(lh)
+				if !has {
+					break
+				}
+				if c.Byte == '\\' {
 					// the next character is skipped
-					tc++
-				} else if scan.Text[tc] == '*' && tc+1 < len(scan.Text) {
-					if scan.Text[tc+1] == '/' {
-						scan.TC = tc + 2
-						return nil, nil
+					lh++
+				} else if c.Byte == '*' {
+					if n, has := scan.Text.Peek(lh + 1); has {
+						if n.Byte == '/' {
+							scan.Text.Advance(lh + 2)
+							return nil, nil
+						}
 					}
 				}
 			}
@@ -113,7 +146,7 @@ func TestSimple(x *testing.T) {
 	}
 
 	scan := func(lexer *Lexer) {
-		scanner, err := lexer.Scanner(text)
+		scanner, err := lexer.StreamScanner(stream.BufferedStream(bytes.NewBuffer(text)))
 		if err != nil {
 			t.Error(err)
 			t.Log(lexer.program.Serialize())
@@ -132,9 +165,9 @@ func TestSimple(x *testing.T) {
 		}
 	}
 
-	// first do the test with the NFA
-	t.AssertNil(lexer.CompileNFA())
-	scan(lexer)
+	// // first do the test with the NFA
+	// t.AssertNil(lexer.CompileNFA())
+	// scan(lexer)
 
 	// then do the test with the DFA
 	lexer.program = nil
@@ -216,7 +249,7 @@ func TestPartialLexer(x *testing.T) {
 	}
 
 	getToken := func(tokenType int) Action {
-		return func(s *Scanner, m *machines.Match) (interface{}, error) {
+		return func(s Scanner, m *machines.Match) (interface{}, error) {
 			return s.Token(tokenType, string(m.Bytes), m), nil
 		}
 	}
@@ -229,7 +262,7 @@ func TestPartialLexer(x *testing.T) {
 	lexer.Add([]byte("[A-Za-z$][A-Za-z0-9$]+"), getToken(tokmap["IDENT"]))
 	lexer.Add([]byte(">=|<=|=|>|<|\\|\\||&&"), getToken(tokmap["OP"]))
 	scan := func(lexer *Lexer) {
-		scanner, err := lexer.Scanner([]byte(text))
+		scanner, err := lexer.TextScanner([]byte(text))
 		t.AssertNil(err)
 		i := 0
 		for tk, err, eof := scanner.Next(); !eof; tk, err, eof = scanner.Next() {
@@ -256,7 +289,7 @@ func TestPartialLexer(x *testing.T) {
 
 func TestRegression(t *testing.T) {
 	token := func(name string) Action {
-		return func(s *Scanner, m *machines.Match) (interface{}, error) {
+		return func(s Scanner, m *machines.Match) (interface{}, error) {
 			return fmt.Sprintf("%v:%q", name, string(m.Bytes)), nil
 		}
 	}
@@ -278,7 +311,7 @@ func TestRegression(t *testing.T) {
 
 	runTest := func(lexer *Lexer) {
 		for _, test := range tests {
-			scanner, err := lexer.Scanner([]byte(test.text))
+			scanner, err := lexer.TextScanner([]byte(test.text))
 			if err != nil {
 				t.Fatal(err)
 			}
@@ -356,11 +389,11 @@ ddns-update-style none;
 	newLexer := func() *Lexer {
 		lex := NewLexer()
 
-		skip := func(*Scanner, *machines.Match) (interface{}, error) {
+		skip := func(Scanner, *machines.Match) (interface{}, error) {
 			return nil, nil
 		}
 		token := func(name string) Action {
-			return func(s *Scanner, m *machines.Match) (interface{}, error) {
+			return func(s Scanner, m *machines.Match) (interface{}, error) {
 				return s.Token(tokenIds[name], string(m.Bytes), m), nil
 			}
 		}
@@ -376,7 +409,7 @@ ddns-update-style none;
 	}
 
 	runTest := func(lexer *Lexer) {
-		scanner, err := lexer.Scanner([]byte(text))
+		scanner, err := lexer.TextScanner([]byte(text))
 		if err != nil {
 			return
 		}
@@ -425,11 +458,11 @@ func TestPythonStrings(t *testing.T) {
 	for i, tok := range tokens {
 		tokenIds[tok] = i
 	}
-	skip := func(*Scanner, *machines.Match) (interface{}, error) {
+	skip := func(Scanner, *machines.Match) (interface{}, error) {
 		return nil, nil
 	}
 	token := func(name string) Action {
-		return func(s *Scanner, m *machines.Match) (interface{}, error) {
+		return func(s Scanner, m *machines.Match) (interface{}, error) {
 			return s.Token(tokenIds[name], string(m.Bytes), m), nil
 		}
 	}
@@ -468,7 +501,7 @@ func TestPythonStrings(t *testing.T) {
 	runTest := func(lexer *Lexer) {
 		for _, test := range tests {
 			fmt.Printf("test %q\n", test.text)
-			scanner, err := lexer.Scanner([]byte(test.text))
+			scanner, err := lexer.TextScanner([]byte(test.text))
 			if err != nil {
 				t.Fatal(err)
 			}
@@ -516,7 +549,7 @@ func TestPythonStrings(t *testing.T) {
 }
 
 func TestNoEmptyStrings(t *testing.T) {
-	skip := func(*Scanner, *machines.Match) (interface{}, error) {
+	skip := func(Scanner, *machines.Match) (interface{}, error) {
 		return nil, nil
 	}
 	lexer := NewLexer()
diff --git a/scanner.go b/scanner.go
index 9db5e11..4c35fd9 100644
--- a/scanner.go
+++ b/scanner.go
@@ -6,6 +6,11 @@ import (
 	"github.com/timtadh/lexmachine/machines"
 )
 
+type Scanner interface {
+	Next() (tok interface{}, err error, eos bool)
+	Token(typ int, value interface{}, m *machines.Match) *Token
+}
+
 // Scanner tokenizes a byte string based on the patterns provided to the lexer
 // object which constructed the scanner. This object works as functional
 // iterator using the Next method.
@@ -27,7 +32,7 @@ import (
 //         fmt.Println(tok)
 //     }
 //
-type Scanner struct {
+type TextScanner struct {
 	lexer   *Lexer
 	matches map[int]int
 	scan    machines.Scanner
@@ -57,7 +62,7 @@ type Scanner struct {
 //
 // For more information on functional iterators see:
 // http://hackthology.com/functional-iteration-in-go.html
-func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
+func (s *TextScanner) Next() (tok interface{}, err error, eos bool) {
 	var token interface{}
 	for token == nil {
 		tc, match, err, scan := s.scan(s.TC)
@@ -81,7 +86,7 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
 }
 
 // Token is a helper function for constructing a Token type inside of a Action.
-func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token {
+func (s *TextScanner) Token(typ int, value interface{}, m *machines.Match) *Token {
 	return &Token{
 		Type:        typ,
 		Value:       value,
diff --git a/stream.go b/stream.go
index 753de52..1617cd0 100644
--- a/stream.go
+++ b/stream.go
@@ -1 +1,80 @@
 package lexmachine
+
+import (
+	"fmt"
+
+	"github.com/timtadh/lexmachine/machines"
+	"github.com/timtadh/lexmachine/stream"
+	"github.com/timtadh/lexmachine/stream_machines"
+)
+
+// StreamScanner tokenizes a stream of bytes (see stream.Stream) which can be
+// constructed from an io.Reader. This object work analogously to the regular
+// Scanner. Note: if the stream you are scanning fits in memory using the
+// regular Scanner is likely more efficient. Finally, stream.Stream objects can
+// only advance the text forwards so an Action cannot move the text counter
+// backwards (as is possible with Scanner).
+type StreamScanner struct {
+	lexer   *Lexer
+	matches map[int]int
+	scan    stream_machines.Scanner
+	Text    stream.Stream
+}
+
+// Next iterates through the string being scanned returning one token at a time
+// until either an error is encountered or the end of the string is reached.
+// The token is returned by the tok value. An error is indicated by err.
+// Finally, eos (a bool) indicates the End Of String when it returns as true.
+//
+// Example
+//
+//     for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
+//         if err != nil {
+//             // handle the error and exit the loop. For example:
+//             return err
+//         }
+//         // do some processing on tok or store it somewhere. eg.
+//         fmt.Println(tok)
+//     }
+//
+// One useful error type which could be returned by Next() is a
+// match.UnconsumedInput which provides the position information for where in
+// the text the scanning failed.
+//
+// For more information on functional iterators see:
+// http://hackthology.com/functional-iteration-in-go.html
+func (s *StreamScanner) Next() (tok interface{}, err error, eos bool) {
+	var token interface{}
+	for token == nil {
+		match, err, scan := s.scan()
+		if scan == nil {
+			return nil, nil, true
+		} else if err != nil {
+			return nil, err, false
+		} else if match == nil {
+			return nil, fmt.Errorf("No match but no error"), false
+		}
+		s.scan = scan
+
+		pattern := s.lexer.patterns[s.matches[match.PC]]
+		token, err = pattern.action(s, match)
+		if err != nil {
+			return nil, err, false
+		}
+	}
+	return token, nil, false
+}
+
+// Token is a helper function for constructing a Token type inside of a Action.
+func (s *StreamScanner) Token(typ int, value interface{}, m *machines.Match) *Token {
+	return &Token{
+		Type:        typ,
+		Value:       value,
+		Lexeme:      m.Bytes,
+		TC:          m.TC,
+		StartLine:   m.StartLine,
+		StartColumn: m.StartColumn,
+		EndLine:     m.EndLine,
+		EndColumn:   m.EndColumn,
+	}
+}
diff --git a/stream/buffered.go b/stream/buffered.go
index 607c2ef..2397f02 100644
--- a/stream/buffered.go
+++ b/stream/buffered.go
@@ -92,7 +92,7 @@ func (b *bufferedStream) Peek(i int) (char Character, has bool) {
 func (b *bufferedStream) Started() bool {
 	b.lock.Lock()
 	defer b.lock.Unlock()
-	return b.eos
+	return b.started
 }
 
 // EOS indicates whether the stream has reached End Of Stream

From b762b7bd420496ba85cefbacf0aa30b8e30f2421 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Wed, 28 Mar 2018 09:08:35 -0700
Subject: [PATCH 7/9] Updated stream API to be easier to use.

Not being able to call Peek(0) was a real pain and made using streams
much more difficult. This changes the behavior of streams to make
Peek(0) be ok but Peek() can only be called after Advance(>=1).

Signed-off-by: Tim Henderson <tadh@google.com>
---
 stream/buffered.go             | 21 +++++++++++----------
 stream/stream.go               | 17 ++++++++++-------
 stream/stream_test.go          | 31 ++++++++++++++++++++++++-------
 stream_machines/dfa_machine.go |  9 +--------
 4 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/stream/buffered.go b/stream/buffered.go
index 2397f02..8156dae 100644
--- a/stream/buffered.go
+++ b/stream/buffered.go
@@ -69,15 +69,13 @@ func (b *bufferedStream) Position() (tc, line, column int) {
 func (b *bufferedStream) Peek(i int) (char Character, has bool) {
 	b.lock.Lock()
 	defer b.lock.Unlock()
-	if b.eos {
-		panic(fmt.Errorf("Call to Byte() after first call to Advance returned false"))
-	}
-	if i <= 0 {
-		panic(fmt.Errorf("Peek() must be called with positive lookahead got %d", i))
-	}
-	// the "cursor" technically starts at -1, this does that adjustment
 	if !b.started {
-		i--
+		panic(fmt.Errorf("Call to Peek() before first call to Advance"))
+	} else if b.eos {
+		panic(fmt.Errorf("Call to Peek() after first call to Advance returned false"))
+	}
+	if i < 0 {
+		panic(fmt.Errorf("Peek() must be called with lookahead >= 0 got %d", i))
 	}
 	if len(b.buf) >= i+1 {
 		return b.buf[i], true
@@ -124,8 +122,11 @@ func (b *bufferedStream) Advance(i int) bool {
 
 // advance moves the cursor forward by i
 func (b *bufferedStream) advance(i int) bool {
-	if i <= 0 {
-		panic(fmt.Errorf("Advance() must be called with positive move got %d", i))
+	if i == 0 {
+		return true
+	}
+	if i < 0 {
+		panic(fmt.Errorf("Advance() must be called with move >= 0 got %d", i))
 	}
 	// the "cursor" technically starts at -1, this does that adjustment
 	if !b.started {
diff --git a/stream/stream.go b/stream/stream.go
index 12a2f02..6c58bcb 100644
--- a/stream/stream.go
+++ b/stream/stream.go
@@ -32,11 +32,12 @@ type Stream interface {
 	Position() (tc, line, column int)
 
 	// Peek returns byte at the current cursor + the lookahead in the stream if
-	// one exists. If lookahead == 0, Peek will panic, if lookahead == 1, it
-	// returns the next byte, and so on. Peek does not advance the cursor. If
-	// there are no further bytes in the stream (or lookahead causes a read
-	// past the end of the stream) Peek returns has == false. You may call this
-	// method before Advance.
+	// one exists. If lookahead == 0, it returns the same character Character()
+	// returns. If lookahead == 1, it returns the next byte, and so on. Peek
+	// does not advance the cursor. If there are no further bytes in the stream
+	// (or lookahead causes a read past the end of the stream) Peek returns has
+	// == false. If you call Peek() before Advance() has been called it will
+	// panic.
 	Peek(lookahead int) (char Character, has bool)
 
 	// Advance moves the cursor i bytes forward in the stream. If there is a
@@ -44,10 +45,12 @@ type Stream interface {
 	// it returns false. Advance with i > than number of bytes remaining moves
 	// the cursor to the end of stream (may be less than i) and returns false
 	// (as you cannot read past the end of the stream). Advance must be called
-	// with positive movement otherwise it will panic.
+	// with movement >= 0 otherwise it will panic. If Advance is called with
+	// i == 0 it does nothing (including setting the stream to started).
 	Advance(i int) bool
 
-	// Started returns true if at least 1 call to Advance has been made.
+	// Started returns true the stream has been started (eg. a call to Advance
+	// has been made with a positive movement).
 	Started() bool
 
 	// EOS returns true if the stream has reached the end of the stream.
diff --git a/stream/stream_test.go b/stream/stream_test.go
index bc4b004..93d8337 100644
--- a/stream/stream_test.go
+++ b/stream/stream_test.go
@@ -69,7 +69,10 @@ func TestPeekTillW(t *testing.T) {
 	expected := "world"
 	var buf bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
-	for i := 1; ; i++ {
+	if !s.Started() {
+		s.Advance(1)
+	}
+	for i := 0; ; i++ {
 		b, has := s.Peek(i)
 		if !has {
 			break
@@ -105,7 +108,10 @@ func TestPeekTillWThenL(t *testing.T) {
 	expected := "ld"
 	var buf bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
-	for i := 1; ; i++ {
+	if !s.Started() {
+		s.Advance(1)
+	}
+	for i := 0; ; i++ {
 		b, has := s.Peek(i)
 		if !has {
 			break
@@ -154,7 +160,10 @@ func TestPeekTillWThenLThenEnd(t *testing.T) {
 	expected := ""
 	var buf bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
-	for i := 1; ; i++ {
+	if !s.Started() {
+		s.Advance(1)
+	}
+	for i := 0; ; i++ {
 		b, has := s.Peek(i)
 		if !has {
 			break
@@ -203,7 +212,10 @@ func TestPeekThenReadFullStream(t *testing.T) {
 	var peek bytes.Buffer
 	var read bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
-	for i := 1; ; i++ {
+	if !s.Started() {
+		s.Advance(1)
+	}
+	for i := 0; ; i++ {
 		b, has := s.Peek(i)
 		if !has {
 			break
@@ -214,12 +226,13 @@ func TestPeekThenReadFullStream(t *testing.T) {
 			}
 		}
 	}
-	for s.Advance(1) {
+	for !s.EOS() {
 		if err := read.WriteByte(s.Byte()); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
 		}
+		s.Advance(1)
 	}
 	if s.Err() != nil {
 		t.Fatalf("stream err %v", s.Err())
@@ -265,13 +278,16 @@ func TestLineColumns(t *testing.T) {
 	}
 	s := BufferedStream(bytes.NewBufferString(text))
 	// pre-peek everything just to futz with the interior state
-	for i := 1; ; i++ {
+	if !s.Started() {
+		s.Advance(1)
+	}
+	for i := 0; ; i++ {
 		_, has := s.Peek(i)
 		if !has {
 			break
 		}
 	}
-	for i := 0; s.Advance(1); i++ {
+	for i := 0; !s.EOS(); i++ {
 		tc, line, column := s.Position()
 		char := s.Byte()
 		if char != expected[i].char {
@@ -286,6 +302,7 @@ func TestLineColumns(t *testing.T) {
 		if column != expected[i].column {
 			t.Fatalf("got %v expected %v", column, expected[i].column)
 		}
+		s.Advance(1)
 	}
 }
 
diff --git a/stream_machines/dfa_machine.go b/stream_machines/dfa_machine.go
index 33e22f6..2c845b6 100644
--- a/stream_machines/dfa_machine.go
+++ b/stream_machines/dfa_machine.go
@@ -29,14 +29,7 @@ func DFALexerEngine(startState, errorState int, trans machines.DFATrans, accepti
 				return nil, nil, nil
 			}
 		}
-		startChar := text.Character()
-		buf = append(buf, startChar)
-		state = trans[state][startChar.Byte]
-		if match, has := accepting[state]; has {
-			matchID = match
-			matchLH = 0
-		}
-		for lh := 1; state != errorState; lh++ {
+		for lh := 0; state != errorState; lh++ {
 			c, has := text.Peek(lh)
 			if !has {
 				break

From 8c242ca51392dece6d88f10a46915cf19eccba01 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Wed, 28 Mar 2018 09:47:42 -0700
Subject: [PATCH 8/9] attempt to harmonize Stream/Text scanning

Signed-off-by: Tim Henderson <tadh@google.com>
---
 buffer.go     |  84 +++++++++++++++++++++++++++++++++++++++
 lexer_test.go | 107 ++++++++++++++++++++++++++++++++------------------
 scanner.go    |  12 ++++++
 stream.go     |  11 ++++++
 4 files changed, 175 insertions(+), 39 deletions(-)
 create mode 100644 buffer.go

diff --git a/buffer.go b/buffer.go
new file mode 100644
index 0000000..609d876
--- /dev/null
+++ b/buffer.go
@@ -0,0 +1,84 @@
+package lexmachine
+
+import (
+	"fmt"
+
+	"github.com/timtadh/lexmachine/stream"
+)
+
+// Buffer is a abstracts to implementations of "text". The first is a []byte with a
+type Buffer interface {
+	Byte(i int) byte
+	HasByte(i int) bool
+	TC() int
+	SetTC(i int)
+}
+
+type SliceBuffer struct {
+	Text        []byte
+	TextCounter int
+}
+
+func sliceBuffer(text []byte, tc int) *SliceBuffer {
+	return &SliceBuffer{
+		Text:        text,
+		TextCounter: tc,
+	}
+}
+
+func (s *SliceBuffer) Byte(i int) byte {
+	return s.Text[i]
+}
+
+func (s *SliceBuffer) HasByte(i int) bool {
+	return i >= 0 && i < len(s.Text)
+}
+
+func (s *SliceBuffer) TC() int {
+	return s.TextCounter
+}
+
+func (s *SliceBuffer) SetTC(tc int) {
+	s.TextCounter = tc
+}
+
+func (s *SliceBuffer) finalize() int {
+	return s.TextCounter
+}
+
+type StreamBuffer struct {
+	Text      stream.Stream
+	Lookahead int
+}
+
+func streamBuffer(text stream.Stream) *StreamBuffer {
+	return &StreamBuffer{
+		Text:      text,
+		Lookahead: 0,
+	}
+}
+
+func (s *StreamBuffer) Byte(i int) byte {
+	c, has := s.Text.Peek(i)
+	if !has {
+		panic(fmt.Errorf("read past the end of the buffer"))
+	}
+	return c.Byte
+}
+
+func (s *StreamBuffer) HasByte(i int) bool {
+	_, has := s.Text.Peek(i)
+	return has
+}
+
+func (s *StreamBuffer) TC() int {
+	return s.Lookahead
+}
+
+func (s *StreamBuffer) SetTC(tc int) {
+	s.Lookahead = tc
+}
+
+func (s *StreamBuffer) finalize() {
+	s.Text.Advance(s.Lookahead)
+}
diff --git a/lexer_test.go b/lexer_test.go
index f3f20ac..7f3ab5f 100644
--- a/lexer_test.go
+++ b/lexer_test.go
@@ -84,34 +84,50 @@ func TestSimple(x *testing.T) {
 		//			match.TC, match.StartLine, match.StartColumn)
 		//},
 		func(s Scanner, match *machines.Match) (interface{}, error) {
-			scan := s.(*StreamScanner)
-			if scan.Text.EOS() {
-				return nil,
-					fmt.Errorf("unclosed comment starting at %d, (%d, %d)",
-						match.TC, match.StartLine, match.StartColumn)
-			}
-			buf := make([]stream.Character, 0, 10)
-			buf = append(buf, scan.Text.Character())
-			for lh := 1; ; lh++ {
-				c, has := scan.Text.Peek(lh)
-				if !has {
-					break
+			text := s.Buffer()
+			buf := make([]byte, 0, 10)
+			buf = append(buf, match.Bytes...)
+			open := 1
+			tc := text.TC()
+			for ; open > 0; tc++ {
+				if !text.HasByte(tc) {
+					return nil,
+						fmt.Errorf("unclosed comment starting at %d, (%d, %d) containing %q",
+							match.TC, match.StartLine, match.StartColumn, buf)
 				}
-				if c.Byte == '\\' {
+				char := text.Byte(tc)
+				buf = append(buf, char)
+				if char == '\\' {
 					// the next character is skipped
-					lh++
-				} else if c.Byte == '*' {
-					if n, has := scan.Text.Peek(lh + 1); has {
-						if n.Byte == '/' {
-							scan.Text.Advance(lh + 2)
-							return nil, nil
+					tc++
+					if text.HasByte(tc) {
+						buf = append(buf, text.Byte(tc))
+					}
+				} else if char == '/' {
+					if text.HasByte(tc + 1) {
+						next := text.Byte(tc + 1)
+						if next == '*' {
+							buf = append(buf, next)
+							tc++
+							open++
+							continue
+						}
+					}
+				} else if char == '*' {
+					if text.HasByte(tc + 1) {
+						next := text.Byte(tc + 1)
+						if next == '/' {
+							buf = append(buf, next)
+							tc++
+							open--
+							continue
 						}
 					}
 				}
 			}
-			return nil,
-				fmt.Errorf("unclosed comment starting at %d, (%d, %d)",
-					match.TC, match.StartLine, match.StartColumn)
+			fmt.Printf("%q\n", buf)
+			text.SetTC(tc)
+			return nil, nil
 		},
 	)
 
@@ -121,10 +137,10 @@ func TestSimple(x *testing.T) {
 		print fred
 		name =12
 		// asdf comment
-		/*awef  oiwe
+		/**//*awef  oiwe
 		 ooiwje \*/ weoi
 		 weoi*/ printname = 13
-		print printname
+		print printname/*/**/*/
 	`)
 
 	expected := []*Token{
@@ -138,20 +154,14 @@ func TestSimple(x *testing.T) {
 		{NAME, "name", []byte("name"), 41, 5, 3, 5, 6},
 		{EQUALS, nil, []byte("="), 46, 5, 8, 5, 8},
 		{NUMBER, 12, []byte("12"), 47, 5, 9, 5, 10},
-		{NAME, "printname", []byte("printname"), 112, 9, 11, 9, 19},
-		{EQUALS, nil, []byte("="), 122, 9, 21, 9, 21},
-		{NUMBER, 13, []byte("13"), 124, 9, 23, 9, 24},
-		{PRINT, nil, []byte("print"), 129, 10, 3, 10, 7},
-		{NAME, "printname", []byte("printname"), 135, 10, 9, 10, 17},
+		{NAME, "printname", []byte("printname"), 116, 9, 11, 9, 19},
+		{EQUALS, nil, []byte("="), 126, 9, 21, 9, 21},
+		{NUMBER, 13, []byte("13"), 128, 9, 23, 9, 24},
+		{PRINT, nil, []byte("print"), 133, 10, 3, 10, 7},
+		{NAME, "printname", []byte("printname"), 139, 10, 9, 10, 17},
 	}
 
-	scan := func(lexer *Lexer) {
-		scanner, err := lexer.StreamScanner(stream.BufferedStream(bytes.NewBuffer(text)))
-		if err != nil {
-			t.Error(err)
-			t.Log(lexer.program.Serialize())
-		}
-
+	scan := func(scanner Scanner) {
 		i := 0
 		for tk, err, eof := scanner.Next(); !eof; tk, err, eof = scanner.Next() {
 			if err != nil {
@@ -166,14 +176,33 @@ func TestSimple(x *testing.T) {
 	}
 
 	// // first do the test with the NFA
-	// t.AssertNil(lexer.CompileNFA())
-	// scan(lexer)
+	t.AssertNil(lexer.CompileNFA())
+	{
+		scanner, err := lexer.TextScanner(text)
+		if err != nil {
+			t.Fatal(err)
+		}
+		scan(scanner)
+	}
 
 	// then do the test with the DFA
 	lexer.program = nil
 	lexer.nfaMatches = nil
 	t.AssertNil(lexer.CompileDFA())
-	scan(lexer)
+	{
+		scanner, err := lexer.TextScanner(text)
+		if err != nil {
+			t.Fatal(err)
+		}
+		scan(scanner)
+	}
+	{
+		scanner, err := lexer.StreamScanner(stream.BufferedStream(bytes.NewBuffer(text)))
+		if err != nil {
+			t.Fatal(err)
+		}
+		scan(scanner)
+	}
 }
 
 func TestPartialLexer(x *testing.T) {
diff --git a/scanner.go b/scanner.go
index 4c35fd9..e029ff4 100644
--- a/scanner.go
+++ b/scanner.go
@@ -9,6 +9,7 @@ import (
 type Scanner interface {
 	Next() (tok interface{}, err error, eos bool)
 	Token(typ int, value interface{}, m *machines.Match) *Token
+	Buffer() Buffer
 }
 
 // Scanner tokenizes a byte string based on the patterns provided to the lexer
@@ -38,6 +39,14 @@ type TextScanner struct {
 	scan    machines.Scanner
 	Text    []byte
 	TC      int
+	buf     *SliceBuffer
+}
+
+func (s *TextScanner) Buffer() Buffer {
+	if s.buf == nil {
+		panic(fmt.Errorf("Buffer called outside of an Action"))
+	}
+	return s.buf
 }
 
 // Next iterates through the string being scanned returning one token at a time
@@ -76,8 +85,11 @@ func (s *TextScanner) Next() (tok interface{}, err error, eos bool) {
 		s.scan = scan
 		s.TC = tc
 
+		s.buf = sliceBuffer(s.Text, s.TC)
 		pattern := s.lexer.patterns[s.matches[match.PC]]
 		token, err = pattern.action(s, match)
+		s.TC = s.buf.finalize()
+		s.buf = nil
 		if err != nil {
 			return nil, err, false
 		}
diff --git a/stream.go b/stream.go
index 1617cd0..f3c31ce 100644
--- a/stream.go
+++ b/stream.go
@@ -19,6 +19,14 @@ type StreamScanner struct {
 	matches map[int]int
 	scan    stream_machines.Scanner
 	Text    stream.Stream
+	buf     *StreamBuffer
+}
+
+func (s *StreamScanner) Buffer() Buffer {
+	if s.buf == nil {
+		panic(fmt.Errorf("Buffer called outside of an Action"))
+	}
+	return s.buf
 }
 
 // Next iterates through the string being scanned returning one token at a time
@@ -56,8 +64,11 @@ func (s *StreamScanner) Next() (tok interface{}, err error, eos bool) {
 		}
 		s.scan = scan
 
+		s.buf = streamBuffer(s.Text)
 		pattern := s.lexer.patterns[s.matches[match.PC]]
 		token, err = pattern.action(s, match)
+		s.buf.finalize()
+		s.buf = nil
 		if err != nil {
 			return nil, err, false
 		}

From ff2a8174f9c2fe115f6d3ee8bb6ae9774cc86d02 Mon Sep 17 00:00:00 2001
From: Tim Henderson <tadh@google.com>
Date: Wed, 28 Mar 2018 09:57:27 -0700
Subject: [PATCH 9/9] removed unneeded methods from stream

Signed-off-by: Tim Henderson <tadh@google.com>
---
 stream/buffered.go    | 24 --------------------
 stream/stream.go      | 12 +---------
 stream/stream_test.go | 51 +++++++++++++++++++++----------------------
 3 files changed, 26 insertions(+), 61 deletions(-)

diff --git a/stream/buffered.go b/stream/buffered.go
index 8156dae..5f6a37a 100644
--- a/stream/buffered.go
+++ b/stream/buffered.go
@@ -29,18 +29,6 @@ func BufferedStream(r io.Reader) Stream {
 	return b
 }
 
-// Byte returns the byte at the cursor
-func (b *bufferedStream) Byte() byte {
-	b.lock.Lock()
-	defer b.lock.Unlock()
-	if !b.started {
-		panic(fmt.Errorf("Call to Byte() before first call to Advance"))
-	} else if b.eos {
-		panic(fmt.Errorf("Call to Byte() after first call to Advance returned false"))
-	}
-	return b.buf[0].Byte
-}
-
 // Character returns the character at the cursor
 func (b *bufferedStream) Character() Character {
 	b.lock.Lock()
@@ -53,18 +41,6 @@ func (b *bufferedStream) Character() Character {
 	return b.buf[0]
 }
 
-// Position gives the current position of the cursor
-func (b *bufferedStream) Position() (tc, line, column int) {
-	b.lock.Lock()
-	defer b.lock.Unlock()
-	if !b.started {
-		panic(fmt.Errorf("Call to Position() before first call to Advance"))
-	} else if b.eos {
-		panic(fmt.Errorf("Call to Position() after first call to Advance returned false"))
-	}
-	return b.buf[0].TC, b.buf[0].Line, b.buf[0].Column
-}
-
 // Peek gets the character at lookahead i
 func (b *bufferedStream) Peek(i int) (char Character, has bool) {
 	b.lock.Lock()
diff --git a/stream/stream.go b/stream/stream.go
index 6c58bcb..a27770c 100644
--- a/stream/stream.go
+++ b/stream/stream.go
@@ -8,7 +8,7 @@ import "fmt"
 //
 //     s := BufferedStream(reader)
 //     for s.Advance(1) {
-//         fmt.Println(s.Byte())
+//         fmt.Println(s.Character().Byte)
 //     }
 //     if s.Err() != nil {
 //         return s.Err()
@@ -16,21 +16,11 @@ import "fmt"
 //
 type Stream interface {
 
-	// Byte returns the current byte in the stream. This method will panic if
-	// Advance has not been called before this method or Advance has returned
-	// false.
-	Byte() byte
-
 	// Character returns the current byte in the stream. This method will panic
 	// if Advance has not been called before this method or Advance has
 	// returned false.
 	Character() Character
 
-	// Position returns the position of the current byte: text counter, line,
-	// and column. This method will panic if Advance has not been called before
-	// this method or Advance has returned false.
-	Position() (tc, line, column int)
-
 	// Peek returns byte at the current cursor + the lookahead in the stream if
 	// one exists. If lookahead == 0, it returns the same character Character()
 	// returns. If lookahead == 1, it returns the next byte, and so on. Peek
diff --git a/stream/stream_test.go b/stream/stream_test.go
index 93d8337..ba52835 100644
--- a/stream/stream_test.go
+++ b/stream/stream_test.go
@@ -10,7 +10,7 @@ func TestReadFullStream(t *testing.T) {
 	var buf bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
 	for s.Advance(1) {
-		if err := buf.WriteByte(s.Byte()); err != nil {
+		if err := buf.WriteByte(s.Character().Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -30,7 +30,7 @@ func TestReadEveryOther(t *testing.T) {
 	var buf bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
 	for s.Advance(2) {
-		if err := buf.WriteByte(s.Byte()); err != nil {
+		if err := buf.WriteByte(s.Character().Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -50,7 +50,7 @@ func TestReadEvery3(t *testing.T) {
 	var buf bytes.Buffer
 	s := BufferedStream(bytes.NewBufferString(text))
 	for s.Advance(3) {
-		if err := buf.WriteByte(s.Byte()); err != nil {
+		if err := buf.WriteByte(s.Character().Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -82,11 +82,11 @@ func TestPeekTillW(t *testing.T) {
 			break
 		}
 	}
-	if s.Byte() != 'w' {
-		t.Fatalf("expected w got %v", s.Byte())
+	if s.Character().Byte != 'w' {
+		t.Fatalf("expected w got %v", s.Character().Byte)
 	}
 	for {
-		if err := buf.WriteByte(s.Byte()); err != nil {
+		if err := buf.WriteByte(s.Character().Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -121,8 +121,8 @@ func TestPeekTillWThenL(t *testing.T) {
 			break
 		}
 	}
-	if s.Byte() != 'w' {
-		t.Fatalf("expected w got %v", s.Byte())
+	if s.Character().Byte != 'w' {
+		t.Fatalf("expected w got %v", s.Character().Byte)
 	}
 	for i := 1; ; i++ {
 		b, has := s.Peek(i)
@@ -134,11 +134,11 @@ func TestPeekTillWThenL(t *testing.T) {
 			break
 		}
 	}
-	if s.Byte() != 'l' {
-		t.Fatalf("expected l got %v", s.Byte())
+	if s.Character().Byte != 'l' {
+		t.Fatalf("expected l got %v", s.Character().Byte)
 	}
 	for {
-		if err := buf.WriteByte(s.Byte()); err != nil {
+		if err := buf.WriteByte(s.Character().Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -173,8 +173,8 @@ func TestPeekTillWThenLThenEnd(t *testing.T) {
 			break
 		}
 	}
-	if s.Byte() != 'w' {
-		t.Fatalf("expected w got %v", s.Byte())
+	if s.Character().Byte != 'w' {
+		t.Fatalf("expected w got %v", s.Character().Byte)
 	}
 	for i := 1; ; i++ {
 		b, has := s.Peek(i)
@@ -186,8 +186,8 @@ func TestPeekTillWThenLThenEnd(t *testing.T) {
 			break
 		}
 	}
-	if s.Byte() != 'l' {
-		t.Fatalf("expected l got %v", s.Byte())
+	if s.Character().Byte != 'l' {
+		t.Fatalf("expected l got %v", s.Character().Byte)
 	}
 	for i := 1; ; i++ {
 		_, has := s.Peek(i)
@@ -227,7 +227,7 @@ func TestPeekThenReadFullStream(t *testing.T) {
 		}
 	}
 	for !s.EOS() {
-		if err := read.WriteByte(s.Byte()); err != nil {
+		if err := read.WriteByte(s.Character().Byte); err != nil {
 			if err != nil {
 				t.Fatalf("err writing %v", err)
 			}
@@ -288,19 +288,18 @@ func TestLineColumns(t *testing.T) {
 		}
 	}
 	for i := 0; !s.EOS(); i++ {
-		tc, line, column := s.Position()
-		char := s.Byte()
-		if char != expected[i].char {
-			t.Fatalf("got %v expected %v", char, expected[i].char)
+		char := s.Character()
+		if char.Byte != expected[i].char {
+			t.Fatalf("got %v expected %v", char.Byte, expected[i].char)
 		}
-		if tc != expected[i].tc {
-			t.Fatalf("got %v expected %v", tc, expected[i].tc)
+		if char.TC != expected[i].tc {
+			t.Fatalf("got %v expected %v", char.TC, expected[i].tc)
 		}
-		if line != expected[i].line {
-			t.Fatalf("got %v expected %v", line, expected[i].line)
+		if char.Line != expected[i].line {
+			t.Fatalf("got %v expected %v", char.Line, expected[i].line)
 		}
-		if column != expected[i].column {
-			t.Fatalf("got %v expected %v", column, expected[i].column)
+		if char.Column != expected[i].column {
+			t.Fatalf("got %v expected %v", char.Column, expected[i].column)
 		}
 		s.Advance(1)
 	}