Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package lexmachine

import (
"fmt"

"github.com/timtadh/lexmachine/stream"
)

// Buffer is a abstracts to implementations of "text". The first is a []byte with a
type Buffer interface {
Byte(i int) byte
HasByte(i int) bool
TC() int
SetTC(i int)
}

type SliceBuffer struct {
Text []byte
TextCounter int
}

func sliceBuffer(text []byte, tc int) *SliceBuffer {
return &SliceBuffer{
Text: text,
TextCounter: tc,
}
}

func (s *SliceBuffer) Byte(i int) byte {
return s.Text[i]
}

func (s *SliceBuffer) HasByte(i int) bool {
return i >= 0 && i < len(s.Text)
}

func (s *SliceBuffer) TC() int {
return s.TextCounter
}

func (s *SliceBuffer) SetTC(tc int) {
s.TextCounter = tc
}

func (s *SliceBuffer) finalize() int {
return s.TextCounter
}

type StreamBuffer struct {
Text stream.Stream
Lookahead int
}

func streamBuffer(text stream.Stream) *StreamBuffer {
return &StreamBuffer{
Text: text,
Lookahead: 0,
}
}

func (s *StreamBuffer) Byte(i int) byte {
c, has := s.Text.Peek(i)
if !has {
panic(fmt.Errorf("read past the end of the buffer"))
}
return c.Byte
}

func (s *StreamBuffer) HasByte(i int) bool {
_, has := s.Text.Peek(i)
return has
}

func (s *StreamBuffer) TC() int {
return s.Lookahead
}

func (s *StreamBuffer) SetTC(tc int) {
s.Lookahead = tc
}

func (s *StreamBuffer) finalize() {
s.Text.Advance(s.Lookahead)
}
192 changes: 35 additions & 157 deletions lexer.go
Original file line number Diff line number Diff line change
@@ -1,61 +1,19 @@
package lexmachine

import (
"bytes"
"fmt"
)

import (
dfapkg "github.com/timtadh/lexmachine/dfa"
"github.com/timtadh/lexmachine/frontend"
"github.com/timtadh/lexmachine/inst"
"github.com/timtadh/lexmachine/machines"
"github.com/timtadh/lexmachine/stream"
"github.com/timtadh/lexmachine/stream_machines"
)

// Token is an optional token representation you could use to represent the
// tokens produced by a lexer built with lexmachine.
//
// Here is an example for constructing a lexer Action which turns a
// machines.Match struct into a token using the scanners Token helper function.
//
// func token(name string, tokenIds map[string]int) lex.Action {
// return func(s *lex.Scanner, m *machines.Match) (interface{}, error) {
// return s.Token(tokenIds[name], string(m.Bytes), m), nil
// }
// }
//
type Token struct {
Type int
Value interface{}
Lexeme []byte
TC int
StartLine int
StartColumn int
EndLine int
EndColumn int
}

// Equals checks the equality of two tokens ignoring the Value field.
func (t *Token) Equals(other *Token) bool {
if t == nil && other == nil {
return true
} else if t == nil {
return false
} else if other == nil {
return false
}
return t.TC == other.TC &&
t.StartLine == other.StartLine &&
t.StartColumn == other.StartColumn &&
t.EndLine == other.EndLine &&
t.EndColumn == other.EndColumn &&
bytes.Equal(t.Lexeme, other.Lexeme) &&
t.Type == other.Type
}

// String formats the token in a human readable form.
func (t *Token) String() string {
return fmt.Sprintf("%d %q %d (%d, %d)-(%d, %d)", t.Type, t.Value, t.TC, t.StartLine, t.StartColumn, t.EndLine, t.EndColumn)
type pattern struct {
regex []byte
action Action
}

// An Action is a function which get called when the Scanner finds a match
Expand All @@ -64,12 +22,7 @@ func (t *Token) String() string {
// have different needs Actions merely return an interface{}. This allows you
// to represent a token in anyway you wish. An example Token struct is provided
// above.
type Action func(scan *Scanner, match *machines.Match) (interface{}, error)

type pattern struct {
regex []byte
action Action
}
type Action func(scan Scanner, match *machines.Match) (interface{}, error)

// Lexer is a "builder" object which lets you construct a Scanner type which
// does the actual work of tokenizing (splitting up and categorizing) a byte
Expand All @@ -84,111 +37,13 @@ type Lexer struct {
dfa *dfapkg.DFA
}

// Scanner tokenizes a byte string based on the patterns provided to the lexer
// object which constructed the scanner. This object works as functional
// iterator using the Next method.
//
// Example
//
// lexer, err := CreateLexer()
// if err != nil {
// return err
// }
// scanner, err := lexer.Scanner(someBytes)
// if err != nil {
// return err
// }
// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
// if err != nil {
// return err
// }
// fmt.Println(tok)
// }
//
type Scanner struct {
lexer *Lexer
matches map[int]int
scan machines.Scanner
Text []byte
TC int
pTC int
sLine int
sColumn int
eLine int
eColumn int
}

// Next iterates through the string being scanned returning one token at a time
// until either an error is encountered or the end of the string is reached.
// The token is returned by the tok value. An error is indicated by err.
// Finally, eos (a bool) indicates the End Of String when it returns as true.
//
// Example
//
// for tok, err, eos := scanner.Next(); !eos; tok, err, eos = scanner.Next() {
// if err != nil {
// // handle the error and exit the loop. For example:
// return err
// }
// // do some processing on tok or store it somewhere. eg.
// fmt.Println(tok)
// }
//
// One useful error type which could be returned by Next() is a
// match.UnconsumedInput which provides the position information for where in
// the text the scanning failed.
//
// For more information on functional iterators see:
// http://hackthology.com/functional-iteration-in-go.html
func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
var token interface{}
for token == nil {
tc, match, err, scan := s.scan(s.TC)
if scan == nil {
return nil, nil, true
} else if err != nil {
return nil, err, false
} else if match == nil {
return nil, fmt.Errorf("No match but no error"), false
}
s.scan = scan
s.pTC = s.TC
s.TC = tc
s.sLine = match.StartLine
s.sColumn = match.StartColumn
s.eLine = match.EndLine
s.eColumn = match.EndColumn

pattern := s.lexer.patterns[s.matches[match.PC]]
token, err = pattern.action(s, match)
if err != nil {
return nil, err, false
}
}
return token, nil, false
}

// Token is a helper function for constructing a Token type inside of a Action.
func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token {
return &Token{
Type: typ,
Value: value,
Lexeme: m.Bytes,
TC: m.TC,
StartLine: m.StartLine,
StartColumn: m.StartColumn,
EndLine: m.EndLine,
EndColumn: m.EndColumn,
}
}

// NewLexer constructs a new lexer object.
func NewLexer() *Lexer {
return &Lexer{}
}

// Scanner creates a scanner for a particular byte string from the lexer.
func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
// TextScanner creates a scanner for a particular byte string from the lexer.
func (l *Lexer) TextScanner(text []byte) (*TextScanner, error) {
if l.program == nil && l.dfa == nil {
err := l.Compile()
if err != nil {
Expand All @@ -200,17 +55,17 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
textCopy := make([]byte, len(text))
copy(textCopy, text)

var s *Scanner
var s *TextScanner
if l.dfa != nil {
s = &Scanner{
s = &TextScanner{
lexer: l,
matches: l.dfaMatches,
scan: machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, textCopy),
Text: textCopy,
TC: 0,
}
} else {
s = &Scanner{
s = &TextScanner{
lexer: l,
matches: l.nfaMatches,
scan: machines.LexerEngine(l.program, textCopy),
Expand All @@ -221,6 +76,29 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
return s, nil
}

// StreamScanner creates a scanner for a particular stream from the lexer.
func (l *Lexer) StreamScanner(text stream.Stream) (*StreamScanner, error) {
if l.program == nil && l.dfa == nil {
err := l.Compile()
if err != nil {
return nil, err
}
}

var s *StreamScanner
if l.dfa != nil {
s = &StreamScanner{
lexer: l,
matches: l.dfaMatches,
scan: stream_machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, text),
Text: text,
}
} else {
panic("not implemented")
}
return s, nil
}

// Add pattern to match on. When a match occurs during scanning the action
// function will be called by the Scanner to turn the low level machines.Match
// struct into a token.
Expand Down Expand Up @@ -325,7 +203,7 @@ func (l *Lexer) CompileDFA() error {
}

func (l *Lexer) matchesEmptyString() (bool, error) {
s, err := l.Scanner([]byte(""))
s, err := l.TextScanner([]byte(""))
if err != nil {
return false, err
}
Expand Down
Loading