From d497d4b5d41b352da78548f6cdcf975724bd8d3f Mon Sep 17 00:00:00 2001 From: chewxy Date: Wed, 23 Dec 2020 10:12:29 +1100 Subject: [PATCH 1/3] added functionality to parse \\xHH with a decent fall back --- lexer/lexer.go | 23 +++++++++++++++++++++++ lexer/lexer_test.yml | 19 +++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 720dd5b4..24fedcad 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -2,6 +2,7 @@ package lexer import ( "bytes" + "strconv" "unicode/utf8" "github.com/vektah/gqlparser/v2/ast" @@ -392,6 +393,20 @@ func (s *Lexer) readString() (Token, *gqlerror.Error) { buf.WriteByte('\r') case 't': buf.WriteByte('\t') + case 'x': + // look two ahead + r, ok := unhex2(s.Input[s.end+2 : s.end+4]) + if !ok { + // if it's not a correct rune, then we treat it as a literal and move o + buf.WriteString(s.Input[s.end : s.end+2]) + s.end += 2 + s.endRunes += 2 + continue + } + buf.WriteRune(r) + s.end += 2 + s.endRunes += 2 + default: s.end += 1 s.endRunes += 1 @@ -491,6 +506,14 @@ func unhex(b string) (v rune, ok bool) { return v, true } +func unhex2(b string) (v rune, ok bool) { + r, err := strconv.ParseUint(b, 16, 32) + if err != nil { + return 0, false + } + return rune(r), true +} + // readName from the input // // [_A-Za-z][_0-9A-Za-z]* diff --git a/lexer/lexer_test.yml b/lexer/lexer_test.yml index e2c26696..770f8287 100644 --- a/lexer/lexer_test.yml +++ b/lexer/lexer_test.yml @@ -124,6 +124,15 @@ lexes strings: end: 15 value: 'slashes \ /' + - name: correct hex literals + input: '"\xaa \xab \xac ."' + tokens: + - + kind: STRING + start: 0 + end: 18 + value: 'ª « ¬ .' + - name: unicode input: '"unicode \u1234\u5678\u90AB\uCDEF"' tokens: @@ -184,9 +193,12 @@ lex reports useful string errors: - name: hex escape sequence input: '"bad \x esc"' - error: - message: 'Invalid character escape sequence: \x.' - locations: [{ line: 1, column: 7 }] + tokens: + - + kind: STRING + start: 0 + end: 12 + value: 'bad \x esc' - name: short escape sequence input: '"bad \u1 esc"' @@ -669,4 +681,3 @@ lex reports useful unknown character error: error: message: 'Cannot parse the unexpected character "â".' locations: [{ line: 1, column: 1 }] - From 444fa1afd27d396fea9a91c5ba10897ffb25f7de Mon Sep 17 00:00:00 2001 From: chewxy Date: Wed, 23 Dec 2020 11:03:27 +1100 Subject: [PATCH 2/3] go-fuzz found a crasher. Fixed it --- lexer/lexer.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lexer/lexer.go b/lexer/lexer.go index 24fedcad..48043b41 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -394,6 +394,11 @@ func (s *Lexer) readString() (Token, *gqlerror.Error) { case 't': buf.WriteByte('\t') case 'x': + if s.end+4 >= inputLen { + s.end++ + s.endRunes++ + break + } // look two ahead r, ok := unhex2(s.Input[s.end+2 : s.end+4]) if !ok { From 9de70ae2dac7a5b5a2ab891e6086c82c196b1321 Mon Sep 17 00:00:00 2001 From: chewxy Date: Wed, 23 Dec 2020 11:51:27 +1100 Subject: [PATCH 3/3] boyscout: clean up switch cases so they all line up vertically. Makes the cases easier to read --- lexer/lexer.go | 74 ++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 48043b41..54b3543f 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -363,7 +363,8 @@ func (s *Lexer) readString() (Token, *gqlerror.Error) { escape := s.Input[s.end+1] - if escape == 'u' { + switch escape { + case 'u': if s.end+6 >= inputLen { s.end++ s.endRunes++ @@ -377,49 +378,46 @@ func (s *Lexer) readString() (Token, *gqlerror.Error) { return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:s.end+5]) } buf.WriteRune(r) - s.end += 6 - s.endRunes += 6 - } else { - switch escape { - case '"', '/', '\\': - buf.WriteByte(escape) - case 'b': - buf.WriteByte('\b') - case 'f': - buf.WriteByte('\f') - case 'n': - buf.WriteByte('\n') - case 'r': - buf.WriteByte('\r') - case 't': - buf.WriteByte('\t') - case 'x': - if s.end+4 >= inputLen { - s.end++ - s.endRunes++ - break - } - // look two ahead - r, ok := unhex2(s.Input[s.end+2 : s.end+4]) - if !ok { - // if it's not a correct rune, then we treat it as a literal and move o - buf.WriteString(s.Input[s.end : s.end+2]) - s.end += 2 - s.endRunes += 2 - continue - } - buf.WriteRune(r) + s.end += 4 // because at the end of this we're going to += 2 + s.endRunes += 4 + case '"', '/', '\\': + buf.WriteByte(escape) + case 'b': + buf.WriteByte('\b') + case 'f': + buf.WriteByte('\f') + case 'n': + buf.WriteByte('\n') + case 'r': + buf.WriteByte('\r') + case 't': + buf.WriteByte('\t') + case 'x': + if s.end+4 >= inputLen { + s.end++ + s.endRunes++ + break + } + // look two ahead + r, ok := unhex2(s.Input[s.end+2 : s.end+4]) + if !ok { + // if it's not a correct rune, then we treat it as a literal and move o + buf.WriteString(s.Input[s.end : s.end+2]) s.end += 2 s.endRunes += 2 - - default: - s.end += 1 - s.endRunes += 1 - return s.makeError("Invalid character escape sequence: \\%s.", string(escape)) + continue } + buf.WriteRune(r) s.end += 2 s.endRunes += 2 + + default: + s.end += 1 + s.endRunes += 1 + return s.makeError("Invalid character escape sequence: \\%s.", string(escape)) } + s.end += 2 + s.endRunes += 2 } }