12
12
class Tokens {
13
13
const DELIMITERS = " \r\n\t' \$\"`=,;.:?!(){}[]#+-*/|&^@%~<> " ;
14
14
const OPERATORS = [
15
- '< ' => ['<=> ' , '<<= ' , '<= ' , '<< ' , '<> ' , '<? ' ],
15
+ '< ' => ['<=> ' , '<<= ' , '<<< ' , ' < = ' , '<< ' , '<> ' , '<? ' ],
16
16
'> ' => ['>>= ' , '>= ' , '>> ' ],
17
17
'= ' => ['=== ' , '=> ' , '== ' ],
18
18
'! ' => ['!== ' , '!= ' ],
@@ -22,7 +22,7 @@ class Tokens {
22
22
'+ ' => ['+= ' , '++ ' ],
23
23
'- ' => ['-= ' , '-- ' , '-> ' ],
24
24
'* ' => ['**= ' , '*= ' , '** ' ],
25
- '/ ' => ['/= ' ],
25
+ '/ ' => ['/= ' , ' // ' , ' /* ' ],
26
26
'~ ' => ['~= ' ],
27
27
'% ' => ['%= ' ],
28
28
'? ' => ['?-> ' , '??= ' , '?: ' , '?? ' ],
@@ -106,10 +106,10 @@ public function iterator($language) {
106
106
$ end = '\\' .$ token ;
107
107
do {
108
108
$ chunk = $ next ($ end );
109
- if (null === $ chunk ) {
110
- throw new FormatException ('Unclosed string literal starting at line ' .$ line );
111
- } else if ('\\' === $ chunk ) {
109
+ if ('\\' === $ chunk ) {
112
110
$ string .= $ chunk .$ next ($ end );
111
+ } else if (null === $ chunk ) {
112
+ throw new FormatException ('Unclosed string literal starting at line ' .$ line );
113
113
} else {
114
114
$ string .= $ chunk ;
115
115
}
@@ -162,43 +162,59 @@ public function iterator($language) {
162
162
goto number;
163
163
}
164
164
$ offset -= strlen ($ t );
165
- } else if ('/ ' === $ token ) {
166
- $ t = $ next (self ::DELIMITERS );
167
- if ('/ ' === $ t ) {
168
- yield new Token (null , 'comment ' , '// ' .$ next ("\r\n" ), $ line );
169
- continue ;
170
- } else if ('* ' === $ t ) {
171
- $ comment = '' ;
172
- do {
173
- $ chunk = $ next ('/ ' );
174
- $ comment .= $ chunk ;
175
- } while (null !== $ chunk && '* ' !== $ chunk [strlen ($ chunk ) - 1 ]);
176
- $ comment .= $ next ('/ ' );
177
- yield new Token (null , '* ' === $ comment [0 ] ? 'apidoc ' : 'comment ' , '/* ' .$ comment , $ line );
178
- $ line += substr_count ($ comment , "\n" );
179
- continue ;
180
- }
181
- null === $ t || $ offset -= strlen ($ t );
182
165
}
183
166
184
167
// Handle combined operators. First, ensure we have enough bytes in our buffer
185
168
// Our longest operator is 3 characters, hardcode this here.
186
- if (self ::OPERATORS [$ token ]) {
169
+ if ($ combined = self ::OPERATORS [$ token ]) {
187
170
$ offset --;
188
171
while ($ offset + 3 > $ length && $ this ->in ->available ()) {
189
172
$ buffer .= $ this ->in ->read (8192 );
190
173
$ length = strlen ($ buffer );
191
174
}
192
- foreach (self :: OPERATORS [ $ token ] as $ operator ) {
175
+ foreach ($ combined as $ operator ) {
193
176
if ($ offset + strlen ($ operator ) > $ length ) continue ;
194
177
if (0 === substr_compare ($ buffer , $ operator , $ offset , strlen ($ operator ))) {
195
178
$ token = $ operator ;
196
179
break ;
197
180
}
198
181
}
199
182
$ offset += strlen ($ token );
200
- }
201
183
184
+ // Distinguish single- and multiline comments as well as heredoc from operators
185
+ if ('// ' === $ token ) {
186
+ yield new Token (null , 'comment ' , '// ' .$ next ("\r\n" ), $ line );
187
+ continue ;
188
+ } else if ('/* ' === $ token ) {
189
+ $ comment = '' ;
190
+ do {
191
+ $ chunk = $ next ('/ ' );
192
+ $ comment .= $ chunk ;
193
+ } while (null !== $ chunk && '* ' !== $ chunk [strlen ($ chunk ) - 1 ]);
194
+ $ comment .= $ next ('/ ' );
195
+ yield new Token (null , '* ' === $ comment [0 ] ? 'apidoc ' : 'comment ' , '/* ' .$ comment , $ line );
196
+ $ line += substr_count ($ comment , "\n" );
197
+ continue ;
198
+ } else if ('<<< ' === $ token ) {
199
+ $ label = $ next ("\r\n" );
200
+ $ end = trim ($ label , '" \'' );
201
+ $ l = strlen ($ end );
202
+ $ string = "<<< {$ label }" ;
203
+
204
+ heredoc: $ token = $ next ("\r\n" );
205
+ if (0 === substr_compare ($ token , $ end , $ p = strspn ($ token , ' ' ), $ l )) {
206
+ $ p += $ l ;
207
+ $ offset -= strlen ($ token ) - $ p ;
208
+ yield new Token ($ language ->symbol ('(literal) ' ), 'heredoc ' , $ string .substr ($ token , 0 , $ p ), $ line );
209
+ $ line += substr_count ($ string , "\n" );
210
+ continue ;
211
+ } else if (null === $ token ) {
212
+ throw new FormatException ('Unclosed heredoc literal starting at line ' .$ line );
213
+ }
214
+ $ string .= $ token ;
215
+ goto heredoc;
216
+ }
217
+ }
202
218
yield new Token ($ language ->symbol ($ token ), 'operator ' , $ token , $ line );
203
219
} else {
204
220
yield new Token ($ language ->symbols [$ token ] ?? $ language ->symbol ('(name) ' ), 'name ' , $ token , $ line );
0 commit comments