Skip to content

Commit a7b7b95

Browse files
authored
Merge pull request #55 from xp-framework/feature/heredoc
Add suport for heredoc (and its nowdoc variant)
2 parents d8b7370 + af447f3 commit a7b7b95

File tree

3 files changed

+98
-25
lines changed

3 files changed

+98
-25
lines changed

src/main/php/lang/ast/Tokens.class.php

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class Tokens {
1313
const DELIMITERS = " \r\n\t'\$\"`=,;.:?!(){}[]#+-*/|&^@%~<>";
1414
const OPERATORS = [
15-
'<' => ['<=>', '<<=', '<=', '<<', '<>', '<?'],
15+
'<' => ['<=>', '<<=', '<<<', '<=', '<<', '<>', '<?'],
1616
'>' => ['>>=', '>=', '>>'],
1717
'=' => ['===', '=>', '=='],
1818
'!' => ['!==', '!='],
@@ -22,7 +22,7 @@ class Tokens {
2222
'+' => ['+=', '++'],
2323
'-' => ['-=', '--', '->'],
2424
'*' => ['**=', '*=', '**'],
25-
'/' => ['/='],
25+
'/' => ['/=', '//', '/*'],
2626
'~' => ['~='],
2727
'%' => ['%='],
2828
'?' => ['?->', '??=', '?:', '??'],
@@ -106,10 +106,10 @@ public function iterator($language) {
106106
$end= '\\'.$token;
107107
do {
108108
$chunk= $next($end);
109-
if (null === $chunk) {
110-
throw new FormatException('Unclosed string literal starting at line '.$line);
111-
} else if ('\\' === $chunk) {
109+
if ('\\' === $chunk) {
112110
$string.= $chunk.$next($end);
111+
} else if (null === $chunk) {
112+
throw new FormatException('Unclosed string literal starting at line '.$line);
113113
} else {
114114
$string.= $chunk;
115115
}
@@ -162,43 +162,59 @@ public function iterator($language) {
162162
goto number;
163163
}
164164
$offset-= strlen($t);
165-
} else if ('/' === $token) {
166-
$t= $next(self::DELIMITERS);
167-
if ('/' === $t) {
168-
yield new Token(null, 'comment', '//'.$next("\r\n"), $line);
169-
continue;
170-
} else if ('*' === $t) {
171-
$comment= '';
172-
do {
173-
$chunk= $next('/');
174-
$comment.= $chunk;
175-
} while (null !== $chunk && '*' !== $chunk[strlen($chunk) - 1]);
176-
$comment.= $next('/');
177-
yield new Token(null, '*' === $comment[0] ? 'apidoc' : 'comment', '/*'.$comment, $line);
178-
$line+= substr_count($comment, "\n");
179-
continue;
180-
}
181-
null === $t || $offset-= strlen($t);
182165
}
183166

184167
// Handle combined operators. First, ensure we have enough bytes in our buffer
185168
// Our longest operator is 3 characters, hardcode this here.
186-
if (self::OPERATORS[$token]) {
169+
if ($combined= self::OPERATORS[$token]) {
187170
$offset--;
188171
while ($offset + 3 > $length && $this->in->available()) {
189172
$buffer.= $this->in->read(8192);
190173
$length= strlen($buffer);
191174
}
192-
foreach (self::OPERATORS[$token] as $operator) {
175+
foreach ($combined as $operator) {
193176
if ($offset + strlen($operator) > $length) continue;
194177
if (0 === substr_compare($buffer, $operator, $offset, strlen($operator))) {
195178
$token= $operator;
196179
break;
197180
}
198181
}
199182
$offset+= strlen($token);
200-
}
201183

184+
// Distinguish single- and multiline comments as well as heredoc from operators
185+
if ('//' === $token) {
186+
yield new Token(null, 'comment', '//'.$next("\r\n"), $line);
187+
continue;
188+
} else if ('/*' === $token) {
189+
$comment= '';
190+
do {
191+
$chunk= $next('/');
192+
$comment.= $chunk;
193+
} while (null !== $chunk && '*' !== $chunk[strlen($chunk) - 1]);
194+
$comment.= $next('/');
195+
yield new Token(null, '*' === $comment[0] ? 'apidoc' : 'comment', '/*'.$comment, $line);
196+
$line+= substr_count($comment, "\n");
197+
continue;
198+
} else if ('<<<' === $token) {
199+
$label= $next("\r\n");
200+
$end= trim($label, '"\'');
201+
$l= strlen($end);
202+
$string= "<<<{$label}";
203+
204+
heredoc: $token= $next("\r\n");
205+
if (0 === substr_compare($token, $end, $p= strspn($token, ' '), $l)) {
206+
$p+= $l;
207+
$offset-= strlen($token) - $p;
208+
yield new Token($language->symbol('(literal)'), 'heredoc', $string.substr($token, 0, $p), $line);
209+
$line+= substr_count($string, "\n");
210+
continue;
211+
} else if (null === $token) {
212+
throw new FormatException('Unclosed heredoc literal starting at line '.$line);
213+
}
214+
$string.= $token;
215+
goto heredoc;
216+
}
217+
}
202218
yield new Token($language->symbol($token), 'operator', $token, $line);
203219
} else {
204220
yield new Token($language->symbols[$token] ?? $language->symbol('(name)'), 'name', $token, $line);

src/test/php/lang/ast/unittest/TokensTest.class.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ public function unclosed_string_literals($input) {
4343
(new Tokens($input))->iterator($this->language)->current();
4444
}
4545

46+
#[Test, Expect(class: FormatException::class, message: '/Unclosed heredoc literal/'), Values(['<<<EOD', "<<<EOD\n", "<<<EOD\nLine 1"])]
47+
public function unclosed_heredoc_literals($input) {
48+
(new Tokens($input))->iterator($this->language)->current();
49+
}
50+
4651
#[Test, Values(['0', '1', '1_000_000_000'])]
4752
public function integer_literal($input) {
4853
$this->assertTokens([['integer' => str_replace('_', '', $input)]], new Tokens($input));

src/test/php/lang/ast/unittest/parse/LiteralsTest.class.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,56 @@ public function dangling_comma_in_key_value_map($declaration) {
8686
$pair= [new Literal('"key"', self::LINE), new Literal('"value"', self::LINE)];
8787
$this->assertParsed([new ArrayLiteral([$pair], self::LINE)], $declaration);
8888
}
89+
90+
#[Test, Values(['EOD', '"EOD"', "'EOD'"])]
91+
public function heredoc($label) {
92+
$nowdoc= (
93+
"<<<{$label}\n".
94+
"Line 1\n".
95+
"Line 2\n".
96+
"\n".
97+
"Line 4\n".
98+
"EOD"
99+
);
100+
$this->assertParsed([new Literal($nowdoc, self::LINE)], $nowdoc.';');
101+
}
102+
103+
#[Test]
104+
public function heredoc_indentation() {
105+
$nowdoc= (
106+
"<<<EOD\n".
107+
" Line 1\n".
108+
" Line 2\n".
109+
"\n".
110+
" Line 4\n".
111+
" EOD"
112+
);
113+
$this->assertParsed([new Literal($nowdoc, self::LINE)], $nowdoc.';');
114+
}
115+
116+
#[Test]
117+
public function line_number_after_multiline_string() {
118+
$string= (
119+
"'<html>\n".
120+
" ...\n".
121+
"</html>'"
122+
);
123+
$this->assertParsed(
124+
[new Literal($string, self::LINE), new Literal('null', self::LINE + 3)],
125+
$string.";\nnull;"
126+
);
127+
}
128+
129+
#[Test]
130+
public function line_number_after_heredoc() {
131+
$nowdoc= (
132+
"<<<EOD\n".
133+
" Line 1\n".
134+
" EOD"
135+
);
136+
$this->assertParsed(
137+
[new Literal($nowdoc, self::LINE), new Literal('null', self::LINE + 3)],
138+
$nowdoc.";\nnull;"
139+
);
140+
}
89141
}

0 commit comments

Comments
 (0)