Skip to content

Commit 9ee04ae

Browse files
authored
Don't extract ERB comments as Ruby comments in herb_extract_ruby (#98)
This pull request updates the `herb_extract_ruby` and `herb_extract_ruby_to_buffer_with_semicolons` methods in `extract.c` to not extract the content of ERB Comment Nodes (`<%#`) as Ruby code. So a source file like this: ```html+erb <%# This is a comment %> <h1><%= title %></h1> ``` Was extracted to Ruby code as: ```ruby # This is a comment title ``` With the changes included in this pull request it's going to be: ```ruby title ``` This is in order to resolve #91. It's valid to have multi-line ERB Comments like: ```html+erb <%# This is a comment over multiple lines %> ``` Which before this pull request was extracted to Ruby as: ```ruby # This is a comment over multiple lines ``` Which is not a valid Ruby comment anymore, but treated as actual Ruby code from the second line on. If the comment itself included Ruby keywords it would cause syntax errors. For now, we don't extract the ERB Comments at all - which is the change this pull request introduces. In the future, we can implement #100 (and/or #102) and also make sure that multi-line ERB Comments get extracted to multi-line Ruby Comments, like: ```ruby # This is # a comment # over multiple # lines ``` or maybe even cleverer: replace the `<%` with a `=begin` and the `%>` with a `=end`: ```ruby =begin This is a comment over multiple lines =end ``` Another case where it would break Ruby syntax is in this example: ```html+erb <% if true %><%# Comment here %><% end %> ``` Which is going to comment out the `end` as well: ```ruby if true # Comment here end ``` This use-case is also fixed with this pull request, since we just skip over the ERB Comments content: ```ruby if true end ``` This last example could be solved even more elegantly if Ruby shipped the Inline Comments feature: https://bugs.ruby-lang.org/issues/20405 This following example is still broken and this pull request does not address that use-case. I opened #101 for this. ```html+erb <% if true %><% # Comment here %><% end %> ``` Currently it does not address the case, where the comment is part of the Ruby Code itself, so the comment is not seen as a "ERB Comment Node":
1 parent 92486f4 commit 9ee04ae

5 files changed

+154
-18
lines changed

src/extract.c

+48-17
Original file line numberDiff line numberDiff line change
@@ -8,58 +8,89 @@
88

99
void herb_extract_ruby_to_buffer_with_semicolons(const char* source, buffer_T* output) {
1010
const array_T* tokens = herb_lex(source);
11+
bool skip_erb_content = false;
1112

1213
for (size_t i = 0; i < array_size(tokens); i++) {
1314
const token_T* token = array_get(tokens, i);
1415

1516
switch (token->type) {
16-
case TOKEN_NEWLINE:
17-
case TOKEN_ERB_CONTENT: buffer_append(output, token->value); break;
18-
case TOKEN_ERB_END: {
19-
buffer_append_char(output, ';');
20-
buffer_append_whitespace(output, range_length(token->range) - 1);
17+
case TOKEN_NEWLINE: {
18+
buffer_append(output, token->value);
2119
break;
2220
}
2321

2422
case TOKEN_ERB_START: {
25-
if (strcmp(token->value, "<%#") == 0) {
26-
buffer_append_char(output, ' ');
27-
buffer_append_char(output, ' ');
28-
buffer_append_char(output, '#');
23+
if (strcmp(token->value, "<%#") == 0) { skip_erb_content = true; }
24+
25+
buffer_append_whitespace(output, range_length(token->range));
26+
break;
27+
}
28+
29+
case TOKEN_ERB_CONTENT: {
30+
if (skip_erb_content == false) {
31+
buffer_append(output, token->value);
2932
} else {
3033
buffer_append_whitespace(output, range_length(token->range));
3134
}
3235

3336
break;
3437
}
3538

36-
default: buffer_append_whitespace(output, range_length(token->range));
39+
case TOKEN_ERB_END: {
40+
skip_erb_content = false;
41+
42+
buffer_append_char(output, ';');
43+
buffer_append_whitespace(output, range_length(token->range) - 1);
44+
break;
45+
}
46+
47+
default: {
48+
buffer_append_whitespace(output, range_length(token->range));
49+
}
3750
}
3851
}
3952
}
4053

4154
void herb_extract_ruby_to_buffer(const char* source, buffer_T* output) {
4255
const array_T* tokens = herb_lex(source);
56+
bool skip_erb_content = false;
4357

4458
for (size_t i = 0; i < array_size(tokens); i++) {
4559
const token_T* token = array_get(tokens, i);
4660

4761
switch (token->type) {
48-
case TOKEN_NEWLINE:
49-
case TOKEN_ERB_CONTENT: buffer_append(output, token->value); break;
62+
case TOKEN_NEWLINE: {
63+
buffer_append(output, token->value);
64+
break;
65+
}
66+
5067
case TOKEN_ERB_START: {
51-
if (strcmp(token->value, "<%#") == 0) {
52-
buffer_append_char(output, ' ');
53-
buffer_append_char(output, ' ');
54-
buffer_append_char(output, '#');
68+
if (strcmp(token->value, "<%#") == 0) { skip_erb_content = true; }
69+
70+
buffer_append_whitespace(output, range_length(token->range));
71+
break;
72+
}
73+
74+
case TOKEN_ERB_CONTENT: {
75+
if (skip_erb_content == false) {
76+
buffer_append(output, token->value);
5577
} else {
5678
buffer_append_whitespace(output, range_length(token->range));
5779
}
5880

5981
break;
6082
}
6183

62-
default: buffer_append_whitespace(output, range_length(token->range));
84+
case TOKEN_ERB_END: {
85+
skip_erb_content = false;
86+
87+
buffer_append_whitespace(output, range_length(token->range));
88+
break;
89+
}
90+
91+
default: {
92+
buffer_append_whitespace(output, range_length(token->range));
93+
}
6394
}
6495
}
6596
}

test/extractor/extract_ruby_test.rb

+64-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,70 @@ class ExtractRubyTest < Minitest::Spec
4747
<%# comment ' %>
4848
HTML
4949

50-
expected = " # comment ' \n"
50+
expected = " \n"
51+
52+
assert_equal expected, actual
53+
end
54+
55+
test "erb comment with ruby keyword" do
56+
actual = Herb.extract_ruby(<<~HTML)
57+
<%# end %>
58+
HTML
59+
60+
expected = " \n"
61+
62+
assert_equal expected, actual
63+
end
64+
65+
test "erb comment broken up over multiple lines" do
66+
actual = Herb.extract_ruby(<<~HTML)
67+
<%#
68+
end
69+
%>
70+
HTML
71+
72+
expected = " \n"
73+
74+
# TODO: it should also preserve the newlines in the ERB content
75+
# expected = "\n #\n end\n "
76+
77+
assert_equal expected, actual
78+
end
79+
80+
test "multi-line erb comment" do
81+
actual = Herb.extract_ruby(<<~HTML)
82+
<%#
83+
end
84+
end
85+
end
86+
end
87+
%>
88+
HTML
89+
90+
expected = " \n"
91+
92+
# TODO: it should also preserve the newlines in the ERB content
93+
# expected = " \n \n \n \n \n \n"
94+
95+
assert_equal expected, actual
96+
end
97+
98+
test "erb if/end and comment on same line" do
99+
actual = Herb.extract_ruby(<<~HTML)
100+
<% if %><%# comment %><% end %>
101+
HTML
102+
103+
expected = " if end \n"
104+
105+
assert_equal expected, actual
106+
end
107+
108+
xtest "erb if/end and Ruby comment on same line" do
109+
actual = Herb.extract_ruby(<<~HTML)
110+
<% if %><% # comment %><% end %>
111+
HTML
112+
113+
expected = " if # comment end \n"
51114

52115
assert_equal expected, actual
53116
end

test/parser/erb_test.rb

+16
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,21 @@ class ERBTest < Minitest::Spec
6565
test "comment" do
6666
assert_parsed_snapshot(%(<%# comment with a single qutote(') and double quote (") %>))
6767
end
68+
69+
test "multi-line comment" do
70+
assert_parsed_snapshot(<<~HTML)
71+
<%#
72+
comment
73+
%>
74+
HTML
75+
end
76+
77+
test "multi-line comment with Ruby keyword" do
78+
assert_parsed_snapshot(<<~HTML)
79+
<%#
80+
end
81+
%>
82+
HTML
83+
end
6884
end
6985
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
@ DocumentNode (location: (1:0)-(2:0))
2+
└── children: (2 items)
3+
├── @ ERBContentNode (location: (1:0)-(1:14))
4+
│ ├── tag_opening: "<%#" (location: (1:0)-(1:3))
5+
│ ├── content: "
6+
│ comment
7+
│ " (location: (1:3)-(1:12))
8+
│ ├── tag_closing: "%>" (location: (1:12)-(1:14))
9+
│ ├── parsed: true
10+
│ └── valid: true
11+
12+
└── @ HTMLTextNode (location: (1:14)-(2:0))
13+
└── content: "\n"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
@ DocumentNode (location: (1:0)-(2:0))
2+
└── children: (2 items)
3+
├── @ ERBContentNode (location: (1:0)-(1:10))
4+
│ ├── tag_opening: "<%#" (location: (1:0)-(1:3))
5+
│ ├── content: "
6+
│ end
7+
│ " (location: (1:3)-(1:8))
8+
│ ├── tag_closing: "%>" (location: (1:8)-(1:10))
9+
│ ├── parsed: true
10+
│ └── valid: false
11+
12+
└── @ HTMLTextNode (location: (1:10)-(2:0))
13+
└── content: "\n"

0 commit comments

Comments
 (0)