Skip to content

Commit 46862a3

Browse files
authored
Merge pull request #43 from embulk/fix-quote-escape-null
Fix for explicit null in CSV quote and escape
2 parents 543d5d1 + 5d3d3a7 commit 46862a3

File tree

2 files changed

+141
-2
lines changed

2 files changed

+141
-2
lines changed

embulk-parser-csv/src/main/java/org/embulk/parser/csv/CsvParserPlugin.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,11 +478,15 @@ static class CsvRecordValidateException extends DataException {
478478
}
479479
}
480480

481+
static CsvTokenizer.Builder buildCsvTokenizerBuilderForTesting(final PluginTask task) {
482+
return buildCsvTokenizerBuilder(task);
483+
}
484+
481485
private static CsvTokenizer.Builder buildCsvTokenizerBuilder(final PluginTask task) {
482486
try {
483487
final CsvTokenizer.Builder builder = CsvTokenizer.builder(task.getDelimiter());
484-
task.getQuoteChar().ifPresent(q -> builder.setQuote(q.getCharacter()));
485-
task.getEscapeChar().ifPresent(e -> builder.setEscape(e.getCharacter()));
488+
builder.setQuote(task.getQuoteChar().orElse(QuoteCharacter.noQuote()).getCharacter());
489+
builder.setEscape(task.getEscapeChar().orElse(EscapeCharacter.noEscape()).getCharacter());
486490
builder.setNewline(task.getNewline().getString());
487491
if (task.getTrimIfNotQuoted()) {
488492
builder.enableTrimIfNotQuoted();

embulk-parser-csv/src/test/java/org/embulk/parser/csv/TestCsvParserPlugin.java

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.embulk.config.ConfigException;
2727
import org.embulk.config.ConfigSource;
2828
import org.embulk.util.config.ConfigMapperFactory;
29+
import org.embulk.util.csv.CsvTokenizer;
2930
import org.embulk.util.text.Newline;
3031
import org.junit.Rule;
3132
import org.junit.Test;
@@ -86,4 +87,138 @@ public void checkLoadConfig() {
8687
assertEquals(Optional.of(new CsvParserPlugin.QuoteCharacter('\\')), task.getQuoteChar());
8788
assertEquals(true, task.getAllowOptionalColumns());
8889
}
90+
91+
@SuppressWarnings("deprecation")
92+
@Test
93+
public void testCsvTokenizerQuoteBackslash() {
94+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
95+
.set("charset", "utf-16")
96+
.set("newline", "LF")
97+
.set("header_line", true)
98+
.set("delimiter", "\t")
99+
.set("quote", "\\")
100+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
101+
final CsvParserPlugin.PluginTask task =
102+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
103+
104+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
105+
assertEquals('\\', builder.peekQuote());
106+
}
107+
108+
@SuppressWarnings("deprecation")
109+
@Test
110+
public void testCsvTokenizerQuoteQuotation() {
111+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
112+
.set("charset", "utf-16")
113+
.set("newline", "LF")
114+
.set("header_line", true)
115+
.set("delimiter", "\t")
116+
.set("quote", "\"")
117+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
118+
final CsvParserPlugin.PluginTask task =
119+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
120+
121+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
122+
assertEquals('\"', builder.peekQuote());
123+
}
124+
125+
@SuppressWarnings("deprecation")
126+
@Test
127+
public void testCsvTokenizerQuoteUnspecified() {
128+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
129+
.set("charset", "utf-16")
130+
.set("newline", "LF")
131+
.set("header_line", true)
132+
.set("delimiter", "\t")
133+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
134+
final CsvParserPlugin.PluginTask task =
135+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
136+
137+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
138+
assertEquals('\"', builder.peekQuote());
139+
}
140+
141+
@SuppressWarnings("deprecation")
142+
@Test
143+
public void testCsvTokenizerQuoteNull() {
144+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
145+
.set("charset", "utf-16")
146+
.set("newline", "LF")
147+
.set("header_line", true)
148+
.set("delimiter", "\t")
149+
.setNested("quote", null) // #setNested is needed to set null
150+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
151+
final CsvParserPlugin.PluginTask task =
152+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
153+
154+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
155+
assertEquals(CsvTokenizer.NO_QUOTE, builder.peekQuote());
156+
}
157+
158+
@SuppressWarnings("deprecation")
159+
@Test
160+
public void testCsvTokenizerEscapeBackslash() {
161+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
162+
.set("charset", "utf-16")
163+
.set("newline", "LF")
164+
.set("header_line", true)
165+
.set("delimiter", "\t")
166+
.set("escape", "\\")
167+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
168+
final CsvParserPlugin.PluginTask task =
169+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
170+
171+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
172+
assertEquals('\\', builder.peekEscape());
173+
}
174+
175+
@SuppressWarnings("deprecation")
176+
@Test
177+
public void testCsvTokenizerEscapeSlash() {
178+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
179+
.set("charset", "utf-16")
180+
.set("newline", "LF")
181+
.set("header_line", true)
182+
.set("delimiter", "\t")
183+
.set("escape", "/")
184+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
185+
final CsvParserPlugin.PluginTask task =
186+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
187+
188+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
189+
assertEquals('/', builder.peekEscape());
190+
}
191+
192+
@SuppressWarnings("deprecation")
193+
@Test
194+
public void testCsvTokenizerEscapeUnspecified() {
195+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
196+
.set("charset", "utf-16")
197+
.set("newline", "LF")
198+
.set("header_line", true)
199+
.set("delimiter", "\t")
200+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
201+
final CsvParserPlugin.PluginTask task =
202+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
203+
204+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
205+
assertEquals('\\', builder.peekEscape());
206+
}
207+
208+
@SuppressWarnings("deprecation")
209+
@Test
210+
public void testCsvTokenizerEscapeNull() {
211+
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
212+
.set("charset", "utf-16")
213+
.set("newline", "LF")
214+
.set("header_line", true)
215+
.set("delimiter", "\t")
216+
.setNested("escape", null) // #setNested is needed to set null
217+
.set("columns", ImmutableList.of(ImmutableMap.of("name", "id", "type", "string")));
218+
final CsvParserPlugin.PluginTask task =
219+
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
220+
221+
final CsvTokenizer.Builder builder = CsvParserPlugin.buildCsvTokenizerBuilderForTesting(task);
222+
assertEquals(CsvTokenizer.NO_ESCAPE, builder.peekEscape());
223+
}
89224
}

0 commit comments

Comments
 (0)