Skip to content

Commit b596ade

Browse files
authored
Merge pull request #10 from embulk/reenable-guess-csv-tests
Re-enable tests for CSV Guess
2 parents 3542735 + fd3b8d4 commit b596ade

File tree

3 files changed

+47
-20
lines changed

3 files changed

+47
-20
lines changed

embulk-guess-csv/src/main/java/org/embulk/guess/csv/CsvGuessPlugin.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.embulk.parser.csv.CsvParserPlugin;
3333
import org.embulk.parser.csv.CsvTokenizer;
3434
import org.embulk.spi.Buffer;
35+
import org.embulk.spi.BufferAllocator;
3536
import org.embulk.spi.Exec;
3637
import org.embulk.spi.GuessPlugin;
3738
import org.embulk.util.config.ConfigMapperFactory;
@@ -59,10 +60,11 @@ public ConfigDiff guess(final ConfigSource config, final Buffer sample) {
5960
return NewlineGuess.of(CONFIG_MAPPER_FACTORY).guess(config, sample);
6061
}
6162

62-
return this.guessLines(config, LineGuessHelper.of(CONFIG_MAPPER_FACTORY).toLines(config, sample));
63+
final BufferAllocator bufferAllocator = Exec.getBufferAllocator();
64+
return this.guessLines(config, LineGuessHelper.of(CONFIG_MAPPER_FACTORY).toLines(config, sample), bufferAllocator);
6365
}
6466

65-
ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines) {
67+
ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines, final BufferAllocator bufferAllocator) {
6668
final ConfigDiff configDiff = newConfigDiff();
6769

6870
// return {} unless config.fetch("parser", {}).fetch("type", "csv") == "csv"
@@ -127,7 +129,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
127129
// skipping empty lines is also disabled here because skipping header lines is done by
128130
// CsvParser which doesn't skip empty lines automatically
129131

130-
final List<List<String>> sampleRecordsBeforeSkip = splitLines(parserGuessed, false, sampleLines, delim, null);
132+
final List<List<String>> sampleRecordsBeforeSkip =
133+
splitLines(parserGuessed, false, sampleLines, delim, null, bufferAllocator);
131134
final int skipHeaderLines = guessSkipHeaderLines(sampleRecordsBeforeSkip);
132135
final List<String> skippedSampleLines = sampleLines.subList(skipHeaderLines, sampleLines.size());
133136
final List<List<String>> skippedSampleRecords = sampleRecordsBeforeSkip.subList(skipHeaderLines, sampleRecordsBeforeSkip.size());
@@ -144,7 +147,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
144147
parserGuessed);
145148
}
146149

147-
final List<List<String>> sampleRecords = splitLines(parserGuessed, true, uncommentedSampleLines, delim, null);
150+
final List<List<String>> sampleRecords =
151+
splitLines(parserGuessed, true, uncommentedSampleLines, delim, null, bufferAllocator);
148152

149153
// It should fail if CSV parser cannot parse sample_lines.
150154
if (sampleRecords == null || sampleRecords.isEmpty()) {
@@ -160,7 +164,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
160164
if (parserGuessed.has("trim_if_not_quoted")) {
161165
columnTypes = SCHEMA_GUESS.typesFromListRecords(sampleRecords.subList(0, 1));
162166
} else {
163-
final List<List<String>> sampleRecordsTrimmed = splitLines(parserGuessed, true, uncommentedSampleLines, delim, true);
167+
final List<List<String>> sampleRecordsTrimmed =
168+
splitLines(parserGuessed, true, uncommentedSampleLines, delim, true, bufferAllocator);
164169
final List<SchemaGuess.GuessedType> columnTypesTrimmed = SCHEMA_GUESS.typesFromListRecords(sampleRecordsTrimmed);
165170

166171
final List<SchemaGuess.GuessedType> columnTypesUntrimmed = SCHEMA_GUESS.typesFromListRecords(sampleRecords.subList(0, 1));
@@ -187,7 +192,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
187192
if (parserGuessed.has("trim_if_not_quoted")) {
188193
otherTypes = otherTypesUntrimmed;
189194
} else {
190-
final List<List<String>> sampleRecordsTrimmed = splitLines(parserGuessed, true, uncommentedSampleLines, delim, true);
195+
final List<List<String>> sampleRecordsTrimmed =
196+
splitLines(parserGuessed, true, uncommentedSampleLines, delim, true, bufferAllocator);
191197
final List<SchemaGuess.GuessedType> otherTypesTrimmed =
192198
SCHEMA_GUESS.typesFromListRecords(sampleRecordsTrimmed.subList(1, sampleRecordsTrimmed.size()));
193199
if (otherTypesUntrimmed.equals(otherTypesTrimmed)) {
@@ -274,7 +280,8 @@ private static List<List<String>> splitLines(
274280
final boolean skipEmptyLines,
275281
final List<String> sampleLines,
276282
final String delim,
277-
final Boolean trimIfNotQuoted) {
283+
final Boolean trimIfNotQuoted,
284+
final BufferAllocator bufferAllocator) {
278285
try {
279286
final String nullString = parserConfig.get(String.class, "null_string", null);
280287
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
@@ -289,7 +296,7 @@ private static List<List<String>> splitLines(
289296
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
290297

291298
final byte[] data = joinBytes(sampleLines, parserTask.getNewline());
292-
final Buffer sample = Exec.getBufferAllocator().allocate(data.length);
299+
final Buffer sample = bufferAllocator.allocate(data.length);
293300
sample.setBytes(0, data, 0, data.length);
294301
sample.limit(data.length);
295302

embulk-guess-csv/src/test/java/org/embulk/guess/csv/TestCsvGuessPlugin.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,18 @@
2323
import java.util.LinkedHashMap;
2424
import java.util.List;
2525
import java.util.Map;
26-
import org.embulk.EmbulkTestRuntime;
2726
import org.embulk.config.ConfigDiff;
2827
import org.embulk.config.ConfigSource;
28+
import org.embulk.spi.Buffer;
29+
import org.embulk.spi.BufferAllocator;
30+
import org.embulk.spi.BufferImpl;
2931
import org.embulk.util.config.ConfigMapperFactory;
30-
import org.junit.Rule;
3132
import org.junit.Test;
3233

3334
/**
3435
* Tests CsvGuessPlugin.
3536
*/
3637
public class TestCsvGuessPlugin {
37-
@Rule
38-
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
39-
4038
@Test
4139
public void testLargeLong() {
4240
final ConfigDiff actual = guess(
@@ -397,12 +395,24 @@ public void testComplexLine() {
397395
assertEquals("%Y%m%d%H%M%S%z", columnsActual.get(3).get("format"));
398396
}
399397

398+
private static class MockBufferAllocator implements BufferAllocator {
399+
@Override
400+
public Buffer allocate() {
401+
return this.allocate(32 * 1024);
402+
}
403+
404+
@Override
405+
public Buffer allocate(final int minimumCapacity) {
406+
return BufferImpl.allocate(minimumCapacity);
407+
}
408+
}
409+
400410
private static ConfigDiff guess(final String... sampleLines) {
401411
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
402412
final ConfigSource parserConfig = CONFIG_MAPPER_FACTORY.newConfigSource();
403413
parserConfig.set("type", "csv");
404414
config.set("parser", parserConfig);
405-
return new CsvGuessPlugin().guessLines(config, Arrays.asList(sampleLines));
415+
return new CsvGuessPlugin().guessLines(config, Arrays.asList(sampleLines), new MockBufferAllocator());
406416
}
407417

408418
private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();

embulk-guess-csv_all_strings/src/test/java/org/embulk/guess/csv/TestCsvAllStringsGuessPlugin.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,18 @@
2121
import java.util.Arrays;
2222
import java.util.List;
2323
import java.util.Map;
24-
import org.embulk.EmbulkTestRuntime;
2524
import org.embulk.config.ConfigDiff;
2625
import org.embulk.config.ConfigSource;
26+
import org.embulk.spi.Buffer;
27+
import org.embulk.spi.BufferAllocator;
28+
import org.embulk.spi.BufferImpl;
2729
import org.embulk.util.config.ConfigMapperFactory;
28-
import org.junit.Rule;
2930
import org.junit.Test;
3031

3132
/**
3233
* Tests CsvAllStringsGuessPlugin.
3334
*/
3435
public class TestCsvAllStringsGuessPlugin {
35-
@Rule
36-
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
37-
3836
/*
3937
def test_columns_without_header
4038
actual = guess([
@@ -103,12 +101,24 @@ public void testColumnsWithHeader() {
103101
assertEquals("string", columnsActual.get(2).get("type"));
104102
}
105103

104+
private static class MockBufferAllocator implements BufferAllocator {
105+
@Override
106+
public Buffer allocate() {
107+
return this.allocate(32 * 1024);
108+
}
109+
110+
@Override
111+
public Buffer allocate(final int minimumCapacity) {
112+
return BufferImpl.allocate(minimumCapacity);
113+
}
114+
}
115+
106116
private static ConfigDiff guess(final String... sampleLines) {
107117
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
108118
final ConfigSource parserConfig = CONFIG_MAPPER_FACTORY.newConfigSource();
109119
parserConfig.set("type", "csv");
110120
config.set("parser", parserConfig);
111-
return new CsvAllStringsGuessPlugin().guessLines(config, Arrays.asList(sampleLines));
121+
return new CsvAllStringsGuessPlugin().guessLines(config, Arrays.asList(sampleLines), new MockBufferAllocator());
112122
}
113123

114124
private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();

0 commit comments

Comments
 (0)