Skip to content

Commit fd3b8d4

Browse files
author
Dai MIKURUBE
committed
Pass BufferAllocator to CsvGuessPlugin#splitLines, instead of calling Exec.getBufferAllocator there
1 parent ed13907 commit fd3b8d4

File tree

3 files changed

+47
-10
lines changed

3 files changed

+47
-10
lines changed

embulk-guess-csv/src/main/java/org/embulk/guess/csv/CsvGuessPlugin.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.embulk.parser.csv.CsvParserPlugin;
3333
import org.embulk.parser.csv.CsvTokenizer;
3434
import org.embulk.spi.Buffer;
35+
import org.embulk.spi.BufferAllocator;
3536
import org.embulk.spi.Exec;
3637
import org.embulk.spi.GuessPlugin;
3738
import org.embulk.util.config.ConfigMapperFactory;
@@ -59,10 +60,11 @@ public ConfigDiff guess(final ConfigSource config, final Buffer sample) {
5960
return NewlineGuess.of(CONFIG_MAPPER_FACTORY).guess(config, sample);
6061
}
6162

62-
return this.guessLines(config, LineGuessHelper.of(CONFIG_MAPPER_FACTORY).toLines(config, sample));
63+
final BufferAllocator bufferAllocator = Exec.getBufferAllocator();
64+
return this.guessLines(config, LineGuessHelper.of(CONFIG_MAPPER_FACTORY).toLines(config, sample), bufferAllocator);
6365
}
6466

65-
ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines) {
67+
ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines, final BufferAllocator bufferAllocator) {
6668
final ConfigDiff configDiff = newConfigDiff();
6769

6870
// return {} unless config.fetch("parser", {}).fetch("type", "csv") == "csv"
@@ -127,7 +129,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
127129
// skipping empty lines is also disabled here because skipping header lines is done by
128130
// CsvParser which doesn't skip empty lines automatically
129131

130-
final List<List<String>> sampleRecordsBeforeSkip = splitLines(parserGuessed, false, sampleLines, delim, null);
132+
final List<List<String>> sampleRecordsBeforeSkip =
133+
splitLines(parserGuessed, false, sampleLines, delim, null, bufferAllocator);
131134
final int skipHeaderLines = guessSkipHeaderLines(sampleRecordsBeforeSkip);
132135
final List<String> skippedSampleLines = sampleLines.subList(skipHeaderLines, sampleLines.size());
133136
final List<List<String>> skippedSampleRecords = sampleRecordsBeforeSkip.subList(skipHeaderLines, sampleRecordsBeforeSkip.size());
@@ -144,7 +147,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
144147
parserGuessed);
145148
}
146149

147-
final List<List<String>> sampleRecords = splitLines(parserGuessed, true, uncommentedSampleLines, delim, null);
150+
final List<List<String>> sampleRecords =
151+
splitLines(parserGuessed, true, uncommentedSampleLines, delim, null, bufferAllocator);
148152

149153
// It should fail if CSV parser cannot parse sample_lines.
150154
if (sampleRecords == null || sampleRecords.isEmpty()) {
@@ -160,7 +164,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
160164
if (parserGuessed.has("trim_if_not_quoted")) {
161165
columnTypes = SCHEMA_GUESS.typesFromListRecords(sampleRecords.subList(0, 1));
162166
} else {
163-
final List<List<String>> sampleRecordsTrimmed = splitLines(parserGuessed, true, uncommentedSampleLines, delim, true);
167+
final List<List<String>> sampleRecordsTrimmed =
168+
splitLines(parserGuessed, true, uncommentedSampleLines, delim, true, bufferAllocator);
164169
final List<SchemaGuess.GuessedType> columnTypesTrimmed = SCHEMA_GUESS.typesFromListRecords(sampleRecordsTrimmed);
165170

166171
final List<SchemaGuess.GuessedType> columnTypesUntrimmed = SCHEMA_GUESS.typesFromListRecords(sampleRecords.subList(0, 1));
@@ -187,7 +192,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
187192
if (parserGuessed.has("trim_if_not_quoted")) {
188193
otherTypes = otherTypesUntrimmed;
189194
} else {
190-
final List<List<String>> sampleRecordsTrimmed = splitLines(parserGuessed, true, uncommentedSampleLines, delim, true);
195+
final List<List<String>> sampleRecordsTrimmed =
196+
splitLines(parserGuessed, true, uncommentedSampleLines, delim, true, bufferAllocator);
191197
final List<SchemaGuess.GuessedType> otherTypesTrimmed =
192198
SCHEMA_GUESS.typesFromListRecords(sampleRecordsTrimmed.subList(1, sampleRecordsTrimmed.size()));
193199
if (otherTypesUntrimmed.equals(otherTypesTrimmed)) {
@@ -274,7 +280,8 @@ private static List<List<String>> splitLines(
274280
final boolean skipEmptyLines,
275281
final List<String> sampleLines,
276282
final String delim,
277-
final Boolean trimIfNotQuoted) {
283+
final Boolean trimIfNotQuoted,
284+
final BufferAllocator bufferAllocator) {
278285
try {
279286
final String nullString = parserConfig.get(String.class, "null_string", null);
280287
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
@@ -289,7 +296,7 @@ private static List<List<String>> splitLines(
289296
CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, CsvParserPlugin.PluginTask.class);
290297

291298
final byte[] data = joinBytes(sampleLines, parserTask.getNewline());
292-
final Buffer sample = Exec.getBufferAllocator().allocate(data.length);
299+
final Buffer sample = bufferAllocator.allocate(data.length);
293300
sample.setBytes(0, data, 0, data.length);
294301
sample.limit(data.length);
295302

embulk-guess-csv/src/test/java/org/embulk/guess/csv/TestCsvGuessPlugin.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
import java.util.Map;
2626
import org.embulk.config.ConfigDiff;
2727
import org.embulk.config.ConfigSource;
28+
import org.embulk.spi.Buffer;
29+
import org.embulk.spi.BufferAllocator;
30+
import org.embulk.spi.BufferImpl;
2831
import org.embulk.util.config.ConfigMapperFactory;
2932
import org.junit.Test;
3033

@@ -392,12 +395,24 @@ public void testComplexLine() {
392395
assertEquals("%Y%m%d%H%M%S%z", columnsActual.get(3).get("format"));
393396
}
394397

398+
private static class MockBufferAllocator implements BufferAllocator {
399+
@Override
400+
public Buffer allocate() {
401+
return this.allocate(32 * 1024);
402+
}
403+
404+
@Override
405+
public Buffer allocate(final int minimumCapacity) {
406+
return BufferImpl.allocate(minimumCapacity);
407+
}
408+
}
409+
395410
private static ConfigDiff guess(final String... sampleLines) {
396411
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
397412
final ConfigSource parserConfig = CONFIG_MAPPER_FACTORY.newConfigSource();
398413
parserConfig.set("type", "csv");
399414
config.set("parser", parserConfig);
400-
return new CsvGuessPlugin().guessLines(config, Arrays.asList(sampleLines));
415+
return new CsvGuessPlugin().guessLines(config, Arrays.asList(sampleLines), new MockBufferAllocator());
401416
}
402417

403418
private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();

embulk-guess-csv_all_strings/src/test/java/org/embulk/guess/csv/TestCsvAllStringsGuessPlugin.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
import java.util.Map;
2424
import org.embulk.config.ConfigDiff;
2525
import org.embulk.config.ConfigSource;
26+
import org.embulk.spi.Buffer;
27+
import org.embulk.spi.BufferAllocator;
28+
import org.embulk.spi.BufferImpl;
2629
import org.embulk.util.config.ConfigMapperFactory;
2730
import org.junit.Test;
2831

@@ -98,12 +101,24 @@ public void testColumnsWithHeader() {
98101
assertEquals("string", columnsActual.get(2).get("type"));
99102
}
100103

104+
private static class MockBufferAllocator implements BufferAllocator {
105+
@Override
106+
public Buffer allocate() {
107+
return this.allocate(32 * 1024);
108+
}
109+
110+
@Override
111+
public Buffer allocate(final int minimumCapacity) {
112+
return BufferImpl.allocate(minimumCapacity);
113+
}
114+
}
115+
101116
private static ConfigDiff guess(final String... sampleLines) {
102117
final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource();
103118
final ConfigSource parserConfig = CONFIG_MAPPER_FACTORY.newConfigSource();
104119
parserConfig.set("type", "csv");
105120
config.set("parser", parserConfig);
106-
return new CsvAllStringsGuessPlugin().guessLines(config, Arrays.asList(sampleLines));
121+
return new CsvAllStringsGuessPlugin().guessLines(config, Arrays.asList(sampleLines), new MockBufferAllocator());
107122
}
108123

109124
private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();

0 commit comments

Comments
 (0)