3232import org .embulk .parser .csv .CsvParserPlugin ;
3333import org .embulk .parser .csv .CsvTokenizer ;
3434import org .embulk .spi .Buffer ;
35+ import org .embulk .spi .BufferAllocator ;
3536import org .embulk .spi .Exec ;
3637import org .embulk .spi .GuessPlugin ;
3738import org .embulk .util .config .ConfigMapperFactory ;
@@ -59,10 +60,11 @@ public ConfigDiff guess(final ConfigSource config, final Buffer sample) {
5960 return NewlineGuess .of (CONFIG_MAPPER_FACTORY ).guess (config , sample );
6061 }
6162
62- return this .guessLines (config , LineGuessHelper .of (CONFIG_MAPPER_FACTORY ).toLines (config , sample ));
63+ final BufferAllocator bufferAllocator = Exec .getBufferAllocator ();
64+ return this .guessLines (config , LineGuessHelper .of (CONFIG_MAPPER_FACTORY ).toLines (config , sample ), bufferAllocator );
6365 }
6466
65- ConfigDiff guessLines (final ConfigSource config , final List <String > sampleLines ) {
67+ ConfigDiff guessLines (final ConfigSource config , final List <String > sampleLines , final BufferAllocator bufferAllocator ) {
6668 final ConfigDiff configDiff = newConfigDiff ();
6769
6870 // return {} unless config.fetch("parser", {}).fetch("type", "csv") == "csv"
@@ -127,7 +129,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
127129 // skipping empty lines is also disabled here because skipping header lines is done by
128130 // CsvParser which doesn't skip empty lines automatically
129131
130- final List <List <String >> sampleRecordsBeforeSkip = splitLines (parserGuessed , false , sampleLines , delim , null );
132+ final List <List <String >> sampleRecordsBeforeSkip =
133+ splitLines (parserGuessed , false , sampleLines , delim , null , bufferAllocator );
131134 final int skipHeaderLines = guessSkipHeaderLines (sampleRecordsBeforeSkip );
132135 final List <String > skippedSampleLines = sampleLines .subList (skipHeaderLines , sampleLines .size ());
133136 final List <List <String >> skippedSampleRecords = sampleRecordsBeforeSkip .subList (skipHeaderLines , sampleRecordsBeforeSkip .size ());
@@ -144,7 +147,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
144147 parserGuessed );
145148 }
146149
147- final List <List <String >> sampleRecords = splitLines (parserGuessed , true , uncommentedSampleLines , delim , null );
150+ final List <List <String >> sampleRecords =
151+ splitLines (parserGuessed , true , uncommentedSampleLines , delim , null , bufferAllocator );
148152
149153 // It should fail if CSV parser cannot parse sample_lines.
150154 if (sampleRecords == null || sampleRecords .isEmpty ()) {
@@ -160,7 +164,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
160164 if (parserGuessed .has ("trim_if_not_quoted" )) {
161165 columnTypes = SCHEMA_GUESS .typesFromListRecords (sampleRecords .subList (0 , 1 ));
162166 } else {
163- final List <List <String >> sampleRecordsTrimmed = splitLines (parserGuessed , true , uncommentedSampleLines , delim , true );
167+ final List <List <String >> sampleRecordsTrimmed =
168+ splitLines (parserGuessed , true , uncommentedSampleLines , delim , true , bufferAllocator );
164169 final List <SchemaGuess .GuessedType > columnTypesTrimmed = SCHEMA_GUESS .typesFromListRecords (sampleRecordsTrimmed );
165170
166171 final List <SchemaGuess .GuessedType > columnTypesUntrimmed = SCHEMA_GUESS .typesFromListRecords (sampleRecords .subList (0 , 1 ));
@@ -187,7 +192,8 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines)
187192 if (parserGuessed .has ("trim_if_not_quoted" )) {
188193 otherTypes = otherTypesUntrimmed ;
189194 } else {
190- final List <List <String >> sampleRecordsTrimmed = splitLines (parserGuessed , true , uncommentedSampleLines , delim , true );
195+ final List <List <String >> sampleRecordsTrimmed =
196+ splitLines (parserGuessed , true , uncommentedSampleLines , delim , true , bufferAllocator );
191197 final List <SchemaGuess .GuessedType > otherTypesTrimmed =
192198 SCHEMA_GUESS .typesFromListRecords (sampleRecordsTrimmed .subList (1 , sampleRecordsTrimmed .size ()));
193199 if (otherTypesUntrimmed .equals (otherTypesTrimmed )) {
@@ -274,7 +280,8 @@ private static List<List<String>> splitLines(
274280 final boolean skipEmptyLines ,
275281 final List <String > sampleLines ,
276282 final String delim ,
277- final Boolean trimIfNotQuoted ) {
283+ final Boolean trimIfNotQuoted ,
284+ final BufferAllocator bufferAllocator ) {
278285 try {
279286 final String nullString = parserConfig .get (String .class , "null_string" , null );
280287 final ConfigSource config = CONFIG_MAPPER_FACTORY .newConfigSource ();
@@ -289,7 +296,7 @@ private static List<List<String>> splitLines(
289296 CONFIG_MAPPER_FACTORY .createConfigMapper ().map (config , CsvParserPlugin .PluginTask .class );
290297
291298 final byte [] data = joinBytes (sampleLines , parserTask .getNewline ());
292- final Buffer sample = Exec . getBufferAllocator () .allocate (data .length );
299+ final Buffer sample = bufferAllocator .allocate (data .length );
293300 sample .setBytes (0 , data , 0 , data .length );
294301 sample .limit (data .length );
295302
0 commit comments