3838import org .embulk .util .config .ConfigMapperFactory ;
3939import org .embulk .util .file .ListFileInput ;
4040import org .embulk .util .guess .CharsetGuess ;
41+ import org .embulk .util .guess .GuesstimatedType ;
4142import org .embulk .util .guess .LineGuessHelper ;
4243import org .embulk .util .guess .NewlineGuess ;
44+ import org .embulk .util .guess .SchemaGuess ;
4345import org .embulk .util .text .LineDecoder ;
4446import org .embulk .util .text .Newline ;
4547import org .slf4j .Logger ;
@@ -156,19 +158,19 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines,
156158 }
157159
158160 final boolean headerLine ;
159- final List <SchemaGuess . GuessedType > columnTypes ;
161+ final List <GuesstimatedType > columnTypes ;
160162 if (uncommentedSampleLines .size () == 1 ) {
161163 // The file contains only 1 line. Assume that there are no header line.
162164 headerLine = false ;
163165
164166 if (parserGuessed .has ("trim_if_not_quoted" )) {
165- columnTypes = SCHEMA_GUESS . typesFromListRecords (sampleRecords .subList (0 , 1 ));
167+ columnTypes = typesFromListRecords (sampleRecords .subList (0 , 1 ));
166168 } else {
167169 final List <List <String >> sampleRecordsTrimmed =
168170 splitLines (parserGuessed , true , uncommentedSampleLines , delim , true , bufferAllocator );
169- final List <SchemaGuess . GuessedType > columnTypesTrimmed = SCHEMA_GUESS . typesFromListRecords (sampleRecordsTrimmed );
171+ final List <GuesstimatedType > columnTypesTrimmed = typesFromListRecords (sampleRecordsTrimmed );
170172
171- final List <SchemaGuess . GuessedType > columnTypesUntrimmed = SCHEMA_GUESS . typesFromListRecords (sampleRecords .subList (0 , 1 ));
173+ final List <GuesstimatedType > columnTypesUntrimmed = typesFromListRecords (sampleRecords .subList (0 , 1 ));
172174 if (columnTypesUntrimmed .equals (columnTypesTrimmed )) {
173175 parserGuessed .set ("trim_if_not_quoted" , false );
174176 columnTypes = columnTypesUntrimmed ;
@@ -180,22 +182,22 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines,
180182 } else {
181183 // The file contains more than 1 line. If guessed first line's column types are all strings or boolean, and the types are
182184 // different from the other lines, assume that the first line is column names.
183- final List <SchemaGuess . GuessedType > firstTypes = SCHEMA_GUESS . typesFromListRecords (sampleRecords .subList (0 , 1 ));
184- final List <SchemaGuess . GuessedType > otherTypesUntrimmed =
185- SCHEMA_GUESS . typesFromListRecords (sampleRecords .subList (1 , sampleRecords .size ()));
185+ final List <GuesstimatedType > firstTypes = typesFromListRecords (sampleRecords .subList (0 , 1 ));
186+ final List <GuesstimatedType > otherTypesUntrimmed =
187+ typesFromListRecords (sampleRecords .subList (1 , sampleRecords .size ()));
186188
187189 logger .debug ("Types of the first line : {}" , firstTypes );
188190 logger .debug ("Types of the other lines (untrimmed): {}" , otherTypesUntrimmed );
189191
190- final List <SchemaGuess . GuessedType > otherTypes ;
192+ final List <GuesstimatedType > otherTypes ;
191193
192194 if (parserGuessed .has ("trim_if_not_quoted" )) {
193195 otherTypes = otherTypesUntrimmed ;
194196 } else {
195197 final List <List <String >> sampleRecordsTrimmed =
196198 splitLines (parserGuessed , true , uncommentedSampleLines , delim , true , bufferAllocator );
197- final List <SchemaGuess . GuessedType > otherTypesTrimmed =
198- SCHEMA_GUESS . typesFromListRecords (sampleRecordsTrimmed .subList (1 , sampleRecordsTrimmed .size ()));
199+ final List <GuesstimatedType > otherTypesTrimmed =
200+ typesFromListRecords (sampleRecordsTrimmed .subList (1 , sampleRecordsTrimmed .size ()));
199201 if (otherTypesUntrimmed .equals (otherTypesTrimmed )) {
200202 parserGuessed .set ("trim_if_not_quoted" , false );
201203 otherTypes = otherTypesUntrimmed ;
@@ -208,7 +210,7 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines,
208210 logger .debug ("Types of the other lines: {}" , otherTypes );
209211
210212 headerLine = ((!firstTypes .equals (otherTypes )
211- && firstTypes .stream ().allMatch (t -> SchemaGuess . GuessedType . STRING .equals (t ) || SchemaGuess . GuessedType .BOOLEAN .equals (t )))
213+ && firstTypes .stream ().allMatch (t -> GuesstimatedType . STRING .equals (t ) || GuesstimatedType .BOOLEAN .equals (t )))
212214 || guessStringHeaderLine (sampleRecords ));
213215
214216 logger .debug ("headerLine: {}" , headerLine );
@@ -261,7 +263,7 @@ ConfigDiff guessLines(final ConfigSource config, final List<String> sampleLines,
261263 * @param type a guessed type
262264 * @return a new column config
263265 */
264- protected ConfigDiff newColumn (final String name , final SchemaGuess . GuessedType type ) {
266+ protected ConfigDiff newColumn (final String name , final GuesstimatedType type ) {
265267 final ConfigDiff column = newConfigDiff ();
266268 column .set ("name" , name );
267269 column .set ("type" , type .toString ());
@@ -604,41 +606,47 @@ private static byte[] joinBytes(final List<String> sampleLines, final Newline ne
604606 return data .toByteArray ();
605607 }
606608
609+ @ SuppressWarnings ("unchecked" )
610+ private static List <GuesstimatedType > typesFromListRecords (final List <List <String >> samples ) {
611+ final List <? extends List <? extends Object >> sampleObjects = (List <? extends List <? extends Object >>) samples ;
612+ return SCHEMA_GUESS .typesFromListRecords ((List <List <Object >>) sampleObjects );
613+ }
614+
607615 private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory .builder ().addDefaultModules ().build ();
608616
609- private static List <Character > DELIMITER_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
617+ private static final List <Character > DELIMITER_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
610618 ',' ,
611619 '\t' ,
612620 '|' ,
613621 ';'
614622 ));
615623
616- private static List <Character > QUOTE_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
624+ private static final List <Character > QUOTE_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
617625 '\"' ,
618626 '\''
619627 ));
620628
621- private static List <String > ESCAPE_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
629+ private static final List <String > ESCAPE_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
622630 "\\ " ,
623631 "\" "
624632 ));
625633
626- private static List <String > NULL_STRING_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
634+ private static final List <String > NULL_STRING_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
627635 "null" ,
628636 "NULL" ,
629637 "#N/A" ,
630638 "\\ N" // MySQL LOAD, Hive STORED AS TEXTFILE
631639 ));
632640
633- private static List <String > COMMENT_LINE_MARKER_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
641+ private static final List <String > COMMENT_LINE_MARKER_CANDIDATES = Collections .unmodifiableList (Arrays .asList (
634642 "#" ,
635643 "//"
636644 ));
637645
638- private static SchemaGuess SCHEMA_GUESS = SchemaGuess .of ();
646+ private static final SchemaGuess SCHEMA_GUESS = SchemaGuess .of (CONFIG_MAPPER_FACTORY );
639647
640- private static int MAX_SKIP_LINES = 10 ;
641- private static int NO_SKIP_DETECT_LINES = 10 ;
648+ private static final int MAX_SKIP_LINES = 10 ;
649+ private static final int NO_SKIP_DETECT_LINES = 10 ;
642650
643651 private static final Logger logger = LoggerFactory .getLogger (CsvGuessPlugin .class );
644652}
0 commit comments