Skip to content

Commit ff17dba

Browse files
authored
Merge pull request #23 from embulk/reorganize-CsvTokenizer-exceptions
Reorganize CsvTokenizer-related Exceptions
2 parents bff2547 + e8f212e commit ff17dba

File tree

11 files changed

+114
-56
lines changed

11 files changed

+114
-56
lines changed

embulk-guess-csv/src/main/java/org/embulk/guess/csv/CsvGuessPlugin.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
import org.embulk.config.ConfigSource;
3232
import org.embulk.parser.csv.CsvParserPlugin;
3333
import org.embulk.parser.csv.CsvTokenizer;
34-
import org.embulk.parser.csv.InvalidValueException;
35-
import org.embulk.parser.csv.TooFewColumnsException;
34+
import org.embulk.parser.csv.InvalidCsvQuotationException;
35+
import org.embulk.parser.csv.RecordDoesNotHaveExpectedColumnException;
3636
import org.embulk.spi.Buffer;
3737
import org.embulk.spi.BufferAllocator;
3838
import org.embulk.spi.Exec;
@@ -319,12 +319,12 @@ private static List<List<String>> splitLines(
319319
} else {
320320
columns.add(column);
321321
}
322-
} catch (final TooFewColumnsException ex) {
322+
} catch (final RecordDoesNotHaveExpectedColumnException ex) {
323323
rows.add(Collections.unmodifiableList(columns));
324324
break;
325325
}
326326
}
327-
} catch (final InvalidValueException ex) {
327+
} catch (final InvalidCsvQuotationException ex) {
328328
// TODO warning
329329
tokenizer.skipCurrentLine();
330330
}

embulk-parser-csv/src/main/java/org/embulk/parser/csv/CsvParserPlugin.java

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -334,9 +334,13 @@ public void run(TaskSource taskSource, final Schema schema,
334334
}
335335
}
336336

337-
if (!tokenizer.nextRecord()) {
338-
// empty file
339-
continue;
337+
try {
338+
if (!tokenizer.nextRecord()) {
339+
// empty file
340+
continue;
341+
}
342+
} catch (final InvalidCsvFormatException ex) {
343+
throw new DataException(ex);
340344
}
341345

342346
while (true) {
@@ -431,7 +435,7 @@ private String nextColumn() {
431435

432436
try {
433437
hasNextRecord = tokenizer.nextRecord();
434-
} catch (TooManyColumnsException ex) {
438+
} catch (final RecordHasUnexpectedTrailingColumnException ex) {
435439
if (allowExtraColumns) {
436440
String tooManyColumnsLine = tokenizer.skipCurrentLine();
437441
// TODO warning
@@ -443,7 +447,7 @@ private String nextColumn() {
443447
}
444448
pageBuilder.addRecord();
445449

446-
} catch (InvalidFormatException | InvalidValueException | CsvRecordValidateException e) {
450+
} catch (final InvalidCsvFormatException | CsvRecordValidateException e) {
447451
String skippedLine = tokenizer.skipCurrentLine();
448452
long lineNumber = tokenizer.getCurrentLineNumber();
449453
if (stopOnInvalidRecord) {
@@ -472,20 +476,24 @@ static class CsvRecordValidateException extends DataException {
472476
}
473477

474478
private static CsvTokenizer.Builder buildCsvTokenizerBuilder(final PluginTask task) {
475-
final CsvTokenizer.Builder builder = CsvTokenizer.builder(task.getDelimiter());
476-
task.getQuoteChar().ifPresent(q -> builder.setQuote(q.getCharacter()));
477-
task.getEscapeChar().ifPresent(e -> builder.setEscape(e.getCharacter()));
478-
builder.setNewline(task.getNewline().getString());
479-
if (task.getTrimIfNotQuoted()) {
480-
builder.enableTrimIfNotQuoted();
481-
}
482-
if (task.getQuotesInQuotedFields() == QuotesInQuotedFields.ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS) {
483-
builder.acceptStrayQuotesAssumingNoDelimitersInFields();
479+
try {
480+
final CsvTokenizer.Builder builder = CsvTokenizer.builder(task.getDelimiter());
481+
task.getQuoteChar().ifPresent(q -> builder.setQuote(q.getCharacter()));
482+
task.getEscapeChar().ifPresent(e -> builder.setEscape(e.getCharacter()));
483+
builder.setNewline(task.getNewline().getString());
484+
if (task.getTrimIfNotQuoted()) {
485+
builder.enableTrimIfNotQuoted();
486+
}
487+
if (task.getQuotesInQuotedFields() == QuotesInQuotedFields.ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS) {
488+
builder.acceptStrayQuotesAssumingNoDelimitersInFields();
489+
}
490+
builder.setMaxQuotedFieldLength(task.getMaxQuotedSizeLimit());
491+
task.getCommentLineMarker().ifPresent(m -> builder.setCommentLineMarker(m));
492+
task.getNullString().ifPresent(n -> builder.setNullString(n));
493+
return builder;
494+
} catch (final RuntimeException ex) {
495+
throw new ConfigException(ex);
484496
}
485-
builder.setMaxQuotedFieldLength(task.getMaxQuotedSizeLimit());
486-
task.getCommentLineMarker().ifPresent(m -> builder.setCommentLineMarker(m));
487-
task.getNullString().ifPresent(n -> builder.setNullString(n));
488-
return builder;
489497
}
490498

491499
@SuppressWarnings("deprecation") // For the use of new PageBuilder().

embulk-parser-csv/src/main/java/org/embulk/parser/csv/CsvTokenizer.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public boolean nextRecord() {
221221
public boolean nextRecord(final boolean skipEmptyLine) {
222222
// If at the end of record, read the next line and initialize the state
223223
if (this.recordState != RecordState.END) {
224-
throw new TooManyColumnsException("Too many columns");
224+
throw new RecordHasUnexpectedTrailingColumnException();
225225
}
226226

227227
final boolean hasNext = this.nextLine(skipEmptyLine);
@@ -260,7 +260,7 @@ public boolean hasNextColumn() {
260260

261261
public String nextColumn() {
262262
if (!this.hasNextColumn()) {
263-
throw new TooFewColumnsException("Too few columns");
263+
throw new RecordDoesNotHaveExpectedColumnException();
264264
}
265265

266266
// reset last state
@@ -402,7 +402,7 @@ public String nextColumn() {
402402
quotedValue.append(this.newline);
403403
this.quotedValueLines.add(this.line);
404404
if (!this.nextLine(false)) {
405-
throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
405+
throw new EndOfFileInQuotedFieldException();
406406
}
407407
valueStartPos = 0;
408408

@@ -423,7 +423,7 @@ public String nextColumn() {
423423
// A non-escaped stray "quote character" in the field is processed as a regular character
424424
// if ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS is specified,
425425
if ((this.linePos - valueStartPos) + quotedValue.length() > this.maxQuotedFieldLength) {
426-
throw new QuotedSizeLimitExceededException("The size of the quoted value exceeds the limit size (" + this.maxQuotedFieldLength + ")");
426+
throw new QuotedFieldLengthLimitExceededException(this.maxQuotedFieldLength);
427427
}
428428
} else {
429429
quotedValue.append(this.line.substring(valueStartPos, this.linePos - 1));
@@ -438,7 +438,7 @@ public String nextColumn() {
438438
quotedValue.append(this.line.substring(valueStartPos, this.linePos));
439439
this.quotedValueLines.add(this.line);
440440
if (!this.nextLine(false)) {
441-
throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
441+
throw new EndOfFileInQuotedFieldException();
442442
}
443443
valueStartPos = 0;
444444
} else if (this.isQuote(next) || this.isEscape(next)) { // escaped quote
@@ -449,7 +449,7 @@ public String nextColumn() {
449449

450450
} else {
451451
if ((this.linePos - valueStartPos) + quotedValue.length() > this.maxQuotedFieldLength) {
452-
throw new QuotedSizeLimitExceededException("The size of the quoted value exceeds the limit size (" + this.maxQuotedFieldLength + ")");
452+
throw new QuotedFieldLengthLimitExceededException(this.maxQuotedFieldLength);
453453
}
454454
// keep QUOTED_VALUE state
455455
}
@@ -473,7 +473,7 @@ public String nextColumn() {
473473
// column has trailing spaces and quoted. TODO should this be rejected?
474474

475475
} else {
476-
throw new InvalidValueException(String.format("Unexpected extra character '%c' after a value quoted by '%c'", c, this.quote));
476+
throw new InvalidCharacterAfterQuoteException(c, this.quote);
477477
}
478478
break;
479479

embulk-parser-csv/src/main/java/org/embulk/parser/csv/TooFewColumnsException.java renamed to embulk-parser-csv/src/main/java/org/embulk/parser/csv/EndOfFileInQuotedFieldException.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2014 The Embulk project
2+
* Copyright 2022 The Embulk project
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
1616

1717
package org.embulk.parser.csv;
1818

19-
public class TooFewColumnsException extends InvalidFormatException {
20-
public TooFewColumnsException(final String message) {
21-
super(message);
19+
public class EndOfFileInQuotedFieldException extends InvalidCsvQuotationException {
20+
public EndOfFileInQuotedFieldException() {
21+
super("Unexpected end of file in a quoted field.");
2222
}
2323
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright 2022 The Embulk project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.embulk.parser.csv;
18+
19+
public class InvalidCharacterAfterQuoteException extends InvalidCsvQuotationException {
20+
public InvalidCharacterAfterQuoteException(final char extraChar, final char quoteChar) {
21+
super(String.format("Unexpected extra character '%c' after a quote by '%c'.", extraChar, quoteChar));
22+
this.extraChar = extraChar;
23+
this.quoteChar = quoteChar;
24+
}
25+
26+
private final char extraChar;
27+
private final char quoteChar;
28+
}

embulk-parser-csv/src/main/java/org/embulk/parser/csv/TooManyColumnsException.java renamed to embulk-parser-csv/src/main/java/org/embulk/parser/csv/InvalidCsvFormatException.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
package org.embulk.parser.csv;
1818

19-
public class TooManyColumnsException extends InvalidFormatException {
20-
public TooManyColumnsException(final String message) {
19+
public abstract class InvalidCsvFormatException extends RuntimeException {
20+
public InvalidCsvFormatException(final String message) {
2121
super(message);
2222
}
2323
}

embulk-parser-csv/src/main/java/org/embulk/parser/csv/QuotedSizeLimitExceededException.java renamed to embulk-parser-csv/src/main/java/org/embulk/parser/csv/InvalidCsvQuotationException.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
package org.embulk.parser.csv;
1818

19-
public class QuotedSizeLimitExceededException extends InvalidValueException {
20-
public QuotedSizeLimitExceededException(final String message) {
19+
public abstract class InvalidCsvQuotationException extends InvalidCsvFormatException {
20+
public InvalidCsvQuotationException(final String message) {
2121
super(message);
2222
}
2323
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright 2014 The Embulk project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.embulk.parser.csv;
18+
19+
public class QuotedFieldLengthLimitExceededException extends InvalidCsvQuotationException {
20+
public QuotedFieldLengthLimitExceededException(final long quotedFieldLengthLimit) {
21+
super("The length of the quoted field exceeds the limit (" + quotedFieldLengthLimit + ")");
22+
this.quotedFieldLengthLimit = quotedFieldLengthLimit;
23+
}
24+
25+
private final long quotedFieldLengthLimit;
26+
}

embulk-parser-csv/src/main/java/org/embulk/parser/csv/InvalidFormatException.java renamed to embulk-parser-csv/src/main/java/org/embulk/parser/csv/RecordDoesNotHaveExpectedColumnException.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@
1616

1717
package org.embulk.parser.csv;
1818

19-
import org.embulk.spi.DataException;
20-
21-
public class InvalidFormatException extends DataException {
22-
public InvalidFormatException(final String message) {
23-
super(message);
19+
public class RecordDoesNotHaveExpectedColumnException extends InvalidCsvFormatException {
20+
public RecordDoesNotHaveExpectedColumnException() {
21+
super("A record does not have an expected column (i.e. too few columns).");
2422
}
2523
}

embulk-parser-csv/src/main/java/org/embulk/parser/csv/InvalidValueException.java renamed to embulk-parser-csv/src/main/java/org/embulk/parser/csv/RecordHasUnexpectedTrailingColumnException.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@
1616

1717
package org.embulk.parser.csv;
1818

19-
import org.embulk.spi.DataException;
20-
21-
public class InvalidValueException extends DataException {
22-
public InvalidValueException(final String message) {
23-
super(message);
19+
public class RecordHasUnexpectedTrailingColumnException extends InvalidCsvFormatException {
20+
public RecordHasUnexpectedTrailingColumnException() {
21+
super("A record has an unexpected trailing column (i.e. too many columns).");
2422
}
2523
}

0 commit comments

Comments
 (0)