Skip to content

Commit 921a51e

Browse files
Enhancement and simplification of CSV error management
1 parent ef5bfa6 commit 921a51e

File tree

4 files changed

+76
-56
lines changed

4 files changed

+76
-56
lines changed

src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java

Lines changed: 46 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -161,41 +161,44 @@ protected Set<Path> getIndividualTimeSeriesFilePaths() {
161161
* occurred
162162
*/
163163
protected Map<String, String> buildFieldsToAttributes(
164-
final String csvRow, final String[] headline) {
164+
final String csvRow, final String[] headline) throws SourceException {
165165

166166
TreeMap<String, String> insensitiveFieldsToAttributes =
167167
new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
168168

169-
try {
170-
String[] fieldVals = parseCsvRow(csvRow, csvSep);
169+
String[] fieldVals = parseCsvRow(csvRow, csvSep);
170+
insensitiveFieldsToAttributes.putAll(
171+
IntStream.range(0, Math.min(fieldVals.length, headline.length))
172+
.boxed()
173+
.collect(
174+
Collectors.toMap(
175+
k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
171176

172-
if (fieldVals.length != headline.length) {
173-
throw new SourceException(
174-
"The size of the headline does not fit to the size of the attribute fields.\nHeadline: "
175-
+ String.join(", ", headline)
176-
+ "\nCsvRow: "
177-
+ csvRow.trim()
178-
+ ".\nPlease check:"
179-
+ "\n - is the csv separator in the file matching the separator provided in the constructor ('"
180-
+ csvSep
181-
+ "')"
182-
+ "\n - does the number of columns match the number of headline fields "
183-
+ "\n - are you using a valid RFC 4180 formatted csv row?");
184-
}
177+
if (fieldVals.length != headline.length) {
178+
throw new SourceException(
179+
"The size of the headline ("
180+
+ headline.length
181+
+ ") does not fit to the size of the attribute fields ("
182+
+ fieldVals.length
183+
+ ").\nHeadline: "
184+
+ String.join(", ", headline)
185+
+ "\nRow: "
186+
+ csvRow.trim()
187+
+ ".\nPlease check:"
188+
+ "\n - is the csv separator in the file matching the separator provided in the constructor ('"
189+
+ csvSep
190+
+ "')"
191+
+ "\n - does the number of columns match the number of headline fields "
192+
+ "\n - are you using a valid RFC 4180 formatted csv row?");
193+
}
185194

186-
insensitiveFieldsToAttributes.putAll(
187-
IntStream.range(0, headline.length)
188-
.boxed()
189-
.collect(
190-
Collectors.toMap(
191-
k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
192-
} catch (SourceException e) {
193-
log.error(
194-
"Cannot build fields to attributes map for row '{}' with headline '{}'.",
195-
csvRow.trim(),
196-
String.join(",", headline),
197-
e);
195+
if (insensitiveFieldsToAttributes.size() != fieldVals.length) {
196+
throw new SourceException(
197+
"There might be duplicate headline elements.\nHeadline: "
198+
+ String.join(", ", headline)
199+
+ ".\nPlease keep in mind that headlines are case-insensitive and underscores from snake case are ignored.");
198200
}
201+
199202
return insensitiveFieldsToAttributes;
200203
}
201204

@@ -252,7 +255,7 @@ Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsToAttribu
252255
// is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
253256
// As we still want to consume the data at other places, we start a new stream instead of
254257
// returning the original one
255-
return Success.of(csvRowFieldValueMapping(reader, headline).parallelStream());
258+
return csvRowFieldValueMapping(reader, headline);
256259
} catch (FileNotFoundException e) {
257260
if (allowFileNotExisting) {
258261
log.warn("Unable to find file '{}': {}", filePath, e.getMessage());
@@ -282,13 +285,20 @@ private Try<Path, SourceException> getFilePath(Class<? extends Entity> entityCla
282285
* @param headline of the file
283286
* @return a list of mapping
284287
*/
285-
protected List<Map<String, String>> csvRowFieldValueMapping(
288+
protected Try<Stream<Map<String, String>>, SourceException> csvRowFieldValueMapping(
286289
BufferedReader reader, String[] headline) {
287-
return reader
288-
.lines()
289-
.parallel()
290-
.map(csvRow -> buildFieldsToAttributes(csvRow, headline))
291-
.filter(map -> !map.isEmpty())
292-
.toList();
290+
return Try.scanStream(
291+
reader
292+
.lines()
293+
.parallel()
294+
.map(
295+
csvRow ->
296+
Try.of(
297+
() -> buildFieldsToAttributes(csvRow, headline),
298+
SourceException.class)),
299+
"Map<String, String>")
300+
.transform(
301+
stream -> stream.filter(map -> !map.isEmpty()),
302+
e -> new SourceException("Parsing csv row failed.", e));
293303
}
294304
}

src/main/java/edu/ie3/datamodel/io/source/csv/CsvIdCoordinateSource.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ public List<CoordinateDistance> findCornerPoints(
179179
}
180180

181181
public int getCoordinateCount() {
182-
return idToCoordinate.keySet().size();
182+
return idToCoordinate.size();
183183
}
184184

185185
private Collection<Point> getCoordinatesInBoundingBox(
@@ -209,7 +209,7 @@ private Collection<Point> getCoordinatesInBoundingBox(
209209
// is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
210210
// As we still want to consume the data at other places, we start a new stream instead of
211211
// returning the original one
212-
return Success.of(dataSource.csvRowFieldValueMapping(reader, headline).parallelStream());
212+
return dataSource.csvRowFieldValueMapping(reader, headline);
213213
} catch (IOException e) {
214214
return Failure.of(
215215
new SourceException("Cannot read the file for coordinate id to coordinate mapping.", e));

src/main/java/edu/ie3/datamodel/io/source/csv/CsvWeatherSource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ private Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsT
240240
// is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
241241
// As we still want to consume the data at other places, we start a new stream instead of
242242
// returning the original one
243-
return Success.of(dataSource.csvRowFieldValueMapping(reader, headline).parallelStream());
243+
return dataSource.csvRowFieldValueMapping(reader, headline);
244244
} catch (IOException e) {
245245
return Failure.of(
246246
new SourceException(

src/test/groovy/edu/ie3/datamodel/io/source/csv/CsvDataSourceTest.groovy

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66
package edu.ie3.datamodel.io.source.csv
77

8+
import edu.ie3.datamodel.exceptions.SourceException
89
import edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation
910
import edu.ie3.datamodel.io.naming.FileNamingStrategy
1011
import edu.ie3.datamodel.io.naming.timeseries.ColumnScheme
@@ -274,7 +275,7 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
274275
]
275276
}
276277

277-
def "A CsvDataSource should be able to handle several errors when the csvRow is invalid or cannot be processed"() {
278+
def "A CsvDataSource should throw an exception if the headline and CSV row have different sizes"() {
278279
given:
279280
def validHeadline = [
280281
"uuid",
@@ -287,33 +288,42 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
287288
"s_rated"
288289
] as String[]
289290

290-
expect:
291-
dummyCsvSource.buildFieldsToAttributes(invalidCsvRow, validHeadline) == [:]
291+
when:
292+
dummyCsvSource.buildFieldsToAttributes(invalidCsvRow, validHeadline)
293+
294+
then:
295+
def exception = thrown(SourceException)
296+
exception.getMessage().startsWith("The size of the headline (8) does not fit to the size of the attribute fields")
292297

293298
where:
294299
invalidCsvRow || explaination
295300
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8;25.0;100.0;0.95;98.0;test_bmTypeInput;50.0;25.0" || "wrong separator"
296-
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput" || "too less columns"
297-
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput,,,," || "too much columns"
301+
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput" || "too little columns"
302+
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput,,,," || "too many columns"
298303
}
299304

300-
def "A CsvDataSource should be able to handle invalid headlines"() {
301-
given:
302-
def validCsvRow = "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,0.95,test_bmTypeInput,25.0"
303305

304-
expect:
305-
dummyCsvSource.buildFieldsToAttributes(validCsvRow, invalidHeadline) == [:]
306-
307-
where:
308-
invalidHeadline || explaination
309-
["uuid", "cosphi_rated", "id"] as String[] || "headline too short"
310-
[
306+
def "A CsvDataSource should throw an exception if there are duplicate headlines"() {
307+
given:
308+
def invalidHeadline = [
311309
"uuid",
310+
"active_power_gradient",
311+
"Active_Power_Gradient",
312+
"capex",
312313
"cosphi_rated",
314+
"eta_conv",
313315
"id",
316+
"opex",
314317
"s_rated",
315-
"capex"
316-
] as String[] || "headline too long"
318+
] as String[]
319+
def validCsvRow = "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,25.0,100.0,0.95,98.0,test_bmTypeInput,50.0,25.0"
320+
321+
when:
322+
dummyCsvSource.buildFieldsToAttributes(validCsvRow, invalidHeadline)
323+
324+
then:
325+
def exception = thrown(SourceException)
326+
exception.getMessage().startsWith("There might be duplicate headline elements.")
317327
}
318328

319329
def "The CsvDataSource is able to provide correct paths to time series files"() {

0 commit comments

Comments
 (0)