Skip to content

Commit e392ecc

Browse files
Merge pull request #1031 from ie3-institute/ms/#1007-refactor-CsvFileConnector-and-CsvDataSource
Refactor `CsvFileConnector` and `CsvDataSource`.
2 parents b6b1508 + 97913de commit e392ecc

File tree

10 files changed

+353
-335
lines changed

10 files changed

+353
-335
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616

1717
### Changed
1818
- Improvements to the search for corner points in `IdCoordinateSource` [#1016](https://github.com/ie3-institute/PowerSystemDataModel/issues/1016)
19+
- Refactor `CsvFileConnector` and `CsvDataSource` [#1007](https://github.com/ie3-institute/PowerSystemDataModel/issues/1007)
1920

2021

2122
## [5.0.1] - 2024-03-07

src/main/java/edu/ie3/datamodel/io/connectors/CsvFileConnector.java

Lines changed: 15 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,23 @@
77

88
import edu.ie3.datamodel.exceptions.ConnectorException;
99
import edu.ie3.datamodel.io.IoUtil;
10-
import edu.ie3.datamodel.io.csv.*;
11-
import edu.ie3.datamodel.io.naming.FileNamingStrategy;
12-
import edu.ie3.datamodel.io.naming.TimeSeriesMetaInformation;
13-
import edu.ie3.datamodel.io.naming.timeseries.ColumnScheme;
14-
import edu.ie3.datamodel.io.naming.timeseries.IndividualTimeSeriesMetaInformation;
10+
import edu.ie3.datamodel.io.csv.BufferedCsvWriter;
11+
import edu.ie3.datamodel.io.csv.CsvFileDefinition;
1512
import edu.ie3.datamodel.models.Entity;
1613
import edu.ie3.datamodel.models.timeseries.TimeSeries;
1714
import edu.ie3.datamodel.models.timeseries.TimeSeriesEntry;
1815
import edu.ie3.datamodel.models.value.Value;
1916
import java.io.*;
2017
import java.nio.charset.StandardCharsets;
21-
import java.nio.file.Files;
2218
import java.nio.file.Path;
2319
import java.util.*;
24-
import java.util.function.Function;
25-
import java.util.stream.Collectors;
2620
import java.util.stream.Stream;
2721
import org.slf4j.Logger;
2822
import org.slf4j.LoggerFactory;
2923

3024
/**
3125
* Provides the connector (here: buffered writer) for specific files to be used by a {@link
32-
* edu.ie3.datamodel.io.sink.CsvFileSink}
26+
* edu.ie3.datamodel.io.sink.CsvFileSink} or {@link edu.ie3.datamodel.io.source.csv.CsvDataSource}
3327
*
3428
* @version 0.1
3529
* @since 19.03.20
@@ -39,27 +33,26 @@ public class CsvFileConnector implements DataConnector {
3933

4034
private final Map<Class<? extends Entity>, BufferedCsvWriter> entityWriters = new HashMap<>();
4135
private final Map<UUID, BufferedCsvWriter> timeSeriesWriters = new HashMap<>();
42-
43-
private final FileNamingStrategy fileNamingStrategy;
4436
private final Path baseDirectory;
45-
4637
private static final String FILE_ENDING = ".csv";
4738

48-
public CsvFileConnector(Path baseDirectory, FileNamingStrategy fileNamingStrategy) {
39+
public CsvFileConnector(Path baseDirectory) {
4940
this.baseDirectory = baseDirectory;
50-
this.fileNamingStrategy = fileNamingStrategy;
41+
}
42+
43+
/** Returns the base directory of this connector. */
44+
public Path getBaseDirectory() {
45+
return baseDirectory;
5146
}
5247

5348
public synchronized BufferedCsvWriter getOrInitWriter(
54-
Class<? extends Entity> clz, String[] headerElements, String csvSep)
55-
throws ConnectorException {
56-
/* Try to the the right writer */
49+
Class<? extends Entity> clz, CsvFileDefinition fileDefinition) throws ConnectorException {
50+
/* Try to the right writer */
5751
BufferedCsvWriter predefinedWriter = entityWriters.get(clz);
5852
if (predefinedWriter != null) return predefinedWriter;
5953

6054
/* If it is not available, build and register one */
6155
try {
62-
CsvFileDefinition fileDefinition = buildFileDefinition(clz, headerElements, csvSep);
6356
BufferedCsvWriter newWriter = initWriter(baseDirectory, fileDefinition);
6457

6558
entityWriters.put(clz, newWriter);
@@ -71,15 +64,14 @@ public synchronized BufferedCsvWriter getOrInitWriter(
7164
}
7265

7366
public synchronized <T extends TimeSeries<E, V>, E extends TimeSeriesEntry<V>, V extends Value>
74-
BufferedCsvWriter getOrInitWriter(T timeSeries, String[] headerElements, String csvSep)
67+
BufferedCsvWriter getOrInitWriter(T timeSeries, CsvFileDefinition fileDefinition)
7568
throws ConnectorException {
76-
/* Try to the the right writer */
69+
/* Try to the right writer */
7770
BufferedCsvWriter predefinedWriter = timeSeriesWriters.get(timeSeries.getUuid());
7871
if (predefinedWriter != null) return predefinedWriter;
7972

8073
/* If it is not available, build and register one */
8174
try {
82-
CsvFileDefinition fileDefinition = buildFileDefinition(timeSeries, headerElements, csvSep);
8375
BufferedCsvWriter newWriter = initWriter(baseDirectory, fileDefinition);
8476

8577
timeSeriesWriters.put(timeSeries.getUuid(), newWriter);
@@ -131,8 +123,7 @@ public synchronized void closeTimeSeriesWriter(UUID uuid) throws IOException {
131123
Optional<BufferedCsvWriter> maybeWriter = Optional.ofNullable(timeSeriesWriters.get(uuid));
132124
if (maybeWriter.isPresent()) {
133125
log.debug("Remove reference to time series writer for UUID '{}'.", uuid);
134-
timeSeriesWriters.remove(uuid);
135-
maybeWriter.get().close();
126+
timeSeriesWriters.remove(uuid).close();
136127
} else {
137128
log.warn("No writer found for time series '{}'.", uuid);
138129
}
@@ -149,8 +140,7 @@ public synchronized <C extends Entity> void closeEntityWriter(Class<C> clz) thro
149140
Optional<BufferedCsvWriter> maybeWriter = Optional.ofNullable(entityWriters.get(clz));
150141
if (maybeWriter.isPresent()) {
151142
log.debug("Remove reference to entity writer for class '{}'.", clz);
152-
entityWriters.remove(clz);
153-
maybeWriter.get().close();
143+
entityWriters.remove(clz).close();
154144
} else {
155145
log.warn("No writer found for class '{}'.", clz);
156146
}
@@ -170,106 +160,6 @@ public BufferedReader initReader(Path filePath) throws FileNotFoundException {
170160
new InputStreamReader(new FileInputStream(fullPath), StandardCharsets.UTF_8), 16384);
171161
}
172162

173-
/**
174-
* Receive the information for specific time series. They are given back filtered by the column
175-
* scheme in order to allow for accounting the different content types.
176-
*
177-
* @param columnSchemes the column schemes to initialize readers for. If no scheme is given, all
178-
* possible readers will be initialized.
179-
* @return A mapping from column scheme to the individual time series meta information
180-
*/
181-
public Map<UUID, CsvIndividualTimeSeriesMetaInformation>
182-
getCsvIndividualTimeSeriesMetaInformation(final ColumnScheme... columnSchemes) {
183-
return getIndividualTimeSeriesFilePaths().parallelStream()
184-
.map(
185-
filePath -> {
186-
/* Extract meta information from file path and enhance it with the file path itself */
187-
IndividualTimeSeriesMetaInformation metaInformation =
188-
fileNamingStrategy.individualTimeSeriesMetaInformation(filePath.toString());
189-
return new CsvIndividualTimeSeriesMetaInformation(
190-
metaInformation, FileNamingStrategy.removeFileNameEnding(filePath.getFileName()));
191-
})
192-
.filter(
193-
metaInformation ->
194-
columnSchemes == null
195-
|| columnSchemes.length == 0
196-
|| Stream.of(columnSchemes)
197-
.anyMatch(scheme -> scheme.equals(metaInformation.getColumnScheme())))
198-
.collect(Collectors.toMap(TimeSeriesMetaInformation::getUuid, Function.identity()));
199-
}
200-
201-
/**
202-
* Returns a set of relative paths strings to time series files, with respect to the base folder
203-
* path
204-
*
205-
* @return A set of relative paths to time series files, with respect to the base folder path
206-
*/
207-
private Set<Path> getIndividualTimeSeriesFilePaths() {
208-
try (Stream<Path> pathStream = Files.walk(baseDirectory)) {
209-
return pathStream
210-
.map(baseDirectory::relativize)
211-
.filter(
212-
path -> {
213-
Path withoutEnding =
214-
Path.of(FileNamingStrategy.removeFileNameEnding(path.toString()));
215-
return fileNamingStrategy
216-
.getIndividualTimeSeriesPattern()
217-
.matcher(withoutEnding.toString())
218-
.matches();
219-
})
220-
.collect(Collectors.toSet());
221-
} catch (IOException e) {
222-
log.error("Unable to determine time series files readers for time series.", e);
223-
return Collections.emptySet();
224-
}
225-
}
226-
227-
/**
228-
* Builds a new file definition consisting of file name and head line elements
229-
*
230-
* @param timeSeries Time series to derive naming information from
231-
* @param headLineElements Array of head line elements
232-
* @param csvSep Separator for csv columns
233-
* @return A suitable file definition
234-
* @throws ConnectorException If the definition cannot be determined
235-
*/
236-
private <T extends TimeSeries<E, V>, E extends TimeSeriesEntry<V>, V extends Value>
237-
CsvFileDefinition buildFileDefinition(T timeSeries, String[] headLineElements, String csvSep)
238-
throws ConnectorException {
239-
Path directoryPath = fileNamingStrategy.getDirectoryPath(timeSeries).orElse(Path.of(""));
240-
String fileName =
241-
fileNamingStrategy
242-
.getEntityName(timeSeries)
243-
.orElseThrow(
244-
() ->
245-
new ConnectorException(
246-
"Cannot determine the file name for time series '" + timeSeries + "'."));
247-
return new CsvFileDefinition(fileName, directoryPath, headLineElements, csvSep);
248-
}
249-
250-
/**
251-
* Builds a new file definition consisting of file name and head line elements
252-
*
253-
* @param clz Class that is meant to be serialized into this file
254-
* @param headLineElements Array of head line elements
255-
* @param csvSep Separator for csv columns
256-
* @return A suitable file definition
257-
* @throws ConnectorException If the definition cannot be determined
258-
*/
259-
private CsvFileDefinition buildFileDefinition(
260-
Class<? extends Entity> clz, String[] headLineElements, String csvSep)
261-
throws ConnectorException {
262-
Path directoryPath = fileNamingStrategy.getDirectoryPath(clz).orElse(Path.of(""));
263-
String fileName =
264-
fileNamingStrategy
265-
.getEntityName(clz)
266-
.orElseThrow(
267-
() ->
268-
new ConnectorException(
269-
"Cannot determine the file name for class '" + clz.getSimpleName() + "'."));
270-
return new CsvFileDefinition(fileName, directoryPath, headLineElements, csvSep);
271-
}
272-
273163
@Override
274164
public void shutdown() {
275165
Stream.of(entityWriters.values(), timeSeriesWriters.values())

src/main/java/edu/ie3/datamodel/io/csv/CsvFileDefinition.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
*/
66
package edu.ie3.datamodel.io.csv;
77

8+
import edu.ie3.datamodel.exceptions.FileException;
9+
import edu.ie3.datamodel.io.naming.FileNamingStrategy;
10+
import edu.ie3.datamodel.models.Entity;
11+
import edu.ie3.datamodel.models.timeseries.TimeSeries;
12+
import edu.ie3.datamodel.models.timeseries.TimeSeriesEntry;
13+
import edu.ie3.datamodel.models.value.Value;
814
import edu.ie3.datamodel.utils.FileUtils;
915
import java.nio.file.Path;
1016
import java.util.Arrays;
@@ -23,6 +29,67 @@ public CsvFileDefinition(
2329
this(FileUtils.ofCsv(fileName, directoryPath), headLineElements, csvSep);
2430
}
2531

32+
/**
33+
* Builds a new file definition consisting of file name and headline elements
34+
*
35+
* @param clz Class that is meant to be serialized into this file
36+
* @param headLineElements Array of headline elements
37+
* @param csvSep Separator for csv columns
38+
* @param fileNamingStrategy that should be used
39+
* @throws FileException If the definition cannot be determined
40+
*/
41+
public CsvFileDefinition(
42+
Class<? extends Entity> clz,
43+
String[] headLineElements,
44+
String csvSep,
45+
FileNamingStrategy fileNamingStrategy)
46+
throws FileException {
47+
this(
48+
FileUtils.ofCsv(
49+
fileNamingStrategy
50+
.getEntityName(clz)
51+
.orElseThrow(
52+
() ->
53+
new FileException(
54+
"Cannot determine the file name for class '"
55+
+ clz.getSimpleName()
56+
+ "'.")),
57+
fileNamingStrategy.getDirectoryPath(clz).orElse(Path.of(""))),
58+
headLineElements,
59+
csvSep);
60+
}
61+
62+
/**
63+
* Builds a new file definition consisting of file name and headline elements
64+
*
65+
* @param timeSeries Time series to derive naming information from
66+
* @param headLineElements Array of headline elements
67+
* @param csvSep Separator for csv columns
68+
* @param fileNamingStrategy that should be used
69+
* @throws FileException If the definition cannot be determined
70+
*/
71+
public <T extends TimeSeries<E, V>, E extends TimeSeriesEntry<V>, V extends Value>
72+
CsvFileDefinition(
73+
T timeSeries,
74+
String[] headLineElements,
75+
String csvSep,
76+
FileNamingStrategy fileNamingStrategy)
77+
throws FileException {
78+
this(
79+
FileUtils.ofCsv(
80+
fileNamingStrategy
81+
.getEntityName(timeSeries)
82+
.orElseThrow(
83+
() ->
84+
new FileException(
85+
"Cannot determine the file name for time series '"
86+
+ timeSeries
87+
+ "'.")),
88+
fileNamingStrategy.getDirectoryPath(timeSeries).orElse(Path.of(""))),
89+
headLineElements,
90+
csvSep);
91+
}
92+
2693
/**
2794
* @return The path to the file relative to a not explicitly defined base directory, including the
2895
* file extension

src/main/java/edu/ie3/datamodel/io/sink/CsvFileSink.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import edu.ie3.datamodel.exceptions.*;
99
import edu.ie3.datamodel.io.connectors.CsvFileConnector;
1010
import edu.ie3.datamodel.io.csv.BufferedCsvWriter;
11+
import edu.ie3.datamodel.io.csv.CsvFileDefinition;
1112
import edu.ie3.datamodel.io.extractor.Extractor;
1213
import edu.ie3.datamodel.io.extractor.NestedEntity;
1314
import edu.ie3.datamodel.io.naming.FileNamingStrategy;
@@ -53,7 +54,7 @@ public class CsvFileSink implements InputDataSink, OutputDataSink {
5354

5455
private final CsvFileConnector connector;
5556
private final ProcessorProvider processorProvider;
56-
57+
private final FileNamingStrategy fileNamingStrategy;
5758
private final String csvSep;
5859

5960
public CsvFileSink(Path baseFolderPath) throws EntityProcessorException {
@@ -95,7 +96,8 @@ public CsvFileSink(
9596
String csvSep) {
9697
this.csvSep = csvSep;
9798
this.processorProvider = processorProvider;
98-
this.connector = new CsvFileConnector(baseFolderPath, fileNamingStrategy);
99+
this.connector = new CsvFileConnector(baseFolderPath);
100+
this.fileNamingStrategy = fileNamingStrategy;
99101
}
100102

101103
@Override
@@ -246,13 +248,16 @@ public <E extends TimeSeriesEntry<V>, V extends Value> void persistTimeSeries(
246248
try {
247249
TimeSeriesProcessorKey key = new TimeSeriesProcessorKey(timeSeries);
248250
String[] headerElements = csvHeaderElements(processorProvider.getHeaderElements(key));
249-
BufferedCsvWriter writer = connector.getOrInitWriter(timeSeries, headerElements, csvSep);
251+
BufferedCsvWriter writer =
252+
connector.getOrInitWriter(
253+
timeSeries,
254+
new CsvFileDefinition(timeSeries, headerElements, csvSep, fileNamingStrategy));
250255
persistTimeSeries(timeSeries, writer);
251256
connector.closeTimeSeriesWriter(timeSeries.getUuid());
252257
} catch (ProcessorProviderException e) {
253258
log.error(
254259
"Exception occurred during receiving of header elements. Cannot write this element.", e);
255-
} catch (ConnectorException e) {
260+
} catch (ConnectorException | FileException e) {
256261
log.error("Exception occurred during acquisition of writer.", e);
257262
} catch (IOException e) {
258263
log.error("Exception occurred during closing of writer.", e);
@@ -292,12 +297,14 @@ private <C extends Entity> void write(C entity) {
292297
processorProvider.handleEntity(entity).map(this::csvEntityFieldData).getOrThrow();
293298
String[] headerElements = processorProvider.getHeaderElements(entity.getClass());
294299
BufferedCsvWriter writer =
295-
connector.getOrInitWriter(entity.getClass(), headerElements, csvSep);
300+
connector.getOrInitWriter(
301+
entity.getClass(),
302+
new CsvFileDefinition(entity.getClass(), headerElements, csvSep, fileNamingStrategy));
296303
writer.write(entityFieldData);
297304
} catch (ProcessorProviderException e) {
298305
log.error(
299306
"Exception occurred during receiving of header elements. Cannot write this element.", e);
300-
} catch (ConnectorException e) {
307+
} catch (ConnectorException | FileException e) {
301308
log.error("Exception occurred during retrieval of writer. Cannot write this element.", e);
302309
} catch (IOException e) {
303310
log.error("Exception occurred during writing of this element. Cannot write this element.", e);

0 commit comments

Comments
 (0)