From 7b15447a9d00e50edfacbb37591db9a2e9bff4c3 Mon Sep 17 00:00:00 2001 From: juanan Date: Wed, 10 Apr 2024 13:14:23 +0200 Subject: [PATCH 1/6] Adding possibility to add a list of files to TRestDataSet instead of a file pattern --- source/framework/core/inc/TRestDataSet.h | 4 ++++ source/framework/core/src/TRestDataSet.cxx | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/framework/core/inc/TRestDataSet.h b/source/framework/core/inc/TRestDataSet.h index 483351510..7d1480ec7 100644 --- a/source/framework/core/inc/TRestDataSet.h +++ b/source/framework/core/inc/TRestDataSet.h @@ -105,6 +105,9 @@ class TRestDataSet : public TRestMetadata { /// A list of new columns together with its corresponding expressions added to the dataset std::vector> fColumnNameExpressions; //< + /// List of files to generate the dataSet + std::vector fFileList; //! + /// A flag to enable Multithreading during dataframe generation Bool_t fMT = false; //< @@ -181,6 +184,7 @@ class TRestDataSet : public TRestMetadata { inline auto GetCut() const { return fCut; } inline auto IsMergedDataSet() const { return fMergedDataset; } + inline void SetFileList(const std::vector& fileList) { fFileList = fileList; } inline void SetObservablesList(const std::vector& obsList) { fObservablesList = obsList; } inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; } inline void SetQuantity(const std::map& quantity) { fQuantity = quantity; } diff --git a/source/framework/core/src/TRestDataSet.cxx b/source/framework/core/src/TRestDataSet.cxx index 56011722a..26808ce57 100644 --- a/source/framework/core/src/TRestDataSet.cxx +++ b/source/framework/core/src/TRestDataSet.cxx @@ -389,16 +389,16 @@ std::vector TRestDataSet::FileSelection() { return fFileSelection; } - std::vector fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern); + if(fFileList.empty())fFileList = TRestTools::GetFilesMatchingPattern(fFilePattern); RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl; - RESTInfo << "Total files : " << fileNames.size() << RESTendl; + RESTInfo << "Total files : " << fFileList.size() << RESTendl; RESTInfo << "This process may take long computation time in case there are many files." << RESTendl; fTotalDuration = 0; std::cout << "Processing file selection."; int cnt = 1; - for (const auto& file : fileNames) { + for (const auto& file : fFileList) { if (cnt % 100 == 0) { std::cout << std::endl; std::cout << "Files processed: " << cnt << " ." << std::flush; From c612c9c5491d75dfd99edf1f40dfb89c660b10a2 Mon Sep 17 00:00:00 2001 From: juanan Date: Wed, 10 Apr 2024 13:16:54 +0200 Subject: [PATCH 2/6] Increasing ClassDef version on TRestDataSet --- source/framework/core/inc/TRestDataSet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/framework/core/inc/TRestDataSet.h b/source/framework/core/inc/TRestDataSet.h index 7d1480ec7..5289f3a85 100644 --- a/source/framework/core/inc/TRestDataSet.h +++ b/source/framework/core/inc/TRestDataSet.h @@ -213,6 +213,6 @@ class TRestDataSet : public TRestMetadata { TRestDataSet(const char* cfgFileName, const std::string& name = ""); ~TRestDataSet(); - ClassDefOverride(TRestDataSet, 7); + ClassDefOverride(TRestDataSet, 8); }; #endif From 0b81d3761936c020e7cfa432c980a57974368ea5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 11:21:24 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- source/framework/core/src/TRestDataSet.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/framework/core/src/TRestDataSet.cxx b/source/framework/core/src/TRestDataSet.cxx index 26808ce57..fe964e066 100644 --- a/source/framework/core/src/TRestDataSet.cxx +++ b/source/framework/core/src/TRestDataSet.cxx @@ -389,7 +389,7 @@ std::vector TRestDataSet::FileSelection() { return fFileSelection; } - if(fFileList.empty())fFileList = TRestTools::GetFilesMatchingPattern(fFilePattern); + if (fFileList.empty()) fFileList = TRestTools::GetFilesMatchingPattern(fFilePattern); RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl; RESTInfo << "Total files : " << fFileList.size() << RESTendl; From b788e507f8e0794eaeecc51b34ba64fe11b8c3b9 Mon Sep 17 00:00:00 2001 From: juanan Date: Fri, 12 Apr 2024 10:36:50 +0200 Subject: [PATCH 4/6] Moving file pattern from string to a vector of string and adding the possibility to add several patterns to a single dataSet --- source/framework/core/inc/TRestDataSet.h | 10 +++----- source/framework/core/src/TRestDataSet.cxx | 27 +++++++++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/source/framework/core/inc/TRestDataSet.h b/source/framework/core/inc/TRestDataSet.h index 5289f3a85..a35393d70 100644 --- a/source/framework/core/inc/TRestDataSet.h +++ b/source/framework/core/inc/TRestDataSet.h @@ -55,7 +55,7 @@ class TRestDataSet : public TRestMetadata { std::string fFilterEndTime = "3000/12/31"; //< /// A glob file pattern that must be satisfied by all files - std::string fFilePattern = ""; //< + std::vector fFilePatternList; //< /// It contains a list of the observables that will be added to the final tree or exported file std::vector fObservablesList; //< @@ -105,9 +105,6 @@ class TRestDataSet : public TRestMetadata { /// A list of new columns together with its corresponding expressions added to the dataset std::vector> fColumnNameExpressions; //< - /// List of files to generate the dataSet - std::vector fFileList; //! - /// A flag to enable Multithreading during dataframe generation Bool_t fMT = false; //< @@ -170,7 +167,7 @@ class TRestDataSet : public TRestMetadata { inline auto GetFilterEndTime() const { return fFilterEndTime; } inline auto GetStartTime() const { return fStartTime; } inline auto GetEndTime() const { return fEndTime; } - inline auto GetFilePattern() const { return fFilePattern; } + inline auto GetFilePattern() const { return fFilePatternList; } inline auto GetObservablesList() const { return fObservablesList; } inline auto GetFileSelection() const { return fFileSelection; } inline auto GetProcessObservablesList() const { return fProcessObservablesList; } @@ -184,9 +181,8 @@ class TRestDataSet : public TRestMetadata { inline auto GetCut() const { return fCut; } inline auto IsMergedDataSet() const { return fMergedDataset; } - inline void SetFileList(const std::vector& fileList) { fFileList = fileList; } inline void SetObservablesList(const std::vector& obsList) { fObservablesList = obsList; } - inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; } + inline void SetFilePattern(const std::string& pattern) { fFilePatternList.push_back(pattern); } inline void SetQuantity(const std::map& quantity) { fQuantity = quantity; } void SetTotalTimeInSeconds(Double_t seconds) { fTotalDuration = seconds; } diff --git a/source/framework/core/src/TRestDataSet.cxx b/source/framework/core/src/TRestDataSet.cxx index 26808ce57..651b63331 100644 --- a/source/framework/core/src/TRestDataSet.cxx +++ b/source/framework/core/src/TRestDataSet.cxx @@ -389,16 +389,20 @@ std::vector TRestDataSet::FileSelection() { return fFileSelection; } - if(fFileList.empty())fFileList = TRestTools::GetFilesMatchingPattern(fFilePattern); + std::vector fileList; + for(const auto &pattern : fFilePatternList){ + auto list = TRestTools::GetFilesMatchingPattern(pattern); + fileList.insert(end(fileList), begin(list), end(list)); + } RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl; - RESTInfo << "Total files : " << fFileList.size() << RESTendl; + RESTInfo << "Total files : " << fileList.size() << RESTendl; RESTInfo << "This process may take long computation time in case there are many files." << RESTendl; fTotalDuration = 0; std::cout << "Processing file selection."; int cnt = 1; - for (const auto& file : fFileList) { + for (const auto& file : fileList) { if (cnt % 100 == 0) { std::cout << std::endl; std::cout << "Files processed: " << cnt << " ." << std::flush; @@ -558,8 +562,10 @@ void TRestDataSet::PrintMetadata() { RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl; RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl; - RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl; - RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl; + for(const auto& pattern : fFilePatternList){ + RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(pattern).first << RESTendl; + RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(pattern).second << RESTendl; + } RESTMetadata << " " << RESTendl; RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl; RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl; @@ -655,6 +661,9 @@ void TRestDataSet::PrintMetadata() { void TRestDataSet::InitFromConfigFile() { TRestMetadata::InitFromConfigFile(); + std::string filePattern = GetParameter("filePattern",""); + if(!filePattern.empty())fFilePatternList.push_back(filePattern); + /// Reading filters TiXmlElement* filterDefinition = GetElement("filter"); while (filterDefinition != nullptr) { @@ -843,8 +852,10 @@ void TRestDataSet::Export(const std::string& filename, std::vector fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.); fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.); fprintf(f, "###\n"); - fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str()); - fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str()); + for(const auto& pattern : fFilePatternList){ + fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(pattern).first.c_str()); + fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(pattern).second.c_str()); + } fprintf(f, "###\n"); if (!fFilterMetadata.empty()) { fprintf(f, "### Metadata filters : \n"); @@ -926,7 +937,7 @@ TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) { fFilterEndTime = dS.GetFilterEndTime(); fStartTime = dS.GetStartTime(); fEndTime = dS.GetEndTime(); - fFilePattern = dS.GetFilePattern(); + fFilePatternList = dS.GetFilePattern(); fObservablesList = dS.GetObservablesList(); fFileSelection = dS.GetFileSelection(); fProcessObservablesList = dS.GetProcessObservablesList(); From 8124fa48376322d3fdaa946b5e8716559e551b79 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 08:39:48 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- source/framework/core/src/TRestDataSet.cxx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/source/framework/core/src/TRestDataSet.cxx b/source/framework/core/src/TRestDataSet.cxx index 9883d761e..acaacb095 100644 --- a/source/framework/core/src/TRestDataSet.cxx +++ b/source/framework/core/src/TRestDataSet.cxx @@ -391,10 +391,10 @@ std::vector TRestDataSet::FileSelection() { <<<<<<< HEAD std::vector fileList; - for(const auto &pattern : fFilePatternList){ + for (const auto& pattern : fFilePatternList) { auto list = TRestTools::GetFilesMatchingPattern(pattern); fileList.insert(end(fileList), begin(list), end(list)); - } + } ======= if (fFileList.empty()) fFileList = TRestTools::GetFilesMatchingPattern(fFilePattern); >>>>>>> 0b81d3761936c020e7cfa432c980a57974368ea5 @@ -566,9 +566,9 @@ void TRestDataSet::PrintMetadata() { RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl; RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl; - for(const auto& pattern : fFilePatternList){ - RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(pattern).first << RESTendl; - RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(pattern).second << RESTendl; + for (const auto& pattern : fFilePatternList) { + RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(pattern).first << RESTendl; + RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(pattern).second << RESTendl; } RESTMetadata << " " << RESTendl; RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl; @@ -665,8 +665,8 @@ void TRestDataSet::PrintMetadata() { void TRestDataSet::InitFromConfigFile() { TRestMetadata::InitFromConfigFile(); - std::string filePattern = GetParameter("filePattern",""); - if(!filePattern.empty())fFilePatternList.push_back(filePattern); + std::string filePattern = GetParameter("filePattern", ""); + if (!filePattern.empty()) fFilePatternList.push_back(filePattern); /// Reading filters TiXmlElement* filterDefinition = GetElement("filter"); @@ -856,9 +856,9 @@ void TRestDataSet::Export(const std::string& filename, std::vector fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.); fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.); fprintf(f, "###\n"); - for(const auto& pattern : fFilePatternList){ - fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(pattern).first.c_str()); - fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(pattern).second.c_str()); + for (const auto& pattern : fFilePatternList) { + fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(pattern).first.c_str()); + fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(pattern).second.c_str()); } fprintf(f, "###\n"); if (!fFilterMetadata.empty()) { From 8314255d236dead0eb806932749054e6b5c27f42 Mon Sep 17 00:00:00 2001 From: juanan Date: Fri, 12 Apr 2024 10:44:55 +0200 Subject: [PATCH 6/6] Addressing error while pushing --- source/framework/core/src/TRestDataSet.cxx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/framework/core/src/TRestDataSet.cxx b/source/framework/core/src/TRestDataSet.cxx index 9883d761e..651b63331 100644 --- a/source/framework/core/src/TRestDataSet.cxx +++ b/source/framework/core/src/TRestDataSet.cxx @@ -389,15 +389,11 @@ std::vector TRestDataSet::FileSelection() { return fFileSelection; } -<<<<<<< HEAD std::vector fileList; for(const auto &pattern : fFilePatternList){ auto list = TRestTools::GetFilesMatchingPattern(pattern); fileList.insert(end(fileList), begin(list), end(list)); } -======= - if (fFileList.empty()) fFileList = TRestTools::GetFilesMatchingPattern(fFilePattern); ->>>>>>> 0b81d3761936c020e7cfa432c980a57974368ea5 RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl; RESTInfo << "Total files : " << fileList.size() << RESTendl;