From e69fc712b7cf59ca382743e779a16c09fff31aeb Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 14:05:06 -0500 Subject: [PATCH 01/42] Removing c++20 features unsupported by GCC versions < 12.2 --- CMakeLists.txt | 3 ++- rnn/dnas_node.cxx | 4 ++-- rnn/dnas_node.hxx | 7 ++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b34e8f0..9093410b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ set (EXACT_VERSION_MINOR 33) #add_definitions( -DEXACT_VERSION="${EXACT_VERSION_MAJOR}.${EXACT_VERSION_MINOR}" ) SET (PLATFORM 64) +set(CMAKE_CXX_STANDARD 20) #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++") #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS") @@ -23,7 +24,7 @@ SET (PLATFORM 64) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS "-std=c++20 -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS "-Wall -O3 -funroll-loops -msse3") SET (CMAKE_CXX_FLAGS_DEBUG "-g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -funroll-loops -DNDEBUG") diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 2f040703..957eaba5 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -67,13 +67,13 @@ DNASNode::~DNASNode() { delete node; } -template +template void DNASNode::gumbel_noise(Rng &rng, vector &output) { for (int i = 0; i < output.size(); i++) output[i] = -log(-log(uniform_real_distribution(0.0, 1.0)(rng))); } -template +template void DNASNode::sample_gumbel_softmax(Rng &rng) { z.assign(pi.size(), 0.0); x.assign(pi.size(), 0.0); diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index daa26605..1b63532c 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -7,7 +7,6 @@ using std::string; #include using std::minstd_rand0; using std::uniform_real_distribution; -using std::uniform_random_bit_generator; using std::generate_canonical; #include @@ -27,9 +26,8 @@ using std::unique_ptr; #define CRYSTALLIZATION_THRESHOLD 50000 class DNASNode : public RNN_Node_Interface { - private: - template + template static void gumbel_noise(R &rng, vector &output); void calculate_maxi(); @@ -75,12 +73,11 @@ class DNASNode : public RNN_Node_Interface { vector> node_outputs; public: - DNASNode(vector &&nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1); DNASNode(const DNASNode &node); ~DNASNode(); - template + template void sample_gumbel_softmax(Rng &rng); void calculate_z(); From 903dcfb40baf8d06fa770875693a0d198efd95e2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 15:26:23 -0500 Subject: [PATCH 02/42] Fix minimum c++ requirement in CMakeLists.txt to be compatible with GCC and clang --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9093410b..2ec362ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.6) +cmake_minimum_required (VERSION 2.8) project (EXACT) # The version number. @@ -24,10 +24,10 @@ set(CMAKE_CXX_STANDARD 20) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS "-Wall -O3 -funroll-loops -msse3") -SET (CMAKE_CXX_FLAGS_DEBUG "-g") -SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") -SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -funroll-loops -DNDEBUG") +SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") +SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") +SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib) From 09c5cbcafdb848f4a493c3502baba7fd5db275ca Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 15:37:33 -0500 Subject: [PATCH 03/42] properly specify minimum CMake version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ec362ca..558fbefe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 3.1) project (EXACT) # The version number. From 8ec09ca9826a8e7022c46382c14a7934e02a0731 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 15:41:37 -0500 Subject: [PATCH 04/42] Fixed bug introduced during merge --- rnn/dnas_node.hxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index f56b404b..82ebff65 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -27,7 +27,7 @@ using std::unique_ptr; class DNASNode : public RNN_Node_Interface { private: - template + template static void gumbel_noise(R &rng, vector &output); void calculate_maxi(); @@ -77,7 +77,7 @@ class DNASNode : public RNN_Node_Interface { DNASNode(const DNASNode &node); ~DNASNode(); - template + template void sample_gumbel_softmax(Rng &rng); void calculate_z(); From 1c9bcfff4042964727bc41f944738f4e82c2d5d2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:12:56 -0500 Subject: [PATCH 05/42] Tweaking for clusteR --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 558fbefe..1af3a314 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required (VERSION 3.8) project (EXACT) # The version number. @@ -9,6 +9,7 @@ set (EXACT_VERSION_MINOR 33) SET (PLATFORM 64) set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++") #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS") @@ -24,13 +25,14 @@ set(CMAKE_CXX_STANDARD 20) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib) +message(STATUS "${CMAKE_CXX_FLAGS}") message(STATUS "project source dir is ${PROJECT_SOURCE_DIR}") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/") From 9c6be46a3141e5cbde600156f9f7b08134600acb Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:35:42 -0500 Subject: [PATCH 06/42] Added updated cluster instructions to the README.md --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7fc588a3..449e7194 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Getting Started and Prerequisites EXONA has been developed to compile using CMake, which should be installed before attempting to compile. To use the MPI version, a version of MPI (such as OpenMPI) should be installed. EXACT currently requires libtiff and libpng -The EXACT algorithm can also checkpoint to a database, however this is not required. To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX. Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++11 compatible compiler. +The EXACT algorithm can also checkpoint to a database, however this is not required. To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX. Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++20 compatible compiler. If you are using OSX, to set up the environment: @@ -15,6 +15,18 @@ brew install libpng xcode-select --install ``` +On the RIT Cluster Computer, load the following packages using spack: +``` +# CMake +spack load /ux27hbj + +# GCC +spack load gcc@11.2.0 + +# libtiff +spack load /ycf67m3 +``` + To build: ``` @@ -24,6 +36,8 @@ To build: ~/exact/build $ make ``` +You can add `-DCMAKE_BUILD_TYPE=Release` to the invocation of `cmake` for a release build (slower compile times, faster execution). + You may also want to have graphviz installed so you can generate images of the evolved neural networks. EXACT/EXALT/EXAMM will write out evolved genomes in a .gv (graphviz) format for this. For example, can generate a pdf from a gv file (assuming graphviz is installed with): ``` From d7023b64e771dad351ae946f962018de2c4a1acc Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:41:24 -0500 Subject: [PATCH 07/42] Updated format script --- scripts/util/format.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/util/format.sh b/scripts/util/format.sh index 85dddbfd..76fb9efa 100755 --- a/scripts/util/format.sh +++ b/scripts/util/format.sh @@ -1,2 +1,4 @@ #!/bin/bash -find . -type f -name "*.*xx" -exec clang-format -style=file -i {} \; +for folder in common examm mpi multithreaded rnn rnn_examples rnn_tests time_series weights word_series; do + find $folder -type f -name "*.*xx" -exec clang-format -style=file -i {} \; +done From 89b3410cebc573430cd244b43ea070ca203fc98f Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:41:46 -0500 Subject: [PATCH 08/42] Formatting --- rnn/dnas_node.hxx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 3b341195..435e5400 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -8,7 +8,6 @@ using std::string; using std::generate_canonical; using std::minstd_rand0; using std::uniform_real_distribution; -using std::generate_canonical; #include using std::vector; @@ -28,7 +27,7 @@ using std::unique_ptr; class DNASNode : public RNN_Node_Interface { private: template - static void gumbel_noise(R &rng, vector &output); + static void gumbel_noise(R& rng, vector& output); void calculate_maxi(); @@ -81,7 +80,7 @@ class DNASNode : public RNN_Node_Interface { ~DNASNode(); template - void sample_gumbel_softmax(Rng &rng); + void sample_gumbel_softmax(Rng& rng); void calculate_z(); virtual void initialize_lamarckian( From cbf757457e0d56235e6c58a64114d3143f3c0f58 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 12:49:34 -0500 Subject: [PATCH 09/42] Added OpenMPI package to cluster instructions --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 449e7194..c187316a 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,9 @@ spack load /ux27hbj # GCC spack load gcc@11.2.0 +# OpenMPI +spack load openmpi@4.1.2 + # libtiff spack load /ycf67m3 ``` From 06f12b588b1bcf1ba4380192eaa7ad2eb18dbf0f Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 7 Feb 2023 12:09:54 -0500 Subject: [PATCH 10/42] Adding argument parsing for DNAS --- common/process_arguments.cxx | 9 +++++ examm/examm.cxx | 18 +-------- examm/examm.hxx | 2 +- rnn/dnas_node.cxx | 73 +++++++++++++++++++++++++----------- rnn/dnas_node.hxx | 2 +- rnn/generate_nn.cxx | 11 ++++++ rnn/rnn_genome.cxx | 30 ++++----------- rnn/rnn_genome.hxx | 2 + rnn/rnn_node_interface.cxx | 35 ++++++++++++++--- rnn/rnn_node_interface.hxx | 9 ++++- 10 files changed, 124 insertions(+), 67 deletions(-) diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index f4bf87a8..885f28a0 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -32,6 +32,15 @@ EXAMM* generate_examm_from_arguments( // get_argument(arguments, "--sequence_length_lower_bound", false, sequence_length_lower_bound); // get_argument(arguments, "--sequence_length_upper_bound", false, sequence_length_upper_bound); + vector dnas_node_type_strings; + get_argument_vector(arguments, "--dnas_node_types", false, dnas_node_type_strings); + if (dnas_node_type_strings.size() != 0) { + dnas_node_types.clear(); + for (auto node_type : dnas_node_type_strings) { + dnas_node_types.push_back(node_type_from_string(node_type)); + } + } + GenomeProperty* genome_property = new GenomeProperty(); genome_property->generate_genome_property_from_arguments(arguments); genome_property->get_time_series_parameters(time_series_sets); diff --git a/examm/examm.cxx b/examm/examm.cxx index ce137d6f..f017ab8b 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -205,22 +205,8 @@ void EXAMM::update_log() { void EXAMM::set_possible_node_types(vector possible_node_type_strings) { possible_node_types.clear(); - for (int32_t i = 0; i < (int32_t) possible_node_type_strings.size(); i++) { - string node_type_s = possible_node_type_strings[i]; - - bool found = false; - - for (int32_t j = 0; j < NUMBER_NODE_TYPES; j++) { - if (NODE_TYPES[j].compare(node_type_s) == 0) { - found = true; - possible_node_types.push_back(j); - } - } - - if (!found) { - Log::error("unknown node type: '%s'\n", node_type_s.c_str()); - exit(1); - } + for (auto node_type : possible_node_type_strings) { + possible_node_types.push_back(node_type_from_string(node_type)); } } diff --git a/examm/examm.hxx b/examm/examm.hxx index 5ccb545e..ac5d56eb 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -63,7 +63,7 @@ class EXAMM { double split_node_rate; double merge_node_rate; - vector possible_node_types; + vector possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; vector op_log_ordering; map inserted_counts; diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index ba6f3ba3..46a20c69 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -1,4 +1,9 @@ #include +using std::sort; + +#include +using std::pair; + #include #include using std::max; @@ -72,7 +77,7 @@ DNASNode::~DNASNode() { template void DNASNode::gumbel_noise(Rng& rng, vector& output) { - for (int i = 0; i < output.size(); i++) { + for (auto i = 0; i < output.size(); i++) { output[i] = -log(-log(uniform_real_distribution(0.0, 1.0)(rng))); } } @@ -92,18 +97,45 @@ void DNASNode::calculate_z() { xtotal = 0.0; double emax = -10000000; - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { x[i] = g[i] + log(pi[i]); x[i] /= tao; emax = max(emax, x[i]); } - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { x[i] = exp(emax - x[i]); xtotal += x[i]; } - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { z[i] = x[i] / xtotal; } + + if (k > 0) { + pair ps_with_indices[z.size()]; + for (int32_t i = 0; i < (int32_t) z.size(); i++) { + ps_with_indices[i] = pair(i, z[i]); + } + + std::sort(ps_with_indices, ps_with_indices + z.size(), + [](const pair& a, const pair& b) { + // Descending order + return a.second > b.second; + } + ); + + double total = 0.0; + for (int i = 0; i < k; i++) { + total += ps_with_indices[i].second; + } + + for (int i = 0; i < z.size(); i++) { + z[i] = 0.0; + } + + for (int i = 0; i < k; i++) { + z[ps_with_indices[i].first] = ps_with_indices[i].second / total; + } + } } void DNASNode::reset(int32_t series_length) { @@ -151,7 +183,7 @@ void DNASNode::input_fired(int32_t time, double incoming_output) { node_outputs[time][maxi] = nodes[maxi]->output_values[time]; output_values[time] = nodes[maxi]->output_values[time]; } else { - for (int i = 0; i < nodes.size(); i++) { + for (auto i = 0; i < nodes.size(); i++) { auto node = nodes[i]; node->input_fired(time, input_values[time]); node_outputs[time][i] = node->output_values[time]; @@ -190,7 +222,7 @@ void DNASNode::try_update_deltas(int32_t time) { d_input[time] += nodes[maxi]->d_input[time]; } else { - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { nodes[i]->output_fired(time, delta * z[i]); double p = (x[i] / pi[i]); p *= ((delta * node_outputs[time][i]) / xtotal); @@ -254,7 +286,7 @@ void DNASNode::set_weights(const vector& parameters) { void DNASNode::get_weights(int32_t& offset, vector& parameters) const { // Log::info("pi start %d; ", offset); - for (int i = 0; i < pi.size(); i++) { + for (auto i = 0; i < pi.size(); i++) { parameters[offset++] = pi[i]; } // Log::info_no_header("pi end %d \n", offset); @@ -265,24 +297,23 @@ void DNASNode::get_weights(int32_t& offset, vector& parameters) const { void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; - for (int i = 0; i < pi.size(); i++) { + for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; } // Log::info("Pi indices: %d-%d\n", start, offset); for (auto node : nodes) { node->set_weights(offset, parameters); } - Log::info("Just set weights\n"); calculate_z(); - string s = "Pi = { "; - for (auto p : pi) { - s += std::to_string(p) + ", "; - } - Log::info("%s }\n", s.c_str()); + // string s = "Pi = { "; + // for (auto p : pi) { + // s += std::to_string(p) + ", "; + // } + // Log::info("%s }\n", s.c_str()); } void DNASNode::set_pi(const vector& new_pi) { - for (int i = 0; i < pi.size(); i++) { + for (auto i = 0; i < pi.size(); i++) { pi[i] = new_pi[i]; } calculate_maxi(); @@ -293,7 +324,7 @@ void DNASNode::calculate_maxi() { maxi = 0; double max_pi = pi[0]; - for (int i = 1; i < nodes.size(); i++) { + for (auto i = 1; i < nodes.size(); i++) { if (pi[i] > max_pi) { max_pi = pi[i]; maxi = i; @@ -314,11 +345,11 @@ void DNASNode::get_gradients(vector& gradients) { if (counter >= CRYSTALLIZATION_THRESHOLD) { offset += pi.size(); - for (int i = 0; i < nodes.size(); i++) { + for (auto i = 0; i < nodes.size(); i++) { RNN_Node_Interface* node = nodes[i]; if (i == maxi) { node->get_gradients(temp); - for (int j = 0; j < temp.size(); j++) { + for (auto j = 0; j < temp.size(); j++) { gradients[offset++] = temp[j]; } } else { @@ -328,13 +359,13 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; - for (int i = 0; i < pi.size(); i++) { - gradients[offset++] = d_pi[i]; + for (auto i = 0; i < pi.size(); i++) { + gradients[offset++] = d_pi[i] * 0.1; } for (auto node : nodes) { node->get_gradients(temp); - for (int i = 0; i < temp.size(); i++) { + for (auto i = 0; i < temp.size(); i++) { gradients[offset++] = temp[i]; } } diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 435e5400..776119cc 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -22,7 +22,7 @@ using std::unique_ptr; #include "rnn_node.hxx" #include "rnn_node_interface.hxx" -#define CRYSTALLIZATION_THRESHOLD 50000 +#define CRYSTALLIZATION_THRESHOLD 1000 class DNASNode : public RNN_Node_Interface { private: diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index c451a098..f9e3c61a 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -39,12 +39,23 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co case DNAS_NODE: Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n"); exit(1); + default: + Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind); + exit(1); } + + // Unreachable + return nullptr; } DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector& node_types) { vector nodes(node_types.size()); + if (node_types.size() == 0) { + Log::fatal("Node types cannot be empty - failed to create DNAS node!\n"); + exit(1); + } + int i = 0; for (auto node_type : node_types) { nodes[i++] = create_hidden_node(node_type, innovation_counter, depth); diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index c7b60b43..370998ee 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -59,6 +59,9 @@ using std::vector; #include "rnn_node.hxx" #include "time_series/time_series.hxx" #include "ugrnn_node.hxx" +#include "generate_nn.hxx" + +extern vector dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; string parse_fitness(double fitness) { if (fitness == EXAMM_MAX_DOUBLE) { @@ -1628,27 +1631,10 @@ RNN_Node_Interface* RNN_Genome::create_node( WeightType weight_initialize = weight_rules->get_weight_initialize_method(); Log::trace("CREATING NODE, type: '%s'\n", NODE_TYPES[node_type].c_str()); - if (node_type == LSTM_NODE) { - n = new LSTM_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == DELTA_NODE) { - n = new Delta_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == GRU_NODE) { - n = new GRU_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == ENARC_NODE) { - n = new ENARC_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == ENAS_DAG_NODE) { - n = new ENAS_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == RANDOM_DAG_NODE) { - n = new RANDOM_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == MGU_NODE) { - n = new MGU_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == UGRNN_NODE) { - n = new UGRNN_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == SIMPLE_NODE || node_type == JORDAN_NODE || node_type == ELMAN_NODE) { - n = new RNN_Node(++node_innovation_count, HIDDEN_LAYER, depth, node_type); + if (node_type != DNAS_NODE) { + n = create_hidden_node(node_type, node_innovation_count, depth); } else { - Log::fatal("ERROR: attempted to create a node with an unknown node type: %d\n", node_type); - exit(1); + n = create_dnas_node(node_innovation_count, depth, dnas_node_types); } if (mutated_component_weight == WeightType::LAMARCKIAN) { @@ -3213,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { } else if (node_type == DNAS_NODE) { int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); - + int32_t counter; bin_istream.read((char*) &counter, sizeof(int32_t)); vector pi(n_nodes, 0.0); @@ -3224,7 +3210,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { nodes[i] = RNN_Genome::read_node_from_stream(bin_istream); } - DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, node_type, depth, counter); + DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, layer_type, depth, counter); dnas_node->set_pi(pi); node = (RNN_Node_Interface*) dnas_node; } else { diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index acba093c..deaf8bce 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -32,6 +32,8 @@ using std::vector; // mysql can't handle the max float value for some reason #define EXAMM_MAX_DOUBLE 10000000 +extern vector dnas_node_types; + string parse_fitness(double fitness); class RNN_Genome { diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index 2ad8d065..55f5e057 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -1,20 +1,45 @@ #include +using std::max; + +#include + #include using std::ostream; #include using std::string; -#include -using std::max; - #include "common/log.hxx" #include "rnn/rnn_genome.hxx" #include "rnn_node_interface.hxx" -extern const int32_t NUMBER_NODE_TYPES = 9; +extern const int32_t NUMBER_NODE_TYPES = 11; extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", - "GRU", "delta", "LSTM", "ENARC", "ENAS_DAG"}; + "GRU", "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; +extern const unordered_map string_to_node_type = { + { "simple", SIMPLE_NODE }, + { "jordan", JORDAN_NODE }, + { "elman", ELMAN_NODE }, + { "ugrnn", UGRNN_NODE }, + { "mgu", MGU_NODE }, + { "gru", GRU_NODE }, + { "delta", DELTA_NODE }, + { "lstm", LSTM_NODE }, + { "enarc", ENARC_NODE }, + { "enas", ENAS_DAG_NODE }, + { "dnas", DNAS_NODE } +}; + +int32_t node_type_from_string(string& node_type) { + std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); }); + + if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) { + return it->second; + } else { + Log::fatal("Invalid node type '%s'\n", node_type.c_str()); + exit(1); + } +} double bound(double value) { if (value < -10.0) { diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx index 26dc0f3c..15ec45cd 100644 --- a/rnn/rnn_node_interface.hxx +++ b/rnn/rnn_node_interface.hxx @@ -12,6 +12,9 @@ using std::uniform_real_distribution; #include using std::string; +#include +using std::unordered_map; + #include using std::vector; @@ -25,6 +28,8 @@ class RNN; extern const int32_t NUMBER_NODE_TYPES; extern const string NODE_TYPES[]; +extern const unordered_map string_to_node_type; +int32_t node_type_from_string(string& node_type); #define SIMPLE_NODE 0 #define JORDAN_NODE 1 @@ -39,6 +44,8 @@ extern const string NODE_TYPES[]; #define RANDOM_DAG_NODE 10 #define DNAS_NODE 11 +int32_t node_type_from_string(string& node_type); + double sigmoid(double value); double sigmoid_derivative(double value); double tanh_derivative(double value); @@ -112,7 +119,7 @@ class RNN_Node_Interface { virtual RNN_Node_Interface* copy() const = 0; - void write_to_stream(ostream& out); + virtual void write_to_stream(ostream& out); int32_t get_node_type() const; int32_t get_layer_type() const; From ad5a7a3523fa05a965fb28d3baf4f089b2c67910 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Thu, 9 Feb 2023 13:54:11 -0500 Subject: [PATCH 11/42] Formatting --- common/process_arguments.cxx | 6 ++++++ examm/examm.hxx | 3 ++- rnn/dnas_node.cxx | 3 ++- rnn/dnas_node.hxx | 2 +- rnn/generate_nn.cxx | 4 +++- rnn/rnn_genome.cxx | 6 +++--- rnn/rnn_node_interface.cxx | 31 ++++++++++++++++--------------- 7 files changed, 33 insertions(+), 22 deletions(-) diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 885f28a0..f2e29ac0 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -39,6 +39,12 @@ EXAMM* generate_examm_from_arguments( for (auto node_type : dnas_node_type_strings) { dnas_node_types.push_back(node_type_from_string(node_type)); } + + Log::info("Using following node types for dnas: "); + for (auto s : dnas_node_type_strings) { + Log::info_no_header("%s", s.c_str()); + } + Log::info_no_header("\n"); } GenomeProperty* genome_property = new GenomeProperty(); diff --git a/examm/examm.hxx b/examm/examm.hxx index ac5d56eb..c0c0ee03 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -63,7 +63,8 @@ class EXAMM { double split_node_rate; double merge_node_rate; - vector possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; + vector possible_node_types = {SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, + MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; vector op_log_ordering; map inserted_counts; diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 46a20c69..dcdab7e0 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -116,7 +116,8 @@ void DNASNode::calculate_z() { ps_with_indices[i] = pair(i, z[i]); } - std::sort(ps_with_indices, ps_with_indices + z.size(), + std::sort( + ps_with_indices, ps_with_indices + z.size(), [](const pair& a, const pair& b) { // Descending order return a.second > b.second; diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 776119cc..c3d74e6b 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -62,7 +62,7 @@ class DNASNode : public RNN_Node_Interface { int32_t maxi = -1; // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) - int32_t k = -1; + int32_t k = 1; // Whether to re-sample the gumbel softmax distribution when resetting the node. // Can be set externally using DNASNode::set_stochastic diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index f9e3c61a..a84fb36f 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -40,7 +40,9 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n"); exit(1); default: - Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind); + Log::fatal( + "If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind + ); exit(1); } diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 370998ee..7e452ad0 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -50,6 +50,7 @@ using std::vector; #include "dnas_node.hxx" #include "enarc_node.hxx" #include "enas_dag_node.hxx" +#include "generate_nn.hxx" #include "gru_node.hxx" #include "lstm_node.hxx" #include "mgu_node.hxx" @@ -59,9 +60,8 @@ using std::vector; #include "rnn_node.hxx" #include "time_series/time_series.hxx" #include "ugrnn_node.hxx" -#include "generate_nn.hxx" -extern vector dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; +extern vector dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; string parse_fitness(double fitness) { if (fitness == EXAMM_MAX_DOUBLE) { @@ -3199,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { } else if (node_type == DNAS_NODE) { int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); - + int32_t counter; bin_istream.read((char*) &counter, sizeof(int32_t)); vector pi(n_nodes, 0.0); diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index 55f5e057..ab5796b2 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -2,7 +2,6 @@ using std::max; #include - #include using std::ostream; @@ -14,24 +13,26 @@ using std::string; #include "rnn_node_interface.hxx" extern const int32_t NUMBER_NODE_TYPES = 11; -extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", - "GRU", "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; +extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", + "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; extern const unordered_map string_to_node_type = { - { "simple", SIMPLE_NODE }, - { "jordan", JORDAN_NODE }, - { "elman", ELMAN_NODE }, - { "ugrnn", UGRNN_NODE }, - { "mgu", MGU_NODE }, - { "gru", GRU_NODE }, - { "delta", DELTA_NODE }, - { "lstm", LSTM_NODE }, - { "enarc", ENARC_NODE }, - { "enas", ENAS_DAG_NODE }, - { "dnas", DNAS_NODE } + {"simple", SIMPLE_NODE}, + {"jordan", JORDAN_NODE}, + { "elman", ELMAN_NODE}, + { "ugrnn", UGRNN_NODE}, + { "mgu", MGU_NODE}, + { "gru", GRU_NODE}, + { "delta", DELTA_NODE}, + { "lstm", LSTM_NODE}, + { "enarc", ENARC_NODE}, + { "enas", ENAS_DAG_NODE}, + { "dnas", DNAS_NODE} }; int32_t node_type_from_string(string& node_type) { - std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); }); + std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c) { + return std::tolower(c); + }); if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) { return it->second; From eda79a02f422d6b1a5dc600acefcccac5f09f7f9 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 12 Apr 2023 10:54:22 -0400 Subject: [PATCH 12/42] Committing experiment scripts --- ground_truth_experiments/cell_experiments.sh | 40 ++++++++++++++++++++ ground_truth_experiments/source_genomes.sh | 33 ++++++++++++++++ rnn/dnas_node.cxx | 30 +++++++++++++-- rnn/dnas_node.hxx | 5 ++- rnn_examples/train_rnn.cxx | 25 +++++++----- 5 files changed, 118 insertions(+), 15 deletions(-) create mode 100755 ground_truth_experiments/cell_experiments.sh create mode 100755 ground_truth_experiments/source_genomes.sh diff --git a/ground_truth_experiments/cell_experiments.sh b/ground_truth_experiments/cell_experiments.sh new file mode 100755 index 00000000..9c0e29d4 --- /dev/null +++ b/ground_truth_experiments/cell_experiments.sh @@ -0,0 +1,40 @@ +#!/usr/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 +bp_epoch=1000 + +for SIZE in 1 2 4; do + for CELL_TYPE in dnas; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + output_dir=ground_truth_experiments/results/$CELL_TYPE/$SIZE/$fold + mkdir -p $output_dir + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --stochastic \ + --rnn_type $CELL_TYPE \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --random_sequence_length \ + --sequence_length_lower_bound 50 \ + --sequence_length_upper_bound 100 \ + --max_recurrent_depth 1 \ + --weight_update adagrad \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level ERROR \ + --file_message_level INFO & + done + done + wait +done + diff --git a/ground_truth_experiments/source_genomes.sh b/ground_truth_experiments/source_genomes.sh new file mode 100755 index 00000000..1c251134 --- /dev/null +++ b/ground_truth_experiments/source_genomes.sh @@ -0,0 +1,33 @@ +#!/usr/bin/zsh +# This is an example of running EXAMM MPI version on c172 dataset +# +# The c172 dataset is not normalized +# To run datasets that's not normalized, make sure to add arguments: +# --normalize min_max for Min Max normalization, or +# --normalize avg_std_dev for Z-score normalization + +INPUT_PARAMETERS="AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd" +OUTPUT_PARAMETERS="Pitch" + +for i in 0 1 2 3 4 5 6 7 8 9; do + exp_name="ground_truth_experiments/results/source_genomes/$i" + mkdir -p $exp_name + echo $exp_name + mpirun -np 5 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --number_islands 8 \ + --island_size 8 \ + --max_genomes 10000 \ + --bp_iterations 5 \ + --num_mutations 2 \ + --normalize min_max \ + --output_directory $exp_name \ + --possible_node_types simple UGRNN MGU GRU delta LSTM \ + --std_message_level ERROR \ + --file_message_level INFO & +done +wait diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index dcdab7e0..f87868f3 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -49,7 +49,6 @@ DNASNode::DNASNode(const DNASNode& src) : RNN_Node_Interface(src.innovation_numb g = src.g; x = src.x; xtotal = src.xtotal; - tao = src.tao; stochastic = src.stochastic; counter = src.counter; maxi = src.maxi; @@ -92,8 +91,32 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) { calculate_z(); } +double DNASNode::calculate_pi_lr() { + double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; + if (percentage_done < 0.33) { + return 0.0; + } else if (percentage_done < 0.66) { + double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; + return 0.5 + percentage_done_with_phase * .5; + } else { + return 0.1; + } +} + +double DNASNode::calculate_tao() { + double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; + if (percentage_done < 0.33) { + return 1.33; + } else if (percentage_done < 0.66) { + double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; + return 1.33 - percentage_done_with_phase * 0.66; + } else { + return 0.33; + } +} + void DNASNode::calculate_z() { - tao = max(1.0 / 3.0, 1.0 / (1.0 + (double) counter * 0.05)); + tao = calculate_tao(); xtotal = 0.0; double emax = -10000000; @@ -360,8 +383,9 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; + double pi_lr = calculate_pi_lr(); for (auto i = 0; i < pi.size(); i++) { - gradients[offset++] = d_pi[i] * 0.1; + gradients[offset++] = d_pi[i] * pi_lr; } for (auto node : nodes) { diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index c3d74e6b..76aa6969 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -56,10 +56,9 @@ class DNASNode : public RNN_Node_Interface { // A vector to put gumbel noise into; just to avoid re-allocation vector noise; - // Temperature used when drawing samples from Gumbel-Softmax(pi) - double tao = 1.0; int32_t counter = 0; int32_t maxi = -1; + double tao; // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) int32_t k = 1; @@ -82,6 +81,8 @@ class DNASNode : public RNN_Node_Interface { template void sample_gumbel_softmax(Rng& rng); void calculate_z(); + double calculate_tao(); + double calculate_pi_lr(); virtual void initialize_lamarckian( minstd_rand0& generator, NormalDistribution& normal_distribution, double mu, double sigma diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index ffdf8999..8c5b0b1c 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -100,6 +100,9 @@ int main(int argc, char** argv) { int32_t max_recurrent_depth; get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); + int32_t hidden_layer_size = number_inputs; + get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); + WeightRules* weight_rules = new WeightRules(arguments); weight_update_method = new WeightUpdate(); @@ -112,59 +115,59 @@ int main(int argc, char** argv) { Log::info("RNN TYPE = %s\n", rnn_type.c_str()); if (rnn_type == "lstm") { genome = create_lstm( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "gru") { genome = create_gru( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "delta") { genome = create_delta( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "mgu") { genome = create_mgu( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "ugrnn") { genome = create_ugrnn( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "ff") { genome = create_ff( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "jordan") { genome = create_jordan( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "elman") { genome = create_elman( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "dnas") { - vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE}; + vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; genome = create_dnas_nn( input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, weight_rules ); } else { - Log::fatal("ERROR: incorrect rnn type\n"); + Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); Log::fatal("Possibilities are:\n"); Log::fatal(" lstm\n"); Log::fatal(" gru\n"); @@ -232,6 +235,8 @@ int main(int argc, char** argv) { genome->get_weights(best_parameters); rnn->set_weights(best_parameters); + genome->write_to_file(output_directory + "/output_genome.bin"); + Log::info("TRAINING ERRORS:\n"); Log::info("MSE: %lf\n", genome->get_mse(best_parameters, training_inputs, training_outputs)); Log::info("MAE: %lf\n", genome->get_mae(best_parameters, training_inputs, training_outputs)); From cd597a43b03a901a23ed5fdbb4a0f97a964ae997 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 20 Sep 2023 18:01:43 -0400 Subject: [PATCH 13/42] Commit for AISTATS results --- common/files.hxx | 2 + common/log.cxx | 8 +- common/process_arguments.cxx | 17 +- initial_integration_experiments/dnas.zsh | 55 ++++++ .../post_training_dnas.zsh | 31 +++ mpi/examm_mpi.cxx | 7 +- rnn/dnas_node.cxx | 49 +++-- rnn/dnas_node.hxx | 12 +- rnn/rnn_genome.cxx | 76 +++++++- rnn/rnn_genome.hxx | 4 + rnn_examples/CMakeLists.txt | 3 + rnn_examples/dnas_info.cxx | 96 +++++++++ rnn_examples/train_rnn.cxx | 182 ++++++++++-------- 13 files changed, 427 insertions(+), 115 deletions(-) create mode 100755 initial_integration_experiments/dnas.zsh create mode 100755 initial_integration_experiments/post_training_dnas.zsh create mode 100644 rnn_examples/dnas_info.cxx diff --git a/common/files.hxx b/common/files.hxx index ac23ff0d..8c4c8a43 100644 --- a/common/files.hxx +++ b/common/files.hxx @@ -1,6 +1,8 @@ #ifndef EXACT_BOINC_COMMON_HXX #define EXACT_BOINC_COMMON_HXX +#include + #include using std::runtime_error; diff --git a/common/log.cxx b/common/log.cxx index 623475e8..6f82e67f 100644 --- a/common/log.cxx +++ b/common/log.cxx @@ -79,11 +79,11 @@ int8_t Log::parse_level_from_string(string level) { void Log::initialize(const vector& arguments) { // TODO: should read these from the CommandLine (to be created) - string std_message_level_str, file_message_level_str; + string std_message_level_str = "INFO", file_message_level_str = "NONE"; - get_argument(arguments, "--std_message_level", true, std_message_level_str); - get_argument(arguments, "--file_message_level", true, file_message_level_str); - get_argument(arguments, "--output_directory", true, output_directory); + get_argument(arguments, "--std_message_level", false, std_message_level_str); + get_argument(arguments, "--file_message_level", false, file_message_level_str); + get_argument(arguments, "--output_directory", false, output_directory); std_message_level = parse_level_from_string(std_message_level_str); file_message_level = parse_level_from_string(file_message_level_str); diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index f2e29ac0..4577d86c 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -10,7 +10,7 @@ using std::vector; EXAMM* generate_examm_from_arguments( const vector& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules, RNN_Genome* seed_genome -) { +) { Log::info("Getting arguments for EXAMM\n"); int32_t island_size; get_argument(arguments, "--island_size", true, island_size); @@ -186,11 +186,18 @@ void get_train_validation_data( time_series_sets->export_training_series(time_offset, train_inputs, train_outputs); time_series_sets->export_test_series(time_offset, validation_inputs, validation_outputs); - int32_t sequence_length = 0; - if (get_argument(arguments, "--sequence_length", false, sequence_length)) { - Log::info("Slicing input training data with time sequence length: %d\n", sequence_length); - slice_input_data(train_inputs, train_outputs, sequence_length); + int32_t train_sequence_length = 0; + if (get_argument(arguments, "--train_sequence_length", false, train_sequence_length)) { + Log::info("Slicing input training data with time sequence length: %d\n", train_sequence_length); + slice_input_data(train_inputs, train_outputs, train_sequence_length); + } + + int32_t validation_sequence_length = 0; + if (get_argument(arguments, "--validation_sequence_length", false, validation_sequence_length)) { + Log::info("Slicing input validation data with time sequence length: %d\n", validation_sequence_length); + slice_input_data(validation_inputs, validation_outputs, validation_sequence_length); } + Log::info("Generating time series data finished! \n"); } diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh new file mode 100755 index 00000000..490e0c3b --- /dev/null +++ b/initial_integration_experiments/dnas.zsh @@ -0,0 +1,55 @@ +#!/usr/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --stochastic \ + --possible_node_types DNAS \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --crystalize_iters $crystalize_iters \ + --max_genomes 10000 \ + --island_size 8 \ + --number_islands 8 \ + --dnas_k $k + + best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128 256 512 1024; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm + done + wait + done + done +done diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh new file mode 100755 index 00000000..f3d355f1 --- /dev/null +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -0,0 +1,31 @@ +#!/usr/bin/zsh +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $OUTPUT_DIRECTORY \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --crystalize_iters $CRYSTALIZE_ITERS \ + --dnas_k $k + +} + +post_training diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index 7886d91d..c1f1dd1c 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -203,13 +203,18 @@ void worker(int32_t rank) { } else if (tag == GENOME_LENGTH_TAG) { Log::debug("received genome!\n"); RNN_Genome* genome = receive_genome_from(0); - + // have each worker write the backproagation to a separate log file string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank); Log::set_id(log_id); + + vector params; + genome->get_weights(params); + genome->backpropagate_stochastic( training_inputs, training_outputs, validation_inputs, validation_outputs, weight_update_method ); + Log::release_id(log_id); // go back to the worker's log for MPI communication diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index f87868f3..ac0e0aa1 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -1,6 +1,8 @@ #include using std::sort; +#include + #include using std::pair; @@ -13,13 +15,16 @@ using std::max; #include "common/log.hxx" #include "dnas_node.hxx" +int32_t DNASNode::CRYSTALLIZATION_THRESHOLD = 1000; +int32_t DNASNode::k = -1; + DNASNode::DNASNode( vector&& _nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter ) : RNN_Node_Interface(_innovation_number, _type, _depth), nodes(_nodes), pi(vector(nodes.size(), 1.0)), - z(vector(nodes.size())), + z(vector(nodes.size(), 0.0)), x(vector(nodes.size())), g(vector(nodes.size())), d_pi(vector(nodes.size())), @@ -87,20 +92,11 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) { x.assign(pi.size(), 0.0); gumbel_noise(rng, g); - calculate_z(); } double DNASNode::calculate_pi_lr() { - double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; - if (percentage_done < 0.33) { - return 0.0; - } else if (percentage_done < 0.66) { - double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; - return 0.5 + percentage_done_with_phase * .5; - } else { - return 0.1; - } + return 0.1; } double DNASNode::calculate_tao() { @@ -109,9 +105,9 @@ double DNASNode::calculate_tao() { return 1.33; } else if (percentage_done < 0.66) { double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; - return 1.33 - percentage_done_with_phase * 0.66; + return 1.33 - percentage_done_with_phase * 1.15; } else { - return 0.33; + return 0.18; } } @@ -162,6 +158,23 @@ void DNASNode::calculate_z() { } } +void DNASNode::print_info() { + printf(" "); + int best_pi_idx = 0; + for (int i = 0; i < nodes.size(); i++) { + printf("%-10s & ", std::to_string(pi[i]).c_str()); + if (pi[i] > pi[best_pi_idx]) + best_pi_idx = i; + } + printf("\n"); + Log::info("Node types: "); + for (auto node : nodes) { + Log::info_no_header("%d ", node->node_type); + } + Log::info_no_header("\n "); + Log::info("Best node: %i, node type: %d\n", best_pi_idx, nodes[best_pi_idx]->node_type); +} + void DNASNode::reset(int32_t series_length) { d_pi = vector(pi.size(), 0.0); d_input = vector(series_length, 0.0); @@ -309,6 +322,7 @@ void DNASNode::set_weights(const vector& parameters) { } void DNASNode::get_weights(int32_t& offset, vector& parameters) const { + int start = offset; // Log::info("pi start %d; ", offset); for (auto i = 0; i < pi.size(); i++) { parameters[offset++] = pi[i]; @@ -323,17 +337,14 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; + if (pi[i] < 0.1) + pi[i] = 0.1; } - // Log::info("Pi indices: %d-%d\n", start, offset); + for (auto node : nodes) { node->set_weights(offset, parameters); } calculate_z(); - // string s = "Pi = { "; - // for (auto p : pi) { - // s += std::to_string(p) + ", "; - // } - // Log::info("%s }\n", s.c_str()); } void DNASNode::set_pi(const vector& new_pi) { diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 76aa6969..00867ffe 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -22,8 +22,6 @@ using std::unique_ptr; #include "rnn_node.hxx" #include "rnn_node_interface.hxx" -#define CRYSTALLIZATION_THRESHOLD 1000 - class DNASNode : public RNN_Node_Interface { private: template @@ -60,9 +58,6 @@ class DNASNode : public RNN_Node_Interface { int32_t maxi = -1; double tao; - // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) - int32_t k = 1; - // Whether to re-sample the gumbel softmax distribution when resetting the node. // Can be set externally using DNASNode::set_stochastic bool stochastic = true; @@ -71,6 +66,11 @@ class DNASNode : public RNN_Node_Interface { vector> node_outputs; public: + static int32_t CRYSTALLIZATION_THRESHOLD; + + // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) + static int32_t k; + DNASNode( vector&& nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1 @@ -110,6 +110,8 @@ class DNASNode : public RNN_Node_Interface { virtual void reset(int32_t _series_length); virtual void write_to_stream(ostream& out); + void print_info(); + virtual RNN_Node_Interface* copy() const; void set_stochastic(bool stochastic); diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 7e452ad0..72868d5e 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -3221,16 +3221,35 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { node->enabled = enabled; return node; } + +#define MAGIC 0xFA + +#define read_magic(place) \ + { \ + uint8_t boo = MAGIC;\ + bin_istream.read((char *) &boo, sizeof(uint8_t)); \ + if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \ + } + +#define write_magic() \ + {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));} + void RNN_Genome::read_from_stream(istream& bin_istream) { Log::debug("READING GENOME FROM STREAM\n"); + + read_magic(__LINE__); bin_istream.read((char*) &generation_id, sizeof(int32_t)); bin_istream.read((char*) &group_id, sizeof(int32_t)); bin_istream.read((char*) &bp_iterations, sizeof(int32_t)); + read_magic(__LINE__); + bin_istream.read((char*) &use_dropout, sizeof(bool)); bin_istream.read((char*) &dropout_probability, sizeof(double)); + read_magic(__LINE__); + WeightType weight_initialize = WeightType::NONE; WeightType weight_inheritance = WeightType::NONE; WeightType mutated_component_weight = WeightType::NONE; @@ -3239,6 +3258,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &weight_inheritance, sizeof(int32_t)); bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t)); + read_magic(__LINE__); + weight_rules = new WeightRules(); weight_rules->set_weight_initialize_method(weight_initialize); weight_rules->set_weight_inheritance_method(weight_inheritance); @@ -3260,8 +3281,10 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream generator_iss(generator_str); generator_iss >> generator; - string rng_0_1_str; - read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); + read_magic(__LINE__); + + // string rng_0_1_str; + // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); // So for some reason this was serialized incorrectly for some genomes, // but the value should always be the same so we really don't need to de-serialize it anways and can just // assign it a constant value @@ -3275,6 +3298,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream generated_by_map_iss(generated_by_map_str); read_map(generated_by_map_iss, generated_by_map); + read_magic(__LINE__); + bin_istream.read((char*) &best_validation_mse, sizeof(double)); bin_istream.read((char*) &best_validation_mae, sizeof(double)); @@ -3286,6 +3311,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { initial_parameters.assign(initial_parameters_v, initial_parameters_v + n_initial_parameters); delete[] initial_parameters_v; + read_magic(__LINE__); + int32_t n_best_parameters; bin_istream.read((char*) &n_best_parameters, sizeof(int32_t)); Log::debug("reading %d best parameters.\n", n_best_parameters); @@ -3294,6 +3321,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { best_parameters.assign(best_parameters_v, best_parameters_v + n_best_parameters); delete[] best_parameters_v; + read_magic(__LINE__); + input_parameter_names.clear(); int32_t n_input_parameter_names; bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t)); @@ -3304,6 +3333,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { input_parameter_names.push_back(input_parameter_name); } + read_magic(__LINE__); + output_parameter_names.clear(); int32_t n_output_parameter_names; bin_istream.read((char*) &n_output_parameter_names, sizeof(int32_t)); @@ -3314,6 +3345,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { output_parameter_names.push_back(output_parameter_name); } + read_magic(__LINE__); + int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); Log::debug("reading %d nodes.\n", n_nodes); @@ -3321,6 +3354,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { nodes.clear(); for (int32_t i = 0; i < n_nodes; i++) { nodes.push_back(RNN_Genome::read_node_from_stream(bin_istream)); + read_magic(__LINE__); } int32_t n_edges; @@ -3347,6 +3381,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { // innovation_list.push_back(innovation_number); edge->enabled = enabled; edges.push_back(edge); + read_magic(__LINE__); } int32_t n_recurrent_edges; @@ -3378,6 +3413,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { // innovation_list.push_back(innovation_number); recurrent_edge->enabled = enabled; recurrent_edges.push_back(recurrent_edge); + read_magic(__LINE__); } read_binary_string(bin_istream, normalize_type, "normalize_type"); @@ -3402,6 +3438,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream normalize_std_devs_iss(normalize_std_devs_str); read_map(normalize_std_devs_iss, normalize_std_devs); + read_magic(__LINE__); + assign_reachability(); } @@ -3425,13 +3463,20 @@ void RNN_Genome::write_to_file(string bin_filename) { void RNN_Genome::write_to_stream(ostream& bin_ostream) { Log::debug("WRITING GENOME TO STREAM\n"); + + write_magic(); + bin_ostream.write((char*) &generation_id, sizeof(int32_t)); bin_ostream.write((char*) &group_id, sizeof(int32_t)); bin_ostream.write((char*) &bp_iterations, sizeof(int32_t)); + write_magic(); + bin_ostream.write((char*) &use_dropout, sizeof(bool)); bin_ostream.write((char*) &dropout_probability, sizeof(double)); + write_magic(); + WeightType weight_initialize = weight_rules->get_weight_initialize_method(); WeightType weight_inheritance = weight_rules->get_weight_inheritance_method(); WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method(); @@ -3439,6 +3484,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &weight_inheritance, sizeof(int32_t)); bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t)); + write_magic(); + Log::debug("generation_id: %d\n", generation_id); Log::debug("bp_iterations: %d\n", bp_iterations); @@ -3456,16 +3503,20 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { string generator_str = generator_oss.str(); write_binary_string(bin_ostream, generator_str, "generator"); - ostringstream rng_0_1_oss; - rng_0_1_oss << rng_0_1; - string rng_0_1_str = rng_0_1_oss.str(); - write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1"); + write_magic(); + + // ostringstream rng_0_1_oss; + // rng_0_1_oss << rng_0_1; + // string rng_0_1_str = rng_0_1_oss.str(); + // write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1"); ostringstream generated_by_map_oss; write_map(generated_by_map_oss, generated_by_map); string generated_by_map_str = generated_by_map_oss.str(); write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map"); + write_magic(); + bin_ostream.write((char*) &best_validation_mse, sizeof(double)); bin_ostream.write((char*) &best_validation_mae, sizeof(double)); @@ -3474,18 +3525,24 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &n_initial_parameters, sizeof(int32_t)); bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size()); + write_magic(); + int32_t n_best_parameters = (int32_t) best_parameters.size(); bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t)); if (n_best_parameters) { bin_ostream.write((char*) &best_parameters[0], sizeof(double) * best_parameters.size()); } + write_magic(); + int32_t n_input_parameter_names = (int32_t) input_parameter_names.size(); bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) { write_binary_string(bin_ostream, input_parameter_names[i], "input_parameter_names[" + std::to_string(i) + "]"); } + write_magic(); + int32_t n_output_parameter_names = (int32_t) output_parameter_names.size(); bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) { @@ -3494,6 +3551,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { ); } + write_magic(); + int32_t n_nodes = (int32_t) nodes.size(); bin_ostream.write((char*) &n_nodes, sizeof(int32_t)); Log::debug("writing %d nodes.\n", n_nodes); @@ -3504,6 +3563,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { nodes[i]->depth, nodes[i]->parameter_name.c_str() ); nodes[i]->write_to_stream(bin_ostream); + write_magic(); } int32_t n_edges = (int32_t) edges.size(); @@ -3516,6 +3576,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { edges[i]->output_innovation_number ); edges[i]->write_to_stream(bin_ostream); + write_magic(); } int32_t n_recurrent_edges = (int32_t) recurrent_edges.size(); @@ -3529,6 +3590,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { ); recurrent_edges[i]->write_to_stream(bin_ostream); + write_magic(); } write_binary_string(bin_ostream, normalize_type, "normalize_type"); @@ -3552,6 +3614,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_map(normalize_std_devs_oss, normalize_std_devs); string normalize_std_devs_str = normalize_std_devs_oss.str(); write_binary_string(bin_ostream, normalize_std_devs_str, "normalize_std_devs"); + + write_magic(); } void RNN_Genome::update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count) { diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index deaf8bce..d6330512 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -325,6 +325,10 @@ class RNN_Genome { ); vector pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type); + const vector &get_nodes() { + return this->nodes; + } + void update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count); vector get_innovation_list(); diff --git a/rnn_examples/CMakeLists.txt b/rnn_examples/CMakeLists.txt index 2bfda532..f5e294c6 100644 --- a/rnn_examples/CMakeLists.txt +++ b/rnn_examples/CMakeLists.txt @@ -16,3 +16,6 @@ target_link_libraries(evaluate_rnns_multi_offset examm_strategy exact_common exa add_executable(rnn_statistics rnn_statistics.cxx) target_link_libraries(rnn_statistics examm_strategy exact_common exact_time_series exact_weights examm_nn ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread) +add_executable(dnas_info dnas_info.cxx) +target_link_libraries(dnas_info examm_strategy exact_common exact_time_series exact_weights examm_nn ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread) + diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx new file mode 100644 index 00000000..74fd6519 --- /dev/null +++ b/rnn_examples/dnas_info.cxx @@ -0,0 +1,96 @@ +#include +#include +using std::getline; +using std::ifstream; +using std::ofstream; + +#include +using std::minstd_rand0; +using std::uniform_real_distribution; + +#include +using std::string; + +#include +using std::vector; + +#include "common/arguments.hxx" +#include "common/files.hxx" +#include "common/log.hxx" +#include "rnn/generate_nn.hxx" +#include "rnn/gru_node.hxx" +#include "rnn/lstm_node.hxx" +#include "rnn/rnn_edge.hxx" +#include "rnn/rnn_genome.hxx" +#include "rnn/rnn_node.hxx" +#include "rnn/rnn_node_interface.hxx" +#include "time_series/time_series.hxx" +#include "weights/weight_rules.hxx" +#include "weights/weight_update.hxx" + +vector > > training_inputs; +vector > > training_outputs; +vector > > test_inputs; +vector > > test_outputs; + +bool random_sequence_length; +int32_t sequence_length_lower_bound = 30; +int32_t sequence_length_upper_bound = 100; + +RNN_Genome* genome; +RNN* rnn; +WeightUpdate* weight_update_method; +int32_t bp_iterations; +bool using_dropout; +double dropout_probability; + +ofstream* log_file; +string output_directory; + +double objective_function(const vector& parameters) { + rnn->set_weights(parameters); + + double error = 0.0; + + for (int32_t i = 0; i < (int32_t) training_inputs.size(); i++) { + error += rnn->prediction_mae(training_inputs[i], training_outputs[i], false, true, 0.0); + } + + return -error; +} + +double test_objective_function(const vector& parameters) { + rnn->set_weights(parameters); + + double total_error = 0.0; + + for (int32_t i = 0; i < (int32_t) test_inputs.size(); i++) { + double error = rnn->prediction_mse(test_inputs[i], test_outputs[i], false, true, 0.0); + total_error += error; + + Log::info("output for series[%d]: %lf\n", i, error); + } + + return -total_error; +} + +int main(int argc, char** argv) { + vector arguments = vector(argv, argv + argc); + + Log::initialize(arguments); + Log::set_id("main"); + + string filename; + get_argument(arguments, "--filename", true, filename); + + RNN_Genome genome(filename); + + for (auto node : genome.get_nodes()) { + if (DNASNode *d = dynamic_cast(node)) { + std::cout << "'" << filename << "': "; + d->print_info(); + } + } + + Log::release_id("main"); +} diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index 8c5b0b1c..02d7db80 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -85,24 +85,20 @@ int main(int argc, char** argv) { int32_t time_offset = 1; get_argument(arguments, "--time_offset", true, time_offset); + int32_t crystallization_threshold = 1000; + get_argument(arguments, "--crystalize_iters", false, crystallization_threshold); + DNASNode::CRYSTALLIZATION_THRESHOLD = crystallization_threshold; + + int32_t k = -1; + get_argument(arguments, "--dnas_k", false, k); + DNASNode::k = k; + time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); int number_inputs = time_series_sets->get_number_inputs(); // int number_outputs = time_series_sets->get_number_outputs(); - string rnn_type; - get_argument(arguments, "--rnn_type", true, rnn_type); - - int32_t num_hidden_layers; - get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); - - int32_t max_recurrent_depth; - get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); - - int32_t hidden_layer_size = number_inputs; - get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); - WeightRules* weight_rules = new WeightRules(arguments); weight_update_method = new WeightUpdate(); @@ -111,74 +107,110 @@ int main(int argc, char** argv) { vector input_parameter_names = time_series_sets->get_input_parameter_names(); vector output_parameter_names = time_series_sets->get_output_parameter_names(); - RNN_Genome* genome; - Log::info("RNN TYPE = %s\n", rnn_type.c_str()); - if (rnn_type == "lstm") { - genome = create_lstm( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "gru") { - genome = create_gru( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "delta") { - genome = create_delta( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); + string genome_file; + get_argument(arguments, "--genome_file", false, genome_file); + Log::info("RNN_GENOME = <%s> \n", genome_file.c_str()); - } else if (rnn_type == "mgu") { - genome = create_mgu( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "ugrnn") { - genome = create_ugrnn( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "ff") { - genome = create_ff( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "jordan") { - genome = create_jordan( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); + RNN_Genome* genome; - } else if (rnn_type == "elman") { - genome = create_elman( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - } else if (rnn_type == "dnas") { - vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; - genome = create_dnas_nn( - input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, - weight_rules - ); + if (genome_file.size() != 0) { + genome = new RNN_Genome(genome_file); + Log::info("best weights: { "); + for (double &d : genome->get_best_parameters()) { + Log::info_no_header("%f, ", d); + } + Log::info("}\n"); + + vector params; + genome->get_weights(params); + Log::info("current weights: { "); + for (double &d : params) { + Log::info_no_header("%f, ", d); + } + Log::info("}\n"); } else { - Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); - Log::fatal("Possibilities are:\n"); - Log::fatal(" lstm\n"); - Log::fatal(" gru\n"); - Log::fatal(" ff\n"); - Log::fatal(" jordan\n"); - Log::fatal(" elman\n"); - exit(1); + + string rnn_type; + get_argument(arguments, "--rnn_type", true, rnn_type); + + Log::info("RNN TYPE = %s\n", rnn_type.c_str()); + + int32_t num_hidden_layers; + get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); + + int32_t max_recurrent_depth; + get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); + + int32_t hidden_layer_size = number_inputs; + get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); + + if (rnn_type == "lstm") { + genome = create_lstm( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "gru") { + genome = create_gru( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "delta") { + genome = create_delta( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "mgu") { + genome = create_mgu( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "ugrnn") { + genome = create_ugrnn( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "ff") { + genome = create_ff( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "jordan") { + genome = create_jordan( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "elman") { + genome = create_elman( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + } else if (rnn_type == "dnas") { + vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; + genome = create_dnas_nn( + input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, + weight_rules + ); + } else { + Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); + Log::fatal("Possibilities are:\n"); + Log::fatal(" lstm\n"); + Log::fatal(" gru\n"); + Log::fatal(" ff\n"); + Log::fatal(" jordan\n"); + Log::fatal(" elman\n"); + exit(1); + } } get_argument(arguments, "--bp_iterations", true, bp_iterations); - genome->set_bp_iterations(bp_iterations); + genome->set_bp_iterations(bp_iterations + genome->get_bp_iterations()); get_argument(arguments, "--output_directory", true, output_directory); if (output_directory != "") { @@ -211,7 +243,7 @@ int main(int argc, char** argv) { using_dropout = false; - genome->initialize_randomly(); + genome->set_weights(genome->get_best_parameters()); double learning_rate = 0.001; get_argument(arguments, "--learning_rate", false, learning_rate); From fa03e78c21bd98f20b5e4301a80d569eae9b8c67 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 12 Dec 2023 19:18:05 -0500 Subject: [PATCH 14/42] Preparing for gecco 2024 experiments --- common/color_table.cxx | 3 +- common/process_arguments.cxx | 2 +- initial_integration_experiments/dnas.zsh | 4 +- .../post_training_dnas.zsh | 2 +- mpi/examm_mpi.cxx | 2 +- rnn/dnas_node.cxx | 9 +-- rnn/rnn_genome.cxx | 60 ++++++++++--------- rnn/rnn_genome.hxx | 2 +- rnn_examples/dnas_info.cxx | 6 +- rnn_examples/train_rnn.cxx | 39 ++++++------ 10 files changed, 68 insertions(+), 61 deletions(-) diff --git a/common/color_table.cxx b/common/color_table.cxx index d9e743b0..d0c42a21 100644 --- a/common/color_table.cxx +++ b/common/color_table.cxx @@ -1026,7 +1026,8 @@ const static double bent_cool_warm[] = { 1.0, 177, 1, - 39}; + 39, +}; Color get_colormap(double value) { Color c; diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 4577d86c..65efa7b6 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -10,7 +10,7 @@ using std::vector; EXAMM* generate_examm_from_arguments( const vector& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules, RNN_Genome* seed_genome -) { +) { Log::info("Getting arguments for EXAMM\n"); int32_t island_size; get_argument(arguments, "--island_size", true, island_size); diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 490e0c3b..9ef90cca 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -1,12 +1,12 @@ #!/usr/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' +OUTPUT_PARAMETERS='E1_EGT1' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh index f3d355f1..8117dadb 100755 --- a/initial_integration_experiments/post_training_dnas.zsh +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -1,6 +1,6 @@ #!/usr/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' +OUTPUT_PARAMETERS='E1_EGT1' offset=1 diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index c1f1dd1c..227c3a85 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -203,7 +203,7 @@ void worker(int32_t rank) { } else if (tag == GENOME_LENGTH_TAG) { Log::debug("received genome!\n"); RNN_Genome* genome = receive_genome_from(0); - + // have each worker write the backproagation to a separate log file string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank); Log::set_id(log_id); diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index ac0e0aa1..465c024c 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -2,7 +2,6 @@ using std::sort; #include - #include using std::pair; @@ -162,9 +161,10 @@ void DNASNode::print_info() { printf(" "); int best_pi_idx = 0; for (int i = 0; i < nodes.size(); i++) { - printf("%-10s & ", std::to_string(pi[i]).c_str()); - if (pi[i] > pi[best_pi_idx]) + printf("%-10s & ", std::to_string(pi[i]).c_str()); + if (pi[i] > pi[best_pi_idx]) { best_pi_idx = i; + } } printf("\n"); Log::info("Node types: "); @@ -337,8 +337,9 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; - if (pi[i] < 0.1) + if (pi[i] < 0.1) { pi[i] = 0.1; + } } for (auto node : nodes) { diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 72868d5e..833feee6 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -3224,19 +3224,25 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { #define MAGIC 0xFA -#define read_magic(place) \ - { \ - uint8_t boo = MAGIC;\ - bin_istream.read((char *) &boo, sizeof(uint8_t)); \ - if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \ - } +#define read_magic(place) \ + { \ + uint8_t boo = MAGIC; \ + bin_istream.read((char*) &boo, sizeof(uint8_t)); \ + if (boo != MAGIC) { \ + Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); \ + exit(-1); \ + } \ + } -#define write_magic() \ - {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));} +#define write_magic() \ + { \ + uint8_t xxmagic = MAGIC; \ + bin_ostream.write((char*) &xxmagic, sizeof(uint8_t)); \ + } void RNN_Genome::read_from_stream(istream& bin_istream) { Log::debug("READING GENOME FROM STREAM\n"); - + read_magic(__LINE__); bin_istream.read((char*) &generation_id, sizeof(int32_t)); @@ -3244,12 +3250,12 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &bp_iterations, sizeof(int32_t)); read_magic(__LINE__); - + bin_istream.read((char*) &use_dropout, sizeof(bool)); bin_istream.read((char*) &dropout_probability, sizeof(double)); read_magic(__LINE__); - + WeightType weight_initialize = WeightType::NONE; WeightType weight_inheritance = WeightType::NONE; WeightType mutated_component_weight = WeightType::NONE; @@ -3259,7 +3265,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t)); read_magic(__LINE__); - + weight_rules = new WeightRules(); weight_rules->set_weight_initialize_method(weight_initialize); weight_rules->set_weight_inheritance_method(weight_inheritance); @@ -3282,7 +3288,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { generator_iss >> generator; read_magic(__LINE__); - + // string rng_0_1_str; // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); // So for some reason this was serialized incorrectly for some genomes, @@ -3299,7 +3305,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { read_map(generated_by_map_iss, generated_by_map); read_magic(__LINE__); - + bin_istream.read((char*) &best_validation_mse, sizeof(double)); bin_istream.read((char*) &best_validation_mae, sizeof(double)); @@ -3312,7 +3318,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { delete[] initial_parameters_v; read_magic(__LINE__); - + int32_t n_best_parameters; bin_istream.read((char*) &n_best_parameters, sizeof(int32_t)); Log::debug("reading %d best parameters.\n", n_best_parameters); @@ -3322,7 +3328,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { delete[] best_parameters_v; read_magic(__LINE__); - + input_parameter_names.clear(); int32_t n_input_parameter_names; bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t)); @@ -3346,7 +3352,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { } read_magic(__LINE__); - + int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); Log::debug("reading %d nodes.\n", n_nodes); @@ -3439,7 +3445,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { read_map(normalize_std_devs_iss, normalize_std_devs); read_magic(__LINE__); - + assign_reachability(); } @@ -3471,12 +3477,12 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &bp_iterations, sizeof(int32_t)); write_magic(); - + bin_ostream.write((char*) &use_dropout, sizeof(bool)); bin_ostream.write((char*) &dropout_probability, sizeof(double)); write_magic(); - + WeightType weight_initialize = weight_rules->get_weight_initialize_method(); WeightType weight_inheritance = weight_rules->get_weight_inheritance_method(); WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method(); @@ -3485,7 +3491,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t)); write_magic(); - + Log::debug("generation_id: %d\n", generation_id); Log::debug("bp_iterations: %d\n", bp_iterations); @@ -3504,7 +3510,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_binary_string(bin_ostream, generator_str, "generator"); write_magic(); - + // ostringstream rng_0_1_oss; // rng_0_1_oss << rng_0_1; // string rng_0_1_str = rng_0_1_oss.str(); @@ -3516,7 +3522,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map"); write_magic(); - + bin_ostream.write((char*) &best_validation_mse, sizeof(double)); bin_ostream.write((char*) &best_validation_mae, sizeof(double)); @@ -3526,7 +3532,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size()); write_magic(); - + int32_t n_best_parameters = (int32_t) best_parameters.size(); bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t)); if (n_best_parameters) { @@ -3534,7 +3540,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { } write_magic(); - + int32_t n_input_parameter_names = (int32_t) input_parameter_names.size(); bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) { @@ -3542,7 +3548,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { } write_magic(); - + int32_t n_output_parameter_names = (int32_t) output_parameter_names.size(); bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) { @@ -3552,7 +3558,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { } write_magic(); - + int32_t n_nodes = (int32_t) nodes.size(); bin_ostream.write((char*) &n_nodes, sizeof(int32_t)); Log::debug("writing %d nodes.\n", n_nodes); diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index d6330512..01c7e9e3 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -325,7 +325,7 @@ class RNN_Genome { ); vector pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type); - const vector &get_nodes() { + const vector& get_nodes() { return this->nodes; } diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx index 74fd6519..fac60c84 100644 --- a/rnn_examples/dnas_info.cxx +++ b/rnn_examples/dnas_info.cxx @@ -86,9 +86,9 @@ int main(int argc, char** argv) { RNN_Genome genome(filename); for (auto node : genome.get_nodes()) { - if (DNASNode *d = dynamic_cast(node)) { - std::cout << "'" << filename << "': "; - d->print_info(); + if (DNASNode* d = dynamic_cast(node)) { + std::cout << "'" << filename << "': "; + d->print_info(); } } diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index 02d7db80..7bd5647c 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -116,7 +116,7 @@ int main(int argc, char** argv) { if (genome_file.size() != 0) { genome = new RNN_Genome(genome_file); Log::info("best weights: { "); - for (double &d : genome->get_best_parameters()) { + for (double& d : genome->get_best_parameters()) { Log::info_no_header("%f, ", d); } Log::info("}\n"); @@ -124,15 +124,14 @@ int main(int argc, char** argv) { vector params; genome->get_weights(params); Log::info("current weights: { "); - for (double &d : params) { + for (double& d : params) { Log::info_no_header("%f, ", d); } Log::info("}\n"); } else { - string rnn_type; get_argument(arguments, "--rnn_type", true, rnn_type); - + Log::info("RNN TYPE = %s\n", rnn_type.c_str()); int32_t num_hidden_layers; @@ -146,50 +145,50 @@ int main(int argc, char** argv) { if (rnn_type == "lstm") { genome = create_lstm( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "gru") { genome = create_gru( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "delta") { genome = create_delta( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "mgu") { genome = create_mgu( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "ugrnn") { genome = create_ugrnn( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "ff") { genome = create_ff( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "jordan") { genome = create_jordan( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "elman") { genome = create_elman( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "dnas") { vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; From c0264d9ad2f82ae86d3369f7cd578e77d0a074eb Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 12 Dec 2023 19:36:16 -0500 Subject: [PATCH 15/42] Tweak experimental parameters --- initial_integration_experiments/dnas.zsh | 8 ++++---- rnn/dnas_node.cxx | 17 ++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 9ef90cca..0c2a615f 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -8,7 +8,7 @@ offset=1 run_examm() { output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir - mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \ + mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ @@ -29,7 +29,7 @@ run_examm() { --std_message_level INFO \ --file_message_level INFO \ --crystalize_iters $crystalize_iters \ - --max_genomes 10000 \ + --max_genomes 4000 \ --island_size 8 \ --number_islands 8 \ --dnas_k $k @@ -39,7 +39,7 @@ run_examm() { } CELL_TYPE='dnas' -for crystalize_iters in 128 256 512 1024; do +for crystalize_iters in 64 128 256 512; do for bp_epoch in 8 16 32 64 128; do for k in 1; do for fold in 0 1 2 3; do @@ -47,7 +47,7 @@ for crystalize_iters in 128 256 512 1024; do done wait for fold in 4 5 6 7; do - run_examm + run_examm & done wait done diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 465c024c..87e05d9b 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -99,15 +99,7 @@ double DNASNode::calculate_pi_lr() { } double DNASNode::calculate_tao() { - double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; - if (percentage_done < 0.33) { - return 1.33; - } else if (percentage_done < 0.66) { - double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; - return 1.33 - percentage_done_with_phase * 1.15; - } else { - return 0.18; - } + return 6.0; } void DNASNode::calculate_z() { @@ -337,8 +329,8 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; - if (pi[i] < 0.1) { - pi[i] = 0.1; + if (pi[i] < 0.01) { + pi[i] = 0.01; } } @@ -395,9 +387,8 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; - double pi_lr = calculate_pi_lr(); for (auto i = 0; i < pi.size(); i++) { - gradients[offset++] = d_pi[i] * pi_lr; + gradients[offset++] = d_pi[i]; } for (auto node : nodes) { From 1607c268ee922a8b042e555a0cf32f96fdae94bb Mon Sep 17 00:00:00 2001 From: Josh Karns Date: Tue, 26 Dec 2023 13:27:35 -0500 Subject: [PATCH 16/42] Tweaking experiments --- examm/examm.cxx | 3 + initial_integration_experiments/analyze.py | 77 +++++++++++++++++++ initial_integration_experiments/analyze.zsh | 12 +++ initial_integration_experiments/dnas.zsh | 31 ++++---- .../post_training_dnas.zsh | 2 +- rnn/generate_nn.cxx | 1 + rnn/rnn_node_interface.cxx | 3 +- 7 files changed, 110 insertions(+), 19 deletions(-) create mode 100644 initial_integration_experiments/analyze.py create mode 100644 initial_integration_experiments/analyze.zsh diff --git a/examm/examm.cxx b/examm/examm.cxx index f017ab8b..e0be2d07 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -182,10 +182,12 @@ void EXAMM::update_log() { } (*op_log_file) << endl; } + RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { best_genome = speciation_strategy->get_global_best_genome(); } + std::chrono::time_point currentClock = std::chrono::system_clock::now(); long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds @@ -193,6 +195,7 @@ void EXAMM::update_log() { << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count() << speciation_strategy->get_strategy_information_values() << endl; + Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count()); } } diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py new file mode 100644 index 00000000..58ba95b9 --- /dev/null +++ b/initial_integration_experiments/analyze.py @@ -0,0 +1,77 @@ +import pandas + +import numpy as np + +import matplotlib.pyplot as plt + +fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1) + +plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128} + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + if k == 8: + continue + v.sharey(a8) + v.sharex(a8) + +results = {} +for ci in [64, 128, 256, 512]: + results[ci] = {} + for bpe in [8, 16, 32, 64, 128]: + results[ci][bpe] = {} + for k in [1]: + x = [] + results[ci][bpe][k] = x + + for fold in range(8): + f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") + results[ci][bpe][k].append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0] + plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd, + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + v.legend(fontsize=12, loc="upper right") + +plt.show() diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh new file mode 100644 index 00000000..5c2876f3 --- /dev/null +++ b/initial_integration_experiments/analyze.zsh @@ -0,0 +1,12 @@ +#!/usr/bin/zsh +# +for crystalize_iters in 64 128 256 512; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3 4 5 6 7; do + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + tail -1 $output_dir/fitness_log.csv + done + done + done +done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 0c2a615f..995d072a 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -1,26 +1,24 @@ -#!/usr/bin/zsh +#!/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir - mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \ + mpirun -np 8 Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ --input_parameter_names ${=INPUT_PARAMETERS} \ --output_parameter_names ${=OUTPUT_PARAMETERS} \ --bp_iterations $bp_epoch \ - --stochastic \ - --possible_node_types DNAS \ --normalize min_max \ --num_hidden_layers $SIZE \ --hidden_layer_size $SIZE \ - --train_sequence_length 100 \ + --train_sequence_length 1000 \ --validation_sequence_length 100 \ --max_recurrent_depth 1 \ --output_directory $output_dir \ @@ -30,7 +28,7 @@ run_examm() { --file_message_level INFO \ --crystalize_iters $crystalize_iters \ --max_genomes 4000 \ - --island_size 8 \ + --island_size 32 \ --number_islands 8 \ --dnas_k $k @@ -40,16 +38,17 @@ run_examm() { CELL_TYPE='dnas' for crystalize_iters in 64 128 256 512; do - for bp_epoch in 8 16 32 64 128; do + for bp_epoch in 1 2 4 8 16 32 64 128; do for k in 1; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait + fold=1 run_examm +# for fold in 0 1 2 3; do +# run_examm & +# done +# wait +# for fold in 4 5 6 7; do +# run_examm & +# done +# wait done done done diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh index 8117dadb..b25171a1 100755 --- a/initial_integration_experiments/post_training_dnas.zsh +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -1,4 +1,4 @@ -#!/usr/bin/zsh +#!/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' OUTPUT_PARAMETERS='E1_EGT1' diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index a84fb36f..c4068495 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -190,6 +190,7 @@ RNN_Genome* get_seed_genome( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); + // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); } diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index ab5796b2..f86eddd7 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -1,7 +1,6 @@ -#include +#include using std::max; -#include #include using std::ostream; From 778d24aa8d695394203cc7d86b8d6d835725a1f2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 8 Jan 2024 13:34:21 -0500 Subject: [PATCH 17/42] Modified scripts --- initial_integration_experiments/analyze.py | 17 ++++++++------- initial_integration_experiments/dnas.zsh | 24 ++++++++++------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py index 58ba95b9..cee900d6 100644 --- a/initial_integration_experiments/analyze.py +++ b/initial_integration_experiments/analyze.py @@ -4,28 +4,31 @@ import matplotlib.pyplot as plt -fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1) +fig, subplts = plt.subplots(6, 1) -plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128} +bprange = [1, 2, 4, 8, 16, 32] +plts = {k:v for k, v in zip(bprange, subplts)} +print(plts) +base = plts[bprange[0]] for k, v in plts.items(): v.set_title(f"{k} BPI") - if k == 8: + if k == bprange[0]: continue - v.sharey(a8) - v.sharex(a8) + v.sharey(base) + v.sharex(base) results = {} for ci in [64, 128, 256, 512]: results[ci] = {} - for bpe in [8, 16, 32, 64, 128]: + for bpe in bprange: results[ci][bpe] = {} for k in [1]: x = [] results[ci][bpe][k] = x for fold in range(8): - f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") + f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") results[ci][bpe][k].append(f) diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 995d072a..5acc8b06 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -6,7 +6,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir mpirun -np 8 Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ @@ -29,7 +29,7 @@ run_examm() { --crystalize_iters $crystalize_iters \ --max_genomes 4000 \ --island_size 32 \ - --number_islands 8 \ + --number_islands 4 \ --dnas_k $k best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) @@ -38,17 +38,15 @@ run_examm() { CELL_TYPE='dnas' for crystalize_iters in 64 128 256 512; do - for bp_epoch in 1 2 4 8 16 32 64 128; do - for k in 1; do - fold=1 run_examm -# for fold in 0 1 2 3; do -# run_examm & -# done -# wait -# for fold in 4 5 6 7; do -# run_examm & -# done -# wait + for bp_epoch in 1 2 4 8 16 32; do + for k in 1 2; do + for fold in 0 1 2 3; do + run_examm & + done + for fold in 4 5 6 7; do + run_examm & + done + wait done done done From ffa684a5b3bcbd1d2533efe61fcd3ca18c6f600c Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 29 Jan 2024 19:44:50 -0500 Subject: [PATCH 18/42] Prepping for cluster --- CMakeLists.txt | 3 +- examm/examm.cxx | 1 + initial_integration_experiments/analyze.py | 122 +++++++++++------- initial_integration_experiments/aviation.zsh | 37 ++++++ initial_integration_experiments/control.zsh | 50 +++++++ initial_integration_experiments/debug.zsh | 55 ++++++++ initial_integration_experiments/dnas.zsh | 25 ++-- .../gp_control.zsh | 59 +++++++++ .../post_training_dnas.zsh | 8 +- initial_integration_experiments/posttrain.zsh | 3 + initial_integration_experiments/run_examm.zsh | 25 ++++ .../run_experiments.zsh | 4 + initial_integration_experiments/wind.zsh | 39 ++++++ rnn/dnas_node.cxx | 21 +-- rnn/generate_nn.cxx | 1 - rnn/rnn_edge.cxx | 3 +- rnn/rnn_node.cxx | 4 +- rnn/rnn_node_interface.cxx | 12 +- rnn/rnn_node_interface.hxx | 5 +- rnn_examples/train_rnn.cxx | 11 +- time_series/time_series.cxx | 8 +- 21 files changed, 404 insertions(+), 92 deletions(-) create mode 100644 initial_integration_experiments/aviation.zsh create mode 100644 initial_integration_experiments/control.zsh create mode 100755 initial_integration_experiments/debug.zsh mode change 100755 => 100644 initial_integration_experiments/dnas.zsh create mode 100644 initial_integration_experiments/gp_control.zsh create mode 100644 initial_integration_experiments/posttrain.zsh create mode 100644 initial_integration_experiments/run_examm.zsh create mode 100755 initial_integration_experiments/run_experiments.zsh create mode 100644 initial_integration_experiments/wind.zsh diff --git a/CMakeLists.txt b/CMakeLists.txt index 1af3a314..5d62df91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") +# SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 -fsanitize=address") +SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 ") SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") diff --git a/examm/examm.cxx b/examm/examm.cxx index e0be2d07..1e1c2314 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -324,6 +324,7 @@ void EXAMM::mutate(int32_t max_mutations, RNN_Genome* g) { g->assign_reachability(); double rng = rng_0_1(generator) * total; int32_t new_node_type = get_random_node_type(); + Log::info("%d %d\n", new_node_type, NODE_TYPES.size()); string node_type_str = NODE_TYPES[new_node_type]; Log::debug("rng: %lf, total: %lf, new node type: %d (%s)\n", rng, total, new_node_type, node_type_str.c_str()); diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py index cee900d6..78d51466 100644 --- a/initial_integration_experiments/analyze.py +++ b/initial_integration_experiments/analyze.py @@ -6,7 +6,7 @@ fig, subplts = plt.subplots(6, 1) -bprange = [1, 2, 4, 8, 16, 32] +bprange = [8, 16] plts = {k:v for k, v in zip(bprange, subplts)} print(plts) base = plts[bprange[0]] @@ -18,63 +18,93 @@ v.sharey(base) v.sharex(base) +def avg(files, slice_at=-1): + r = {} + for file in files: + x = [] + + for fold in range(8): + f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] + print(f"{file}/{fold} -> {len(f)}") + x.append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + r[file] = { + 'mean_nodes': nodesmean, + 'mean_edges': edgesmean, + 'mean_rec_edges':redgesmean, + 'bpi': bpimean, + 'mean_mse': msemean, + 'std_mse': msestd, + } + return r + results = {} -for ci in [64, 128, 256, 512]: +for ci in [64]: results[ci] = {} for bpe in bprange: results[ci][bpe] = {} for k in [1]: - x = [] + f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" + x = avg([f])[f] results[ci][bpe][k] = x + print(x) - for fold in range(8): - f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") - results[ci][bpe][k].append(f) - - - enabled_nodes = [] - enabled_edges = [] - enabled_rec_edges = [] - - bpi_columns = [] - mse_columns = [] - - minlen = 100000000 - - for f in x: - bpi_columns.append(f[' Total BP Epochs'].to_numpy()) - mse_columns.append(f[' Best Val. MSE'].to_numpy()) - enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) - enabled_edges.append(f[' Enabled Edges'].to_numpy()) - enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) - - minlen = min(minlen, len(bpi_columns[-1])) - - enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) - enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) - enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) - bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) - mse_columns = list(map(lambda x: x[:minlen], mse_columns)) - - - nodesmean = np.mean(np.array(enabled_nodes), axis=0) - edgesmean = np.mean(np.array(enabled_edges), axis=0) - redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) - print(f"Nodes at end mean: {nodesmean[-1]}") - print(f"edges at end mean: {edgesmean[-1]}") - print(f"redges at end mean: {redgesmean[-1]}") - + print(x['mean_mse'] - x['std_mse']) + g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] + plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - bpimean = np.mean(np.array(bpi_columns), axis=0) - msemean = np.mean(np.array(mse_columns), axis=0) - msestd = np.std(np.array(mse_columns), axis=0) +control_results = {} +for bp in [8, 16]: + key = f"initial_integration_experiments/results/control_v7/{bp}" + r = avg([key])[key] + control_results[bp] = r + print(list(r.keys())) + g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] + plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0] - plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd, - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) for k, v in plts.items(): v.set_title(f"{k} BPI") v.legend(fontsize=12, loc="upper right") + plt.show() diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh new file mode 100644 index 00000000..7059da3e --- /dev/null +++ b/initial_integration_experiments/aviation.zsh @@ -0,0 +1,37 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + +for output_params in "E1_CHT1" "Pitch"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/initial_integration_experiments/control.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh new file mode 100755 index 00000000..ce159c01 --- /dev/null +++ b/initial_integration_experiments/debug.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes 8192 \ + --island_size 32 \ + --number_islands 4 \ + --stochastic \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128; do + for bp_epoch in 8; do + for k in 1; do + for fold in 0; do + run_examm + done + # wait + # for fold in 4 5 6 7; do + # run_examm & + # done + # wait + done + done +done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh old mode 100755 new mode 100644 index 5acc8b06..8b525b09 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -6,43 +6,46 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold + output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ + mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ --input_parameter_names ${=INPUT_PARAMETERS} \ --output_parameter_names ${=OUTPUT_PARAMETERS} \ --bp_iterations $bp_epoch \ --normalize min_max \ --num_hidden_layers $SIZE \ --hidden_layer_size $SIZE \ - --train_sequence_length 1000 \ --validation_sequence_length 100 \ --max_recurrent_depth 1 \ --output_directory $output_dir \ --log_filename fitness.csv \ --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ + --std_message_level WARNING \ + --file_message_level WARNING \ --crystalize_iters $crystalize_iters \ - --max_genomes 4000 \ + --max_genomes $max_genomes \ --island_size 32 \ --number_islands 4 \ --dnas_k $k - best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh } CELL_TYPE='dnas' -for crystalize_iters in 64 128 256 512; do - for bp_epoch in 1 2 4 8 16 32; do - for k in 1 2; do +bp_ge=(8 8192 16 4096 32 2048) +for crystalize_iters in 256; do + for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for k in 1; do for fold in 0 1 2 3; do run_examm & done + wait for fold in 4 5 6 7; do run_examm & done diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh new file mode 100644 index 00000000..049e9750 --- /dev/null +++ b/initial_integration_experiments/gp_control.zsh @@ -0,0 +1,59 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=test_results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done + done +} + +INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" +training_filenames=(datasets/2018_coal/burner_[0-9].csv) +test_filenames=(datasets/2018_coal/burner_1[0-1].csv) +OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") +run_group + + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUTS=("E1_CHT1" "Pitch") +training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) +test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) +run_group + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" +OUTPUTS=("Cm_avg" "P_avg") +training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) +test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) +run_group diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh index b25171a1..1c226178 100755 --- a/initial_integration_experiments/post_training_dnas.zsh +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -1,7 +1,4 @@ #!/bin/zsh -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1' - offset=1 post_training() { @@ -21,11 +18,12 @@ post_training() { --log_filename post_training.csv \ --learning_rate 0.01 \ --weight_update adagrad \ - --train_sequence_length 100 \ + --train_sequence_length 1000 \ --validation_sequence_length 100 \ --crystalize_iters $CRYSTALIZE_ITERS \ --dnas_k $k - + + tail -1 $OUTPUT_DIRECTORY/post_training.csv } post_training diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh new file mode 100644 index 00000000..cc54a2eb --- /dev/null +++ b/initial_integration_experiments/posttrain.zsh @@ -0,0 +1,3 @@ +#!/bin/zsh + + diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh new file mode 100644 index 00000000..77d2893f --- /dev/null +++ b/initial_integration_experiments/run_examm.zsh @@ -0,0 +1,25 @@ +#!/bin/zsh + +output_dir=results/v0/$bp_epoch/$fold +mkdir -p $output_dir + +mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 4000 \ + --island_size 32 \ + --number_islands 4 + +touch $output_dir/completed + diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh new file mode 100755 index 00000000..7dd8e956 --- /dev/null +++ b/initial_integration_experiments/run_experiments.zsh @@ -0,0 +1,4 @@ +#!/bin/zsh + +initial_integration_experiments/control.zsh +initial_integration_experiments/dnas.zsh diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh new file mode 100644 index 00000000..7e68f482 --- /dev/null +++ b/initial_integration_experiments/wind.zsh @@ -0,0 +1,39 @@ +#!/bin/zsh + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ + --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + + +for output_params in "Cm_avg" "P_avg"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 87e05d9b..f6d42bfe 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -168,14 +168,15 @@ void DNASNode::print_info() { } void DNASNode::reset(int32_t series_length) { - d_pi = vector(pi.size(), 0.0); - d_input = vector(series_length, 0.0); - node_outputs = vector>(series_length, vector(pi.size(), 0.0)); - output_values = vector(series_length, 0.0); - error_values = vector(series_length, 0.0); - inputs_fired = vector(series_length, 0); - outputs_fired = vector(series_length, 0); - input_values = vector(series_length, 0.0); + d_pi.assign(pi.size(), 0.0); + d_input.assign(series_length, 0.0); + node_outputs.clear(); + for (int i = 0; i < series_length; i++) node_outputs.emplace_back(pi.size(), 0.0); + output_values.assign(series_length, 0.0); + error_values.assign(series_length, 0.0); + inputs_fired.assign(series_length, 0); + outputs_fired.assign(series_length, 0); + input_values.assign(series_length, 0.0); if (counter >= CRYSTALLIZATION_THRESHOLD) { nodes[maxi]->reset(series_length); @@ -206,8 +207,10 @@ void DNASNode::input_fired(int32_t time, double incoming_output) { } if (counter >= CRYSTALLIZATION_THRESHOLD) { + Log::info("%d hmm\n", maxi >= 0); assert(maxi >= 0); - + + Log::info("%d %d %p\n", maxi, time, nodes[maxi]); nodes[maxi]->input_fired(time, input_values[time]); node_outputs[time][maxi] = nodes[maxi]->output_values[time]; output_values[time] = nodes[maxi]->output_values[time]; diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index c4068495..a84fb36f 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -190,7 +190,6 @@ RNN_Genome* get_seed_genome( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); - // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); } diff --git a/rnn/rnn_edge.cxx b/rnn/rnn_edge.cxx index 3227e961..babb2552 100644 --- a/rnn/rnn_edge.cxx +++ b/rnn/rnn_edge.cxx @@ -92,7 +92,7 @@ RNN_Edge* RNN_Edge::copy(const vector new_nodes) { } void RNN_Edge::propagate_forward(int32_t time) { - if (input_node->inputs_fired[time] != input_node->total_inputs) { + if (input_node->inputs_fired[time] != input_node->total_inputs || time < 0 || time >= input_node->output_values.size()) { Log::fatal( "ERROR! propagate forward called on edge %d where input_node->inputs_fired[%d] (%d) != total_inputs (%d)\n", innovation_number, time, input_node->inputs_fired[time], input_node->total_inputs @@ -104,7 +104,6 @@ void RNN_Edge::propagate_forward(int32_t time) { exit(1); } - // Log::debug("input_node %p %d\n", input_node, input_node->output_values.size()); double output = input_node->output_values[time] * weight; // Log::debug("propagating forward at time %d from %d to %d, value: %lf, input: %lf, weight: %lf\n", time, diff --git a/rnn/rnn_node.cxx b/rnn/rnn_node.cxx index 075c11ed..3e79a1df 100644 --- a/rnn/rnn_node.cxx +++ b/rnn/rnn_node.cxx @@ -57,8 +57,6 @@ void RNN_Node::input_fired(int32_t time, double incoming_output) { exit(1); } - Log::debug("node %d - input value[%d]: %lf\n", innovation_number, time, input_values[time]); - output_values[time] = tanh(input_values[time] + bias); ld_output[time] = tanh_derivative(output_values[time]); @@ -86,6 +84,8 @@ void RNN_Node::try_update_deltas(int32_t time) { outputs_fired[time], total_outputs ); exit(1); + } else if (time >= d_input.size() || time < 0) { + Log::fatal("invalid time %d\n", time); } d_input[time] *= ld_output[time]; diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index f86eddd7..210706a2 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -7,13 +7,16 @@ using std::ostream; #include using std::string; +#include +using std::vector; + #include "common/log.hxx" #include "rnn/rnn_genome.hxx" #include "rnn_node_interface.hxx" -extern const int32_t NUMBER_NODE_TYPES = 11; -extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", - "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; +extern const vector NODE_TYPES = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", + "delta", "LSTM", "ENARC", "ENAS_DAG", "random_dag", "dnas"}; + extern const unordered_map string_to_node_type = { {"simple", SIMPLE_NODE}, {"jordan", JORDAN_NODE}, @@ -25,7 +28,8 @@ extern const unordered_map string_to_node_type = { { "lstm", LSTM_NODE}, { "enarc", ENARC_NODE}, { "enas", ENAS_DAG_NODE}, - { "dnas", DNAS_NODE} + { "dnas", DNAS_NODE}, + { "random_dag", RANDOM_DAG_NODE}, }; int32_t node_type_from_string(string& node_type) { diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx index 15ec45cd..d1b56fa3 100644 --- a/rnn/rnn_node_interface.hxx +++ b/rnn/rnn_node_interface.hxx @@ -26,8 +26,9 @@ class RNN; #define HIDDEN_LAYER 1 #define OUTPUT_LAYER 2 -extern const int32_t NUMBER_NODE_TYPES; -extern const string NODE_TYPES[]; +extern const vector NODE_TYPES; +#define NUMBER_NODE_TYPES NODE_TYPES.size() + extern const unordered_map string_to_node_type; int32_t node_type_from_string(string& node_type); diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index 7bd5647c..265b9669 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -15,6 +15,7 @@ using std::string; using std::vector; #include "common/arguments.hxx" +#include "common/process_arguments.hxx" #include "common/files.hxx" #include "common/log.hxx" #include "rnn/generate_nn.hxx" @@ -81,9 +82,9 @@ int main(int argc, char** argv) { Log::set_id("main"); TimeSeriesSets* time_series_sets = TimeSeriesSets::generate_from_arguments(arguments); - - int32_t time_offset = 1; - get_argument(arguments, "--time_offset", true, time_offset); + get_train_validation_data( + arguments, time_series_sets, training_inputs, training_outputs, test_inputs, test_outputs + ); int32_t crystallization_threshold = 1000; get_argument(arguments, "--crystalize_iters", false, crystallization_threshold); @@ -93,8 +94,8 @@ int main(int argc, char** argv) { get_argument(arguments, "--dnas_k", false, k); DNASNode::k = k; - time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); - time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); + // time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); + // time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); int number_inputs = time_series_sets->get_number_inputs(); // int number_outputs = time_series_sets->get_number_outputs(); diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx index de143147..e315164e 100644 --- a/time_series/time_series.cxx +++ b/time_series/time_series.cxx @@ -472,7 +472,7 @@ void TimeSeriesSet::export_time_series( if (time_offset == 0) { for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) { for (int32_t j = 0; j < number_rows; j++) { - data[i][j] = time_series[requested_fields[i]]->get_value(j); + data[i][j] = time_series.at(requested_fields[i])->get_value(j); } } @@ -480,7 +480,7 @@ void TimeSeriesSet::export_time_series( // output data, ignore the first N values for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) { for (int32_t j = time_offset; j < number_rows; j++) { - data[i][j - time_offset] = time_series[requested_fields[i]]->get_value(j); + data[i][j - time_offset] = time_series.at(requested_fields[i])->get_value(j); } } @@ -492,13 +492,13 @@ void TimeSeriesSet::export_time_series( Log::debug("doing shift for field: '%s'\n", requested_fields[i].c_str()); // shift the shifted fields to the same as the output, not the input for (int32_t j = -time_offset; j < number_rows; j++) { - data[i][j + time_offset] = time_series[requested_fields[i]]->get_value(j); + data[i][j + time_offset] = time_series.at(requested_fields[i])->get_value(j); // Log::info("\tdata[%d][%d]: %lf\n", i, j + time_offset, data[i][j + time_offset]); } } else { Log::debug("not doing shift for field: '%s'\n", requested_fields[i].c_str()); for (int32_t j = 0; j < number_rows + time_offset; j++) { - data[i][j] = time_series[requested_fields[i]]->get_value(j); + data[i][j] = time_series.at(requested_fields[i])->get_value(j); } } } From 60acb2c8c08d60844fdc161843f0a94771aa5158 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 31 Jan 2024 04:28:06 -0500 Subject: [PATCH 19/42] gecco 2024 related experiment files + hacky changes --- dnas_cluster.zsh | 69 ++++++++++++++++++++++++++++ dnas_control.zsh | 60 ++++++++++++++++++++++++ examm/island_speciation_strategy.cxx | 5 +- key | 7 +++ rnn/generate_nn.cxx | 16 ++++++- rnn/genome_property.cxx | 31 +++++++++++-- rnn/genome_property.hxx | 12 ++++- 7 files changed, 191 insertions(+), 9 deletions(-) create mode 100644 dnas_cluster.zsh create mode 100644 dnas_control.zsh create mode 100644 key diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh new file mode 100644 index 00000000..55823c0c --- /dev/null +++ b/dnas_cluster.zsh @@ -0,0 +1,69 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --use_dnas_seed true \ + --use_burn_in_bp_epoch \ + --burn_in_period 1024 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 512; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/dnas_control.zsh b/dnas_control.zsh new file mode 100644 index 00000000..5e6982c8 --- /dev/null +++ b/dnas_control.zsh @@ -0,0 +1,60 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --burn_in_period 1024 \ + --use_burn_in_bp_epoch + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done +} + +run_group diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 920eb203..d8eaabab 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island( Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str()); exit(1); } - return new_genome; + return new_genome17731515; } RNN_Genome* IslandSpeciationStrategy::generate_genome( @@ -370,6 +370,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( Log::info("Island %d: new genome is still null, regenerating\n", generation_island); new_genome = generate_genome(rng_0_1, generator, mutate, crossover); } + generated_genomes++; new_genome->set_generation_id(generated_genomes); islands[generation_island]->set_latest_generation_id(generated_genomes); @@ -577,4 +578,4 @@ void IslandSpeciationStrategy::set_erased_islands_status() { RNN_Genome* IslandSpeciationStrategy::get_seed_genome() { return seed_genome; -} \ No newline at end of file +} diff --git a/key b/key new file mode 100644 index 00000000..391a7405 --- /dev/null +++ b/key @@ -0,0 +1,7 @@ +v11 -> burn in schedule with 0.001 lr 4 mut +v12 -> burn in schedule with 0.01 lr 4 mut +v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut +v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut +v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut +v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut + diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index a84fb36f..d9fd2eac 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -185,13 +185,27 @@ RNN_Genome* get_seed_genome( ); Log::info("Finished transfering seed genome\n"); } else { - if (seed_genome == NULL) { + bool use_dnas_seed = argument_exists(arguments, "--use_dnas_seed"); + + if (!use_dnas_seed) { seed_genome = create_ff( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); + } else { + vector node_types = { + SIMPLE_NODE, + UGRNN_NODE, + MGU_NODE, + GRU_NODE, + DELTA_NODE, + LSTM_NODE + }; + seed_genome = create_dnas_nn( + time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, node_types, weight_rules + ); } } diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx index 6bf061b9..09ea1ae8 100644 --- a/rnn/genome_property.cxx +++ b/rnn/genome_property.cxx @@ -10,6 +10,20 @@ GenomeProperty::GenomeProperty() { max_recurrent_depth = 10; } +int32_t GenomeProperty::compute_bp_iterations(RNN_Genome* genome) { + if (use_burn_in_bp_epoch) { + int32_t n = genome->generation_id / burn_in_period; + n = n > max_burn_in_cycles ? max_burn_in_cycles : n; + + float epochs = bp_epochs_start; + for (int i = 0; i < n; i++) epochs *= burn_in_ratio; + + return (int32_t) epochs; + } else { + return bp_iterations; + } +} + void GenomeProperty::generate_genome_property_from_arguments(const vector& arguments) { get_argument(arguments, "--bp_iterations", true, bp_iterations); use_dropout = get_argument(arguments, "--dropout_probability", false, dropout_probability); @@ -17,6 +31,13 @@ void GenomeProperty::generate_genome_property_from_arguments(const vectorset_bp_iterations(bp_iterations); - if (use_dropout) { - genome->enable_dropout(dropout_probability); - } + genome->set_bp_iterations(compute_bp_iterations(genome)); + + if (use_dropout) genome->enable_dropout(dropout_probability); + genome->normalize_type = normalize_type; genome->set_parameter_names(input_parameter_names, output_parameter_names); genome->set_normalize_bounds(normalize_type, normalize_mins, normalize_maxs, normalize_avgs, normalize_std_devs); @@ -48,4 +69,4 @@ void GenomeProperty::get_time_series_parameters(TimeSeriesSets* time_series_sets uniform_int_distribution GenomeProperty::get_recurrent_depth_dist() { return uniform_int_distribution(this->min_recurrent_depth, this->max_recurrent_depth); -} \ No newline at end of file +} diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx index 7d220ff6..130b26de 100644 --- a/rnn/genome_property.hxx +++ b/rnn/genome_property.hxx @@ -18,6 +18,12 @@ class GenomeProperty { int32_t min_recurrent_depth; int32_t max_recurrent_depth; + bool use_burn_in_bp_epoch; + int32_t burn_in_period = 2048; + int32_t max_burn_in_cycles = 4; + double bp_epochs_start = 0.5; + double burn_in_ratio = 2.0; + // TimeSeriesSets *time_series_sets; int32_t number_inputs; int32_t number_outputs; @@ -30,12 +36,16 @@ class GenomeProperty { map normalize_avgs; map normalize_std_devs; + int32_t compute_bp_iterations(RNN_Genome* genome); + public: GenomeProperty(); + void generate_genome_property_from_arguments(const vector& arguments); void set_genome_properties(RNN_Genome* genome); void get_time_series_parameters(TimeSeriesSets* time_series_sets); + uniform_int_distribution get_recurrent_depth_dist(); }; -#endif \ No newline at end of file +#endif From 5730472f918a1ebf459e68c88092bf2e37ea1ba1 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Fri, 2 Feb 2024 03:16:10 -0500 Subject: [PATCH 20/42] BP schedule --- dnas_control.zsh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dnas_control.zsh b/dnas_control.zsh index 5e6982c8..88a7c882 100644 --- a/dnas_control.zsh +++ b/dnas_control.zsh @@ -17,7 +17,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold + output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold mkdir -p $output_dir srun -n 36 Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ @@ -41,7 +41,7 @@ run_examm() { --max_genomes $max_genomes \ --island_size 32 \ --number_islands 8 \ - --num_mutations 2 \ + --num_mutations 4 \ --burn_in_period 1024 \ --use_burn_in_bp_epoch From 70e79d442b1d4fea6c2c752739778f096a9b70ff Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:30:54 -0500 Subject: [PATCH 21/42] moving scripts --- scripts/dnas/analyze.py | 110 ++++++++++++++++++++++++++++ scripts/dnas/analyze.zsh | 12 +++ scripts/dnas/aviation.zsh | 37 ++++++++++ scripts/dnas/coal_dnas_control.zsh | 22 ++++++ scripts/dnas/coal_gp.zsh | 22 ++++++ scripts/dnas/control.zsh | 50 +++++++++++++ scripts/dnas/control_cluster.zsh | 50 +++++++++++++ scripts/dnas/debug.zsh | 55 ++++++++++++++ scripts/dnas/dnas.zsh | 55 ++++++++++++++ scripts/dnas/dnas_cluster.zsh | 69 +++++++++++++++++ scripts/dnas/dnas_control.zsh | 60 +++++++++++++++ scripts/dnas/dnas_r2_cluster.zsh | 67 +++++++++++++++++ scripts/dnas/experiment.zsh | 34 +++++++++ scripts/dnas/gp_control.zsh | 59 +++++++++++++++ scripts/dnas/lib.zsh | 65 ++++++++++++++++ scripts/dnas/mk_jobs.zsh | 6 ++ scripts/dnas/populate_queue.zsh | 29 ++++++++ scripts/dnas/post_training.zsh | 28 +++++++ scripts/dnas/post_training_dnas.zsh | 29 ++++++++ scripts/dnas/posttrain.zsh | 3 + scripts/dnas/run_examm.zsh | 25 +++++++ scripts/dnas/run_experiments.zsh | 4 + scripts/dnas/wind.zsh | 39 ++++++++++ 23 files changed, 930 insertions(+) create mode 100644 scripts/dnas/analyze.py create mode 100644 scripts/dnas/analyze.zsh create mode 100644 scripts/dnas/aviation.zsh create mode 100644 scripts/dnas/coal_dnas_control.zsh create mode 100644 scripts/dnas/coal_gp.zsh create mode 100644 scripts/dnas/control.zsh create mode 100644 scripts/dnas/control_cluster.zsh create mode 100755 scripts/dnas/debug.zsh create mode 100644 scripts/dnas/dnas.zsh create mode 100644 scripts/dnas/dnas_cluster.zsh create mode 100644 scripts/dnas/dnas_control.zsh create mode 100644 scripts/dnas/dnas_r2_cluster.zsh create mode 100755 scripts/dnas/experiment.zsh create mode 100644 scripts/dnas/gp_control.zsh create mode 100644 scripts/dnas/lib.zsh create mode 100644 scripts/dnas/mk_jobs.zsh create mode 100755 scripts/dnas/populate_queue.zsh create mode 100755 scripts/dnas/post_training.zsh create mode 100755 scripts/dnas/post_training_dnas.zsh create mode 100644 scripts/dnas/posttrain.zsh create mode 100644 scripts/dnas/run_examm.zsh create mode 100755 scripts/dnas/run_experiments.zsh create mode 100644 scripts/dnas/wind.zsh diff --git a/scripts/dnas/analyze.py b/scripts/dnas/analyze.py new file mode 100644 index 00000000..78d51466 --- /dev/null +++ b/scripts/dnas/analyze.py @@ -0,0 +1,110 @@ +import pandas + +import numpy as np + +import matplotlib.pyplot as plt + +fig, subplts = plt.subplots(6, 1) + +bprange = [8, 16] +plts = {k:v for k, v in zip(bprange, subplts)} +print(plts) +base = plts[bprange[0]] + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + if k == bprange[0]: + continue + v.sharey(base) + v.sharex(base) + +def avg(files, slice_at=-1): + r = {} + for file in files: + x = [] + + for fold in range(8): + f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] + print(f"{file}/{fold} -> {len(f)}") + x.append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + r[file] = { + 'mean_nodes': nodesmean, + 'mean_edges': edgesmean, + 'mean_rec_edges':redgesmean, + 'bpi': bpimean, + 'mean_mse': msemean, + 'std_mse': msestd, + } + return r + +results = {} +for ci in [64]: + results[ci] = {} + for bpe in bprange: + results[ci][bpe] = {} + for k in [1]: + f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" + x = avg([f])[f] + results[ci][bpe][k] = x + print(x) + + print(x['mean_mse'] - x['std_mse']) + g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] + plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + +control_results = {} +for bp in [8, 16]: + key = f"initial_integration_experiments/results/control_v7/{bp}" + r = avg([key])[key] + control_results[bp] = r + print(list(r.keys())) + g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] + plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + v.legend(fontsize=12, loc="upper right") + + +plt.show() diff --git a/scripts/dnas/analyze.zsh b/scripts/dnas/analyze.zsh new file mode 100644 index 00000000..5c2876f3 --- /dev/null +++ b/scripts/dnas/analyze.zsh @@ -0,0 +1,12 @@ +#!/usr/bin/zsh +# +for crystalize_iters in 64 128 256 512; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3 4 5 6 7; do + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + tail -1 $output_dir/fitness_log.csv + done + done + done +done diff --git a/scripts/dnas/aviation.zsh b/scripts/dnas/aviation.zsh new file mode 100644 index 00000000..7059da3e --- /dev/null +++ b/scripts/dnas/aviation.zsh @@ -0,0 +1,37 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + +for output_params in "E1_CHT1" "Pitch"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/scripts/dnas/coal_dnas_control.zsh b/scripts/dnas/coal_dnas_control.zsh new file mode 100644 index 00000000..9543cc09 --- /dev/null +++ b/scripts/dnas/coal_dnas_control.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +let np=8 +#SBATCH --ntasks=8 +#SBATCH --exclude theocho +#SBATCH --time=8-00:00:00 +#SBATCH -A examm +#SBATCH --partition=TIER +#SBATCH -J examm_coal_gp_control +#SBATCH -o /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +source lib.zsh + +output_dir_prefix=/home/jak5763/exact/results/gp_control +bp_epoch_set=(8 16 32 64 128) +nfolds=20 +MAX_GENOMES=4000 +ISLAND_SIZE=10 +N_ISLANDS=10 +coal diff --git a/scripts/dnas/coal_gp.zsh b/scripts/dnas/coal_gp.zsh new file mode 100644 index 00000000..c1318793 --- /dev/null +++ b/scripts/dnas/coal_gp.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +let np=8 +#SBATCH --ntasks=8 +#SBATCH --exclude theocho +#SBATCH --time=8-00:00:00 +#SBATCH -A examm +#SBATCH --partition=TIER +#SBATCH -J examm_coal_gp_control +#SBATCH -o /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +source lib.zsh + +output_dir_prefix=/home/jak5763/exact/results/gp_control +bp_epoch_set=(8) +nfolds=20 +MAX_GENOMES=10000 +ISLAND_SIZE=10 +N_ISLANDS=10 +coal diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/scripts/dnas/control.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/scripts/dnas/control_cluster.zsh b/scripts/dnas/control_cluster.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/scripts/dnas/control_cluster.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/scripts/dnas/debug.zsh b/scripts/dnas/debug.zsh new file mode 100755 index 00000000..ce159c01 --- /dev/null +++ b/scripts/dnas/debug.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes 8192 \ + --island_size 32 \ + --number_islands 4 \ + --stochastic \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128; do + for bp_epoch in 8; do + for k in 1; do + for fold in 0; do + run_examm + done + # wait + # for fold in 4 5 6 7; do + # run_examm & + # done + # wait + done + done +done diff --git a/scripts/dnas/dnas.zsh b/scripts/dnas/dnas.zsh new file mode 100644 index 00000000..8b525b09 --- /dev/null +++ b/scripts/dnas/dnas.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +bp_ge=(8 8192 16 4096 32 2048) +for crystalize_iters in 256; do + for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for k in 1; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait + done + done +done diff --git a/scripts/dnas/dnas_cluster.zsh b/scripts/dnas/dnas_cluster.zsh new file mode 100644 index 00000000..55823c0c --- /dev/null +++ b/scripts/dnas/dnas_cluster.zsh @@ -0,0 +1,69 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --use_dnas_seed true \ + --use_burn_in_bp_epoch \ + --burn_in_period 1024 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 512; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/scripts/dnas/dnas_control.zsh b/scripts/dnas/dnas_control.zsh new file mode 100644 index 00000000..88a7c882 --- /dev/null +++ b/scripts/dnas/dnas_control.zsh @@ -0,0 +1,60 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 4 \ + --burn_in_period 1024 \ + --use_burn_in_bp_epoch + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done +} + +run_group diff --git a/scripts/dnas/dnas_r2_cluster.zsh b/scripts/dnas/dnas_r2_cluster.zsh new file mode 100644 index 00000000..a8bce387 --- /dev/null +++ b/scripts/dnas/dnas_r2_cluster.zsh @@ -0,0 +1,67 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v9/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.001 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 16 \ + --number_islands 8 \ + --num_mutations 4 \ + --use_dnas_seed true \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 1000000; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/scripts/dnas/experiment.zsh b/scripts/dnas/experiment.zsh new file mode 100755 index 00000000..32a1db55 --- /dev/null +++ b/scripts/dnas/experiment.zsh @@ -0,0 +1,34 @@ +#!/bin/zsh +#SBATCH -n 1 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -o /home/jak5763/exact/aistats/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/aistats/slurm_out/%x.%j.err +#SBATCH --mem=10G + +spack load gcc +spack load openmpi +spack load /5aoa7oi +spack load /dd7nzzh + +for i in $(seq 0 19); do + export i=$i + export output_dir=/home/jak5763/exact/aistats/$control/maxt$maxt/crystal$crystal/bp$bp/$i + + if [ "$control" = "control" ]; then + node_types="simple UGRNN MGU GRU delta LSTM" + else + node_types="DNAS" + fi + + echo $node_types $control + + export node_types=$node_types + + # ./run_examm.zsh + + best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + export BP_ITERS=1 + export GENOME=$best_genome_file + ./post_training.zsh +done diff --git a/scripts/dnas/gp_control.zsh b/scripts/dnas/gp_control.zsh new file mode 100644 index 00000000..049e9750 --- /dev/null +++ b/scripts/dnas/gp_control.zsh @@ -0,0 +1,59 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=test_results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done + done +} + +INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" +training_filenames=(datasets/2018_coal/burner_[0-9].csv) +test_filenames=(datasets/2018_coal/burner_1[0-1].csv) +OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") +run_group + + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUTS=("E1_CHT1" "Pitch") +training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) +test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) +run_group + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" +OUTPUTS=("Cm_avg" "P_avg") +training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) +test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) +run_group diff --git a/scripts/dnas/lib.zsh b/scripts/dnas/lib.zsh new file mode 100644 index 00000000..49ebc581 --- /dev/null +++ b/scripts/dnas/lib.zsh @@ -0,0 +1,65 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=$output_dir_prefix/bp_$bp_epoch/output_$output_params/$fold + mkdir -p $output_dir + echo srun -n $np Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in $bp_epoch_set; do + for fold in $(seq 1 $nfolds); do + run_examm + done + done + done +} + +coal() { + INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" + training_filenames=(datasets/2018_coal/burner_[0-9].csv) + test_filenames=(datasets/2018_coal/burner_1[0-1].csv) + OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") + run_group +} + +aviation() { + INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + OUTPUTS=("E1_CHT1" "Pitch") + training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) + test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) + run_group +} + +wind() { + INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + OUTPUTS=("Cm_avg" "P_avg") + training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) + test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) + run_group +} + diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh new file mode 100644 index 00000000..38a5526c --- /dev/null +++ b/scripts/dnas/mk_jobs.zsh @@ -0,0 +1,6 @@ +bp_ge=(8 8192 16 4096 32 2048 64 1024) +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh + bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh + bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh +done diff --git a/scripts/dnas/populate_queue.zsh b/scripts/dnas/populate_queue.zsh new file mode 100755 index 00000000..43a09dbb --- /dev/null +++ b/scripts/dnas/populate_queue.zsh @@ -0,0 +1,29 @@ +#!/bin/zsh +export INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +export OUTPUT_PARAMETERS='E1_EGT1' + +export offset=1 +export k=1 + +push_job() { + export maxt=$maxt + export crystal=$crystal + export bp=$bp + export control=$control + sbatch -J $control.maxt$maxt.cr$crystal.bp$bp ./experiment.zsh + +} + +export control="exp" +for maxt in 1.66 1.33 1.0; do + for crystal in 64 128 256; do + for bp in 4 8 16; do + push_job + done + done +done + +export control="control" +for bp in 4 8 16; do + push_job +done diff --git a/scripts/dnas/post_training.zsh b/scripts/dnas/post_training.zsh new file mode 100755 index 00000000..38c2d39d --- /dev/null +++ b/scripts/dnas/post_training.zsh @@ -0,0 +1,28 @@ +#!/usr/bin/zsh +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $output_dir \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --crystalize_iters $crystal \ + --dnas_k $k + +} + +post_training diff --git a/scripts/dnas/post_training_dnas.zsh b/scripts/dnas/post_training_dnas.zsh new file mode 100755 index 00000000..1c226178 --- /dev/null +++ b/scripts/dnas/post_training_dnas.zsh @@ -0,0 +1,29 @@ +#!/bin/zsh +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $OUTPUT_DIRECTORY \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 1000 \ + --validation_sequence_length 100 \ + --crystalize_iters $CRYSTALIZE_ITERS \ + --dnas_k $k + + tail -1 $OUTPUT_DIRECTORY/post_training.csv +} + +post_training diff --git a/scripts/dnas/posttrain.zsh b/scripts/dnas/posttrain.zsh new file mode 100644 index 00000000..cc54a2eb --- /dev/null +++ b/scripts/dnas/posttrain.zsh @@ -0,0 +1,3 @@ +#!/bin/zsh + + diff --git a/scripts/dnas/run_examm.zsh b/scripts/dnas/run_examm.zsh new file mode 100644 index 00000000..77d2893f --- /dev/null +++ b/scripts/dnas/run_examm.zsh @@ -0,0 +1,25 @@ +#!/bin/zsh + +output_dir=results/v0/$bp_epoch/$fold +mkdir -p $output_dir + +mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 4000 \ + --island_size 32 \ + --number_islands 4 + +touch $output_dir/completed + diff --git a/scripts/dnas/run_experiments.zsh b/scripts/dnas/run_experiments.zsh new file mode 100755 index 00000000..7dd8e956 --- /dev/null +++ b/scripts/dnas/run_experiments.zsh @@ -0,0 +1,4 @@ +#!/bin/zsh + +initial_integration_experiments/control.zsh +initial_integration_experiments/dnas.zsh diff --git a/scripts/dnas/wind.zsh b/scripts/dnas/wind.zsh new file mode 100644 index 00000000..7e68f482 --- /dev/null +++ b/scripts/dnas/wind.zsh @@ -0,0 +1,39 @@ +#!/bin/zsh + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ + --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + + +for output_params in "Cm_avg" "P_avg"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done From 4c3ebfc64a020a4ec0ae343a1f328dfc14715c64 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:35:20 -0500 Subject: [PATCH 22/42] removed old fileS --- initial_integration_experiments/analyze.py | 110 ------------------ initial_integration_experiments/analyze.zsh | 12 -- initial_integration_experiments/aviation.zsh | 37 ------ initial_integration_experiments/control.zsh | 50 -------- initial_integration_experiments/debug.zsh | 55 --------- initial_integration_experiments/dnas.zsh | 55 --------- .../gp_control.zsh | 59 ---------- .../post_training_dnas.zsh | 29 ----- initial_integration_experiments/posttrain.zsh | 3 - initial_integration_experiments/run_examm.zsh | 25 ---- .../run_experiments.zsh | 4 - initial_integration_experiments/wind.zsh | 39 ------- 12 files changed, 478 deletions(-) delete mode 100644 initial_integration_experiments/analyze.py delete mode 100644 initial_integration_experiments/analyze.zsh delete mode 100644 initial_integration_experiments/aviation.zsh delete mode 100644 initial_integration_experiments/control.zsh delete mode 100755 initial_integration_experiments/debug.zsh delete mode 100644 initial_integration_experiments/dnas.zsh delete mode 100644 initial_integration_experiments/gp_control.zsh delete mode 100755 initial_integration_experiments/post_training_dnas.zsh delete mode 100644 initial_integration_experiments/posttrain.zsh delete mode 100644 initial_integration_experiments/run_examm.zsh delete mode 100755 initial_integration_experiments/run_experiments.zsh delete mode 100644 initial_integration_experiments/wind.zsh diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py deleted file mode 100644 index 78d51466..00000000 --- a/initial_integration_experiments/analyze.py +++ /dev/null @@ -1,110 +0,0 @@ -import pandas - -import numpy as np - -import matplotlib.pyplot as plt - -fig, subplts = plt.subplots(6, 1) - -bprange = [8, 16] -plts = {k:v for k, v in zip(bprange, subplts)} -print(plts) -base = plts[bprange[0]] - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - if k == bprange[0]: - continue - v.sharey(base) - v.sharex(base) - -def avg(files, slice_at=-1): - r = {} - for file in files: - x = [] - - for fold in range(8): - f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] - print(f"{file}/{fold} -> {len(f)}") - x.append(f) - - - enabled_nodes = [] - enabled_edges = [] - enabled_rec_edges = [] - - bpi_columns = [] - mse_columns = [] - - minlen = 100000000 - - for f in x: - bpi_columns.append(f[' Total BP Epochs'].to_numpy()) - mse_columns.append(f[' Best Val. MSE'].to_numpy()) - enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) - enabled_edges.append(f[' Enabled Edges'].to_numpy()) - enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) - - minlen = min(minlen, len(bpi_columns[-1])) - - enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) - enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) - enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) - bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) - mse_columns = list(map(lambda x: x[:minlen], mse_columns)) - - nodesmean = np.mean(np.array(enabled_nodes), axis=0) - edgesmean = np.mean(np.array(enabled_edges), axis=0) - redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) - print(f"Nodes at end mean: {nodesmean[-1]}") - print(f"edges at end mean: {edgesmean[-1]}") - print(f"redges at end mean: {redgesmean[-1]}") - - - bpimean = np.mean(np.array(bpi_columns), axis=0) - msemean = np.mean(np.array(mse_columns), axis=0) - msestd = np.std(np.array(mse_columns), axis=0) - - r[file] = { - 'mean_nodes': nodesmean, - 'mean_edges': edgesmean, - 'mean_rec_edges':redgesmean, - 'bpi': bpimean, - 'mean_mse': msemean, - 'std_mse': msestd, - } - return r - -results = {} -for ci in [64]: - results[ci] = {} - for bpe in bprange: - results[ci][bpe] = {} - for k in [1]: - f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" - x = avg([f])[f] - results[ci][bpe][k] = x - print(x) - - print(x['mean_mse'] - x['std_mse']) - g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] - plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - -control_results = {} -for bp in [8, 16]: - key = f"initial_integration_experiments/results/control_v7/{bp}" - r = avg([key])[key] - control_results[bp] = r - print(list(r.keys())) - g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] - plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - v.legend(fontsize=12, loc="upper right") - - -plt.show() diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh deleted file mode 100644 index 5c2876f3..00000000 --- a/initial_integration_experiments/analyze.zsh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/zsh -# -for crystalize_iters in 64 128 256 512; do - for bp_epoch in 8 16 32 64 128; do - for k in 1; do - for fold in 0 1 2 3 4 5 6 7; do - output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold - tail -1 $output_dir/fitness_log.csv - done - done - done -done diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh deleted file mode 100644 index 7059da3e..00000000 --- a/initial_integration_experiments/aviation.zsh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - -for output_params in "E1_CHT1" "Pitch"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh deleted file mode 100644 index a848302b..00000000 --- a/initial_integration_experiments/control.zsh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh -} - -bp_ge=(8 8192 16 4096 32 2048) - -for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait -done diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh deleted file mode 100755 index ce159c01..00000000 --- a/initial_integration_experiments/debug.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes 8192 \ - --island_size 32 \ - --number_islands 4 \ - --stochastic \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -for crystalize_iters in 128; do - for bp_epoch in 8; do - for k in 1; do - for fold in 0; do - run_examm - done - # wait - # for fold in 4 5 6 7; do - # run_examm & - # done - # wait - done - done -done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh deleted file mode 100644 index 8b525b09..00000000 --- a/initial_integration_experiments/dnas.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -bp_ge=(8 8192 16 4096 32 2048) -for crystalize_iters in 256; do - for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for k in 1; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait - done - done -done diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh deleted file mode 100644 index 049e9750..00000000 --- a/initial_integration_experiments/gp_control.zsh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/zsh - -offset=1 -MAX_GENOMES=10 -N_ISLANDS=4 -ISLAND_SIZE=32 - -run_examm() { - output_dir=test_results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames ${=training_filenames} \ - --test_filenames ${=test_filenames} \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names $output_params \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes $MAX_GENOMES \ - --island_size $ISLAND_SIZE \ - --number_islands $N_ISLANDS - - touch $output_dir/completed -} - -run_group() { - for output_params in $OUTPUTS; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done - done -} - -INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" -training_filenames=(datasets/2018_coal/burner_[0-9].csv) -test_filenames=(datasets/2018_coal/burner_1[0-1].csv) -OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") -run_group - - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUTS=("E1_CHT1" "Pitch") -training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) -test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) -run_group - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" -OUTPUTS=("Cm_avg" "P_avg") -training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) -test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) -run_group diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh deleted file mode 100755 index 1c226178..00000000 --- a/initial_integration_experiments/post_training_dnas.zsh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/zsh -offset=1 - -post_training() { - - echo "genome = $GENOME" - Release/rnn_examples/train_rnn \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $BP_ITERS \ - --stochastic \ - --normalize min_max \ - --genome_file $GENOME \ - --output_directory $OUTPUT_DIRECTORY \ - --log_filename post_training.csv \ - --learning_rate 0.01 \ - --weight_update adagrad \ - --train_sequence_length 1000 \ - --validation_sequence_length 100 \ - --crystalize_iters $CRYSTALIZE_ITERS \ - --dnas_k $k - - tail -1 $OUTPUT_DIRECTORY/post_training.csv -} - -post_training diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh deleted file mode 100644 index cc54a2eb..00000000 --- a/initial_integration_experiments/posttrain.zsh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/zsh - - diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh deleted file mode 100644 index 77d2893f..00000000 --- a/initial_integration_experiments/run_examm.zsh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/zsh - -output_dir=results/v0/$bp_epoch/$fold -mkdir -p $output_dir - -mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 4000 \ - --island_size 32 \ - --number_islands 4 - -touch $output_dir/completed - diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh deleted file mode 100755 index 7dd8e956..00000000 --- a/initial_integration_experiments/run_experiments.zsh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/zsh - -initial_integration_experiments/control.zsh -initial_integration_experiments/dnas.zsh diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh deleted file mode 100644 index 7e68f482..00000000 --- a/initial_integration_experiments/wind.zsh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" - - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ - --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - - -for output_params in "Cm_avg" "P_avg"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done From c0b9e41e02ecdcaa98a501032f466d73ddedd42a Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:36:23 -0500 Subject: [PATCH 23/42] removed old fileS --- initial_integration_experiments/analyze.py | 110 ------------------ initial_integration_experiments/analyze.zsh | 12 -- initial_integration_experiments/aviation.zsh | 37 ------ initial_integration_experiments/control.zsh | 50 -------- initial_integration_experiments/debug.zsh | 55 --------- initial_integration_experiments/dnas.zsh | 55 --------- .../gp_control.zsh | 59 ---------- .../post_training_dnas.zsh | 29 ----- initial_integration_experiments/posttrain.zsh | 3 - initial_integration_experiments/run_examm.zsh | 25 ---- .../run_experiments.zsh | 4 - initial_integration_experiments/wind.zsh | 39 ------- 12 files changed, 478 deletions(-) delete mode 100644 initial_integration_experiments/analyze.py delete mode 100644 initial_integration_experiments/analyze.zsh delete mode 100644 initial_integration_experiments/aviation.zsh delete mode 100644 initial_integration_experiments/control.zsh delete mode 100755 initial_integration_experiments/debug.zsh delete mode 100644 initial_integration_experiments/dnas.zsh delete mode 100644 initial_integration_experiments/gp_control.zsh delete mode 100755 initial_integration_experiments/post_training_dnas.zsh delete mode 100644 initial_integration_experiments/posttrain.zsh delete mode 100644 initial_integration_experiments/run_examm.zsh delete mode 100755 initial_integration_experiments/run_experiments.zsh delete mode 100644 initial_integration_experiments/wind.zsh diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py deleted file mode 100644 index 78d51466..00000000 --- a/initial_integration_experiments/analyze.py +++ /dev/null @@ -1,110 +0,0 @@ -import pandas - -import numpy as np - -import matplotlib.pyplot as plt - -fig, subplts = plt.subplots(6, 1) - -bprange = [8, 16] -plts = {k:v for k, v in zip(bprange, subplts)} -print(plts) -base = plts[bprange[0]] - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - if k == bprange[0]: - continue - v.sharey(base) - v.sharex(base) - -def avg(files, slice_at=-1): - r = {} - for file in files: - x = [] - - for fold in range(8): - f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] - print(f"{file}/{fold} -> {len(f)}") - x.append(f) - - - enabled_nodes = [] - enabled_edges = [] - enabled_rec_edges = [] - - bpi_columns = [] - mse_columns = [] - - minlen = 100000000 - - for f in x: - bpi_columns.append(f[' Total BP Epochs'].to_numpy()) - mse_columns.append(f[' Best Val. MSE'].to_numpy()) - enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) - enabled_edges.append(f[' Enabled Edges'].to_numpy()) - enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) - - minlen = min(minlen, len(bpi_columns[-1])) - - enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) - enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) - enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) - bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) - mse_columns = list(map(lambda x: x[:minlen], mse_columns)) - - nodesmean = np.mean(np.array(enabled_nodes), axis=0) - edgesmean = np.mean(np.array(enabled_edges), axis=0) - redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) - print(f"Nodes at end mean: {nodesmean[-1]}") - print(f"edges at end mean: {edgesmean[-1]}") - print(f"redges at end mean: {redgesmean[-1]}") - - - bpimean = np.mean(np.array(bpi_columns), axis=0) - msemean = np.mean(np.array(mse_columns), axis=0) - msestd = np.std(np.array(mse_columns), axis=0) - - r[file] = { - 'mean_nodes': nodesmean, - 'mean_edges': edgesmean, - 'mean_rec_edges':redgesmean, - 'bpi': bpimean, - 'mean_mse': msemean, - 'std_mse': msestd, - } - return r - -results = {} -for ci in [64]: - results[ci] = {} - for bpe in bprange: - results[ci][bpe] = {} - for k in [1]: - f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" - x = avg([f])[f] - results[ci][bpe][k] = x - print(x) - - print(x['mean_mse'] - x['std_mse']) - g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] - plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - -control_results = {} -for bp in [8, 16]: - key = f"initial_integration_experiments/results/control_v7/{bp}" - r = avg([key])[key] - control_results[bp] = r - print(list(r.keys())) - g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] - plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - v.legend(fontsize=12, loc="upper right") - - -plt.show() diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh deleted file mode 100644 index 5c2876f3..00000000 --- a/initial_integration_experiments/analyze.zsh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/zsh -# -for crystalize_iters in 64 128 256 512; do - for bp_epoch in 8 16 32 64 128; do - for k in 1; do - for fold in 0 1 2 3 4 5 6 7; do - output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold - tail -1 $output_dir/fitness_log.csv - done - done - done -done diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh deleted file mode 100644 index 7059da3e..00000000 --- a/initial_integration_experiments/aviation.zsh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - -for output_params in "E1_CHT1" "Pitch"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh deleted file mode 100644 index a848302b..00000000 --- a/initial_integration_experiments/control.zsh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh -} - -bp_ge=(8 8192 16 4096 32 2048) - -for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait -done diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh deleted file mode 100755 index ce159c01..00000000 --- a/initial_integration_experiments/debug.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes 8192 \ - --island_size 32 \ - --number_islands 4 \ - --stochastic \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -for crystalize_iters in 128; do - for bp_epoch in 8; do - for k in 1; do - for fold in 0; do - run_examm - done - # wait - # for fold in 4 5 6 7; do - # run_examm & - # done - # wait - done - done -done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh deleted file mode 100644 index 8b525b09..00000000 --- a/initial_integration_experiments/dnas.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -bp_ge=(8 8192 16 4096 32 2048) -for crystalize_iters in 256; do - for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for k in 1; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait - done - done -done diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh deleted file mode 100644 index 049e9750..00000000 --- a/initial_integration_experiments/gp_control.zsh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/zsh - -offset=1 -MAX_GENOMES=10 -N_ISLANDS=4 -ISLAND_SIZE=32 - -run_examm() { - output_dir=test_results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames ${=training_filenames} \ - --test_filenames ${=test_filenames} \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names $output_params \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes $MAX_GENOMES \ - --island_size $ISLAND_SIZE \ - --number_islands $N_ISLANDS - - touch $output_dir/completed -} - -run_group() { - for output_params in $OUTPUTS; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done - done -} - -INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" -training_filenames=(datasets/2018_coal/burner_[0-9].csv) -test_filenames=(datasets/2018_coal/burner_1[0-1].csv) -OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") -run_group - - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUTS=("E1_CHT1" "Pitch") -training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) -test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) -run_group - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" -OUTPUTS=("Cm_avg" "P_avg") -training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) -test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) -run_group diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh deleted file mode 100755 index 1c226178..00000000 --- a/initial_integration_experiments/post_training_dnas.zsh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/zsh -offset=1 - -post_training() { - - echo "genome = $GENOME" - Release/rnn_examples/train_rnn \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $BP_ITERS \ - --stochastic \ - --normalize min_max \ - --genome_file $GENOME \ - --output_directory $OUTPUT_DIRECTORY \ - --log_filename post_training.csv \ - --learning_rate 0.01 \ - --weight_update adagrad \ - --train_sequence_length 1000 \ - --validation_sequence_length 100 \ - --crystalize_iters $CRYSTALIZE_ITERS \ - --dnas_k $k - - tail -1 $OUTPUT_DIRECTORY/post_training.csv -} - -post_training diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh deleted file mode 100644 index cc54a2eb..00000000 --- a/initial_integration_experiments/posttrain.zsh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/zsh - - diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh deleted file mode 100644 index 77d2893f..00000000 --- a/initial_integration_experiments/run_examm.zsh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/zsh - -output_dir=results/v0/$bp_epoch/$fold -mkdir -p $output_dir - -mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 4000 \ - --island_size 32 \ - --number_islands 4 - -touch $output_dir/completed - diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh deleted file mode 100755 index 7dd8e956..00000000 --- a/initial_integration_experiments/run_experiments.zsh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/zsh - -initial_integration_experiments/control.zsh -initial_integration_experiments/dnas.zsh diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh deleted file mode 100644 index 7e68f482..00000000 --- a/initial_integration_experiments/wind.zsh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" - - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ - --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - - -for output_params in "Cm_avg" "P_avg"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done From 79df69ab941e0d6783d20ba31d1929624d9601b7 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:37:52 -0500 Subject: [PATCH 24/42] Fixed bug caused by accidental paste --- examm/island_speciation_strategy.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index d8eaabab..b0a7b5e0 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island( Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str()); exit(1); } - return new_genome17731515; + return new_genome; } RNN_Genome* IslandSpeciationStrategy::generate_genome( From 68752460cb9a79e554212b3f0c9a97305defdfc4 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 15:59:40 -0500 Subject: [PATCH 25/42] Synchronous EXAMM flag added --sychronous --- mpi/examm_mpi.cxx | 75 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index 227c3a85..e350be0a 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -114,10 +114,62 @@ void receive_terminate_message(int32_t source) { MPI_Recv(terminate_message, 1, MPI_INT, source, TERMINATE_TAG, MPI_COMM_WORLD, &status); } -void master(int32_t max_rank) { - // the "main" id will have already been set by the main function so we do not need to re-set it here - Log::debug("MAX int32_t: %d\n", numeric_limits::max()); +void master_sync(int32_t max_rank) { + max_rank -= 1; + int32_t generation = 0; + while (true) { + + // Wait for N work requests + int32_t nreqs = 0; + while (nreqs < max_rank) { + MPI_Status status; + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + + int32_t source = status.MPI_SOURCE; + int32_t tag = status.MPI_TAG; + // Log::info("probe returned message from: %d with tag: %d\n", source, tag); + + if (tag == WORK_REQUEST_TAG) { + receive_work_request(source); + nreqs++; + } else if (tag == GENOME_LENGTH_TAG) { + Log::debug("received genome from: %d\n", source); + RNN_Genome* genome = receive_genome_from(source); + + examm->insert_genome(genome); + + // delete the genome as it won't be used again, a copy was inserted + delete genome; + } else { + Log::fatal("ERROR: received message from %d with unknown tag: %d", source, tag); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + vector genomes(max_rank); + for (int32_t i = 1; i <= max_rank; i++) { + RNN_Genome* genome = examm->generate_genome(); + if (genome == NULL) + break; + genomes[i - 1] = genome; + } + + if (genomes.size() != max_rank) { + break; + } + + for (int i = 1; i <= max_rank; i++) { + send_genome_to(i, genomes[i - 1]); + delete genomes[i - 1]; + } + } + + for (int i = 1; i <= max_rank; i++) { + send_terminate_message(i); + } +} + +void master(int32_t max_rank) { int32_t terminates_sent = 0; while (true) { @@ -134,12 +186,7 @@ void master(int32_t max_rank) { if (tag == WORK_REQUEST_TAG) { receive_work_request(source); - // if (transfer_learning_version.compare("v3") == 0 || transfer_learning_version.compare("v1+v3") == 0) { - // seed_stirs = 3; - // } - examm_mutex.lock(); RNN_Genome* genome = examm->generate_genome(); - examm_mutex.unlock(); if (genome == NULL) { // search was completed if it returns NULL for an individual // send terminate message @@ -167,9 +214,7 @@ void master(int32_t max_rank) { Log::debug("received genome from: %d\n", source); RNN_Genome* genome = receive_genome_from(source); - examm_mutex.lock(); examm->insert_genome(genome); - examm_mutex.unlock(); // delete the genome as it won't be used again, a copy was inserted delete genome; @@ -264,12 +309,20 @@ int main(int argc, char** argv) { RNN_Genome* seed_genome = get_seed_genome(arguments, time_series_sets, weight_rules); + bool synchronous = argument_exists(arguments, "--synchronous"); + Log::warning("synchronous? %d\n", synchronous); + Log::clear_rank_restriction(); if (rank == 0) { write_time_series_to_file(arguments, time_series_sets); examm = generate_examm_from_arguments(arguments, time_series_sets, weight_rules, seed_genome); - master(max_rank); + + if (synchronous) { + master_sync(max_rank); + } else { + master(max_rank); + } } else { worker(rank); } From a6006064aaeddb6160bd665d5208a4e135b2f5af Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Fri, 23 Feb 2024 21:47:51 -0500 Subject: [PATCH 26/42] Adding additional log information --- examm/examm.cxx | 26 ++++++++++++++++++------- examm/examm.hxx | 4 +++- examm/island_speciation_strategy.cxx | 29 +++++++++++++++++++++++----- examm/island_speciation_strategy.hxx | 16 ++++++++++----- examm/neat_speciation_strategy.cxx | 10 +++++----- examm/neat_speciation_strategy.hxx | 10 +++++----- examm/speciation_strategy.hxx | 12 +++++++----- scripts/dnas/control.zsh | 21 +++++++++----------- 8 files changed, 83 insertions(+), 45 deletions(-) diff --git a/examm/examm.cxx b/examm/examm.cxx index 1e1c2314..a90034f2 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -95,8 +95,8 @@ void EXAMM::generate_log() { Log::info("Generating fitness log\n"); mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); - (*log_file) << "Inserted Genomes, Total BP Epochs, Time, Best Val. MAE, Best Val. MSE, Enabled Nodes, Enabled " - "Edges, Enabled Rec. Edges"; + (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -151,7 +151,7 @@ void EXAMM::update_op_log_statistics(RNN_Genome* genome, int32_t insert_position } } -void EXAMM::update_log() { +void EXAMM::update_log(RNN_Genome *genome) { if (log_file != NULL) { // make sure the log file is still good if (!log_file->good()) { @@ -193,8 +193,12 @@ void EXAMM::update_log() { (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," - << best_genome->get_enabled_recurrent_edge_count() - << speciation_strategy->get_strategy_information_values() << endl; + << best_genome->get_enabled_recurrent_edge_count() << "," + << genome->best_validation_mse << "," + << pre_insert_best_mse << "," + << (int32_t) (last_genome_inserted ? 1 : 0) << "," + << genome->get_number_weights() + << speciation_strategy->get_strategy_information_values(genome) << endl; Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count()); } } @@ -243,17 +247,25 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // updates EXAMM's mapping of which genomes have been generated by what genome->update_generation_map(generated_from_map); + pre_insert_best_mse = this->get_best_fitness(); + int32_t insert_position = speciation_strategy->insert_genome(genome); + // write this genome to disk if it was a new best found genome if (insert_position == 0) { // genome->normalize_type = normalize_type; genome->write_graphviz(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".gv"); genome->write_to_file(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".bin"); } + + last_genome_inserted = insert_position >= 0; + speciation_strategy->print(); + update_op_log_statistics(genome, insert_position); - update_log(); - return insert_position >= 0; + update_log(genome); + + return last_genome_inserted; } RNN_Genome* EXAMM::generate_genome() { diff --git a/examm/examm.hxx b/examm/examm.hxx index c0c0ee03..a95d8af4 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -73,6 +73,8 @@ class EXAMM { string output_directory; ofstream* log_file; ofstream* op_log_file; + double pre_insert_best_mse = 1000000; + bool last_genome_inserted = false; std::chrono::time_point startClock; @@ -87,7 +89,7 @@ class EXAMM { ~EXAMM(); void print(); - void update_log(); + void update_log(RNN_Genome *genome); void set_possible_node_types(vector possible_node_type_strings); diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index b0a7b5e0..2e7a91be 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -100,12 +100,12 @@ int32_t IslandSpeciationStrategy::get_evaluated_genomes() const { return evaluated_genomes; } -RNN_Genome* IslandSpeciationStrategy::get_best_genome() { +RNN_Genome* IslandSpeciationStrategy::get_best_genome() const { // the global_best_genome is updated every time a genome is inserted return global_best_genome; } -RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { +RNN_Genome* IslandSpeciationStrategy::get_worst_genome() const { int32_t worst_genome_island = -1; double worst_fitness = -EXAMM_MAX_DOUBLE; @@ -126,7 +126,7 @@ RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { } } -double IslandSpeciationStrategy::get_best_fitness() { +double IslandSpeciationStrategy::get_best_fitness() const { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -135,7 +135,7 @@ double IslandSpeciationStrategy::get_best_fitness() { } } -double IslandSpeciationStrategy::get_worst_fitness() { +double IslandSpeciationStrategy::get_worst_fitness() const { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -376,6 +376,9 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( islands[generation_island]->set_latest_generation_id(generated_genomes); new_genome->set_group_id(generation_island); + pair perf = {this->get_best_fitness(), this->get_worst_fitness()}; + genome_performance.emplace(new_genome->generation_id, perf); + if (current_island->is_initializing()) { RNN_Genome* genome_copy = new_genome->copy(); Log::debug("inserting genome copy!\n"); @@ -386,6 +389,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( generation_island = 0; } + return new_genome; } @@ -456,6 +460,7 @@ void IslandSpeciationStrategy::print(string indent) const { */ string IslandSpeciationStrategy::get_strategy_information_headers() const { string info_header = ""; + info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"); for (int32_t i = 0; i < (int32_t) islands.size(); i++) { info_header.append(","); info_header.append("Island_"); @@ -472,8 +477,22 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const { /** * Gets speciation strategy information values for logs */ -string IslandSpeciationStrategy::get_strategy_information_values() const { +string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const { string info_value = ""; + + auto &[min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id); + info_value.append(","); + info_value.append(to_string(min_mse_pre)); + info_value.append(","); + info_value.append(to_string(max_mse_pre)); + + float min_mse_post = this->get_best_fitness(); + float max_mse_post = this->get_worst_fitness(); + info_value.append(","); + info_value.append(to_string(min_mse_post)); + info_value.append(","); + info_value.append(to_string(max_mse_post)); + for (int32_t i = 0; i < (int32_t) islands.size(); i++) { double best_fitness = islands[i]->get_best_fitness(); double worst_fitness = islands[i]->get_worst_fitness(); diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index 0eed891c..683e2a39 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -62,6 +62,12 @@ class IslandSpeciationStrategy : public SpeciationStrategy { vector islands; RNN_Genome* global_best_genome; + ofstream *island_log_file; + + // Maps genome number to a pair representing (worst island mse, best island mse) at + // the time of genome generation. + unordered_map> genome_performance; + // Transfer learning class properties: bool transfer_learning; @@ -114,25 +120,25 @@ class IslandSpeciationStrategy : public SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - double get_best_fitness(); + double get_best_fitness() const; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - double get_worst_fitness(); + double get_worst_fitness() const; /** * Gets the best genome of all the islands * \return the best genome of all islands or NULL if no genomes have yet been inserted */ - RNN_Genome* get_best_genome(); + RNN_Genome* get_best_genome() const; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands or NULL if no genomes have yet been inserted */ - RNN_Genome* get_worst_genome(); + RNN_Genome* get_worst_genome() const; /** * \return true if all the islands are full @@ -207,7 +213,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - string get_strategy_information_values() const; + string get_strategy_information_values(RNN_Genome *genome) const; /** * Island repopulation through two random parents from two seperate islands, diff --git a/examm/neat_speciation_strategy.cxx b/examm/neat_speciation_strategy.cxx index 8d5f18ac..e71470e1 100644 --- a/examm/neat_speciation_strategy.cxx +++ b/examm/neat_speciation_strategy.cxx @@ -74,7 +74,7 @@ int32_t NeatSpeciationStrategy::get_evaluated_genomes() const { return evaluated_genomes; } -RNN_Genome* NeatSpeciationStrategy::get_best_genome() { +RNN_Genome* NeatSpeciationStrategy::get_best_genome() const { int32_t best_genome_species = -1; double best_fitness = EXAMM_MAX_DOUBLE; @@ -95,7 +95,7 @@ RNN_Genome* NeatSpeciationStrategy::get_best_genome() { } } -RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { +RNN_Genome* NeatSpeciationStrategy::get_worst_genome() const { int32_t worst_genome_species = -1; double worst_fitness = -EXAMM_MAX_DOUBLE; @@ -116,7 +116,7 @@ RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { } } -double NeatSpeciationStrategy::get_best_fitness() { +double NeatSpeciationStrategy::get_best_fitness() const { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -125,7 +125,7 @@ double NeatSpeciationStrategy::get_best_fitness() { } } -double NeatSpeciationStrategy::get_worst_fitness() { +double NeatSpeciationStrategy::get_worst_fitness() const { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -399,7 +399,7 @@ string NeatSpeciationStrategy::get_strategy_information_headers() const { /** * Gets speciation strategy information values for logs */ -string NeatSpeciationStrategy::get_strategy_information_values() const { +string NeatSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const { string info_value = ""; for (int32_t i = 0; i < (int32_t) Neat_Species.size(); i++) { double best_fitness = Neat_Species[i]->get_best_fitness(); diff --git a/examm/neat_speciation_strategy.hxx b/examm/neat_speciation_strategy.hxx index 3416de03..645aabdd 100644 --- a/examm/neat_speciation_strategy.hxx +++ b/examm/neat_speciation_strategy.hxx @@ -64,25 +64,25 @@ class NeatSpeciationStrategy : public SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - double get_best_fitness(); + double get_best_fitness() const; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - double get_worst_fitness(); + double get_worst_fitness() const; /** * Gets the best genome of all the islands * \return the best genome of all islands */ - RNN_Genome* get_best_genome(); + RNN_Genome* get_best_genome() const; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands */ - RNN_Genome* get_worst_genome(); + RNN_Genome* get_worst_genome() const; /** * Inserts a copy of the genome into this speciation strategy. @@ -130,7 +130,7 @@ class NeatSpeciationStrategy : public SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - string get_strategy_information_values() const; + string get_strategy_information_values(RNN_Genome *genome) const; RNN_Genome* get_global_best_genome(); diff --git a/examm/speciation_strategy.hxx b/examm/speciation_strategy.hxx index 9d790ab0..2d66f990 100644 --- a/examm/speciation_strategy.hxx +++ b/examm/speciation_strategy.hxx @@ -9,6 +9,8 @@ using std::string; using std::minstd_rand0; using std::uniform_real_distribution; +#include "rnn/rnn_genome.hxx" + class SpeciationStrategy { public: /** @@ -25,25 +27,25 @@ class SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - virtual double get_best_fitness() = 0; + virtual double get_best_fitness() const = 0; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - virtual double get_worst_fitness() = 0; + virtual double get_worst_fitness() const = 0; /** * Gets the best genome of all the islands * \return the best genome of all islands */ - virtual RNN_Genome* get_best_genome() = 0; + virtual RNN_Genome* get_best_genome() const = 0; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands */ - virtual RNN_Genome* get_worst_genome() = 0; + virtual RNN_Genome* get_worst_genome() const = 0; /** * Inserts a copy of the genome into this speciation strategy. @@ -86,7 +88,7 @@ class SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - virtual string get_strategy_information_values() const = 0; + virtual string get_strategy_information_values(RNN_Genome *genome) const = 0; virtual RNN_Genome* get_global_best_genome() = 0; virtual void initialize_population(function& mutate) = 0; diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh index a848302b..f3532525 100644 --- a/scripts/dnas/control.zsh +++ b/scripts/dnas/control.zsh @@ -6,9 +6,9 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + output_dir=results/control_v8/$bp_epoch/$fold mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ + mpirun -np 14 build/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ @@ -25,26 +25,23 @@ run_examm() { --output_directory $output_dir \ --log_filename fitness.csv \ --learning_rate 0.01 \ - --std_message_level WARNING \ + --std_message_level INFO \ --file_message_level WARNING \ --crystalize_iters $crystalize_iters \ --max_genomes $max_genomes \ --island_size 32 \ - --number_islands 4 + --number_islands 4 \ + --synchronous # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh } -bp_ge=(8 8192 16 4096 32 2048) +# bp_ge=(8 8192 16 4096 32 2048) +bp_ge=(8 8192) for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for fold in 0 1 2 3; do - run_examm & + for fold in $(seq 0 1); do + run_examm done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait done From 72ce5d4a42e6435cff23e5c0984527235f786d7f Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Thu, 29 Feb 2024 15:20:33 -0500 Subject: [PATCH 27/42] Additional log data --- examm/examm.cxx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examm/examm.cxx b/examm/examm.cxx index a90034f2..d0c784b9 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -96,7 +96,7 @@ void EXAMM::generate_log() { mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" - "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters"; + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -197,7 +197,8 @@ void EXAMM::update_log(RNN_Genome *genome) { << genome->best_validation_mse << "," << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << "," - << genome->get_number_weights() + << genome->get_number_weights() << "," + << genome->get_generation_id() << speciation_strategy->get_strategy_information_values(genome) << endl; Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count()); } From 5949736f7bc61d174a7b0465dd104769bcd73781 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 6 Mar 2024 15:17:26 -0500 Subject: [PATCH 28/42] Cluster script updates --- scripts/dnas/examm_bias_exp.zsh | 58 ++++++++++++++++++++++++++++++ scripts/dnas/examm_synchronous.zsh | 55 ++++++++++++++++++++++++++++ scripts/dnas/mk_jobs.zsh | 12 ++++--- 3 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 scripts/dnas/examm_bias_exp.zsh create mode 100644 scripts/dnas/examm_synchronous.zsh diff --git a/scripts/dnas/examm_bias_exp.zsh b/scripts/dnas/examm_bias_exp.zsh new file mode 100644 index 00000000..52816f00 --- /dev/null +++ b/scripts/dnas/examm_bias_exp.zsh @@ -0,0 +1,58 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=18 +#SBATCH --exclude theocho +#SBATCH --time=48:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_bias_ablation +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +cd /home/jak5763/exact + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1' + +offset=1 + +run_examm() { + output_dir=results/$synchronous/$scramble_weights/$max_genomes/$fold + mkdir -p $output_dir + srun -n 18 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types simple ugrnn gru mgu lstm delta \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 10 \ + --number_islands 10 \ + --num_mutations 1 \ + --$synchronous \ + --$scramble_weights +} + +run_group() { + for fold in $(seq 0 19); do + run_examm + done +} + +let base_genomes=100000 +let max_genomes=$base_genomes/$bp_epoch +run_group diff --git a/scripts/dnas/examm_synchronous.zsh b/scripts/dnas/examm_synchronous.zsh new file mode 100644 index 00000000..1d970272 --- /dev/null +++ b/scripts/dnas/examm_synchronous.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1' + +offset=1 + +run_examm() { + output_dir=results/synchronous/$max_genomes/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types simple ugrnn gru mgu lstm delta \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 10 \ + --number_islands 10 \ + --num_mutations 1 \ + --synchronous +} + +run_group() { + for fold in $(seq 0 9); do + run_examm + done +} + +let base_genomes=100000 +let max_genomes=$base_genomes/$bp_epoch +run_group diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh index 38a5526c..b996883e 100644 --- a/scripts/dnas/mk_jobs.zsh +++ b/scripts/dnas/mk_jobs.zsh @@ -1,6 +1,8 @@ -bp_ge=(8 8192 16 4096 32 2048 64 1024) -for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh - bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh - bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh +bp=(1 2 3 4 5 10 15 20 30 40 50 100 150 200) +for bp_epoch in $bp; do + for synchronous in "async" "synchronous"; do + for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do + bp_epoch=$bp_epoch synchronous="$synchronous" scramble_weights="$scramble_weights" sbatch examm_bias_exp.zsh + done + done done From 95277159201b2e0f7b5469223d76794820cd8ccd Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 13 Mar 2024 13:56:00 -0400 Subject: [PATCH 29/42] Remove scripts in root directory --- dnas_cluster.zsh | 69 ------------------------------------------------ dnas_control.zsh | 60 ----------------------------------------- 2 files changed, 129 deletions(-) delete mode 100644 dnas_cluster.zsh delete mode 100644 dnas_control.zsh diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh deleted file mode 100644 index 55823c0c..00000000 --- a/dnas_cluster.zsh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/zsh - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --exclude theocho -#SBATCH --time=23:00:00 -#SBATCH -A examm -#SBATCH --partition=tier3 -#SBATCH -J examm_dnas_experimental -#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out -#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err -#SBATCH --mem=0 - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - srun -n 36 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 10 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 8 \ - --num_mutations 2 \ - --use_dnas_seed true \ - --use_burn_in_bp_epoch \ - --burn_in_period 1024 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -run_group() { - for crystalize_iters in 512; do - for k in 1; do - for fold in $(seq 0 19); do - run_examm - done - done - done -} - -CELL_TYPE='dnas' -# bp_ge=(8 8192 16 4096 32 2048 64 1024) -# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do -run_group -# done diff --git a/dnas_control.zsh b/dnas_control.zsh deleted file mode 100644 index 88a7c882..00000000 --- a/dnas_control.zsh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/zsh - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --exclude theocho -#SBATCH --time=23:00:00 -#SBATCH -A examm -#SBATCH --partition=tier3 -#SBATCH -J examm_dnas_experimental -#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out -#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err -#SBATCH --mem=0 - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold - mkdir -p $output_dir - srun -n 36 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --max_recurrent_depth 10 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 8 \ - --num_mutations 4 \ - --burn_in_period 1024 \ - --use_burn_in_bp_epoch - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -run_group() { - for k in 1; do - for fold in $(seq 0 19); do - run_examm - done - done -} - -run_group From fbb32b2aa3b2adb282e87022b2394169971eb159 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 13 Mar 2024 13:56:17 -0400 Subject: [PATCH 30/42] Remove junk file --- key | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 key diff --git a/key b/key deleted file mode 100644 index 391a7405..00000000 --- a/key +++ /dev/null @@ -1,7 +0,0 @@ -v11 -> burn in schedule with 0.001 lr 4 mut -v12 -> burn in schedule with 0.01 lr 4 mut -v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut -v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut -v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut -v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut - From 217e5bedae3fcce907d10ae823d6caff367241d2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 18 Mar 2024 13:58:35 -0400 Subject: [PATCH 31/42] First stage of removing old-style architectural hashes --- examm/island.cxx | 171 +++------------------------ examm/island.hxx | 7 +- examm/island_speciation_strategy.cxx | 1 - rnn/rnn_genome.cxx | 15 ++- rnn/rnn_genome.hxx | 13 +- 5 files changed, 49 insertions(+), 158 deletions(-) diff --git a/examm/island.cxx b/examm/island.cxx index 6d8b0b5f..6a26bbcd 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -160,104 +160,26 @@ int32_t Island::insert_genome(RNN_Genome* genome) { // check and see if the structural hash of the genome is in the // set of hashes for this population Log::info("getting structural hash\n"); - string structural_hash = genome->get_structural_hash(); - if (structure_map.count(structural_hash) > 0) { - vector& potential_matches = structure_map.find(structural_hash)->second; - Log::debug( - "potential duplicate for hash '%s', had %d potential matches.\n", structural_hash.c_str(), - potential_matches.size() - ); + auto duplicate_it = structure_set.find(genome); - for (auto potential_match = potential_matches.begin(); potential_match != potential_matches.end();) { - Log::debug( - "on potential match %d of %d\n", potential_match - potential_matches.begin(), potential_matches.size() - ); - if ((*potential_match)->equals(genome)) { - if ((*potential_match)->get_fitness() > new_fitness) { - Log::debug( - "REPLACING DUPLICATE GENOME, fitness of genome in search: %s, new fitness: %s\n", - parse_fitness((*potential_match)->get_fitness()).c_str(), - parse_fitness(genome->get_fitness()).c_str() - ); - // we have an exact match for this genome in the island and its fitness is worse - // than the genome we're trying to remove, so remove the duplicate it from the genomes - // as well from the potential matches vector - - auto duplicate_genome_iterator = - lower_bound(genomes.begin(), genomes.end(), *potential_match, sort_genomes_by_fitness()); - bool found = false; - for (; duplicate_genome_iterator != genomes.end(); duplicate_genome_iterator++) { - Log::debug( - "duplicate_genome_iterator: %p, (*potential_match): %p\n", (*duplicate_genome_iterator), - (*potential_match) - ); - if ((*duplicate_genome_iterator) == (*potential_match)) { - found = true; - break; - } - } - if (!found) { - Log::fatal( - "ERROR: could not find duplicate genome even though its structural hash was in the island, " - "this should never happen!\n" - ); - exit(1); - } - Log::debug( - "potential_match->get_fitness(): %lf, duplicate_genome_iterator->get_fitness(): %lf, " - "new_fitness: %lf\n", - (*potential_match)->get_fitness(), (*duplicate_genome_iterator)->get_fitness(), new_fitness - ); - int32_t duplicate_genome_index = duplicate_genome_iterator - genomes.begin(); - Log::debug("duplicate_genome_index: %d\n", duplicate_genome_index); - // int32_t test_index = contains(genome); - // Log::info("test_index: %d\n", test_index); - RNN_Genome* duplicate = genomes[duplicate_genome_index]; - // Log::info("duplicate.equals(potential_match)? %d\n", duplicate->equals(*potential_match)); - genomes.erase(genomes.begin() + duplicate_genome_index); - Log::debug("potential_matches.size() before erase: %d\n", potential_matches.size()); - - // erase the potential match from the structure map as well - // returns an iterator to next element after the deleted one so - // we don't need to increment it - potential_match = potential_matches.erase(potential_match); - delete duplicate; - - Log::debug("potential_matches.size() after erase: %d\n", potential_matches.size()); - Log::debug( - "structure_map[%s].size() after erase: %d\n", structural_hash.c_str(), - structure_map[structural_hash].size() - ); - if (potential_matches.size() == 0) { - Log::debug( - "deleting the potential_matches vector for hash '%s' because it was empty.\n", - structural_hash.c_str() - ); - structure_map.erase(structural_hash); - break; // break because this vector is now empty and deleted - } - } else { - Log::info( - "Island %d: island already contains a duplicate genome with a better fitness! not inserting.\n", - id - ); - do_population_check(__LINE__, initial_size); - return -1; - } - } else { - // increment potential match because we didn't delete an entry (or return from the method) - potential_match++; - } + if (duplicate_it != structure_set.end()) { + RNN_Genome* duplicate = *duplicate_it; + // TODO: Add annealment here + if (duplicate->get_fitness() > genome->get_fitness()) { + genomes.erase(std::find(genomes.begin(), genomes.end(), duplicate)); } } + // inorder insert the new individual RNN_Genome* copy = genome->copy(); + copy->set_generation_id(genome->get_generation_id()); + vector best = copy->get_best_parameters(); if (best.size() != 0) { copy->set_weights(best); } - copy->set_generation_id(genome->get_generation_id()); + Log::debug("created copy to insert to island: %d\n", copy->get_group_id()); auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness()); int32_t insert_index = index_iterator - genomes.begin(); @@ -274,12 +196,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) { } genomes.insert(index_iterator, copy); - // calculate the index the genome was inseretd at from the iterator - - structural_hash = copy->get_structural_hash(); - // add the genome to the vector for this structural hash - structure_map[structural_hash].push_back(copy); - Log::debug("adding to structure_map[%s] : %p\n", structural_hash.c_str(), ©); + structure_set.insert(copy); if (insert_index == 0) { // this was a new best genome for this island @@ -309,51 +226,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) { Log::debug("deleting worst genome\n"); RNN_Genome* worst = genomes.back(); genomes.pop_back(); - structural_hash = worst->get_structural_hash(); - - vector& potential_matches = structure_map.find(structural_hash)->second; - - bool found = false; - for (auto potential_match = potential_matches.begin(); potential_match != potential_matches.end();) { - // make sure the addresses of the pointers are the same - Log::debug( - "checking to remove worst from structure_map - &worst: %p, &(*potential_match): %p\n", worst, - (*potential_match) - ); - if ((*potential_match) == worst) { - found = true; - Log::debug("potential_matches.size() before erase: %d\n", potential_matches.size()); - - // erase the potential match from the structure map as well - potential_match = potential_matches.erase(potential_match); - - Log::debug("potential_matches.size() after erase: %d\n", potential_matches.size()); - Log::debug( - "structure_map[%s].size() after erase: %d\n", structural_hash.c_str(), - structure_map[structural_hash].size() - ); - - // clean up the structure_map if no genomes in the population have this hash - if (potential_matches.size() == 0) { - Log::debug( - "deleting the potential_matches vector for hash '%s' because it was empty.\n", - structural_hash.c_str() - ); - structure_map.erase(structural_hash); - break; - } - } else { - potential_match++; - } - } - - if (!found) { - Log::debug( - "could not erase from structure_map[%s], genome not found! This should never happen.\n", - structural_hash.c_str() - ); - exit(1); - } + structure_set.erase(worst); delete worst; } @@ -382,24 +255,18 @@ void Island::print(string indent) { } void Island::erase_island() { - erased_generation_id = latest_generation_id; - for (int32_t i = 0; i < (int32_t) genomes.size(); i++) { + structure_set.clear(); + + for (int32_t i = 0; i < (int32_t) genomes.size(); i++) delete genomes[i]; - } + genomes.clear(); + erased = true; erase_again = 5; + erased_generation_id = latest_generation_id; + Log::debug("Worst island size after erased: %d\n", genomes.size()); - - if (genomes.size() != 0) { - Log::error("The worst island is not fully erased!\n"); - } -} - -void Island::erase_structure_map() { - Log::debug("Erasing the structure map in the worst performing island\n"); - structure_map.clear(); - Log::debug("after erase structure map size is %d\n", structure_map.size()); } int32_t Island::get_erased_generation_id() { diff --git a/examm/island.hxx b/examm/island.hxx index c75921aa..86ffdbbc 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -18,6 +18,9 @@ using std::string; #include using std::unordered_map; +#include +using std::unordered_set; + #include "rnn/rnn_genome.hxx" class Island { @@ -34,8 +37,8 @@ class Island { * The genomes on this island, stored in sorted order best (front) to worst (back). */ vector genomes; + unordered_set structure_set; - unordered_map> structure_map; int32_t status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or Island::REPOPULATING */ @@ -172,8 +175,6 @@ class Island { */ void erase_island(); - void erase_structure_map(); - /** * returns the get_erased_generation_id. */ diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index a2463b2d..6fd15bb4 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -228,7 +228,6 @@ void IslandSpeciationStrategy::repopulate() { if (rank[i] >= 0) { Log::info("found island: %d is the worst island \n", rank[0]); islands[rank[i]]->erase_island(); - islands[rank[i]]->erase_structure_map(); islands[rank[i]]->set_status(Island::REPOPULATING); } else { Log::error("Didn't find the worst island!"); diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index d0887808..47bd934f 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -1337,7 +1337,7 @@ bool RNN_Genome::has_node_with_innovation(int32_t innovation_number) const { return false; } -bool RNN_Genome::equals(RNN_Genome* other) { +bool RNN_Genome::equals(const RNN_Genome* other) const { if (nodes.size() != other->nodes.size()) { return false; } @@ -1369,6 +1369,19 @@ bool RNN_Genome::equals(RNN_Genome* other) { return true; } +bool RNN_Genome::operator==(const RNN_Genome& other) const { + return other.equals(this); +} + +size_t RNN_Genome::StructuralHash::operator()(const RNN_Genome* genome) const { + return this->operator()(*genome); +} + +size_t RNN_Genome::StructuralHash::operator()(const RNN_Genome& genome) const { + std::hash hasher; + return hasher(genome.get_structural_hash()); +} + void RNN_Genome::assign_reachability() { Log::trace("assigning reachability!\n"); Log::trace("%6d nodes, %6d edges, %6d recurrent edges\n", nodes.size(), edges.size(), recurrent_edges.size()); diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index 56977e76..74c174c9 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -295,7 +295,18 @@ class RNN_Genome { */ bool has_node_with_innovation(int32_t innovation_number) const; - bool equals(RNN_Genome* other); + bool equals(const RNN_Genome* other) const; + bool operator==(const RNN_Genome& other) const; + + /** + * Hash function implementation. + * Based on the hash code of the structural hash. + * */ + struct StructuralHash { + size_t operator()(const RNN_Genome& other) const; + size_t operator()(const RNN_Genome* other) const; + }; + string get_color(double weight, bool is_recurrent); void write_graphviz(string filename); From ed5ad176215af9c550947f94c3a2bb76ead39de3 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 18 Mar 2024 18:39:48 -0400 Subject: [PATCH 32/42] Add annealing policy --- examm/CMakeLists.txt | 2 +- examm/annealing.cxx | 81 ++++++++++++++++++++++++++++ examm/annealing.hxx | 65 ++++++++++++++++++++++ examm/island.cxx | 16 +++--- examm/island.hxx | 11 ++-- examm/island_speciation_strategy.cxx | 20 +++---- examm/island_speciation_strategy.hxx | 7 +-- 7 files changed, 169 insertions(+), 33 deletions(-) create mode 100644 examm/annealing.cxx create mode 100644 examm/annealing.hxx diff --git a/examm/CMakeLists.txt b/examm/CMakeLists.txt index d5c532f9..2f5942b7 100644 --- a/examm/CMakeLists.txt +++ b/examm/CMakeLists.txt @@ -1 +1 @@ -add_library(examm_strategy examm.cxx species.cxx island.cxx island_speciation_strategy.cxx species.cxx neat_speciation_strategy.cxx) +add_library(examm_strategy examm.cxx species.cxx island.cxx island_speciation_strategy.cxx species.cxx neat_speciation_strategy.cxx annealing.cxx) diff --git a/examm/annealing.cxx b/examm/annealing.cxx new file mode 100644 index 00000000..d3c9db9c --- /dev/null +++ b/examm/annealing.cxx @@ -0,0 +1,81 @@ +#include "annealing.hxx" +#include "common/arguments.hxx" +#include "common/log.hxx" + +#include +#include + +unique_ptr AnnealingPolicy::from_arguments(const vector &arguments) { + string type; + get_argument(arguments, "--annealing_policy", false, type); + + if (type == "linear") { + return unique_ptr(new LinearAnnealingPolicy(arguments)); + } else if (type == "inv_exp") { + return unique_ptr(new InvExpAnnealingPolicy(arguments)); + } else if (type == "sin") { + return unique_ptr(new SinAnnealingPolicy(arguments)); + } else { + Log::info("Using default annealing policy\n"); + return make_unique(); + } +} + +double AnnealingPolicy::operator()(int32_t genome_number) { + return 0.0; +} + +LinearAnnealingPolicy::LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes) + : start_value(start_value), + end_value(end_value), + start_genomes(start_genomes), + interp_genomes(interp_genomes) {} + +LinearAnnealingPolicy::LinearAnnealingPolicy(const vector &arguments) { + get_argument(arguments, "--linear_start_value", true, start_value); + get_argument(arguments, "--linear_end_value", true, end_value); + get_argument(arguments, "--linear_start_genomes", true, start_genomes); + get_argument(arguments, "--linear_interp_genomes", true, interp_genomes); +} + +double LinearAnnealingPolicy::operator()(int32_t genome_number) { + if (genome_number <= start_genomes) { + return start_value; + } else if (genome_number <= interp_genomes + start_genomes) { + double weight = (double) (genome_number - (interp_genomes + start_genomes)) / (double) interp_genomes; + return weight * end_value + (1 - weight) * start_value; + } else { + return end_value; + } +} + +InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) {} +InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector &arguments) { + get_argument(arguments, "--exp_decay_factor", true, decay_factor); +} + +double InvExpAnnealingPolicy::operator()(int32_t genome_number) { + return std::pow(1. + genome_number, decay_factor); +} + +SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) : period(period), min_p(min_p), max_p(max_p) { + if (min_p > max_p) + std::swap(min_p, max_p); + + if (min_p > 1.0 || min_p < 0.0) + throw "Invalid min_p supplied to SinAnnealingPolicyConstructor"; + if (max_p > 1.0 || max_p < 0.0) + throw "Invalid max_p supplied to SinAnnealingPolicyConstructor"; + +} +SinAnnealingPolicy::SinAnnealingPolicy(const vector &arguments) { + get_argument(arguments, "--sin_min_p", true, min_p); + get_argument(arguments, "--sin_max_p", true, max_p); + get_argument(arguments, "--sin_period", true, period); +} + +double SinAnnealingPolicy::operator()(int32_t genome_number) { + double range = max_p - min_p; + + return (max_p + min_p) / 2. + range / 2. + std::sin(2. * M_PI * genome_number / period); +} diff --git a/examm/annealing.hxx b/examm/annealing.hxx new file mode 100644 index 00000000..1cfada54 --- /dev/null +++ b/examm/annealing.hxx @@ -0,0 +1,65 @@ +#include +#include +using std::unique_ptr; + +#include +using std::string; + +#include +using std::vector; + +struct AnnealingPolicy { + + static unique_ptr from_arguments(const vector &arguments); + + /** + * Compute the probability to be used during genome insertion. + * This represents the probability of inserting the genome, even if it + * has a fitness value that is worse than the worst member in the population. + */ + double operator()(int32_t genome_number); +}; + +/** + * Interpolate between two values for a set number of genomes. + * The `start_value` will be returned for `start_genomes`, + * then a linear interpolation of `start_value` and `end_value` for + * `interp_genomes`. Then, `end_value` is given indefinitely. + */ +class LinearAnnealingPolicy : public AnnealingPolicy { + double start_value, end_value; + int32_t start_genomes, interp_genomes; + + public: + LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes); + LinearAnnealingPolicy(const vector &arguments); + + double operator()(int32_t genome_number); +}; + +/** + * Calculates p by simply computing `genome_number^(-decay_factor). + **/ +class InvExpAnnealingPolicy : public AnnealingPolicy { + double decay_factor; + + public: + InvExpAnnealingPolicy(double decay_factor); + InvExpAnnealingPolicy(const vector &arguments); + + double operator()(int32_t genome_number); +}; + +/** + * Computes `p` as a value falling on a sinusoidal curve with the supplied period. + * a `min_p` and a `max_p` specify the range of the curve. + **/ +class SinAnnealingPolicy : public AnnealingPolicy { + double period, min_p, max_p; + + public: + SinAnnealingPolicy(double period, double min_p, double max_p); + SinAnnealingPolicy(const vector &arguments); + + double operator()(int32_t genome_number); +}; diff --git a/examm/island.cxx b/examm/island.cxx index 6a26bbcd..1313764a 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -1,10 +1,8 @@ #include -using std::lower_bound; -using std::sort; +#include using std::upper_bound; #include -using std::setw; #include using std::minstd_rand0; @@ -14,9 +12,6 @@ using std::uniform_real_distribution; using std::string; using std::to_string; -#include -using std::unordered_map; - #include using std::vector; @@ -24,17 +19,18 @@ using std::vector; #include "island.hxx" #include "rnn/rnn_genome.hxx" -Island::Island(int32_t _id, int32_t _max_size) - : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false) { +Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy) + : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false), annealing_policy(annealing_policy) { } -Island::Island(int32_t _id, vector _genomes) +Island::Island(int32_t _id, vector _genomes, AnnealingPolicy& annealing_policy) : id(_id), max_size((int32_t) _genomes.size()), genomes(_genomes), status(Island::FILLED), erase_again(0), - erased(false) { + erased(false), + annealing_policy(annealing_policy) { } RNN_Genome* Island::get_best_genome() { diff --git a/examm/island.hxx b/examm/island.hxx index 86ffdbbc..d2120bca 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -2,10 +2,9 @@ #define EXAMM_ISLAND_STRATEGY_HXX #include -using std::sort; -using std::upper_bound; #include +#include using std::function; #include @@ -16,12 +15,12 @@ using std::uniform_real_distribution; using std::string; #include -using std::unordered_map; #include using std::unordered_set; #include "rnn/rnn_genome.hxx" +#include "annealing.hxx" class Island { private: @@ -39,6 +38,8 @@ class Island { vector genomes; unordered_set structure_set; + AnnealingPolicy& annealing_policy; + int32_t status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or Island::REPOPULATING */ @@ -55,13 +56,13 @@ class Island { * * \param max_size is the maximum number of genomes in the island. */ - Island(int32_t id, int32_t max_size); + Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy); /** * Initializes an island filled the supplied genomes. The size of the island will be the size * of the supplied genome vector. The island status is set to filled. */ - Island(int32_t id, vector genomes); + Island(int32_t id, vector genomes, AnnealingPolicy& annealing_policy); /** * Returns the fitness of the best genome in the island diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 6fd15bb4..52941f3b 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -26,7 +26,8 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( double _inter_island_crossover_rate, RNN_Genome* _seed_genome, string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, bool _start_filled, - bool _transfer_learning, string _transfer_learning_version, int32_t _seed_stirs, bool _tl_epigenetic_weights + bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights, + unique_ptr& annealing_policy ) : generation_island(0), number_of_islands(_number_of_islands), @@ -47,8 +48,8 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( start_filled(_start_filled), transfer_learning(_transfer_learning), transfer_learning_version(_transfer_learning_version), - seed_stirs(_seed_stirs), - tl_epigenetic_weights(_tl_epigenetic_weights) { + tl_epigenetic_weights(_tl_epigenetic_weights), + annealing_policy(std::move(annealing_policy)) { double rate_sum = mutation_rate + intra_island_crossover_rate + inter_island_crossover_rate; if (rate_sum != 1.0) { mutation_rate = mutation_rate / rate_sum; @@ -78,15 +79,14 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( if (transfer_learning) { Log::info("Transfer learning version is %s\n", transfer_learning_version.c_str()); - Log::info("Apply seed stirs: %d\n", seed_stirs); } } void IslandSpeciationStrategy::initialize_population(function& mutate) { for (int32_t i = 0; i < number_of_islands; i++) { - Island* new_island = new Island(i, max_island_size); + Island* new_island = new Island(i, max_island_size, *annealing_policy); if (start_filled) { - new_island->fill_with_mutated_genomes(seed_genome, seed_stirs, tl_epigenetic_weights, mutate); + new_island->fill_with_mutated_genomes(seed_genome, num_mutations, tl_epigenetic_weights, mutate); } islands.push_back(new_island); } @@ -286,14 +286,6 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_initializing_island( new_genome = seed_genome->copy(); new_genome->initialize_randomly(); - bool stir_seed_genome = false; - if (stir_seed_genome) { - Log::info("Stir the seed genome with %d mutations\n", seed_stirs); - mutate(seed_stirs, new_genome); - if (!tl_epigenetic_weights) { - new_genome->initialize_randomly(); - } - } } else { Log::info("Island %d: island is initializing but not empty, mutating a random genome\n", generation_island); while (new_genome == NULL) { diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index 19eff273..fdc41024 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -72,9 +72,10 @@ class IslandSpeciationStrategy : public SpeciationStrategy { bool transfer_learning; string transfer_learning_version; - int32_t seed_stirs; bool tl_epigenetic_weights; + unique_ptr annealing_policy; + public: // static void register_command_line_arguments(); // static IslandSpeciationStrategy* generate_from_command_line(); @@ -90,8 +91,8 @@ class IslandSpeciationStrategy : public SpeciationStrategy { double _intra_island_crossover_rate, double _inter_island_crossover_rate, RNN_Genome* _seed_genome, string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, - bool _start_filled, bool _transfer_learning, string _transfer_learning_version, int32_t _seed_stirs, - bool _tl_epigenetic_weights + bool _start_filled, bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights, + unique_ptr& annealing_policy ); // /** From 7f9686b31f348cf51458863c115f123a82587f90 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 18 Mar 2024 18:50:21 -0400 Subject: [PATCH 33/42] Clang format --- common/process_arguments.cxx | 4 ++- examm/annealing.cxx | 63 +++++++++++++++++++----------------- examm/annealing.hxx | 18 +++++------ examm/island.cxx | 20 +++++++----- examm/island.hxx | 6 ++-- rnn/rnn_genome.hxx | 1 - 6 files changed, 60 insertions(+), 52 deletions(-) diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 202ddd42..9419fdd7 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -1,3 +1,4 @@ +#include #include using std::string; @@ -120,12 +121,13 @@ IslandSpeciationStrategy* generate_island_speciation_strategy_from_arguments( get_argument(arguments, "--seed_stirs", false, seed_stirs); bool start_filled = argument_exists(arguments, "--start_filled"); bool tl_epigenetic_weights = argument_exists(arguments, "--tl_epigenetic_weights"); + unique_ptr annealing_policy = AnnealingPolicy::from_arguments(arguments); IslandSpeciationStrategy* island_strategy = new IslandSpeciationStrategy( number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, seed_genome, island_ranking_method, repopulation_method, extinction_event_generation_number, num_mutations, islands_to_exterminate, max_genomes, repeat_extinction, start_filled, transfer_learning, - transfer_learning_version, seed_stirs, tl_epigenetic_weights + transfer_learning_version, tl_epigenetic_weights, annealing_policy ); return island_strategy; diff --git a/examm/annealing.cxx b/examm/annealing.cxx index d3c9db9c..023e872f 100644 --- a/examm/annealing.cxx +++ b/examm/annealing.cxx @@ -1,37 +1,38 @@ #include "annealing.hxx" -#include "common/arguments.hxx" -#include "common/log.hxx" #include #include -unique_ptr AnnealingPolicy::from_arguments(const vector &arguments) { - string type; - get_argument(arguments, "--annealing_policy", false, type); +#include "common/arguments.hxx" +#include "common/log.hxx" - if (type == "linear") { - return unique_ptr(new LinearAnnealingPolicy(arguments)); - } else if (type == "inv_exp") { - return unique_ptr(new InvExpAnnealingPolicy(arguments)); - } else if (type == "sin") { - return unique_ptr(new SinAnnealingPolicy(arguments)); - } else { - Log::info("Using default annealing policy\n"); - return make_unique(); - } +unique_ptr AnnealingPolicy::from_arguments(const vector& arguments) { + string type; + get_argument(arguments, "--annealing_policy", false, type); + + if (type == "linear") { + return unique_ptr(new LinearAnnealingPolicy(arguments)); + } else if (type == "inv_exp") { + return unique_ptr(new InvExpAnnealingPolicy(arguments)); + } else if (type == "sin") { + return unique_ptr(new SinAnnealingPolicy(arguments)); + } else { + Log::info("Using default annealing policy\n"); + return make_unique(); + } } double AnnealingPolicy::operator()(int32_t genome_number) { return 0.0; } -LinearAnnealingPolicy::LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes) - : start_value(start_value), - end_value(end_value), - start_genomes(start_genomes), - interp_genomes(interp_genomes) {} +LinearAnnealingPolicy::LinearAnnealingPolicy( + double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes +) + : start_value(start_value), end_value(end_value), start_genomes(start_genomes), interp_genomes(interp_genomes) { +} -LinearAnnealingPolicy::LinearAnnealingPolicy(const vector &arguments) { +LinearAnnealingPolicy::LinearAnnealingPolicy(const vector& arguments) { get_argument(arguments, "--linear_start_value", true, start_value); get_argument(arguments, "--linear_end_value", true, end_value); get_argument(arguments, "--linear_start_genomes", true, start_genomes); @@ -49,8 +50,9 @@ double LinearAnnealingPolicy::operator()(int32_t genome_number) { } } -InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) {} -InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector &arguments) { +InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) { +} +InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector& arguments) { get_argument(arguments, "--exp_decay_factor", true, decay_factor); } @@ -58,17 +60,20 @@ double InvExpAnnealingPolicy::operator()(int32_t genome_number) { return std::pow(1. + genome_number, decay_factor); } -SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) : period(period), min_p(min_p), max_p(max_p) { - if (min_p > max_p) +SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) + : period(period), min_p(min_p), max_p(max_p) { + if (min_p > max_p) { std::swap(min_p, max_p); + } - if (min_p > 1.0 || min_p < 0.0) + if (min_p > 1.0 || min_p < 0.0) { throw "Invalid min_p supplied to SinAnnealingPolicyConstructor"; - if (max_p > 1.0 || max_p < 0.0) + } + if (max_p > 1.0 || max_p < 0.0) { throw "Invalid max_p supplied to SinAnnealingPolicyConstructor"; - + } } -SinAnnealingPolicy::SinAnnealingPolicy(const vector &arguments) { +SinAnnealingPolicy::SinAnnealingPolicy(const vector& arguments) { get_argument(arguments, "--sin_min_p", true, min_p); get_argument(arguments, "--sin_max_p", true, max_p); get_argument(arguments, "--sin_period", true, period); diff --git a/examm/annealing.hxx b/examm/annealing.hxx index 1cfada54..83addc91 100644 --- a/examm/annealing.hxx +++ b/examm/annealing.hxx @@ -1,4 +1,5 @@ #include + #include using std::unique_ptr; @@ -9,8 +10,7 @@ using std::string; using std::vector; struct AnnealingPolicy { - - static unique_ptr from_arguments(const vector &arguments); + static unique_ptr from_arguments(const vector& arguments); /** * Compute the probability to be used during genome insertion. @@ -30,9 +30,9 @@ class LinearAnnealingPolicy : public AnnealingPolicy { double start_value, end_value; int32_t start_genomes, interp_genomes; - public: + public: LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes); - LinearAnnealingPolicy(const vector &arguments); + LinearAnnealingPolicy(const vector& arguments); double operator()(int32_t genome_number); }; @@ -42,10 +42,10 @@ class LinearAnnealingPolicy : public AnnealingPolicy { **/ class InvExpAnnealingPolicy : public AnnealingPolicy { double decay_factor; - - public: + + public: InvExpAnnealingPolicy(double decay_factor); - InvExpAnnealingPolicy(const vector &arguments); + InvExpAnnealingPolicy(const vector& arguments); double operator()(int32_t genome_number); }; @@ -57,9 +57,9 @@ class InvExpAnnealingPolicy : public AnnealingPolicy { class SinAnnealingPolicy : public AnnealingPolicy { double period, min_p, max_p; - public: + public: SinAnnealingPolicy(double period, double min_p, double max_p); - SinAnnealingPolicy(const vector &arguments); + SinAnnealingPolicy(const vector& arguments); double operator()(int32_t genome_number); }; diff --git a/examm/island.cxx b/examm/island.cxx index 1313764a..64af839b 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -3,7 +3,6 @@ using std::upper_bound; #include - #include using std::minstd_rand0; using std::uniform_real_distribution; @@ -20,7 +19,12 @@ using std::vector; #include "rnn/rnn_genome.hxx" Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy) - : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false), annealing_policy(annealing_policy) { + : id(_id), + max_size(_max_size), + status(Island::INITIALIZING), + erase_again(0), + erased(false), + annealing_policy(annealing_policy) { } Island::Island(int32_t _id, vector _genomes, AnnealingPolicy& annealing_policy) @@ -166,11 +170,10 @@ int32_t Island::insert_genome(RNN_Genome* genome) { } } - // inorder insert the new individual RNN_Genome* copy = genome->copy(); copy->set_generation_id(genome->get_generation_id()); - + vector best = copy->get_best_parameters(); if (best.size() != 0) { copy->set_weights(best); @@ -252,16 +255,17 @@ void Island::print(string indent) { void Island::erase_island() { structure_set.clear(); - - for (int32_t i = 0; i < (int32_t) genomes.size(); i++) + + for (int32_t i = 0; i < (int32_t) genomes.size(); i++) { delete genomes[i]; + } genomes.clear(); - + erased = true; erase_again = 5; erased_generation_id = latest_generation_id; - + Log::debug("Worst island size after erased: %d\n", genomes.size()); } diff --git a/examm/island.hxx b/examm/island.hxx index d2120bca..5c5940e1 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -2,7 +2,6 @@ #define EXAMM_ISLAND_STRATEGY_HXX #include - #include #include using std::function; @@ -15,12 +14,11 @@ using std::uniform_real_distribution; using std::string; #include - #include using std::unordered_set; -#include "rnn/rnn_genome.hxx" #include "annealing.hxx" +#include "rnn/rnn_genome.hxx" class Island { private: @@ -36,7 +34,7 @@ class Island { * The genomes on this island, stored in sorted order best (front) to worst (back). */ vector genomes; - unordered_set structure_set; + unordered_set structure_set; AnnealingPolicy& annealing_policy; diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index 74c174c9..c3584e51 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -307,7 +307,6 @@ class RNN_Genome { size_t operator()(const RNN_Genome* other) const; }; - string get_color(double weight, bool is_recurrent); void write_graphviz(string filename); void print_equations(); From 9c0ec5bb787c644b0e54c7d134be10d88ded1165 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 18 Mar 2024 19:41:11 -0400 Subject: [PATCH 34/42] Initial implementation --- examm/island.cxx | 33 +++++++++++++++++++++++++-------- examm/island.hxx | 2 ++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/examm/island.cxx b/examm/island.cxx index 64af839b..2aa6cc19 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -21,20 +21,26 @@ using std::vector; Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy) : id(_id), max_size(_max_size), + annealing_policy(annealing_policy), status(Island::INITIALIZING), erase_again(0), - erased(false), - annealing_policy(annealing_policy) { + erased(false) { + using namespace std::chrono; + long long t = time_point_cast(system_clock::now()).time_since_epoch().count(); + generator = minstd_rand0(t); } Island::Island(int32_t _id, vector _genomes, AnnealingPolicy& annealing_policy) : id(_id), max_size((int32_t) _genomes.size()), genomes(_genomes), + annealing_policy(annealing_policy), status(Island::FILLED), erase_again(0), - erased(false), - annealing_policy(annealing_policy) { + erased(false) { + using namespace std::chrono; + long long t = time_point_cast(system_clock::now()).time_since_epoch().count(); + generator = minstd_rand0(t); } RNN_Genome* Island::get_best_genome() { @@ -162,7 +168,8 @@ int32_t Island::insert_genome(RNN_Genome* genome) { Log::info("getting structural hash\n"); auto duplicate_it = structure_set.find(genome); - if (duplicate_it != structure_set.end()) { + bool duplicate_exists = duplicate_it != structure_set.end(); + if (duplicate_exists) { RNN_Genome* duplicate = *duplicate_it; // TODO: Add annealment here if (duplicate->get_fitness() > genome->get_fitness()) { @@ -179,13 +186,24 @@ int32_t Island::insert_genome(RNN_Genome* genome) { copy->set_weights(best); } - Log::debug("created copy to insert to island: %d\n", copy->get_group_id()); + // Only do simulated annealing if the island is full + // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept + // genomes by deleting a random member of the population./ + if (genomes.size() == max_size && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) { + int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size(); + + RNN_Genome *victim = genomes[index]; + genomes.erase(genomes.begin() + index); + structure_set.erase(victim); + } + auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness()); int32_t insert_index = index_iterator - genomes.begin(); Log::debug("inserting genome at index: %d\n", insert_index); if (insert_index >= max_size) { - // if we're going to insert this at the back of the population + // For simulated annealing: if this is true, then we should remove a random member of the population to insert. + // if we're going to insert this at the back of the population // its just going to get removed anyways, so we can delete // it and report it was not inserted. Log::debug("not inserting genome because it is worse than the worst fitness\n"); @@ -199,7 +217,6 @@ int32_t Island::insert_genome(RNN_Genome* genome) { if (insert_index == 0) { // this was a new best genome for this island - Log::info("Island %d: new best fitness found!\n", id); if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { diff --git a/examm/island.hxx b/examm/island.hxx index 5c5940e1..7b977d47 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -36,6 +36,8 @@ class Island { vector genomes; unordered_set structure_set; + minstd_rand0 generator; + AnnealingPolicy& annealing_policy; int32_t From 050e2780859d2bc7e275b934e6e0b26eafa337c1 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 18 Mar 2024 19:41:24 -0400 Subject: [PATCH 35/42] Clang format --- examm/island.cxx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examm/island.cxx b/examm/island.cxx index 2aa6cc19..2681aade 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -189,10 +189,11 @@ int32_t Island::insert_genome(RNN_Genome* genome) { // Only do simulated annealing if the island is full // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept // genomes by deleting a random member of the population./ - if (genomes.size() == max_size && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) { + if (genomes.size() == max_size + && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) { int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size(); - - RNN_Genome *victim = genomes[index]; + + RNN_Genome* victim = genomes[index]; genomes.erase(genomes.begin() + index); structure_set.erase(victim); } @@ -203,7 +204,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) { if (insert_index >= max_size) { // For simulated annealing: if this is true, then we should remove a random member of the population to insert. - // if we're going to insert this at the back of the population + // if we're going to insert this at the back of the population // its just going to get removed anyways, so we can delete // it and report it was not inserted. Log::debug("not inserting genome because it is worse than the worst fitness\n"); From e59c649de177b14d518f600b598200000eee56c6 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 20 Mar 2024 15:23:10 -0400 Subject: [PATCH 36/42] Finishing touches --- examm/annealing.cxx | 6 +- examm/annealing.hxx | 8 +-- examm/examm.cxx | 21 +++++-- examm/examm.hxx | 5 +- examm/island.cxx | 83 +++++++++++++--------------- examm/island.hxx | 25 ++++++++- examm/island_speciation_strategy.cxx | 44 ++++++--------- examm/species.cxx | 8 +-- 8 files changed, 104 insertions(+), 96 deletions(-) diff --git a/examm/annealing.cxx b/examm/annealing.cxx index 023e872f..6d0b5868 100644 --- a/examm/annealing.cxx +++ b/examm/annealing.cxx @@ -9,7 +9,7 @@ unique_ptr AnnealingPolicy::from_arguments(const vector& arguments) { string type; get_argument(arguments, "--annealing_policy", false, type); - + Log::info("Annealing policy = %s\n", type.c_str()); if (type == "linear") { return unique_ptr(new LinearAnnealingPolicy(arguments)); } else if (type == "inv_exp") { @@ -57,7 +57,7 @@ InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector& arguments) { } double InvExpAnnealingPolicy::operator()(int32_t genome_number) { - return std::pow(1. + genome_number, decay_factor); + return std::pow(1. + genome_number, -decay_factor); } SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) @@ -82,5 +82,5 @@ SinAnnealingPolicy::SinAnnealingPolicy(const vector& arguments) { double SinAnnealingPolicy::operator()(int32_t genome_number) { double range = max_p - min_p; - return (max_p + min_p) / 2. + range / 2. + std::sin(2. * M_PI * genome_number / period); + return (max_p + min_p) / 2. + range / 2. * std::sin(2. * M_PI * genome_number / period); } diff --git a/examm/annealing.hxx b/examm/annealing.hxx index 83addc91..4406610c 100644 --- a/examm/annealing.hxx +++ b/examm/annealing.hxx @@ -17,7 +17,7 @@ struct AnnealingPolicy { * This represents the probability of inserting the genome, even if it * has a fitness value that is worse than the worst member in the population. */ - double operator()(int32_t genome_number); + virtual double operator()(int32_t genome_number); }; /** @@ -34,7 +34,7 @@ class LinearAnnealingPolicy : public AnnealingPolicy { LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes); LinearAnnealingPolicy(const vector& arguments); - double operator()(int32_t genome_number); + virtual double operator()(int32_t genome_number); }; /** @@ -47,7 +47,7 @@ class InvExpAnnealingPolicy : public AnnealingPolicy { InvExpAnnealingPolicy(double decay_factor); InvExpAnnealingPolicy(const vector& arguments); - double operator()(int32_t genome_number); + virtual double operator()(int32_t genome_number); }; /** @@ -61,5 +61,5 @@ class SinAnnealingPolicy : public AnnealingPolicy { SinAnnealingPolicy(double period, double min_p, double max_p); SinAnnealingPolicy(const vector& arguments); - double operator()(int32_t genome_number); + virtual double operator()(int32_t genome_number); }; diff --git a/examm/examm.cxx b/examm/examm.cxx index 0c76c500..164d84d1 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -97,8 +97,8 @@ void EXAMM::generate_log() { mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); (*log_file - ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" - "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id"; + ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled Nodes,Enabled" + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Genome Trainable Parameters,Island Id"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -194,6 +194,7 @@ void EXAMM::update_log(RNN_Genome* genome) { long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," + << best_genome->get_number_weights() << "," << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count() << "," << genome->best_validation_mse << "," << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << "," @@ -263,10 +264,10 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // write this genome to disk if it was a new best found genome if (save_genome_option.compare("all_best_genomes") == 0) { - Log::info("save genome option compared, save genome option size: %d!\n", save_genome_option.size()); - for (int i = 0; i < 20 && i < save_genome_option.size(); i++) { - cout << "save_genome_option[" << i << "]: " << save_genome_option[i] << endl; - } + // Log::info("save genome option compared, save genome option size: %d!\n", save_genome_option.size()); + // for (int i = 0; i < 20 && i < save_genome_option.size(); i++) { + // cout << "save_genome_option[" << i << "]: " << save_genome_option[i] << endl; + // } if (insert_position == 0) { Log::info("saving genome!"); @@ -288,6 +289,14 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // write function to save genomes to file void EXAMM::save_genome(RNN_Genome* genome, string genome_name = "rnn_genome") { + if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { + // need to set the weights for non-initial genomes so we + // can generate a proper graphviz file + vector best_parameters = genome->get_best_parameters(); + genome->set_weights(best_parameters); + Log::info("set genome parameters to best\n"); + } + genome->write_graphviz(output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".gv"); ofstream equations_filestream( output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".txt" diff --git a/examm/examm.hxx b/examm/examm.hxx index a1e7cc59..3ec90bd9 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -70,7 +70,7 @@ class EXAMM { map inserted_counts; map generated_counts; - string output_directory; + const string output_directory; ofstream* log_file; ofstream* op_log_file; double pre_insert_best_mse = 1000000; @@ -78,8 +78,7 @@ class EXAMM { std::chrono::time_point startClock; - string genome_file_name; - string save_genome_option; + const string save_genome_option; public: EXAMM( diff --git a/examm/island.cxx b/examm/island.cxx index 2681aade..dc73e1d5 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -3,9 +3,6 @@ using std::upper_bound; #include -#include -using std::minstd_rand0; -using std::uniform_real_distribution; #include using std::string; @@ -18,31 +15,23 @@ using std::vector; #include "island.hxx" #include "rnn/rnn_genome.hxx" -Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy) - : id(_id), - max_size(_max_size), - annealing_policy(annealing_policy), - status(Island::INITIALIZING), - erase_again(0), - erased(false) { +Island::Island(int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy) : + id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) { using namespace std::chrono; long long t = time_point_cast(system_clock::now()).time_since_epoch().count(); - generator = minstd_rand0(t); -} + generator = mt19937_64(t + 1123 * id + 12334 * max_size); + + for (int i = 0; i < 100; i++) + generate_canonical(generator); -Island::Island(int32_t _id, vector _genomes, AnnealingPolicy& annealing_policy) - : id(_id), - max_size((int32_t) _genomes.size()), - genomes(_genomes), - annealing_policy(annealing_policy), - status(Island::FILLED), - erase_again(0), - erased(false) { - using namespace std::chrono; - long long t = time_point_cast(system_clock::now()).time_since_epoch().count(); - generator = minstd_rand0(t); } +Island::Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy) + : Island(id, max_size, vector(), Island::INITIALIZING, annealing_policy) {} + +Island::Island(int32_t id, vector genomes, AnnealingPolicy& annealing_policy) + : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) {} + RNN_Genome* Island::get_best_genome() { if (genomes.size() == 0) { return NULL; @@ -68,6 +57,13 @@ double Island::get_best_fitness() { } } +double Island::get_best_all_time_fitness() { + if (all_time_local_best) + return all_time_local_best->get_fitness(); + else + return EXAMM_MAX_DOUBLE; +} + double Island::get_worst_fitness() { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { @@ -153,6 +149,22 @@ int32_t Island::insert_genome(RNN_Genome* genome) { double new_fitness = genome->get_fitness(); Log::info("inserting genome with fitness: %s to island %d\n", parse_fitness(genome->get_fitness()).c_str(), id); + // Only do simulated annealing if the island is full + // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept + // genomes by deleting a random member of the population. + double p = annealing_policy(genome->get_generation_id()); + Log::info("Annealing policy p = %f\n", p); + + if (is_full() && uniform_real_distribution<>(0.0, 1.0)(generator) < p) { + int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size(); + + Log::info("Simulated annealing triggered - deleting a random genome %d\n", index); + + RNN_Genome* victim = genomes[index]; + genomes.erase(genomes.begin() + index); + structure_set.erase(victim); + } + // discard the genome if the island is full and it's fitness is worse than the worst in thte population if (is_full() && new_fitness > get_worst_fitness()) { Log::debug( @@ -160,7 +172,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) { genomes.back()->get_fitness() ); do_population_check(__LINE__, initial_size); - return false; + return -1; } // check and see if the structural hash of the genome is in the @@ -186,21 +198,9 @@ int32_t Island::insert_genome(RNN_Genome* genome) { copy->set_weights(best); } - // Only do simulated annealing if the island is full - // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept - // genomes by deleting a random member of the population./ - if (genomes.size() == max_size - && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) { - int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size(); - - RNN_Genome* victim = genomes[index]; - genomes.erase(genomes.begin() + index); - structure_set.erase(victim); - } - auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness()); int32_t insert_index = index_iterator - genomes.begin(); - Log::debug("inserting genome at index: %d\n", insert_index); + Log::info("inserting genome at index: %d\n", insert_index); if (insert_index >= max_size) { // For simulated annealing: if this is true, then we should remove a random member of the population to insert. @@ -220,13 +220,8 @@ int32_t Island::insert_genome(RNN_Genome* genome) { // this was a new best genome for this island Log::info("Island %d: new best fitness found!\n", id); - if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { - // need to set the weights for non-initial genomes so we - // can generate a proper graphviz file - vector best_parameters = genome->get_best_parameters(); - genome->set_weights(best_parameters); - Log::info("set genome parameters to best\n"); - } + if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness()) + all_time_local_best = unique_ptr(genome->copy()); } if ((int32_t) genomes.size() >= max_size) { diff --git a/examm/island.hxx b/examm/island.hxx index 7b977d47..b6d50104 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -7,6 +7,7 @@ using std::function; #include +using std::mt19937_64; using std::minstd_rand0; using std::uniform_real_distribution; @@ -34,23 +35,36 @@ class Island { * The genomes on this island, stored in sorted order best (front) to worst (back). */ vector genomes; + + /** + * If we are using simulated annealing, then the genomes vector may not contain the best genome we have discovered. + * Keep an additional clone of the best genome here for logging. + **/ + unique_ptr all_time_local_best; + + /** + * A set of the genomes this island contains (one entry per genome in Island::genomes. + * These are hashed by their structure: the nodes, edges, and their innovation numbers. Weights are not considered. + **/ unordered_set structure_set; - minstd_rand0 generator; + mt19937_64 generator; AnnealingPolicy& annealing_policy; int32_t status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or Island::REPOPULATING */ - int32_t erase_again; /**< a flag to track if this islands has been erased */ - bool erased; /**< a flag to track if this islands has been erased */ + int32_t erase_again = 0; /**< a flag to track if this islands has been erased */ + bool erased = false; /**< a flag to track if this islands has been erased */ public: const static int32_t INITIALIZING = 0; /**< status flag for if the island is initializing. */ const static int32_t FILLED = 1; /**< status flag for if the island is filled. */ const static int32_t REPOPULATING = 2; /**< status flag for if the island is repopulating. */ + Island(int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy); + /** * Initializes an island with a given max size. * @@ -71,6 +85,11 @@ class Island { */ double get_best_fitness(); + /** + * Returns the best fitness ever obtains by any genome in this island - even if that genome has been removed. + **/ + double get_best_all_time_fitness(); + /** * Returns the fitness of the worst genome in the island * diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 52941f3b..094cb304 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -1,4 +1,5 @@ #include +#include using std::function; #include @@ -186,7 +187,7 @@ int32_t IslandSpeciationStrategy::insert_genome(RNN_Genome* genome) { Log::fatal("ERROR: island[%d] is null!\n", island); } int32_t insert_position = islands[island]->insert_genome(genome); - Log::info("Island %d: Insert position was: %d\n", insert_position); + Log::info("Island %d: Insert position was: %d\n", island, insert_position); if (insert_position == 0) { if (new_global_best) { @@ -455,49 +456,40 @@ void IslandSpeciationStrategy::print(string indent) const { * Gets speciation strategy information headers for logs */ string IslandSpeciationStrategy::get_strategy_information_headers() const { + stringstream oss; + string info_header = ""; - info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"); + oss << ",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"; for (int32_t i = 0; i < (int32_t) islands.size(); i++) { - info_header.append(","); - info_header.append("Island_"); - info_header.append(to_string(i)); - info_header.append("_best_fitness"); - info_header.append(","); - info_header.append("Island_"); - info_header.append(to_string(i)); - info_header.append("_worst_fitness"); - } - return info_header; + oss << ",Island_" << i << "_best_fitness" + << ",Island_" << i << "_wort_fitness" + << ",Island_" << i << "_all_time_best"; + } + + return oss.str(); } /** * Gets speciation strategy information values for logs */ string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const { - string info_value = ""; + stringstream oss; auto& [min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id); - info_value.append(","); - info_value.append(to_string(min_mse_pre)); - info_value.append(","); - info_value.append(to_string(max_mse_pre)); + oss << "," << min_mse_pre << "," << max_mse_pre; float min_mse_post = this->get_best_fitness(); float max_mse_post = this->get_worst_fitness(); - info_value.append(","); - info_value.append(to_string(min_mse_post)); - info_value.append(","); - info_value.append(to_string(max_mse_post)); + oss << "," << min_mse_post << "," << max_mse_post; for (int32_t i = 0; i < (int32_t) islands.size(); i++) { double best_fitness = islands[i]->get_best_fitness(); double worst_fitness = islands[i]->get_worst_fitness(); - info_value.append(","); - info_value.append(to_string(best_fitness)); - info_value.append(","); - info_value.append(to_string(worst_fitness)); + double all_time_best = islands[i]->get_best_all_time_fitness(); + oss << "," << best_fitness << "," << worst_fitness << "," << all_time_best; } - return info_value; + + return oss.str(); } RNN_Genome* IslandSpeciationStrategy::parents_repopulation( diff --git a/examm/species.cxx b/examm/species.cxx index 1e650ec7..9081b203 100644 --- a/examm/species.cxx +++ b/examm/species.cxx @@ -130,12 +130,6 @@ int32_t Species::insert_genome(RNN_Genome* genome) { if (insert_index == 0) { // this was a new best genome for this island Log::info("new best fitness for island: %d!\n", id); - if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { - // need to set the weights for non-initial genomes so we - // can generate a proper graphviz file - vector best_parameters = genome->get_best_parameters(); - genome->set_weights(best_parameters); - } species_not_improving_count = 0; } else { species_not_improving_count++; @@ -233,4 +227,4 @@ int32_t Species::get_species_not_improving_count() { void Species::set_species_not_improving_count(int32_t count) { species_not_improving_count = count; -} \ No newline at end of file +} From 19f43fe9a1502a4ceb66a9f8dbf5ddcf3075bb66 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 20 Mar 2024 15:23:30 -0400 Subject: [PATCH 37/42] Format --- examm/examm.cxx | 15 ++++++++------- examm/island.cxx | 25 +++++++++++++++---------- examm/island.hxx | 8 +++++--- examm/island_speciation_strategy.cxx | 8 +++----- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/examm/examm.cxx b/examm/examm.cxx index 164d84d1..7d9e7cd4 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -97,7 +97,8 @@ void EXAMM::generate_log() { mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); (*log_file - ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled Nodes,Enabled" + ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled " + "Nodes,Enabled" "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Genome Trainable Parameters,Island Id"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -194,12 +195,12 @@ void EXAMM::update_log(RNN_Genome* genome) { long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," - << best_genome->get_number_weights() << "," - << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," - << best_genome->get_enabled_recurrent_edge_count() << "," << genome->best_validation_mse << "," - << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << "," - << genome->get_number_weights() << "," << genome->get_generation_id() - << speciation_strategy->get_strategy_information_values(genome) << endl; + << best_genome->get_number_weights() << "," << best_genome->get_enabled_node_count() << "," + << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count() + << "," << genome->best_validation_mse << "," << pre_insert_best_mse << "," + << (int32_t) (last_genome_inserted ? 1 : 0) << "," << genome->get_number_weights() << "," + << genome->get_generation_id() << speciation_strategy->get_strategy_information_values(genome) + << endl; Log::info( "mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), diff --git a/examm/island.cxx b/examm/island.cxx index dc73e1d5..eb39ed98 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -3,7 +3,6 @@ using std::upper_bound; #include - #include using std::string; using std::to_string; @@ -15,22 +14,26 @@ using std::vector; #include "island.hxx" #include "rnn/rnn_genome.hxx" -Island::Island(int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy) : - id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) { +Island::Island( + int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy +) + : id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) { using namespace std::chrono; long long t = time_point_cast(system_clock::now()).time_since_epoch().count(); generator = mt19937_64(t + 1123 * id + 12334 * max_size); - for (int i = 0; i < 100; i++) + for (int i = 0; i < 100; i++) { generate_canonical(generator); - + } } Island::Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy) - : Island(id, max_size, vector(), Island::INITIALIZING, annealing_policy) {} + : Island(id, max_size, vector(), Island::INITIALIZING, annealing_policy) { +} Island::Island(int32_t id, vector genomes, AnnealingPolicy& annealing_policy) - : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) {} + : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) { +} RNN_Genome* Island::get_best_genome() { if (genomes.size() == 0) { @@ -58,10 +61,11 @@ double Island::get_best_fitness() { } double Island::get_best_all_time_fitness() { - if (all_time_local_best) + if (all_time_local_best) { return all_time_local_best->get_fitness(); - else + } else { return EXAMM_MAX_DOUBLE; + } } double Island::get_worst_fitness() { @@ -220,8 +224,9 @@ int32_t Island::insert_genome(RNN_Genome* genome) { // this was a new best genome for this island Log::info("Island %d: new best fitness found!\n", id); - if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness()) + if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness()) { all_time_local_best = unique_ptr(genome->copy()); + } } if ((int32_t) genomes.size() >= max_size) { diff --git a/examm/island.hxx b/examm/island.hxx index b6d50104..707bf9db 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -7,8 +7,8 @@ using std::function; #include -using std::mt19937_64; using std::minstd_rand0; +using std::mt19937_64; using std::uniform_real_distribution; #include @@ -35,7 +35,7 @@ class Island { * The genomes on this island, stored in sorted order best (front) to worst (back). */ vector genomes; - + /** * If we are using simulated annealing, then the genomes vector may not contain the best genome we have discovered. * Keep an additional clone of the best genome here for logging. @@ -63,7 +63,9 @@ class Island { const static int32_t FILLED = 1; /**< status flag for if the island is filled. */ const static int32_t REPOPULATING = 2; /**< status flag for if the island is repopulating. */ - Island(int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy); + Island( + int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy + ); /** * Initializes an island with a given max size. diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 094cb304..c8605fbe 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -461,9 +461,8 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const { string info_header = ""; oss << ",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"; for (int32_t i = 0; i < (int32_t) islands.size(); i++) { - oss << ",Island_" << i << "_best_fitness" - << ",Island_" << i << "_wort_fitness" - << ",Island_" << i << "_all_time_best"; + oss << ",Island_" << i << "_best_fitness" << ",Island_" << i << "_wort_fitness" << ",Island_" << i + << "_all_time_best"; } return oss.str(); @@ -473,7 +472,6 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const { * Gets speciation strategy information values for logs */ string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const { - stringstream oss; auto& [min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id); oss << "," << min_mse_pre << "," << max_mse_pre; @@ -488,7 +486,7 @@ string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* gen double all_time_best = islands[i]->get_best_all_time_fitness(); oss << "," << best_fitness << "," << worst_fitness << "," << all_time_best; } - + return oss.str(); } From 3bfc9fac289773623cc1e85678e4cef6ea3f4712 Mon Sep 17 00:00:00 2001 From: aidanlabella Date: Wed, 3 Apr 2024 19:58:15 -0400 Subject: [PATCH 38/42] add ability to specify delimiter for CSV inputs --- time_series/time_series.cxx | 27 +++++++++++++++++++++++---- time_series/time_series.hxx | 4 +++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx index b91c13a2..2979e9ba 100644 --- a/time_series/time_series.cxx +++ b/time_series/time_series.cxx @@ -238,7 +238,7 @@ void TimeSeriesSet::add_time_series(string name) { } } -TimeSeriesSet::TimeSeriesSet(string _filename, const vector& _fields) { +TimeSeriesSet::TimeSeriesSet(string _filename, const vector& _fields, char delim) { filename = _filename; fields = _fields; @@ -251,7 +251,7 @@ TimeSeriesSet::TimeSeriesSet(string _filename, const vector& _fields) { } vector file_fields; - string_split(line, ',', file_fields); + string_split(line, delim, file_fields); for (int32_t i = 0; i < (int32_t) file_fields.size(); i++) { // get rid of carriage returns (sometimes windows messes this up) file_fields[i].erase(std::remove(file_fields[i].begin(), file_fields[i].end(), '\r'), file_fields[i].end()); @@ -308,7 +308,7 @@ TimeSeriesSet::TimeSeriesSet(string _filename, const vector& _fields) { } vector parts; - string_split(line, ',', parts); + string_split(line, delim, parts); if (parts.size() != file_fields.size()) { Log::fatal( @@ -734,7 +734,7 @@ void TimeSeriesSets::load_time_series() { for (int32_t i = 0; i < (int32_t) filenames.size(); i++) { Log::info("\t%s\n", filenames[i].c_str()); - TimeSeriesSet* ts = new TimeSeriesSet(filenames[i], all_parameter_names); + TimeSeriesSet* ts = new TimeSeriesSet(filenames[i], all_parameter_names, this->csv_delimiter); time_series.push_back(ts); rows += ts->get_number_rows(); @@ -831,6 +831,25 @@ TimeSeriesSets* TimeSeriesSets::generate_from_arguments(const vector& ar exit(1); } + if (argument_exists(arguments, "--csv_delimiter")) { + vector delim_vec; + get_argument_vector(arguments, "--csv_delimiter", false, delim_vec); + + string delim_str = delim_vec.front(); + + if (delim_vec.size() != 1 || delim_str.size() != 1) { + // Exit if the user specifies more than one delimiter character + Log::fatal( + "The delimeter for CSV files should be a single character." + ); + + help_message(); + exit(1); + } + + tss->csv_delimiter = delim_str.at(0); + } + tss->load_time_series(); tss->normalize_type = ""; diff --git a/time_series/time_series.hxx b/time_series/time_series.hxx index fada6f51..2ccc75c0 100644 --- a/time_series/time_series.hxx +++ b/time_series/time_series.hxx @@ -72,7 +72,7 @@ class TimeSeriesSet { TimeSeriesSet(); public: - TimeSeriesSet(string _filename, const vector& _fields); + TimeSeriesSet(string _filename, const vector& _fields, char delim); ~TimeSeriesSet(); void add_time_series(string name); @@ -115,6 +115,8 @@ class TimeSeriesSet { class TimeSeriesSets { private: + char csv_delimiter = ','; + string normalize_type; vector filenames; From d665e842ee1618b0c1e5950c8d168b13de4c184a Mon Sep 17 00:00:00 2001 From: aidanlabella Date: Wed, 3 Apr 2024 20:05:56 -0400 Subject: [PATCH 39/42] add script for new delimiter option --- scripts/air_quality/evolve_aq.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 scripts/air_quality/evolve_aq.sh diff --git a/scripts/air_quality/evolve_aq.sh b/scripts/air_quality/evolve_aq.sh new file mode 100755 index 00000000..e36407fe --- /dev/null +++ b/scripts/air_quality/evolve_aq.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# This is a script for evlolving networks to predict Air Quality data +# This also doubles as an example for the csv delimiter option + +cd build + +INPUT_PARAMETERS="Date Time PT08.S1(CO) PT08.S2(NMHC) PT08.S3(NOx) PT08.S4(NO2) PT08.S5(O3) T RH AH" +OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)" + +exp_name="/home/aidan/sandbox/DEEPSPrj/output/init_mvar_2" +mkdir -p $exp_name +echo "Running base EXAMM code with UCI Air Quality dataset, results will be saved to: "$exp_name +echo "###-------------------###" + +../../build/multithreaded/examm_mt \ +--training_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \ +--test_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \ +--time_offset 1 \ +--input_parameter_names $INPUT_PARAMETERS \ +--output_parameter_names $OUTPUT_PARAMETERS \ +--number_islands 10 \ +--island_size 10 \ +--max_genomes 20000 \ +--number_threads 14 \ +--bp_iterations 15 \ +--normalize min_max \ +--output_directory $exp_name \ +--possible_node_types simple UGRNN MGU GRU delta LSTM \ +--std_message_level INFO \ +--file_message_level NONE \ +--csv_delimiter ";" From a5fdba975f0b917800c956c568e894ecd292faa2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Fri, 5 Apr 2024 13:11:15 -0400 Subject: [PATCH 40/42] Write island best to disk --- common/process_arguments.cxx | 4 +++- examm/island_speciation_strategy.cxx | 15 +++++++++------ examm/island_speciation_strategy.hxx | 3 ++- rnn_examples/train_rnn.cxx | 18 +++++------------- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 9419fdd7..fe2a49f0 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -122,9 +122,11 @@ IslandSpeciationStrategy* generate_island_speciation_strategy_from_arguments( bool start_filled = argument_exists(arguments, "--start_filled"); bool tl_epigenetic_weights = argument_exists(arguments, "--tl_epigenetic_weights"); unique_ptr annealing_policy = AnnealingPolicy::from_arguments(arguments); + string output_directory = ""; + get_argument(arguments, "--output_directory", false, output_directory); IslandSpeciationStrategy* island_strategy = new IslandSpeciationStrategy( - number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, seed_genome, + number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, output_directory, seed_genome, island_ranking_method, repopulation_method, extinction_event_generation_number, num_mutations, islands_to_exterminate, max_genomes, repeat_extinction, start_filled, transfer_learning, transfer_learning_version, tl_epigenetic_weights, annealing_policy diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index c8605fbe..e05b95d1 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -5,6 +5,8 @@ using std::function; #include // #include +#include +using std::stringstream; #include @@ -24,7 +26,7 @@ using std::string; */ IslandSpeciationStrategy::IslandSpeciationStrategy( int32_t _number_of_islands, int32_t _max_island_size, double _mutation_rate, double _intra_island_crossover_rate, - double _inter_island_crossover_rate, RNN_Genome* _seed_genome, string _island_ranking_method, + double _inter_island_crossover_rate, string output_directory, RNN_Genome* _seed_genome, string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, bool _start_filled, bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights, @@ -36,6 +38,7 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( mutation_rate(_mutation_rate), intra_island_crossover_rate(_intra_island_crossover_rate), inter_island_crossover_rate(_inter_island_crossover_rate), + output_directory(output_directory), generated_genomes(0), evaluated_genomes(0), seed_genome(_seed_genome), @@ -190,11 +193,11 @@ int32_t IslandSpeciationStrategy::insert_genome(RNN_Genome* genome) { Log::info("Island %d: Insert position was: %d\n", island, insert_position); if (insert_position == 0) { - if (new_global_best) { - return 0; - } else { - return 1; - } + stringstream ss; + ss << output_directory << "/island_" << island << "_best.bin"; + genome->write_to_file(ss.str()); + + return insert_position != 0; } else { return insert_position; // will be -1 if not inserted, or > 0 if not the global best } diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index fdc41024..bd32d507 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -37,6 +37,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy { RNN_Genome* seed_genome; /**< keep a reference to the seed genome so we can re-use it across islands and not duplicate innovation numbers. */ + string output_directory; string island_ranking_method; /**< The method used to find the worst island in population */ string repopulation_method; /**< The method used to repopulate the island after being erased */ @@ -88,7 +89,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy { */ IslandSpeciationStrategy( int32_t _number_of_islands, int32_t _max_island_size, double _mutation_rate, - double _intra_island_crossover_rate, double _inter_island_crossover_rate, RNN_Genome* _seed_genome, + double _intra_island_crossover_rate, double _inter_island_crossover_rate, string output_directory, RNN_Genome* _seed_genome, string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, bool _start_filled, bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights, diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index c790b112..a452fe1a 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -116,19 +116,8 @@ int main(int argc, char** argv) { if (genome_file.size() != 0) { genome = new RNN_Genome(genome_file); - Log::info("best weights: { "); - for (double& d : genome->get_best_parameters()) { - Log::info_no_header("%f, ", d); - } - Log::info("}\n"); - - vector params; - genome->get_weights(params); - Log::info("current weights: { "); - for (double& d : params) { - Log::info_no_header("%f, ", d); - } - Log::info("}\n"); + genome->set_weights(genome->get_best_parameters()); + Log::info("Number of weights = %d\n", genome->get_number_weights()); } else { string rnn_type; get_argument(arguments, "--rnn_type", true, rnn_type); @@ -222,6 +211,9 @@ int main(int argc, char** argv) { genome->set_log_filename(output_directory + "/" + log_filename); } + string output_genome_name = "output_genome.bin"; + get_argument(arguments, "--output_genome_name", false, output_genome_name); + genome->set_parameter_names( time_series_sets->get_input_parameter_names(), time_series_sets->get_output_parameter_names() ); From 28fe8329fba140c3258e18a19e0c6c4372bf4019 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 9 Apr 2024 15:52:26 -0400 Subject: [PATCH 41/42] Update build for native optimization --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65d1f2a8..f51b9f51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,11 +27,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") # SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 -fsanitize=address") -SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS " -Wall -march=native -O3") #SET (CMAKE_CXX_FLAGS " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG") #SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") #SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") -SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") +SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG") #SET (CMAKE_SHARED_LINKER_FLAGS " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG") From 99fda86862c63e256d78f5c3fcc192ba83fdde6b Mon Sep 17 00:00:00 2001 From: aidanlabella Date: Fri, 3 May 2024 15:44:54 -0400 Subject: [PATCH 42/42] add scripts for training/evolution with NASA merra --- rnn/inverse_node.cxx | 2 +- rnn_examples/train_rnn.cxx | 4 ++-- scripts/air_quality/eval_merra.sh | 17 ++++++++++++++ scripts/air_quality/evolve_aq.sh | 13 +++++++---- scripts/air_quality/evolve_merra.sh | 35 +++++++++++++++++++++++++++++ scripts/air_quality/train_aq.sh | 32 ++++++++++++++++++++++++++ scripts/air_quality/train_merra.sh | 32 ++++++++++++++++++++++++++ 7 files changed, 128 insertions(+), 7 deletions(-) create mode 100755 scripts/air_quality/eval_merra.sh create mode 100755 scripts/air_quality/evolve_merra.sh create mode 100755 scripts/air_quality/train_aq.sh create mode 100755 scripts/air_quality/train_merra.sh diff --git a/rnn/inverse_node.cxx b/rnn/inverse_node.cxx index 202dac29..fff9bcd4 100644 --- a/rnn/inverse_node.cxx +++ b/rnn/inverse_node.cxx @@ -19,7 +19,7 @@ double INVERSE_Node::activation_function(double input) { double INVERSE_Node::derivative_function(double input) { double gradient = -1.0 / ((input) * (input)); - if (isnan(gradient) || isinf(gradient)) { + if (std::isnan(gradient) || std::isinf(gradient)) { gradient = -1000.0; } return gradient; diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index a452fe1a..64727f07 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -125,10 +125,10 @@ int main(int argc, char** argv) { Log::info("RNN TYPE = %s\n", rnn_type.c_str()); int32_t num_hidden_layers; - get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); + get_argument(arguments, "--num_hidden_layers", false, num_hidden_layers); int32_t max_recurrent_depth; - get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); + get_argument(arguments, "--max_recurrent_depth", false, max_recurrent_depth); int32_t hidden_layer_size = number_inputs; get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); diff --git a/scripts/air_quality/eval_merra.sh b/scripts/air_quality/eval_merra.sh new file mode 100755 index 00000000..d3de8abd --- /dev/null +++ b/scripts/air_quality/eval_merra.sh @@ -0,0 +1,17 @@ +INPUT_PARAMETERS="lon lat lev AIRDENS SO4 SO2 RH PS H O3 T U V" +# OUTPUT_PARAMETERS="CO" +OUTPUT_PARAMETERS="CO SO4 SO2 O3" + +exp_name="/home/aidan/sandbox/DEEPSPrj/output/merra/multivar_B/evaluation" +mkdir -p $exp_name + +../../build/rnn_examples/evaluate_rnn \ +--testing_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_eval_1000.csv \ +--time_offset 1 \ +--input_parameter_names $INPUT_PARAMETERS \ +--output_parameter_names $OUTPUT_PARAMETERS \ +--genome_file $1 \ +--output_directory $exp_name \ +--std_message_level INFO \ +--file_message_level ERROR +# --bp_iterations $epochs \ diff --git a/scripts/air_quality/evolve_aq.sh b/scripts/air_quality/evolve_aq.sh index e36407fe..0136307e 100755 --- a/scripts/air_quality/evolve_aq.sh +++ b/scripts/air_quality/evolve_aq.sh @@ -4,10 +4,12 @@ cd build -INPUT_PARAMETERS="Date Time PT08.S1(CO) PT08.S2(NMHC) PT08.S3(NOx) PT08.S4(NO2) PT08.S5(O3) T RH AH" -OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)" +# INPUT_PARAMETERS="Date Time PT08.S1(CO) PT08.S2(NMHC) PT08.S3(NOx) PT08.S4(NO2) PT08.S5(O3) T RH AH" +INPUT_PARAMETERS="PT08.S5(O3) T RH AH" +# OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)" +OUTPUT_PARAMETERS="CO(GT)" -exp_name="/home/aidan/sandbox/DEEPSPrj/output/init_mvar_2" +exp_name="/home/aidan/sandbox/DEEPSPrj/output/univar3" mkdir -p $exp_name echo "Running base EXAMM code with UCI Air Quality dataset, results will be saved to: "$exp_name echo "###-------------------###" @@ -19,10 +21,13 @@ echo "###-------------------###" --input_parameter_names $INPUT_PARAMETERS \ --output_parameter_names $OUTPUT_PARAMETERS \ --number_islands 10 \ +--min_recurrent_depth 10 \ +--max_recurrent_depth 40 \ --island_size 10 \ --max_genomes 20000 \ --number_threads 14 \ ---bp_iterations 15 \ +--num_mutations 20 \ +--bp_iterations 20 \ --normalize min_max \ --output_directory $exp_name \ --possible_node_types simple UGRNN MGU GRU delta LSTM \ diff --git a/scripts/air_quality/evolve_merra.sh b/scripts/air_quality/evolve_merra.sh new file mode 100755 index 00000000..d5a71e31 --- /dev/null +++ b/scripts/air_quality/evolve_merra.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# This is a script for evlolving networks to predict Air Quality data +# This also doubles as an example for the csv delimiter option + +cd build + +INPUT_PARAMETERS="lon lat lev AIRDENS SO4 SO2 RH PS H O3 T U V" +# OUTPUT_PARAMETERS="CO" +OUTPUT_PARAMETERS="CO SO4 SO2 O3" + +exp_name="/home/aidan/sandbox/DEEPSPrj/output/merra/mv-A" +mkdir -p $exp_name +echo "Running base EXAMM code with MERRA-2 dataset, results will be saved to: "$exp_name +echo "###-------------------###" + +../../build/multithreaded/examm_mt \ +--training_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_100k_23.csv \ +--test_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_100k_23_test.csv \ +--time_offset 1 \ +--input_parameter_names $INPUT_PARAMETERS \ +--output_parameter_names $OUTPUT_PARAMETERS \ +--number_islands 10 \ +--min_recurrent_depth 1 \ +--max_recurrent_depth 100 \ +--island_size 10 \ +--max_genomes 1000 \ +--number_threads 14 \ +--num_mutations 20 \ +--bp_iterations 5 \ +--normalize none \ +--output_directory $exp_name \ +--possible_node_types simple UGRNN MGU GRU delta LSTM \ +--std_message_level INFO \ +--file_message_level NONE \ +--csv_delimiter "," diff --git a/scripts/air_quality/train_aq.sh b/scripts/air_quality/train_aq.sh new file mode 100755 index 00000000..1d323ab8 --- /dev/null +++ b/scripts/air_quality/train_aq.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# This is an example of running EXAMM MPI version on pa28 dataset, output parameters are non engine parameters +# +# The pa28 dataset is not normalized +# To run datasets that's not normalized, make sure to add arguments: +# --normalize min_max for Min Max normalization, or +# --normalize avg_std_dev for Z-score normalization + +cd build + +INPUT_PARAMETERS="Date Time PT08.S5(O3) T RH AH" +# OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)" +OUTPUT_PARAMETERS="CO(GT)" + +exp_name="/home/aidan/sandbox/DEEPSPrj/output/init_mvar_1" +mkdir -p "${exp_name}/training" +echo "Running base EXAMM rnn training code with UCI Air Quality dataset, results will be saved to: "$exp_name +echo "###-------------------###" + +../../build/rnn_examples/train_rnn \ +--training_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \ +--test_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \ +--time_offset 1 \ +--input_parameter_names $INPUT_PARAMETERS \ +--output_parameter_names $OUTPUT_PARAMETERS \ +--bp_iterations 100000 \ +--output_directory "${exp_name}/training" \ +--std_message_level INFO \ +--file_message_level NONE \ +--genome_file $1 \ +--learning_rate 0.001 \ +--csv_delimiter ";" diff --git a/scripts/air_quality/train_merra.sh b/scripts/air_quality/train_merra.sh new file mode 100755 index 00000000..5fd2931e --- /dev/null +++ b/scripts/air_quality/train_merra.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# This is an example of running EXAMM MPI version on pa28 dataset, output parameters are non engine parameters +# +# The pa28 dataset is not normalized +# To run datasets that's not normalized, make sure to add arguments: +# --normalize min_max for Min Max normalization, or +# --normalize avg_std_dev for Z-score normalization + +cd build + +INPUT_PARAMETERS="lon lat lev AIRDENS SO4 SO2 RH PS H O3 T U V" +# OUTPUT_PARAMETERS="CO" +OUTPUT_PARAMETERS="CO SO4 SO2 O3" + +exp_name="/home/aidan/sandbox/DEEPSPrj/output/merra/multivar_B" +mkdir -p "${exp_name}/training" +echo "Running base EXAMM rnn training code with UCI Air Quality dataset, results will be saved to: "$exp_name"/training" +echo "###-------------------###" + +../../build/rnn_examples/train_rnn \ +--training_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_post.csv \ +--test_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/poc_merra_test.csv \ +--time_offset 1 \ +--input_parameter_names $INPUT_PARAMETERS \ +--output_parameter_names $OUTPUT_PARAMETERS \ +--bp_iterations 5 \ +--output_directory "${exp_name}/training" \ +--std_message_level INFO \ +--file_message_level NONE \ +--genome_file $1 \ +--learning_rate 0.01 \ +--csv_delimiter ","