From e69fc712b7cf59ca382743e779a16c09fff31aeb Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 14:05:06 -0500 Subject: [PATCH 01/31] Removing c++20 features unsupported by GCC versions < 12.2 --- CMakeLists.txt | 3 ++- rnn/dnas_node.cxx | 4 ++-- rnn/dnas_node.hxx | 7 ++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b34e8f0..9093410b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ set (EXACT_VERSION_MINOR 33) #add_definitions( -DEXACT_VERSION="${EXACT_VERSION_MAJOR}.${EXACT_VERSION_MINOR}" ) SET (PLATFORM 64) +set(CMAKE_CXX_STANDARD 20) #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++") #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS") @@ -23,7 +24,7 @@ SET (PLATFORM 64) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS "-std=c++20 -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS "-Wall -O3 -funroll-loops -msse3") SET (CMAKE_CXX_FLAGS_DEBUG "-g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -funroll-loops -DNDEBUG") diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 2f040703..957eaba5 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -67,13 +67,13 @@ DNASNode::~DNASNode() { delete node; } -template +template void DNASNode::gumbel_noise(Rng &rng, vector &output) { for (int i = 0; i < output.size(); i++) output[i] = -log(-log(uniform_real_distribution(0.0, 1.0)(rng))); } -template +template void DNASNode::sample_gumbel_softmax(Rng &rng) { z.assign(pi.size(), 0.0); x.assign(pi.size(), 0.0); diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index daa26605..1b63532c 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -7,7 +7,6 @@ using std::string; #include using std::minstd_rand0; using std::uniform_real_distribution; -using std::uniform_random_bit_generator; using std::generate_canonical; #include @@ -27,9 +26,8 @@ using std::unique_ptr; #define CRYSTALLIZATION_THRESHOLD 50000 class DNASNode : public RNN_Node_Interface { - private: - template + template static void gumbel_noise(R &rng, vector &output); void calculate_maxi(); @@ -75,12 +73,11 @@ class DNASNode : public RNN_Node_Interface { vector> node_outputs; public: - DNASNode(vector &&nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1); DNASNode(const DNASNode &node); ~DNASNode(); - template + template void sample_gumbel_softmax(Rng &rng); void calculate_z(); From 903dcfb40baf8d06fa770875693a0d198efd95e2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 15:26:23 -0500 Subject: [PATCH 02/31] Fix minimum c++ requirement in CMakeLists.txt to be compatible with GCC and clang --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9093410b..2ec362ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.6) +cmake_minimum_required (VERSION 2.8) project (EXACT) # The version number. @@ -24,10 +24,10 @@ set(CMAKE_CXX_STANDARD 20) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS "-Wall -O3 -funroll-loops -msse3") -SET (CMAKE_CXX_FLAGS_DEBUG "-g") -SET (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") -SET (CMAKE_CXX_FLAGS_RELEASE "-O4 -funroll-loops -DNDEBUG") +SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") +SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") +SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib) From 09c5cbcafdb848f4a493c3502baba7fd5db275ca Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 15:37:33 -0500 Subject: [PATCH 03/31] properly specify minimum CMake version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ec362ca..558fbefe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 3.1) project (EXACT) # The version number. From 8ec09ca9826a8e7022c46382c14a7934e02a0731 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 30 Jan 2023 15:41:37 -0500 Subject: [PATCH 04/31] Fixed bug introduced during merge --- rnn/dnas_node.hxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index f56b404b..82ebff65 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -27,7 +27,7 @@ using std::unique_ptr; class DNASNode : public RNN_Node_Interface { private: - template + template static void gumbel_noise(R &rng, vector &output); void calculate_maxi(); @@ -77,7 +77,7 @@ class DNASNode : public RNN_Node_Interface { DNASNode(const DNASNode &node); ~DNASNode(); - template + template void sample_gumbel_softmax(Rng &rng); void calculate_z(); From 1c9bcfff4042964727bc41f944738f4e82c2d5d2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:12:56 -0500 Subject: [PATCH 05/31] Tweaking for clusteR --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 558fbefe..1af3a314 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required (VERSION 3.8) project (EXACT) # The version number. @@ -9,6 +9,7 @@ set (EXACT_VERSION_MINOR 33) SET (PLATFORM 64) set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++") #SET (CMAKE_CXX_FLAGS "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS") @@ -24,13 +25,14 @@ set(CMAKE_CXX_STANDARD 20) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib) +message(STATUS "${CMAKE_CXX_FLAGS}") message(STATUS "project source dir is ${PROJECT_SOURCE_DIR}") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/") From 9c6be46a3141e5cbde600156f9f7b08134600acb Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:35:42 -0500 Subject: [PATCH 06/31] Added updated cluster instructions to the README.md --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7fc588a3..449e7194 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Getting Started and Prerequisites EXONA has been developed to compile using CMake, which should be installed before attempting to compile. To use the MPI version, a version of MPI (such as OpenMPI) should be installed. EXACT currently requires libtiff and libpng -The EXACT algorithm can also checkpoint to a database, however this is not required. To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX. Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++11 compatible compiler. +The EXACT algorithm can also checkpoint to a database, however this is not required. To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX. Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++20 compatible compiler. If you are using OSX, to set up the environment: @@ -15,6 +15,18 @@ brew install libpng xcode-select --install ``` +On the RIT Cluster Computer, load the following packages using spack: +``` +# CMake +spack load /ux27hbj + +# GCC +spack load gcc@11.2.0 + +# libtiff +spack load /ycf67m3 +``` + To build: ``` @@ -24,6 +36,8 @@ To build: ~/exact/build $ make ``` +You can add `-DCMAKE_BUILD_TYPE=Release` to the invocation of `cmake` for a release build (slower compile times, faster execution). + You may also want to have graphviz installed so you can generate images of the evolved neural networks. EXACT/EXALT/EXAMM will write out evolved genomes in a .gv (graphviz) format for this. For example, can generate a pdf from a gv file (assuming graphviz is installed with): ``` From d7023b64e771dad351ae946f962018de2c4a1acc Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:41:24 -0500 Subject: [PATCH 07/31] Updated format script --- scripts/util/format.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/util/format.sh b/scripts/util/format.sh index 85dddbfd..76fb9efa 100755 --- a/scripts/util/format.sh +++ b/scripts/util/format.sh @@ -1,2 +1,4 @@ #!/bin/bash -find . -type f -name "*.*xx" -exec clang-format -style=file -i {} \; +for folder in common examm mpi multithreaded rnn rnn_examples rnn_tests time_series weights word_series; do + find $folder -type f -name "*.*xx" -exec clang-format -style=file -i {} \; +done From 89b3410cebc573430cd244b43ea070ca203fc98f Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 11:41:46 -0500 Subject: [PATCH 08/31] Formatting --- rnn/dnas_node.hxx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 3b341195..435e5400 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -8,7 +8,6 @@ using std::string; using std::generate_canonical; using std::minstd_rand0; using std::uniform_real_distribution; -using std::generate_canonical; #include using std::vector; @@ -28,7 +27,7 @@ using std::unique_ptr; class DNASNode : public RNN_Node_Interface { private: template - static void gumbel_noise(R &rng, vector &output); + static void gumbel_noise(R& rng, vector& output); void calculate_maxi(); @@ -81,7 +80,7 @@ class DNASNode : public RNN_Node_Interface { ~DNASNode(); template - void sample_gumbel_softmax(Rng &rng); + void sample_gumbel_softmax(Rng& rng); void calculate_z(); virtual void initialize_lamarckian( From cbf757457e0d56235e6c58a64114d3143f3c0f58 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 31 Jan 2023 12:49:34 -0500 Subject: [PATCH 09/31] Added OpenMPI package to cluster instructions --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 449e7194..c187316a 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,9 @@ spack load /ux27hbj # GCC spack load gcc@11.2.0 +# OpenMPI +spack load openmpi@4.1.2 + # libtiff spack load /ycf67m3 ``` From 06f12b588b1bcf1ba4380192eaa7ad2eb18dbf0f Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 7 Feb 2023 12:09:54 -0500 Subject: [PATCH 10/31] Adding argument parsing for DNAS --- common/process_arguments.cxx | 9 +++++ examm/examm.cxx | 18 +-------- examm/examm.hxx | 2 +- rnn/dnas_node.cxx | 73 +++++++++++++++++++++++++----------- rnn/dnas_node.hxx | 2 +- rnn/generate_nn.cxx | 11 ++++++ rnn/rnn_genome.cxx | 30 ++++----------- rnn/rnn_genome.hxx | 2 + rnn/rnn_node_interface.cxx | 35 ++++++++++++++--- rnn/rnn_node_interface.hxx | 9 ++++- 10 files changed, 124 insertions(+), 67 deletions(-) diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index f4bf87a8..885f28a0 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -32,6 +32,15 @@ EXAMM* generate_examm_from_arguments( // get_argument(arguments, "--sequence_length_lower_bound", false, sequence_length_lower_bound); // get_argument(arguments, "--sequence_length_upper_bound", false, sequence_length_upper_bound); + vector dnas_node_type_strings; + get_argument_vector(arguments, "--dnas_node_types", false, dnas_node_type_strings); + if (dnas_node_type_strings.size() != 0) { + dnas_node_types.clear(); + for (auto node_type : dnas_node_type_strings) { + dnas_node_types.push_back(node_type_from_string(node_type)); + } + } + GenomeProperty* genome_property = new GenomeProperty(); genome_property->generate_genome_property_from_arguments(arguments); genome_property->get_time_series_parameters(time_series_sets); diff --git a/examm/examm.cxx b/examm/examm.cxx index ce137d6f..f017ab8b 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -205,22 +205,8 @@ void EXAMM::update_log() { void EXAMM::set_possible_node_types(vector possible_node_type_strings) { possible_node_types.clear(); - for (int32_t i = 0; i < (int32_t) possible_node_type_strings.size(); i++) { - string node_type_s = possible_node_type_strings[i]; - - bool found = false; - - for (int32_t j = 0; j < NUMBER_NODE_TYPES; j++) { - if (NODE_TYPES[j].compare(node_type_s) == 0) { - found = true; - possible_node_types.push_back(j); - } - } - - if (!found) { - Log::error("unknown node type: '%s'\n", node_type_s.c_str()); - exit(1); - } + for (auto node_type : possible_node_type_strings) { + possible_node_types.push_back(node_type_from_string(node_type)); } } diff --git a/examm/examm.hxx b/examm/examm.hxx index 5ccb545e..ac5d56eb 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -63,7 +63,7 @@ class EXAMM { double split_node_rate; double merge_node_rate; - vector possible_node_types; + vector possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; vector op_log_ordering; map inserted_counts; diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index ba6f3ba3..46a20c69 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -1,4 +1,9 @@ #include +using std::sort; + +#include +using std::pair; + #include #include using std::max; @@ -72,7 +77,7 @@ DNASNode::~DNASNode() { template void DNASNode::gumbel_noise(Rng& rng, vector& output) { - for (int i = 0; i < output.size(); i++) { + for (auto i = 0; i < output.size(); i++) { output[i] = -log(-log(uniform_real_distribution(0.0, 1.0)(rng))); } } @@ -92,18 +97,45 @@ void DNASNode::calculate_z() { xtotal = 0.0; double emax = -10000000; - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { x[i] = g[i] + log(pi[i]); x[i] /= tao; emax = max(emax, x[i]); } - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { x[i] = exp(emax - x[i]); xtotal += x[i]; } - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { z[i] = x[i] / xtotal; } + + if (k > 0) { + pair ps_with_indices[z.size()]; + for (int32_t i = 0; i < (int32_t) z.size(); i++) { + ps_with_indices[i] = pair(i, z[i]); + } + + std::sort(ps_with_indices, ps_with_indices + z.size(), + [](const pair& a, const pair& b) { + // Descending order + return a.second > b.second; + } + ); + + double total = 0.0; + for (int i = 0; i < k; i++) { + total += ps_with_indices[i].second; + } + + for (int i = 0; i < z.size(); i++) { + z[i] = 0.0; + } + + for (int i = 0; i < k; i++) { + z[ps_with_indices[i].first] = ps_with_indices[i].second / total; + } + } } void DNASNode::reset(int32_t series_length) { @@ -151,7 +183,7 @@ void DNASNode::input_fired(int32_t time, double incoming_output) { node_outputs[time][maxi] = nodes[maxi]->output_values[time]; output_values[time] = nodes[maxi]->output_values[time]; } else { - for (int i = 0; i < nodes.size(); i++) { + for (auto i = 0; i < nodes.size(); i++) { auto node = nodes[i]; node->input_fired(time, input_values[time]); node_outputs[time][i] = node->output_values[time]; @@ -190,7 +222,7 @@ void DNASNode::try_update_deltas(int32_t time) { d_input[time] += nodes[maxi]->d_input[time]; } else { - for (int i = 0; i < z.size(); i++) { + for (auto i = 0; i < z.size(); i++) { nodes[i]->output_fired(time, delta * z[i]); double p = (x[i] / pi[i]); p *= ((delta * node_outputs[time][i]) / xtotal); @@ -254,7 +286,7 @@ void DNASNode::set_weights(const vector& parameters) { void DNASNode::get_weights(int32_t& offset, vector& parameters) const { // Log::info("pi start %d; ", offset); - for (int i = 0; i < pi.size(); i++) { + for (auto i = 0; i < pi.size(); i++) { parameters[offset++] = pi[i]; } // Log::info_no_header("pi end %d \n", offset); @@ -265,24 +297,23 @@ void DNASNode::get_weights(int32_t& offset, vector& parameters) const { void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; - for (int i = 0; i < pi.size(); i++) { + for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; } // Log::info("Pi indices: %d-%d\n", start, offset); for (auto node : nodes) { node->set_weights(offset, parameters); } - Log::info("Just set weights\n"); calculate_z(); - string s = "Pi = { "; - for (auto p : pi) { - s += std::to_string(p) + ", "; - } - Log::info("%s }\n", s.c_str()); + // string s = "Pi = { "; + // for (auto p : pi) { + // s += std::to_string(p) + ", "; + // } + // Log::info("%s }\n", s.c_str()); } void DNASNode::set_pi(const vector& new_pi) { - for (int i = 0; i < pi.size(); i++) { + for (auto i = 0; i < pi.size(); i++) { pi[i] = new_pi[i]; } calculate_maxi(); @@ -293,7 +324,7 @@ void DNASNode::calculate_maxi() { maxi = 0; double max_pi = pi[0]; - for (int i = 1; i < nodes.size(); i++) { + for (auto i = 1; i < nodes.size(); i++) { if (pi[i] > max_pi) { max_pi = pi[i]; maxi = i; @@ -314,11 +345,11 @@ void DNASNode::get_gradients(vector& gradients) { if (counter >= CRYSTALLIZATION_THRESHOLD) { offset += pi.size(); - for (int i = 0; i < nodes.size(); i++) { + for (auto i = 0; i < nodes.size(); i++) { RNN_Node_Interface* node = nodes[i]; if (i == maxi) { node->get_gradients(temp); - for (int j = 0; j < temp.size(); j++) { + for (auto j = 0; j < temp.size(); j++) { gradients[offset++] = temp[j]; } } else { @@ -328,13 +359,13 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; - for (int i = 0; i < pi.size(); i++) { - gradients[offset++] = d_pi[i]; + for (auto i = 0; i < pi.size(); i++) { + gradients[offset++] = d_pi[i] * 0.1; } for (auto node : nodes) { node->get_gradients(temp); - for (int i = 0; i < temp.size(); i++) { + for (auto i = 0; i < temp.size(); i++) { gradients[offset++] = temp[i]; } } diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 435e5400..776119cc 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -22,7 +22,7 @@ using std::unique_ptr; #include "rnn_node.hxx" #include "rnn_node_interface.hxx" -#define CRYSTALLIZATION_THRESHOLD 50000 +#define CRYSTALLIZATION_THRESHOLD 1000 class DNASNode : public RNN_Node_Interface { private: diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index c451a098..f9e3c61a 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -39,12 +39,23 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co case DNAS_NODE: Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n"); exit(1); + default: + Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind); + exit(1); } + + // Unreachable + return nullptr; } DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector& node_types) { vector nodes(node_types.size()); + if (node_types.size() == 0) { + Log::fatal("Node types cannot be empty - failed to create DNAS node!\n"); + exit(1); + } + int i = 0; for (auto node_type : node_types) { nodes[i++] = create_hidden_node(node_type, innovation_counter, depth); diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index c7b60b43..370998ee 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -59,6 +59,9 @@ using std::vector; #include "rnn_node.hxx" #include "time_series/time_series.hxx" #include "ugrnn_node.hxx" +#include "generate_nn.hxx" + +extern vector dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; string parse_fitness(double fitness) { if (fitness == EXAMM_MAX_DOUBLE) { @@ -1628,27 +1631,10 @@ RNN_Node_Interface* RNN_Genome::create_node( WeightType weight_initialize = weight_rules->get_weight_initialize_method(); Log::trace("CREATING NODE, type: '%s'\n", NODE_TYPES[node_type].c_str()); - if (node_type == LSTM_NODE) { - n = new LSTM_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == DELTA_NODE) { - n = new Delta_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == GRU_NODE) { - n = new GRU_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == ENARC_NODE) { - n = new ENARC_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == ENAS_DAG_NODE) { - n = new ENAS_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == RANDOM_DAG_NODE) { - n = new RANDOM_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == MGU_NODE) { - n = new MGU_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == UGRNN_NODE) { - n = new UGRNN_Node(++node_innovation_count, HIDDEN_LAYER, depth); - } else if (node_type == SIMPLE_NODE || node_type == JORDAN_NODE || node_type == ELMAN_NODE) { - n = new RNN_Node(++node_innovation_count, HIDDEN_LAYER, depth, node_type); + if (node_type != DNAS_NODE) { + n = create_hidden_node(node_type, node_innovation_count, depth); } else { - Log::fatal("ERROR: attempted to create a node with an unknown node type: %d\n", node_type); - exit(1); + n = create_dnas_node(node_innovation_count, depth, dnas_node_types); } if (mutated_component_weight == WeightType::LAMARCKIAN) { @@ -3213,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { } else if (node_type == DNAS_NODE) { int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); - + int32_t counter; bin_istream.read((char*) &counter, sizeof(int32_t)); vector pi(n_nodes, 0.0); @@ -3224,7 +3210,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { nodes[i] = RNN_Genome::read_node_from_stream(bin_istream); } - DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, node_type, depth, counter); + DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, layer_type, depth, counter); dnas_node->set_pi(pi); node = (RNN_Node_Interface*) dnas_node; } else { diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index acba093c..deaf8bce 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -32,6 +32,8 @@ using std::vector; // mysql can't handle the max float value for some reason #define EXAMM_MAX_DOUBLE 10000000 +extern vector dnas_node_types; + string parse_fitness(double fitness); class RNN_Genome { diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index 2ad8d065..55f5e057 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -1,20 +1,45 @@ #include +using std::max; + +#include + #include using std::ostream; #include using std::string; -#include -using std::max; - #include "common/log.hxx" #include "rnn/rnn_genome.hxx" #include "rnn_node_interface.hxx" -extern const int32_t NUMBER_NODE_TYPES = 9; +extern const int32_t NUMBER_NODE_TYPES = 11; extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", - "GRU", "delta", "LSTM", "ENARC", "ENAS_DAG"}; + "GRU", "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; +extern const unordered_map string_to_node_type = { + { "simple", SIMPLE_NODE }, + { "jordan", JORDAN_NODE }, + { "elman", ELMAN_NODE }, + { "ugrnn", UGRNN_NODE }, + { "mgu", MGU_NODE }, + { "gru", GRU_NODE }, + { "delta", DELTA_NODE }, + { "lstm", LSTM_NODE }, + { "enarc", ENARC_NODE }, + { "enas", ENAS_DAG_NODE }, + { "dnas", DNAS_NODE } +}; + +int32_t node_type_from_string(string& node_type) { + std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); }); + + if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) { + return it->second; + } else { + Log::fatal("Invalid node type '%s'\n", node_type.c_str()); + exit(1); + } +} double bound(double value) { if (value < -10.0) { diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx index 26dc0f3c..15ec45cd 100644 --- a/rnn/rnn_node_interface.hxx +++ b/rnn/rnn_node_interface.hxx @@ -12,6 +12,9 @@ using std::uniform_real_distribution; #include using std::string; +#include +using std::unordered_map; + #include using std::vector; @@ -25,6 +28,8 @@ class RNN; extern const int32_t NUMBER_NODE_TYPES; extern const string NODE_TYPES[]; +extern const unordered_map string_to_node_type; +int32_t node_type_from_string(string& node_type); #define SIMPLE_NODE 0 #define JORDAN_NODE 1 @@ -39,6 +44,8 @@ extern const string NODE_TYPES[]; #define RANDOM_DAG_NODE 10 #define DNAS_NODE 11 +int32_t node_type_from_string(string& node_type); + double sigmoid(double value); double sigmoid_derivative(double value); double tanh_derivative(double value); @@ -112,7 +119,7 @@ class RNN_Node_Interface { virtual RNN_Node_Interface* copy() const = 0; - void write_to_stream(ostream& out); + virtual void write_to_stream(ostream& out); int32_t get_node_type() const; int32_t get_layer_type() const; From ad5a7a3523fa05a965fb28d3baf4f089b2c67910 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Thu, 9 Feb 2023 13:54:11 -0500 Subject: [PATCH 11/31] Formatting --- common/process_arguments.cxx | 6 ++++++ examm/examm.hxx | 3 ++- rnn/dnas_node.cxx | 3 ++- rnn/dnas_node.hxx | 2 +- rnn/generate_nn.cxx | 4 +++- rnn/rnn_genome.cxx | 6 +++--- rnn/rnn_node_interface.cxx | 31 ++++++++++++++++--------------- 7 files changed, 33 insertions(+), 22 deletions(-) diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 885f28a0..f2e29ac0 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -39,6 +39,12 @@ EXAMM* generate_examm_from_arguments( for (auto node_type : dnas_node_type_strings) { dnas_node_types.push_back(node_type_from_string(node_type)); } + + Log::info("Using following node types for dnas: "); + for (auto s : dnas_node_type_strings) { + Log::info_no_header("%s", s.c_str()); + } + Log::info_no_header("\n"); } GenomeProperty* genome_property = new GenomeProperty(); diff --git a/examm/examm.hxx b/examm/examm.hxx index ac5d56eb..c0c0ee03 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -63,7 +63,8 @@ class EXAMM { double split_node_rate; double merge_node_rate; - vector possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; + vector possible_node_types = {SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, + MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; vector op_log_ordering; map inserted_counts; diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 46a20c69..dcdab7e0 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -116,7 +116,8 @@ void DNASNode::calculate_z() { ps_with_indices[i] = pair(i, z[i]); } - std::sort(ps_with_indices, ps_with_indices + z.size(), + std::sort( + ps_with_indices, ps_with_indices + z.size(), [](const pair& a, const pair& b) { // Descending order return a.second > b.second; diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 776119cc..c3d74e6b 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -62,7 +62,7 @@ class DNASNode : public RNN_Node_Interface { int32_t maxi = -1; // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) - int32_t k = -1; + int32_t k = 1; // Whether to re-sample the gumbel softmax distribution when resetting the node. // Can be set externally using DNASNode::set_stochastic diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index f9e3c61a..a84fb36f 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -40,7 +40,9 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n"); exit(1); default: - Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind); + Log::fatal( + "If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind + ); exit(1); } diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 370998ee..7e452ad0 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -50,6 +50,7 @@ using std::vector; #include "dnas_node.hxx" #include "enarc_node.hxx" #include "enas_dag_node.hxx" +#include "generate_nn.hxx" #include "gru_node.hxx" #include "lstm_node.hxx" #include "mgu_node.hxx" @@ -59,9 +60,8 @@ using std::vector; #include "rnn_node.hxx" #include "time_series/time_series.hxx" #include "ugrnn_node.hxx" -#include "generate_nn.hxx" -extern vector dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE }; +extern vector dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; string parse_fitness(double fitness) { if (fitness == EXAMM_MAX_DOUBLE) { @@ -3199,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { } else if (node_type == DNAS_NODE) { int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); - + int32_t counter; bin_istream.read((char*) &counter, sizeof(int32_t)); vector pi(n_nodes, 0.0); diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index 55f5e057..ab5796b2 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -2,7 +2,6 @@ using std::max; #include - #include using std::ostream; @@ -14,24 +13,26 @@ using std::string; #include "rnn_node_interface.hxx" extern const int32_t NUMBER_NODE_TYPES = 11; -extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", - "GRU", "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; +extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", + "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; extern const unordered_map string_to_node_type = { - { "simple", SIMPLE_NODE }, - { "jordan", JORDAN_NODE }, - { "elman", ELMAN_NODE }, - { "ugrnn", UGRNN_NODE }, - { "mgu", MGU_NODE }, - { "gru", GRU_NODE }, - { "delta", DELTA_NODE }, - { "lstm", LSTM_NODE }, - { "enarc", ENARC_NODE }, - { "enas", ENAS_DAG_NODE }, - { "dnas", DNAS_NODE } + {"simple", SIMPLE_NODE}, + {"jordan", JORDAN_NODE}, + { "elman", ELMAN_NODE}, + { "ugrnn", UGRNN_NODE}, + { "mgu", MGU_NODE}, + { "gru", GRU_NODE}, + { "delta", DELTA_NODE}, + { "lstm", LSTM_NODE}, + { "enarc", ENARC_NODE}, + { "enas", ENAS_DAG_NODE}, + { "dnas", DNAS_NODE} }; int32_t node_type_from_string(string& node_type) { - std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); }); + std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c) { + return std::tolower(c); + }); if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) { return it->second; From eda79a02f422d6b1a5dc600acefcccac5f09f7f9 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 12 Apr 2023 10:54:22 -0400 Subject: [PATCH 12/31] Committing experiment scripts --- ground_truth_experiments/cell_experiments.sh | 40 ++++++++++++++++++++ ground_truth_experiments/source_genomes.sh | 33 ++++++++++++++++ rnn/dnas_node.cxx | 30 +++++++++++++-- rnn/dnas_node.hxx | 5 ++- rnn_examples/train_rnn.cxx | 25 +++++++----- 5 files changed, 118 insertions(+), 15 deletions(-) create mode 100755 ground_truth_experiments/cell_experiments.sh create mode 100755 ground_truth_experiments/source_genomes.sh diff --git a/ground_truth_experiments/cell_experiments.sh b/ground_truth_experiments/cell_experiments.sh new file mode 100755 index 00000000..9c0e29d4 --- /dev/null +++ b/ground_truth_experiments/cell_experiments.sh @@ -0,0 +1,40 @@ +#!/usr/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 +bp_epoch=1000 + +for SIZE in 1 2 4; do + for CELL_TYPE in dnas; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + output_dir=ground_truth_experiments/results/$CELL_TYPE/$SIZE/$fold + mkdir -p $output_dir + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --stochastic \ + --rnn_type $CELL_TYPE \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --random_sequence_length \ + --sequence_length_lower_bound 50 \ + --sequence_length_upper_bound 100 \ + --max_recurrent_depth 1 \ + --weight_update adagrad \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level ERROR \ + --file_message_level INFO & + done + done + wait +done + diff --git a/ground_truth_experiments/source_genomes.sh b/ground_truth_experiments/source_genomes.sh new file mode 100755 index 00000000..1c251134 --- /dev/null +++ b/ground_truth_experiments/source_genomes.sh @@ -0,0 +1,33 @@ +#!/usr/bin/zsh +# This is an example of running EXAMM MPI version on c172 dataset +# +# The c172 dataset is not normalized +# To run datasets that's not normalized, make sure to add arguments: +# --normalize min_max for Min Max normalization, or +# --normalize avg_std_dev for Z-score normalization + +INPUT_PARAMETERS="AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd" +OUTPUT_PARAMETERS="Pitch" + +for i in 0 1 2 3 4 5 6 7 8 9; do + exp_name="ground_truth_experiments/results/source_genomes/$i" + mkdir -p $exp_name + echo $exp_name + mpirun -np 5 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --number_islands 8 \ + --island_size 8 \ + --max_genomes 10000 \ + --bp_iterations 5 \ + --num_mutations 2 \ + --normalize min_max \ + --output_directory $exp_name \ + --possible_node_types simple UGRNN MGU GRU delta LSTM \ + --std_message_level ERROR \ + --file_message_level INFO & +done +wait diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index dcdab7e0..f87868f3 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -49,7 +49,6 @@ DNASNode::DNASNode(const DNASNode& src) : RNN_Node_Interface(src.innovation_numb g = src.g; x = src.x; xtotal = src.xtotal; - tao = src.tao; stochastic = src.stochastic; counter = src.counter; maxi = src.maxi; @@ -92,8 +91,32 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) { calculate_z(); } +double DNASNode::calculate_pi_lr() { + double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; + if (percentage_done < 0.33) { + return 0.0; + } else if (percentage_done < 0.66) { + double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; + return 0.5 + percentage_done_with_phase * .5; + } else { + return 0.1; + } +} + +double DNASNode::calculate_tao() { + double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; + if (percentage_done < 0.33) { + return 1.33; + } else if (percentage_done < 0.66) { + double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; + return 1.33 - percentage_done_with_phase * 0.66; + } else { + return 0.33; + } +} + void DNASNode::calculate_z() { - tao = max(1.0 / 3.0, 1.0 / (1.0 + (double) counter * 0.05)); + tao = calculate_tao(); xtotal = 0.0; double emax = -10000000; @@ -360,8 +383,9 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; + double pi_lr = calculate_pi_lr(); for (auto i = 0; i < pi.size(); i++) { - gradients[offset++] = d_pi[i] * 0.1; + gradients[offset++] = d_pi[i] * pi_lr; } for (auto node : nodes) { diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index c3d74e6b..76aa6969 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -56,10 +56,9 @@ class DNASNode : public RNN_Node_Interface { // A vector to put gumbel noise into; just to avoid re-allocation vector noise; - // Temperature used when drawing samples from Gumbel-Softmax(pi) - double tao = 1.0; int32_t counter = 0; int32_t maxi = -1; + double tao; // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) int32_t k = 1; @@ -82,6 +81,8 @@ class DNASNode : public RNN_Node_Interface { template void sample_gumbel_softmax(Rng& rng); void calculate_z(); + double calculate_tao(); + double calculate_pi_lr(); virtual void initialize_lamarckian( minstd_rand0& generator, NormalDistribution& normal_distribution, double mu, double sigma diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index ffdf8999..8c5b0b1c 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -100,6 +100,9 @@ int main(int argc, char** argv) { int32_t max_recurrent_depth; get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); + int32_t hidden_layer_size = number_inputs; + get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); + WeightRules* weight_rules = new WeightRules(arguments); weight_update_method = new WeightUpdate(); @@ -112,59 +115,59 @@ int main(int argc, char** argv) { Log::info("RNN TYPE = %s\n", rnn_type.c_str()); if (rnn_type == "lstm") { genome = create_lstm( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "gru") { genome = create_gru( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "delta") { genome = create_delta( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "mgu") { genome = create_mgu( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "ugrnn") { genome = create_ugrnn( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "ff") { genome = create_ff( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "jordan") { genome = create_jordan( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "elman") { genome = create_elman( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, weight_rules ); } else if (rnn_type == "dnas") { - vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE}; + vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; genome = create_dnas_nn( input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, weight_rules ); } else { - Log::fatal("ERROR: incorrect rnn type\n"); + Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); Log::fatal("Possibilities are:\n"); Log::fatal(" lstm\n"); Log::fatal(" gru\n"); @@ -232,6 +235,8 @@ int main(int argc, char** argv) { genome->get_weights(best_parameters); rnn->set_weights(best_parameters); + genome->write_to_file(output_directory + "/output_genome.bin"); + Log::info("TRAINING ERRORS:\n"); Log::info("MSE: %lf\n", genome->get_mse(best_parameters, training_inputs, training_outputs)); Log::info("MAE: %lf\n", genome->get_mae(best_parameters, training_inputs, training_outputs)); From cd597a43b03a901a23ed5fdbb4a0f97a964ae997 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 20 Sep 2023 18:01:43 -0400 Subject: [PATCH 13/31] Commit for AISTATS results --- common/files.hxx | 2 + common/log.cxx | 8 +- common/process_arguments.cxx | 17 +- initial_integration_experiments/dnas.zsh | 55 ++++++ .../post_training_dnas.zsh | 31 +++ mpi/examm_mpi.cxx | 7 +- rnn/dnas_node.cxx | 49 +++-- rnn/dnas_node.hxx | 12 +- rnn/rnn_genome.cxx | 76 +++++++- rnn/rnn_genome.hxx | 4 + rnn_examples/CMakeLists.txt | 3 + rnn_examples/dnas_info.cxx | 96 +++++++++ rnn_examples/train_rnn.cxx | 182 ++++++++++-------- 13 files changed, 427 insertions(+), 115 deletions(-) create mode 100755 initial_integration_experiments/dnas.zsh create mode 100755 initial_integration_experiments/post_training_dnas.zsh create mode 100644 rnn_examples/dnas_info.cxx diff --git a/common/files.hxx b/common/files.hxx index ac23ff0d..8c4c8a43 100644 --- a/common/files.hxx +++ b/common/files.hxx @@ -1,6 +1,8 @@ #ifndef EXACT_BOINC_COMMON_HXX #define EXACT_BOINC_COMMON_HXX +#include + #include using std::runtime_error; diff --git a/common/log.cxx b/common/log.cxx index 623475e8..6f82e67f 100644 --- a/common/log.cxx +++ b/common/log.cxx @@ -79,11 +79,11 @@ int8_t Log::parse_level_from_string(string level) { void Log::initialize(const vector& arguments) { // TODO: should read these from the CommandLine (to be created) - string std_message_level_str, file_message_level_str; + string std_message_level_str = "INFO", file_message_level_str = "NONE"; - get_argument(arguments, "--std_message_level", true, std_message_level_str); - get_argument(arguments, "--file_message_level", true, file_message_level_str); - get_argument(arguments, "--output_directory", true, output_directory); + get_argument(arguments, "--std_message_level", false, std_message_level_str); + get_argument(arguments, "--file_message_level", false, file_message_level_str); + get_argument(arguments, "--output_directory", false, output_directory); std_message_level = parse_level_from_string(std_message_level_str); file_message_level = parse_level_from_string(file_message_level_str); diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index f2e29ac0..4577d86c 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -10,7 +10,7 @@ using std::vector; EXAMM* generate_examm_from_arguments( const vector& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules, RNN_Genome* seed_genome -) { +) { Log::info("Getting arguments for EXAMM\n"); int32_t island_size; get_argument(arguments, "--island_size", true, island_size); @@ -186,11 +186,18 @@ void get_train_validation_data( time_series_sets->export_training_series(time_offset, train_inputs, train_outputs); time_series_sets->export_test_series(time_offset, validation_inputs, validation_outputs); - int32_t sequence_length = 0; - if (get_argument(arguments, "--sequence_length", false, sequence_length)) { - Log::info("Slicing input training data with time sequence length: %d\n", sequence_length); - slice_input_data(train_inputs, train_outputs, sequence_length); + int32_t train_sequence_length = 0; + if (get_argument(arguments, "--train_sequence_length", false, train_sequence_length)) { + Log::info("Slicing input training data with time sequence length: %d\n", train_sequence_length); + slice_input_data(train_inputs, train_outputs, train_sequence_length); + } + + int32_t validation_sequence_length = 0; + if (get_argument(arguments, "--validation_sequence_length", false, validation_sequence_length)) { + Log::info("Slicing input validation data with time sequence length: %d\n", validation_sequence_length); + slice_input_data(validation_inputs, validation_outputs, validation_sequence_length); } + Log::info("Generating time series data finished! \n"); } diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh new file mode 100755 index 00000000..490e0c3b --- /dev/null +++ b/initial_integration_experiments/dnas.zsh @@ -0,0 +1,55 @@ +#!/usr/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --stochastic \ + --possible_node_types DNAS \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --crystalize_iters $crystalize_iters \ + --max_genomes 10000 \ + --island_size 8 \ + --number_islands 8 \ + --dnas_k $k + + best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128 256 512 1024; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm + done + wait + done + done +done diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh new file mode 100755 index 00000000..f3d355f1 --- /dev/null +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -0,0 +1,31 @@ +#!/usr/bin/zsh +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $OUTPUT_DIRECTORY \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --crystalize_iters $CRYSTALIZE_ITERS \ + --dnas_k $k + +} + +post_training diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index 7886d91d..c1f1dd1c 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -203,13 +203,18 @@ void worker(int32_t rank) { } else if (tag == GENOME_LENGTH_TAG) { Log::debug("received genome!\n"); RNN_Genome* genome = receive_genome_from(0); - + // have each worker write the backproagation to a separate log file string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank); Log::set_id(log_id); + + vector params; + genome->get_weights(params); + genome->backpropagate_stochastic( training_inputs, training_outputs, validation_inputs, validation_outputs, weight_update_method ); + Log::release_id(log_id); // go back to the worker's log for MPI communication diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index f87868f3..ac0e0aa1 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -1,6 +1,8 @@ #include using std::sort; +#include + #include using std::pair; @@ -13,13 +15,16 @@ using std::max; #include "common/log.hxx" #include "dnas_node.hxx" +int32_t DNASNode::CRYSTALLIZATION_THRESHOLD = 1000; +int32_t DNASNode::k = -1; + DNASNode::DNASNode( vector&& _nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter ) : RNN_Node_Interface(_innovation_number, _type, _depth), nodes(_nodes), pi(vector(nodes.size(), 1.0)), - z(vector(nodes.size())), + z(vector(nodes.size(), 0.0)), x(vector(nodes.size())), g(vector(nodes.size())), d_pi(vector(nodes.size())), @@ -87,20 +92,11 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) { x.assign(pi.size(), 0.0); gumbel_noise(rng, g); - calculate_z(); } double DNASNode::calculate_pi_lr() { - double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; - if (percentage_done < 0.33) { - return 0.0; - } else if (percentage_done < 0.66) { - double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; - return 0.5 + percentage_done_with_phase * .5; - } else { - return 0.1; - } + return 0.1; } double DNASNode::calculate_tao() { @@ -109,9 +105,9 @@ double DNASNode::calculate_tao() { return 1.33; } else if (percentage_done < 0.66) { double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; - return 1.33 - percentage_done_with_phase * 0.66; + return 1.33 - percentage_done_with_phase * 1.15; } else { - return 0.33; + return 0.18; } } @@ -162,6 +158,23 @@ void DNASNode::calculate_z() { } } +void DNASNode::print_info() { + printf(" "); + int best_pi_idx = 0; + for (int i = 0; i < nodes.size(); i++) { + printf("%-10s & ", std::to_string(pi[i]).c_str()); + if (pi[i] > pi[best_pi_idx]) + best_pi_idx = i; + } + printf("\n"); + Log::info("Node types: "); + for (auto node : nodes) { + Log::info_no_header("%d ", node->node_type); + } + Log::info_no_header("\n "); + Log::info("Best node: %i, node type: %d\n", best_pi_idx, nodes[best_pi_idx]->node_type); +} + void DNASNode::reset(int32_t series_length) { d_pi = vector(pi.size(), 0.0); d_input = vector(series_length, 0.0); @@ -309,6 +322,7 @@ void DNASNode::set_weights(const vector& parameters) { } void DNASNode::get_weights(int32_t& offset, vector& parameters) const { + int start = offset; // Log::info("pi start %d; ", offset); for (auto i = 0; i < pi.size(); i++) { parameters[offset++] = pi[i]; @@ -323,17 +337,14 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; + if (pi[i] < 0.1) + pi[i] = 0.1; } - // Log::info("Pi indices: %d-%d\n", start, offset); + for (auto node : nodes) { node->set_weights(offset, parameters); } calculate_z(); - // string s = "Pi = { "; - // for (auto p : pi) { - // s += std::to_string(p) + ", "; - // } - // Log::info("%s }\n", s.c_str()); } void DNASNode::set_pi(const vector& new_pi) { diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index 76aa6969..00867ffe 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -22,8 +22,6 @@ using std::unique_ptr; #include "rnn_node.hxx" #include "rnn_node_interface.hxx" -#define CRYSTALLIZATION_THRESHOLD 1000 - class DNASNode : public RNN_Node_Interface { private: template @@ -60,9 +58,6 @@ class DNASNode : public RNN_Node_Interface { int32_t maxi = -1; double tao; - // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) - int32_t k = 1; - // Whether to re-sample the gumbel softmax distribution when resetting the node. // Can be set externally using DNASNode::set_stochastic bool stochastic = true; @@ -71,6 +66,11 @@ class DNASNode : public RNN_Node_Interface { vector> node_outputs; public: + static int32_t CRYSTALLIZATION_THRESHOLD; + + // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) + static int32_t k; + DNASNode( vector&& nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1 @@ -110,6 +110,8 @@ class DNASNode : public RNN_Node_Interface { virtual void reset(int32_t _series_length); virtual void write_to_stream(ostream& out); + void print_info(); + virtual RNN_Node_Interface* copy() const; void set_stochastic(bool stochastic); diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 7e452ad0..72868d5e 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -3221,16 +3221,35 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { node->enabled = enabled; return node; } + +#define MAGIC 0xFA + +#define read_magic(place) \ + { \ + uint8_t boo = MAGIC;\ + bin_istream.read((char *) &boo, sizeof(uint8_t)); \ + if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \ + } + +#define write_magic() \ + {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));} + void RNN_Genome::read_from_stream(istream& bin_istream) { Log::debug("READING GENOME FROM STREAM\n"); + + read_magic(__LINE__); bin_istream.read((char*) &generation_id, sizeof(int32_t)); bin_istream.read((char*) &group_id, sizeof(int32_t)); bin_istream.read((char*) &bp_iterations, sizeof(int32_t)); + read_magic(__LINE__); + bin_istream.read((char*) &use_dropout, sizeof(bool)); bin_istream.read((char*) &dropout_probability, sizeof(double)); + read_magic(__LINE__); + WeightType weight_initialize = WeightType::NONE; WeightType weight_inheritance = WeightType::NONE; WeightType mutated_component_weight = WeightType::NONE; @@ -3239,6 +3258,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &weight_inheritance, sizeof(int32_t)); bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t)); + read_magic(__LINE__); + weight_rules = new WeightRules(); weight_rules->set_weight_initialize_method(weight_initialize); weight_rules->set_weight_inheritance_method(weight_inheritance); @@ -3260,8 +3281,10 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream generator_iss(generator_str); generator_iss >> generator; - string rng_0_1_str; - read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); + read_magic(__LINE__); + + // string rng_0_1_str; + // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); // So for some reason this was serialized incorrectly for some genomes, // but the value should always be the same so we really don't need to de-serialize it anways and can just // assign it a constant value @@ -3275,6 +3298,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream generated_by_map_iss(generated_by_map_str); read_map(generated_by_map_iss, generated_by_map); + read_magic(__LINE__); + bin_istream.read((char*) &best_validation_mse, sizeof(double)); bin_istream.read((char*) &best_validation_mae, sizeof(double)); @@ -3286,6 +3311,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { initial_parameters.assign(initial_parameters_v, initial_parameters_v + n_initial_parameters); delete[] initial_parameters_v; + read_magic(__LINE__); + int32_t n_best_parameters; bin_istream.read((char*) &n_best_parameters, sizeof(int32_t)); Log::debug("reading %d best parameters.\n", n_best_parameters); @@ -3294,6 +3321,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { best_parameters.assign(best_parameters_v, best_parameters_v + n_best_parameters); delete[] best_parameters_v; + read_magic(__LINE__); + input_parameter_names.clear(); int32_t n_input_parameter_names; bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t)); @@ -3304,6 +3333,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { input_parameter_names.push_back(input_parameter_name); } + read_magic(__LINE__); + output_parameter_names.clear(); int32_t n_output_parameter_names; bin_istream.read((char*) &n_output_parameter_names, sizeof(int32_t)); @@ -3314,6 +3345,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { output_parameter_names.push_back(output_parameter_name); } + read_magic(__LINE__); + int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); Log::debug("reading %d nodes.\n", n_nodes); @@ -3321,6 +3354,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { nodes.clear(); for (int32_t i = 0; i < n_nodes; i++) { nodes.push_back(RNN_Genome::read_node_from_stream(bin_istream)); + read_magic(__LINE__); } int32_t n_edges; @@ -3347,6 +3381,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { // innovation_list.push_back(innovation_number); edge->enabled = enabled; edges.push_back(edge); + read_magic(__LINE__); } int32_t n_recurrent_edges; @@ -3378,6 +3413,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { // innovation_list.push_back(innovation_number); recurrent_edge->enabled = enabled; recurrent_edges.push_back(recurrent_edge); + read_magic(__LINE__); } read_binary_string(bin_istream, normalize_type, "normalize_type"); @@ -3402,6 +3438,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream normalize_std_devs_iss(normalize_std_devs_str); read_map(normalize_std_devs_iss, normalize_std_devs); + read_magic(__LINE__); + assign_reachability(); } @@ -3425,13 +3463,20 @@ void RNN_Genome::write_to_file(string bin_filename) { void RNN_Genome::write_to_stream(ostream& bin_ostream) { Log::debug("WRITING GENOME TO STREAM\n"); + + write_magic(); + bin_ostream.write((char*) &generation_id, sizeof(int32_t)); bin_ostream.write((char*) &group_id, sizeof(int32_t)); bin_ostream.write((char*) &bp_iterations, sizeof(int32_t)); + write_magic(); + bin_ostream.write((char*) &use_dropout, sizeof(bool)); bin_ostream.write((char*) &dropout_probability, sizeof(double)); + write_magic(); + WeightType weight_initialize = weight_rules->get_weight_initialize_method(); WeightType weight_inheritance = weight_rules->get_weight_inheritance_method(); WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method(); @@ -3439,6 +3484,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &weight_inheritance, sizeof(int32_t)); bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t)); + write_magic(); + Log::debug("generation_id: %d\n", generation_id); Log::debug("bp_iterations: %d\n", bp_iterations); @@ -3456,16 +3503,20 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { string generator_str = generator_oss.str(); write_binary_string(bin_ostream, generator_str, "generator"); - ostringstream rng_0_1_oss; - rng_0_1_oss << rng_0_1; - string rng_0_1_str = rng_0_1_oss.str(); - write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1"); + write_magic(); + + // ostringstream rng_0_1_oss; + // rng_0_1_oss << rng_0_1; + // string rng_0_1_str = rng_0_1_oss.str(); + // write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1"); ostringstream generated_by_map_oss; write_map(generated_by_map_oss, generated_by_map); string generated_by_map_str = generated_by_map_oss.str(); write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map"); + write_magic(); + bin_ostream.write((char*) &best_validation_mse, sizeof(double)); bin_ostream.write((char*) &best_validation_mae, sizeof(double)); @@ -3474,18 +3525,24 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &n_initial_parameters, sizeof(int32_t)); bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size()); + write_magic(); + int32_t n_best_parameters = (int32_t) best_parameters.size(); bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t)); if (n_best_parameters) { bin_ostream.write((char*) &best_parameters[0], sizeof(double) * best_parameters.size()); } + write_magic(); + int32_t n_input_parameter_names = (int32_t) input_parameter_names.size(); bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) { write_binary_string(bin_ostream, input_parameter_names[i], "input_parameter_names[" + std::to_string(i) + "]"); } + write_magic(); + int32_t n_output_parameter_names = (int32_t) output_parameter_names.size(); bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) { @@ -3494,6 +3551,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { ); } + write_magic(); + int32_t n_nodes = (int32_t) nodes.size(); bin_ostream.write((char*) &n_nodes, sizeof(int32_t)); Log::debug("writing %d nodes.\n", n_nodes); @@ -3504,6 +3563,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { nodes[i]->depth, nodes[i]->parameter_name.c_str() ); nodes[i]->write_to_stream(bin_ostream); + write_magic(); } int32_t n_edges = (int32_t) edges.size(); @@ -3516,6 +3576,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { edges[i]->output_innovation_number ); edges[i]->write_to_stream(bin_ostream); + write_magic(); } int32_t n_recurrent_edges = (int32_t) recurrent_edges.size(); @@ -3529,6 +3590,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { ); recurrent_edges[i]->write_to_stream(bin_ostream); + write_magic(); } write_binary_string(bin_ostream, normalize_type, "normalize_type"); @@ -3552,6 +3614,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_map(normalize_std_devs_oss, normalize_std_devs); string normalize_std_devs_str = normalize_std_devs_oss.str(); write_binary_string(bin_ostream, normalize_std_devs_str, "normalize_std_devs"); + + write_magic(); } void RNN_Genome::update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count) { diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index deaf8bce..d6330512 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -325,6 +325,10 @@ class RNN_Genome { ); vector pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type); + const vector &get_nodes() { + return this->nodes; + } + void update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count); vector get_innovation_list(); diff --git a/rnn_examples/CMakeLists.txt b/rnn_examples/CMakeLists.txt index 2bfda532..f5e294c6 100644 --- a/rnn_examples/CMakeLists.txt +++ b/rnn_examples/CMakeLists.txt @@ -16,3 +16,6 @@ target_link_libraries(evaluate_rnns_multi_offset examm_strategy exact_common exa add_executable(rnn_statistics rnn_statistics.cxx) target_link_libraries(rnn_statistics examm_strategy exact_common exact_time_series exact_weights examm_nn ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread) +add_executable(dnas_info dnas_info.cxx) +target_link_libraries(dnas_info examm_strategy exact_common exact_time_series exact_weights examm_nn ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread) + diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx new file mode 100644 index 00000000..74fd6519 --- /dev/null +++ b/rnn_examples/dnas_info.cxx @@ -0,0 +1,96 @@ +#include +#include +using std::getline; +using std::ifstream; +using std::ofstream; + +#include +using std::minstd_rand0; +using std::uniform_real_distribution; + +#include +using std::string; + +#include +using std::vector; + +#include "common/arguments.hxx" +#include "common/files.hxx" +#include "common/log.hxx" +#include "rnn/generate_nn.hxx" +#include "rnn/gru_node.hxx" +#include "rnn/lstm_node.hxx" +#include "rnn/rnn_edge.hxx" +#include "rnn/rnn_genome.hxx" +#include "rnn/rnn_node.hxx" +#include "rnn/rnn_node_interface.hxx" +#include "time_series/time_series.hxx" +#include "weights/weight_rules.hxx" +#include "weights/weight_update.hxx" + +vector > > training_inputs; +vector > > training_outputs; +vector > > test_inputs; +vector > > test_outputs; + +bool random_sequence_length; +int32_t sequence_length_lower_bound = 30; +int32_t sequence_length_upper_bound = 100; + +RNN_Genome* genome; +RNN* rnn; +WeightUpdate* weight_update_method; +int32_t bp_iterations; +bool using_dropout; +double dropout_probability; + +ofstream* log_file; +string output_directory; + +double objective_function(const vector& parameters) { + rnn->set_weights(parameters); + + double error = 0.0; + + for (int32_t i = 0; i < (int32_t) training_inputs.size(); i++) { + error += rnn->prediction_mae(training_inputs[i], training_outputs[i], false, true, 0.0); + } + + return -error; +} + +double test_objective_function(const vector& parameters) { + rnn->set_weights(parameters); + + double total_error = 0.0; + + for (int32_t i = 0; i < (int32_t) test_inputs.size(); i++) { + double error = rnn->prediction_mse(test_inputs[i], test_outputs[i], false, true, 0.0); + total_error += error; + + Log::info("output for series[%d]: %lf\n", i, error); + } + + return -total_error; +} + +int main(int argc, char** argv) { + vector arguments = vector(argv, argv + argc); + + Log::initialize(arguments); + Log::set_id("main"); + + string filename; + get_argument(arguments, "--filename", true, filename); + + RNN_Genome genome(filename); + + for (auto node : genome.get_nodes()) { + if (DNASNode *d = dynamic_cast(node)) { + std::cout << "'" << filename << "': "; + d->print_info(); + } + } + + Log::release_id("main"); +} diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index 8c5b0b1c..02d7db80 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -85,24 +85,20 @@ int main(int argc, char** argv) { int32_t time_offset = 1; get_argument(arguments, "--time_offset", true, time_offset); + int32_t crystallization_threshold = 1000; + get_argument(arguments, "--crystalize_iters", false, crystallization_threshold); + DNASNode::CRYSTALLIZATION_THRESHOLD = crystallization_threshold; + + int32_t k = -1; + get_argument(arguments, "--dnas_k", false, k); + DNASNode::k = k; + time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); int number_inputs = time_series_sets->get_number_inputs(); // int number_outputs = time_series_sets->get_number_outputs(); - string rnn_type; - get_argument(arguments, "--rnn_type", true, rnn_type); - - int32_t num_hidden_layers; - get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); - - int32_t max_recurrent_depth; - get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); - - int32_t hidden_layer_size = number_inputs; - get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); - WeightRules* weight_rules = new WeightRules(arguments); weight_update_method = new WeightUpdate(); @@ -111,74 +107,110 @@ int main(int argc, char** argv) { vector input_parameter_names = time_series_sets->get_input_parameter_names(); vector output_parameter_names = time_series_sets->get_output_parameter_names(); - RNN_Genome* genome; - Log::info("RNN TYPE = %s\n", rnn_type.c_str()); - if (rnn_type == "lstm") { - genome = create_lstm( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "gru") { - genome = create_gru( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "delta") { - genome = create_delta( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); + string genome_file; + get_argument(arguments, "--genome_file", false, genome_file); + Log::info("RNN_GENOME = <%s> \n", genome_file.c_str()); - } else if (rnn_type == "mgu") { - genome = create_mgu( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "ugrnn") { - genome = create_ugrnn( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "ff") { - genome = create_ff( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "jordan") { - genome = create_jordan( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); + RNN_Genome* genome; - } else if (rnn_type == "elman") { - genome = create_elman( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules - ); - } else if (rnn_type == "dnas") { - vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; - genome = create_dnas_nn( - input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, - weight_rules - ); + if (genome_file.size() != 0) { + genome = new RNN_Genome(genome_file); + Log::info("best weights: { "); + for (double &d : genome->get_best_parameters()) { + Log::info_no_header("%f, ", d); + } + Log::info("}\n"); + + vector params; + genome->get_weights(params); + Log::info("current weights: { "); + for (double &d : params) { + Log::info_no_header("%f, ", d); + } + Log::info("}\n"); } else { - Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); - Log::fatal("Possibilities are:\n"); - Log::fatal(" lstm\n"); - Log::fatal(" gru\n"); - Log::fatal(" ff\n"); - Log::fatal(" jordan\n"); - Log::fatal(" elman\n"); - exit(1); + + string rnn_type; + get_argument(arguments, "--rnn_type", true, rnn_type); + + Log::info("RNN TYPE = %s\n", rnn_type.c_str()); + + int32_t num_hidden_layers; + get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); + + int32_t max_recurrent_depth; + get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); + + int32_t hidden_layer_size = number_inputs; + get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); + + if (rnn_type == "lstm") { + genome = create_lstm( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "gru") { + genome = create_gru( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "delta") { + genome = create_delta( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "mgu") { + genome = create_mgu( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "ugrnn") { + genome = create_ugrnn( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "ff") { + genome = create_ff( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "jordan") { + genome = create_jordan( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + + } else if (rnn_type == "elman") { + genome = create_elman( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, + weight_rules + ); + } else if (rnn_type == "dnas") { + vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; + genome = create_dnas_nn( + input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, + weight_rules + ); + } else { + Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); + Log::fatal("Possibilities are:\n"); + Log::fatal(" lstm\n"); + Log::fatal(" gru\n"); + Log::fatal(" ff\n"); + Log::fatal(" jordan\n"); + Log::fatal(" elman\n"); + exit(1); + } } get_argument(arguments, "--bp_iterations", true, bp_iterations); - genome->set_bp_iterations(bp_iterations); + genome->set_bp_iterations(bp_iterations + genome->get_bp_iterations()); get_argument(arguments, "--output_directory", true, output_directory); if (output_directory != "") { @@ -211,7 +243,7 @@ int main(int argc, char** argv) { using_dropout = false; - genome->initialize_randomly(); + genome->set_weights(genome->get_best_parameters()); double learning_rate = 0.001; get_argument(arguments, "--learning_rate", false, learning_rate); From fa03e78c21bd98f20b5e4301a80d569eae9b8c67 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 12 Dec 2023 19:18:05 -0500 Subject: [PATCH 14/31] Preparing for gecco 2024 experiments --- common/color_table.cxx | 3 +- common/process_arguments.cxx | 2 +- initial_integration_experiments/dnas.zsh | 4 +- .../post_training_dnas.zsh | 2 +- mpi/examm_mpi.cxx | 2 +- rnn/dnas_node.cxx | 9 +-- rnn/rnn_genome.cxx | 60 ++++++++++--------- rnn/rnn_genome.hxx | 2 +- rnn_examples/dnas_info.cxx | 6 +- rnn_examples/train_rnn.cxx | 39 ++++++------ 10 files changed, 68 insertions(+), 61 deletions(-) diff --git a/common/color_table.cxx b/common/color_table.cxx index d9e743b0..d0c42a21 100644 --- a/common/color_table.cxx +++ b/common/color_table.cxx @@ -1026,7 +1026,8 @@ const static double bent_cool_warm[] = { 1.0, 177, 1, - 39}; + 39, +}; Color get_colormap(double value) { Color c; diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 4577d86c..65efa7b6 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -10,7 +10,7 @@ using std::vector; EXAMM* generate_examm_from_arguments( const vector& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules, RNN_Genome* seed_genome -) { +) { Log::info("Getting arguments for EXAMM\n"); int32_t island_size; get_argument(arguments, "--island_size", true, island_size); diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 490e0c3b..9ef90cca 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -1,12 +1,12 @@ #!/usr/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' +OUTPUT_PARAMETERS='E1_EGT1' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh index f3d355f1..8117dadb 100755 --- a/initial_integration_experiments/post_training_dnas.zsh +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -1,6 +1,6 @@ #!/usr/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' +OUTPUT_PARAMETERS='E1_EGT1' offset=1 diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index c1f1dd1c..227c3a85 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -203,7 +203,7 @@ void worker(int32_t rank) { } else if (tag == GENOME_LENGTH_TAG) { Log::debug("received genome!\n"); RNN_Genome* genome = receive_genome_from(0); - + // have each worker write the backproagation to a separate log file string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank); Log::set_id(log_id); diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index ac0e0aa1..465c024c 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -2,7 +2,6 @@ using std::sort; #include - #include using std::pair; @@ -162,9 +161,10 @@ void DNASNode::print_info() { printf(" "); int best_pi_idx = 0; for (int i = 0; i < nodes.size(); i++) { - printf("%-10s & ", std::to_string(pi[i]).c_str()); - if (pi[i] > pi[best_pi_idx]) + printf("%-10s & ", std::to_string(pi[i]).c_str()); + if (pi[i] > pi[best_pi_idx]) { best_pi_idx = i; + } } printf("\n"); Log::info("Node types: "); @@ -337,8 +337,9 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; - if (pi[i] < 0.1) + if (pi[i] < 0.1) { pi[i] = 0.1; + } } for (auto node : nodes) { diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 72868d5e..833feee6 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -3224,19 +3224,25 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { #define MAGIC 0xFA -#define read_magic(place) \ - { \ - uint8_t boo = MAGIC;\ - bin_istream.read((char *) &boo, sizeof(uint8_t)); \ - if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \ - } +#define read_magic(place) \ + { \ + uint8_t boo = MAGIC; \ + bin_istream.read((char*) &boo, sizeof(uint8_t)); \ + if (boo != MAGIC) { \ + Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); \ + exit(-1); \ + } \ + } -#define write_magic() \ - {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));} +#define write_magic() \ + { \ + uint8_t xxmagic = MAGIC; \ + bin_ostream.write((char*) &xxmagic, sizeof(uint8_t)); \ + } void RNN_Genome::read_from_stream(istream& bin_istream) { Log::debug("READING GENOME FROM STREAM\n"); - + read_magic(__LINE__); bin_istream.read((char*) &generation_id, sizeof(int32_t)); @@ -3244,12 +3250,12 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &bp_iterations, sizeof(int32_t)); read_magic(__LINE__); - + bin_istream.read((char*) &use_dropout, sizeof(bool)); bin_istream.read((char*) &dropout_probability, sizeof(double)); read_magic(__LINE__); - + WeightType weight_initialize = WeightType::NONE; WeightType weight_inheritance = WeightType::NONE; WeightType mutated_component_weight = WeightType::NONE; @@ -3259,7 +3265,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t)); read_magic(__LINE__); - + weight_rules = new WeightRules(); weight_rules->set_weight_initialize_method(weight_initialize); weight_rules->set_weight_inheritance_method(weight_inheritance); @@ -3282,7 +3288,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { generator_iss >> generator; read_magic(__LINE__); - + // string rng_0_1_str; // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); // So for some reason this was serialized incorrectly for some genomes, @@ -3299,7 +3305,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { read_map(generated_by_map_iss, generated_by_map); read_magic(__LINE__); - + bin_istream.read((char*) &best_validation_mse, sizeof(double)); bin_istream.read((char*) &best_validation_mae, sizeof(double)); @@ -3312,7 +3318,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { delete[] initial_parameters_v; read_magic(__LINE__); - + int32_t n_best_parameters; bin_istream.read((char*) &n_best_parameters, sizeof(int32_t)); Log::debug("reading %d best parameters.\n", n_best_parameters); @@ -3322,7 +3328,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { delete[] best_parameters_v; read_magic(__LINE__); - + input_parameter_names.clear(); int32_t n_input_parameter_names; bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t)); @@ -3346,7 +3352,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { } read_magic(__LINE__); - + int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); Log::debug("reading %d nodes.\n", n_nodes); @@ -3439,7 +3445,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { read_map(normalize_std_devs_iss, normalize_std_devs); read_magic(__LINE__); - + assign_reachability(); } @@ -3471,12 +3477,12 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &bp_iterations, sizeof(int32_t)); write_magic(); - + bin_ostream.write((char*) &use_dropout, sizeof(bool)); bin_ostream.write((char*) &dropout_probability, sizeof(double)); write_magic(); - + WeightType weight_initialize = weight_rules->get_weight_initialize_method(); WeightType weight_inheritance = weight_rules->get_weight_inheritance_method(); WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method(); @@ -3485,7 +3491,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t)); write_magic(); - + Log::debug("generation_id: %d\n", generation_id); Log::debug("bp_iterations: %d\n", bp_iterations); @@ -3504,7 +3510,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_binary_string(bin_ostream, generator_str, "generator"); write_magic(); - + // ostringstream rng_0_1_oss; // rng_0_1_oss << rng_0_1; // string rng_0_1_str = rng_0_1_oss.str(); @@ -3516,7 +3522,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map"); write_magic(); - + bin_ostream.write((char*) &best_validation_mse, sizeof(double)); bin_ostream.write((char*) &best_validation_mae, sizeof(double)); @@ -3526,7 +3532,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size()); write_magic(); - + int32_t n_best_parameters = (int32_t) best_parameters.size(); bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t)); if (n_best_parameters) { @@ -3534,7 +3540,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { } write_magic(); - + int32_t n_input_parameter_names = (int32_t) input_parameter_names.size(); bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) { @@ -3542,7 +3548,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { } write_magic(); - + int32_t n_output_parameter_names = (int32_t) output_parameter_names.size(); bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) { @@ -3552,7 +3558,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { } write_magic(); - + int32_t n_nodes = (int32_t) nodes.size(); bin_ostream.write((char*) &n_nodes, sizeof(int32_t)); Log::debug("writing %d nodes.\n", n_nodes); diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index d6330512..01c7e9e3 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -325,7 +325,7 @@ class RNN_Genome { ); vector pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type); - const vector &get_nodes() { + const vector& get_nodes() { return this->nodes; } diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx index 74fd6519..fac60c84 100644 --- a/rnn_examples/dnas_info.cxx +++ b/rnn_examples/dnas_info.cxx @@ -86,9 +86,9 @@ int main(int argc, char** argv) { RNN_Genome genome(filename); for (auto node : genome.get_nodes()) { - if (DNASNode *d = dynamic_cast(node)) { - std::cout << "'" << filename << "': "; - d->print_info(); + if (DNASNode* d = dynamic_cast(node)) { + std::cout << "'" << filename << "': "; + d->print_info(); } } diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index 02d7db80..7bd5647c 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -116,7 +116,7 @@ int main(int argc, char** argv) { if (genome_file.size() != 0) { genome = new RNN_Genome(genome_file); Log::info("best weights: { "); - for (double &d : genome->get_best_parameters()) { + for (double& d : genome->get_best_parameters()) { Log::info_no_header("%f, ", d); } Log::info("}\n"); @@ -124,15 +124,14 @@ int main(int argc, char** argv) { vector params; genome->get_weights(params); Log::info("current weights: { "); - for (double &d : params) { + for (double& d : params) { Log::info_no_header("%f, ", d); } Log::info("}\n"); } else { - string rnn_type; get_argument(arguments, "--rnn_type", true, rnn_type); - + Log::info("RNN TYPE = %s\n", rnn_type.c_str()); int32_t num_hidden_layers; @@ -146,50 +145,50 @@ int main(int argc, char** argv) { if (rnn_type == "lstm") { genome = create_lstm( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "gru") { genome = create_gru( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "delta") { genome = create_delta( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "mgu") { genome = create_mgu( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "ugrnn") { genome = create_ugrnn( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "ff") { genome = create_ff( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "jordan") { genome = create_jordan( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "elman") { genome = create_elman( - input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth, - weight_rules + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules ); } else if (rnn_type == "dnas") { vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; From c0264d9ad2f82ae86d3369f7cd578e77d0a074eb Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Tue, 12 Dec 2023 19:36:16 -0500 Subject: [PATCH 15/31] Tweak experimental parameters --- initial_integration_experiments/dnas.zsh | 8 ++++---- rnn/dnas_node.cxx | 17 ++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 9ef90cca..0c2a615f 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -8,7 +8,7 @@ offset=1 run_examm() { output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir - mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \ + mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ @@ -29,7 +29,7 @@ run_examm() { --std_message_level INFO \ --file_message_level INFO \ --crystalize_iters $crystalize_iters \ - --max_genomes 10000 \ + --max_genomes 4000 \ --island_size 8 \ --number_islands 8 \ --dnas_k $k @@ -39,7 +39,7 @@ run_examm() { } CELL_TYPE='dnas' -for crystalize_iters in 128 256 512 1024; do +for crystalize_iters in 64 128 256 512; do for bp_epoch in 8 16 32 64 128; do for k in 1; do for fold in 0 1 2 3; do @@ -47,7 +47,7 @@ for crystalize_iters in 128 256 512 1024; do done wait for fold in 4 5 6 7; do - run_examm + run_examm & done wait done diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 465c024c..87e05d9b 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -99,15 +99,7 @@ double DNASNode::calculate_pi_lr() { } double DNASNode::calculate_tao() { - double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD; - if (percentage_done < 0.33) { - return 1.33; - } else if (percentage_done < 0.66) { - double percentage_done_with_phase = (0.66 - percentage_done) / 0.33; - return 1.33 - percentage_done_with_phase * 1.15; - } else { - return 0.18; - } + return 6.0; } void DNASNode::calculate_z() { @@ -337,8 +329,8 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (auto i = 0; i < pi.size(); i++) { pi[i] = parameters[offset++]; - if (pi[i] < 0.1) { - pi[i] = 0.1; + if (pi[i] < 0.01) { + pi[i] = 0.01; } } @@ -395,9 +387,8 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; - double pi_lr = calculate_pi_lr(); for (auto i = 0; i < pi.size(); i++) { - gradients[offset++] = d_pi[i] * pi_lr; + gradients[offset++] = d_pi[i]; } for (auto node : nodes) { From 1607c268ee922a8b042e555a0cf32f96fdae94bb Mon Sep 17 00:00:00 2001 From: Josh Karns Date: Tue, 26 Dec 2023 13:27:35 -0500 Subject: [PATCH 16/31] Tweaking experiments --- examm/examm.cxx | 3 + initial_integration_experiments/analyze.py | 77 +++++++++++++++++++ initial_integration_experiments/analyze.zsh | 12 +++ initial_integration_experiments/dnas.zsh | 31 ++++---- .../post_training_dnas.zsh | 2 +- rnn/generate_nn.cxx | 1 + rnn/rnn_node_interface.cxx | 3 +- 7 files changed, 110 insertions(+), 19 deletions(-) create mode 100644 initial_integration_experiments/analyze.py create mode 100644 initial_integration_experiments/analyze.zsh diff --git a/examm/examm.cxx b/examm/examm.cxx index f017ab8b..e0be2d07 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -182,10 +182,12 @@ void EXAMM::update_log() { } (*op_log_file) << endl; } + RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { best_genome = speciation_strategy->get_global_best_genome(); } + std::chrono::time_point currentClock = std::chrono::system_clock::now(); long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds @@ -193,6 +195,7 @@ void EXAMM::update_log() { << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count() << speciation_strategy->get_strategy_information_values() << endl; + Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count()); } } diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py new file mode 100644 index 00000000..58ba95b9 --- /dev/null +++ b/initial_integration_experiments/analyze.py @@ -0,0 +1,77 @@ +import pandas + +import numpy as np + +import matplotlib.pyplot as plt + +fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1) + +plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128} + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + if k == 8: + continue + v.sharey(a8) + v.sharex(a8) + +results = {} +for ci in [64, 128, 256, 512]: + results[ci] = {} + for bpe in [8, 16, 32, 64, 128]: + results[ci][bpe] = {} + for k in [1]: + x = [] + results[ci][bpe][k] = x + + for fold in range(8): + f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") + results[ci][bpe][k].append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0] + plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd, + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + v.legend(fontsize=12, loc="upper right") + +plt.show() diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh new file mode 100644 index 00000000..5c2876f3 --- /dev/null +++ b/initial_integration_experiments/analyze.zsh @@ -0,0 +1,12 @@ +#!/usr/bin/zsh +# +for crystalize_iters in 64 128 256 512; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3 4 5 6 7; do + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + tail -1 $output_dir/fitness_log.csv + done + done + done +done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 0c2a615f..995d072a 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -1,26 +1,24 @@ -#!/usr/bin/zsh +#!/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir - mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \ + mpirun -np 8 Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ --input_parameter_names ${=INPUT_PARAMETERS} \ --output_parameter_names ${=OUTPUT_PARAMETERS} \ --bp_iterations $bp_epoch \ - --stochastic \ - --possible_node_types DNAS \ --normalize min_max \ --num_hidden_layers $SIZE \ --hidden_layer_size $SIZE \ - --train_sequence_length 100 \ + --train_sequence_length 1000 \ --validation_sequence_length 100 \ --max_recurrent_depth 1 \ --output_directory $output_dir \ @@ -30,7 +28,7 @@ run_examm() { --file_message_level INFO \ --crystalize_iters $crystalize_iters \ --max_genomes 4000 \ - --island_size 8 \ + --island_size 32 \ --number_islands 8 \ --dnas_k $k @@ -40,16 +38,17 @@ run_examm() { CELL_TYPE='dnas' for crystalize_iters in 64 128 256 512; do - for bp_epoch in 8 16 32 64 128; do + for bp_epoch in 1 2 4 8 16 32 64 128; do for k in 1; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait + fold=1 run_examm +# for fold in 0 1 2 3; do +# run_examm & +# done +# wait +# for fold in 4 5 6 7; do +# run_examm & +# done +# wait done done done diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh index 8117dadb..b25171a1 100755 --- a/initial_integration_experiments/post_training_dnas.zsh +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -1,4 +1,4 @@ -#!/usr/bin/zsh +#!/bin/zsh INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' OUTPUT_PARAMETERS='E1_EGT1' diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index a84fb36f..c4068495 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -190,6 +190,7 @@ RNN_Genome* get_seed_genome( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); + // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); } diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index ab5796b2..f86eddd7 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -1,7 +1,6 @@ -#include +#include using std::max; -#include #include using std::ostream; From 778d24aa8d695394203cc7d86b8d6d835725a1f2 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 8 Jan 2024 13:34:21 -0500 Subject: [PATCH 17/31] Modified scripts --- initial_integration_experiments/analyze.py | 17 ++++++++------- initial_integration_experiments/dnas.zsh | 24 ++++++++++------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py index 58ba95b9..cee900d6 100644 --- a/initial_integration_experiments/analyze.py +++ b/initial_integration_experiments/analyze.py @@ -4,28 +4,31 @@ import matplotlib.pyplot as plt -fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1) +fig, subplts = plt.subplots(6, 1) -plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128} +bprange = [1, 2, 4, 8, 16, 32] +plts = {k:v for k, v in zip(bprange, subplts)} +print(plts) +base = plts[bprange[0]] for k, v in plts.items(): v.set_title(f"{k} BPI") - if k == 8: + if k == bprange[0]: continue - v.sharey(a8) - v.sharex(a8) + v.sharey(base) + v.sharex(base) results = {} for ci in [64, 128, 256, 512]: results[ci] = {} - for bpe in [8, 16, 32, 64, 128]: + for bpe in bprange: results[ci][bpe] = {} for k in [1]: x = [] results[ci][bpe][k] = x for fold in range(8): - f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") + f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") results[ci][bpe][k].append(f) diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh index 995d072a..5acc8b06 100755 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -6,7 +6,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir mpirun -np 8 Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ @@ -29,7 +29,7 @@ run_examm() { --crystalize_iters $crystalize_iters \ --max_genomes 4000 \ --island_size 32 \ - --number_islands 8 \ + --number_islands 4 \ --dnas_k $k best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) @@ -38,17 +38,15 @@ run_examm() { CELL_TYPE='dnas' for crystalize_iters in 64 128 256 512; do - for bp_epoch in 1 2 4 8 16 32 64 128; do - for k in 1; do - fold=1 run_examm -# for fold in 0 1 2 3; do -# run_examm & -# done -# wait -# for fold in 4 5 6 7; do -# run_examm & -# done -# wait + for bp_epoch in 1 2 4 8 16 32; do + for k in 1 2; do + for fold in 0 1 2 3; do + run_examm & + done + for fold in 4 5 6 7; do + run_examm & + done + wait done done done From ffa684a5b3bcbd1d2533efe61fcd3ca18c6f600c Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 29 Jan 2024 19:44:50 -0500 Subject: [PATCH 18/31] Prepping for cluster --- CMakeLists.txt | 3 +- examm/examm.cxx | 1 + initial_integration_experiments/analyze.py | 122 +++++++++++------- initial_integration_experiments/aviation.zsh | 37 ++++++ initial_integration_experiments/control.zsh | 50 +++++++ initial_integration_experiments/debug.zsh | 55 ++++++++ initial_integration_experiments/dnas.zsh | 25 ++-- .../gp_control.zsh | 59 +++++++++ .../post_training_dnas.zsh | 8 +- initial_integration_experiments/posttrain.zsh | 3 + initial_integration_experiments/run_examm.zsh | 25 ++++ .../run_experiments.zsh | 4 + initial_integration_experiments/wind.zsh | 39 ++++++ rnn/dnas_node.cxx | 21 +-- rnn/generate_nn.cxx | 1 - rnn/rnn_edge.cxx | 3 +- rnn/rnn_node.cxx | 4 +- rnn/rnn_node_interface.cxx | 12 +- rnn/rnn_node_interface.hxx | 5 +- rnn_examples/train_rnn.cxx | 11 +- time_series/time_series.cxx | 8 +- 21 files changed, 404 insertions(+), 92 deletions(-) create mode 100644 initial_integration_experiments/aviation.zsh create mode 100644 initial_integration_experiments/control.zsh create mode 100755 initial_integration_experiments/debug.zsh mode change 100755 => 100644 initial_integration_experiments/dnas.zsh create mode 100644 initial_integration_experiments/gp_control.zsh create mode 100644 initial_integration_experiments/posttrain.zsh create mode 100644 initial_integration_experiments/run_examm.zsh create mode 100755 initial_integration_experiments/run_experiments.zsh create mode 100644 initial_integration_experiments/wind.zsh diff --git a/CMakeLists.txt b/CMakeLists.txt index 1af3a314..5d62df91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") -SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") +# SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 -fsanitize=address") +SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 ") SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") diff --git a/examm/examm.cxx b/examm/examm.cxx index e0be2d07..1e1c2314 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -324,6 +324,7 @@ void EXAMM::mutate(int32_t max_mutations, RNN_Genome* g) { g->assign_reachability(); double rng = rng_0_1(generator) * total; int32_t new_node_type = get_random_node_type(); + Log::info("%d %d\n", new_node_type, NODE_TYPES.size()); string node_type_str = NODE_TYPES[new_node_type]; Log::debug("rng: %lf, total: %lf, new node type: %d (%s)\n", rng, total, new_node_type, node_type_str.c_str()); diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py index cee900d6..78d51466 100644 --- a/initial_integration_experiments/analyze.py +++ b/initial_integration_experiments/analyze.py @@ -6,7 +6,7 @@ fig, subplts = plt.subplots(6, 1) -bprange = [1, 2, 4, 8, 16, 32] +bprange = [8, 16] plts = {k:v for k, v in zip(bprange, subplts)} print(plts) base = plts[bprange[0]] @@ -18,63 +18,93 @@ v.sharey(base) v.sharex(base) +def avg(files, slice_at=-1): + r = {} + for file in files: + x = [] + + for fold in range(8): + f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] + print(f"{file}/{fold} -> {len(f)}") + x.append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + r[file] = { + 'mean_nodes': nodesmean, + 'mean_edges': edgesmean, + 'mean_rec_edges':redgesmean, + 'bpi': bpimean, + 'mean_mse': msemean, + 'std_mse': msestd, + } + return r + results = {} -for ci in [64, 128, 256, 512]: +for ci in [64]: results[ci] = {} for bpe in bprange: results[ci][bpe] = {} for k in [1]: - x = [] + f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" + x = avg([f])[f] results[ci][bpe][k] = x + print(x) - for fold in range(8): - f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv") - results[ci][bpe][k].append(f) - - - enabled_nodes = [] - enabled_edges = [] - enabled_rec_edges = [] - - bpi_columns = [] - mse_columns = [] - - minlen = 100000000 - - for f in x: - bpi_columns.append(f[' Total BP Epochs'].to_numpy()) - mse_columns.append(f[' Best Val. MSE'].to_numpy()) - enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) - enabled_edges.append(f[' Enabled Edges'].to_numpy()) - enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) - - minlen = min(minlen, len(bpi_columns[-1])) - - enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) - enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) - enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) - bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) - mse_columns = list(map(lambda x: x[:minlen], mse_columns)) - - - nodesmean = np.mean(np.array(enabled_nodes), axis=0) - edgesmean = np.mean(np.array(enabled_edges), axis=0) - redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) - print(f"Nodes at end mean: {nodesmean[-1]}") - print(f"edges at end mean: {edgesmean[-1]}") - print(f"redges at end mean: {redgesmean[-1]}") - + print(x['mean_mse'] - x['std_mse']) + g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] + plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - bpimean = np.mean(np.array(bpi_columns), axis=0) - msemean = np.mean(np.array(mse_columns), axis=0) - msestd = np.std(np.array(mse_columns), axis=0) +control_results = {} +for bp in [8, 16]: + key = f"initial_integration_experiments/results/control_v7/{bp}" + r = avg([key])[key] + control_results[bp] = r + print(list(r.keys())) + g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] + plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0] - plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd, - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) for k, v in plts.items(): v.set_title(f"{k} BPI") v.legend(fontsize=12, loc="upper right") + plt.show() diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh new file mode 100644 index 00000000..7059da3e --- /dev/null +++ b/initial_integration_experiments/aviation.zsh @@ -0,0 +1,37 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + +for output_params in "E1_CHT1" "Pitch"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/initial_integration_experiments/control.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh new file mode 100755 index 00000000..ce159c01 --- /dev/null +++ b/initial_integration_experiments/debug.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes 8192 \ + --island_size 32 \ + --number_islands 4 \ + --stochastic \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128; do + for bp_epoch in 8; do + for k in 1; do + for fold in 0; do + run_examm + done + # wait + # for fold in 4 5 6 7; do + # run_examm & + # done + # wait + done + done +done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh old mode 100755 new mode 100644 index 5acc8b06..8b525b09 --- a/initial_integration_experiments/dnas.zsh +++ b/initial_integration_experiments/dnas.zsh @@ -6,43 +6,46 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold + output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ + mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ --input_parameter_names ${=INPUT_PARAMETERS} \ --output_parameter_names ${=OUTPUT_PARAMETERS} \ --bp_iterations $bp_epoch \ --normalize min_max \ --num_hidden_layers $SIZE \ --hidden_layer_size $SIZE \ - --train_sequence_length 1000 \ --validation_sequence_length 100 \ --max_recurrent_depth 1 \ --output_directory $output_dir \ --log_filename fitness.csv \ --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ + --std_message_level WARNING \ + --file_message_level WARNING \ --crystalize_iters $crystalize_iters \ - --max_genomes 4000 \ + --max_genomes $max_genomes \ --island_size 32 \ --number_islands 4 \ --dnas_k $k - best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh } CELL_TYPE='dnas' -for crystalize_iters in 64 128 256 512; do - for bp_epoch in 1 2 4 8 16 32; do - for k in 1 2; do +bp_ge=(8 8192 16 4096 32 2048) +for crystalize_iters in 256; do + for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for k in 1; do for fold in 0 1 2 3; do run_examm & done + wait for fold in 4 5 6 7; do run_examm & done diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh new file mode 100644 index 00000000..049e9750 --- /dev/null +++ b/initial_integration_experiments/gp_control.zsh @@ -0,0 +1,59 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=test_results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done + done +} + +INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" +training_filenames=(datasets/2018_coal/burner_[0-9].csv) +test_filenames=(datasets/2018_coal/burner_1[0-1].csv) +OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") +run_group + + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUTS=("E1_CHT1" "Pitch") +training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) +test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) +run_group + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" +OUTPUTS=("Cm_avg" "P_avg") +training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) +test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) +run_group diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh index b25171a1..1c226178 100755 --- a/initial_integration_experiments/post_training_dnas.zsh +++ b/initial_integration_experiments/post_training_dnas.zsh @@ -1,7 +1,4 @@ #!/bin/zsh -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_EGT1' - offset=1 post_training() { @@ -21,11 +18,12 @@ post_training() { --log_filename post_training.csv \ --learning_rate 0.01 \ --weight_update adagrad \ - --train_sequence_length 100 \ + --train_sequence_length 1000 \ --validation_sequence_length 100 \ --crystalize_iters $CRYSTALIZE_ITERS \ --dnas_k $k - + + tail -1 $OUTPUT_DIRECTORY/post_training.csv } post_training diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh new file mode 100644 index 00000000..cc54a2eb --- /dev/null +++ b/initial_integration_experiments/posttrain.zsh @@ -0,0 +1,3 @@ +#!/bin/zsh + + diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh new file mode 100644 index 00000000..77d2893f --- /dev/null +++ b/initial_integration_experiments/run_examm.zsh @@ -0,0 +1,25 @@ +#!/bin/zsh + +output_dir=results/v0/$bp_epoch/$fold +mkdir -p $output_dir + +mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 4000 \ + --island_size 32 \ + --number_islands 4 + +touch $output_dir/completed + diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh new file mode 100755 index 00000000..7dd8e956 --- /dev/null +++ b/initial_integration_experiments/run_experiments.zsh @@ -0,0 +1,4 @@ +#!/bin/zsh + +initial_integration_experiments/control.zsh +initial_integration_experiments/dnas.zsh diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh new file mode 100644 index 00000000..7e68f482 --- /dev/null +++ b/initial_integration_experiments/wind.zsh @@ -0,0 +1,39 @@ +#!/bin/zsh + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ + --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + + +for output_params in "Cm_avg" "P_avg"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index 87e05d9b..f6d42bfe 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -168,14 +168,15 @@ void DNASNode::print_info() { } void DNASNode::reset(int32_t series_length) { - d_pi = vector(pi.size(), 0.0); - d_input = vector(series_length, 0.0); - node_outputs = vector>(series_length, vector(pi.size(), 0.0)); - output_values = vector(series_length, 0.0); - error_values = vector(series_length, 0.0); - inputs_fired = vector(series_length, 0); - outputs_fired = vector(series_length, 0); - input_values = vector(series_length, 0.0); + d_pi.assign(pi.size(), 0.0); + d_input.assign(series_length, 0.0); + node_outputs.clear(); + for (int i = 0; i < series_length; i++) node_outputs.emplace_back(pi.size(), 0.0); + output_values.assign(series_length, 0.0); + error_values.assign(series_length, 0.0); + inputs_fired.assign(series_length, 0); + outputs_fired.assign(series_length, 0); + input_values.assign(series_length, 0.0); if (counter >= CRYSTALLIZATION_THRESHOLD) { nodes[maxi]->reset(series_length); @@ -206,8 +207,10 @@ void DNASNode::input_fired(int32_t time, double incoming_output) { } if (counter >= CRYSTALLIZATION_THRESHOLD) { + Log::info("%d hmm\n", maxi >= 0); assert(maxi >= 0); - + + Log::info("%d %d %p\n", maxi, time, nodes[maxi]); nodes[maxi]->input_fired(time, input_values[time]); node_outputs[time][maxi] = nodes[maxi]->output_values[time]; output_values[time] = nodes[maxi]->output_values[time]; diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index c4068495..a84fb36f 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -190,7 +190,6 @@ RNN_Genome* get_seed_genome( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); - // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); } diff --git a/rnn/rnn_edge.cxx b/rnn/rnn_edge.cxx index 3227e961..babb2552 100644 --- a/rnn/rnn_edge.cxx +++ b/rnn/rnn_edge.cxx @@ -92,7 +92,7 @@ RNN_Edge* RNN_Edge::copy(const vector new_nodes) { } void RNN_Edge::propagate_forward(int32_t time) { - if (input_node->inputs_fired[time] != input_node->total_inputs) { + if (input_node->inputs_fired[time] != input_node->total_inputs || time < 0 || time >= input_node->output_values.size()) { Log::fatal( "ERROR! propagate forward called on edge %d where input_node->inputs_fired[%d] (%d) != total_inputs (%d)\n", innovation_number, time, input_node->inputs_fired[time], input_node->total_inputs @@ -104,7 +104,6 @@ void RNN_Edge::propagate_forward(int32_t time) { exit(1); } - // Log::debug("input_node %p %d\n", input_node, input_node->output_values.size()); double output = input_node->output_values[time] * weight; // Log::debug("propagating forward at time %d from %d to %d, value: %lf, input: %lf, weight: %lf\n", time, diff --git a/rnn/rnn_node.cxx b/rnn/rnn_node.cxx index 075c11ed..3e79a1df 100644 --- a/rnn/rnn_node.cxx +++ b/rnn/rnn_node.cxx @@ -57,8 +57,6 @@ void RNN_Node::input_fired(int32_t time, double incoming_output) { exit(1); } - Log::debug("node %d - input value[%d]: %lf\n", innovation_number, time, input_values[time]); - output_values[time] = tanh(input_values[time] + bias); ld_output[time] = tanh_derivative(output_values[time]); @@ -86,6 +84,8 @@ void RNN_Node::try_update_deltas(int32_t time) { outputs_fired[time], total_outputs ); exit(1); + } else if (time >= d_input.size() || time < 0) { + Log::fatal("invalid time %d\n", time); } d_input[time] *= ld_output[time]; diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index f86eddd7..210706a2 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -7,13 +7,16 @@ using std::ostream; #include using std::string; +#include +using std::vector; + #include "common/log.hxx" #include "rnn/rnn_genome.hxx" #include "rnn_node_interface.hxx" -extern const int32_t NUMBER_NODE_TYPES = 11; -extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", - "delta", "LSTM", "ENARC", "ENAS_DAG", "dnas"}; +extern const vector NODE_TYPES = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", + "delta", "LSTM", "ENARC", "ENAS_DAG", "random_dag", "dnas"}; + extern const unordered_map string_to_node_type = { {"simple", SIMPLE_NODE}, {"jordan", JORDAN_NODE}, @@ -25,7 +28,8 @@ extern const unordered_map string_to_node_type = { { "lstm", LSTM_NODE}, { "enarc", ENARC_NODE}, { "enas", ENAS_DAG_NODE}, - { "dnas", DNAS_NODE} + { "dnas", DNAS_NODE}, + { "random_dag", RANDOM_DAG_NODE}, }; int32_t node_type_from_string(string& node_type) { diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx index 15ec45cd..d1b56fa3 100644 --- a/rnn/rnn_node_interface.hxx +++ b/rnn/rnn_node_interface.hxx @@ -26,8 +26,9 @@ class RNN; #define HIDDEN_LAYER 1 #define OUTPUT_LAYER 2 -extern const int32_t NUMBER_NODE_TYPES; -extern const string NODE_TYPES[]; +extern const vector NODE_TYPES; +#define NUMBER_NODE_TYPES NODE_TYPES.size() + extern const unordered_map string_to_node_type; int32_t node_type_from_string(string& node_type); diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index 7bd5647c..265b9669 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -15,6 +15,7 @@ using std::string; using std::vector; #include "common/arguments.hxx" +#include "common/process_arguments.hxx" #include "common/files.hxx" #include "common/log.hxx" #include "rnn/generate_nn.hxx" @@ -81,9 +82,9 @@ int main(int argc, char** argv) { Log::set_id("main"); TimeSeriesSets* time_series_sets = TimeSeriesSets::generate_from_arguments(arguments); - - int32_t time_offset = 1; - get_argument(arguments, "--time_offset", true, time_offset); + get_train_validation_data( + arguments, time_series_sets, training_inputs, training_outputs, test_inputs, test_outputs + ); int32_t crystallization_threshold = 1000; get_argument(arguments, "--crystalize_iters", false, crystallization_threshold); @@ -93,8 +94,8 @@ int main(int argc, char** argv) { get_argument(arguments, "--dnas_k", false, k); DNASNode::k = k; - time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); - time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); + // time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); + // time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); int number_inputs = time_series_sets->get_number_inputs(); // int number_outputs = time_series_sets->get_number_outputs(); diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx index de143147..e315164e 100644 --- a/time_series/time_series.cxx +++ b/time_series/time_series.cxx @@ -472,7 +472,7 @@ void TimeSeriesSet::export_time_series( if (time_offset == 0) { for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) { for (int32_t j = 0; j < number_rows; j++) { - data[i][j] = time_series[requested_fields[i]]->get_value(j); + data[i][j] = time_series.at(requested_fields[i])->get_value(j); } } @@ -480,7 +480,7 @@ void TimeSeriesSet::export_time_series( // output data, ignore the first N values for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) { for (int32_t j = time_offset; j < number_rows; j++) { - data[i][j - time_offset] = time_series[requested_fields[i]]->get_value(j); + data[i][j - time_offset] = time_series.at(requested_fields[i])->get_value(j); } } @@ -492,13 +492,13 @@ void TimeSeriesSet::export_time_series( Log::debug("doing shift for field: '%s'\n", requested_fields[i].c_str()); // shift the shifted fields to the same as the output, not the input for (int32_t j = -time_offset; j < number_rows; j++) { - data[i][j + time_offset] = time_series[requested_fields[i]]->get_value(j); + data[i][j + time_offset] = time_series.at(requested_fields[i])->get_value(j); // Log::info("\tdata[%d][%d]: %lf\n", i, j + time_offset, data[i][j + time_offset]); } } else { Log::debug("not doing shift for field: '%s'\n", requested_fields[i].c_str()); for (int32_t j = 0; j < number_rows + time_offset; j++) { - data[i][j] = time_series[requested_fields[i]]->get_value(j); + data[i][j] = time_series.at(requested_fields[i])->get_value(j); } } } From 60acb2c8c08d60844fdc161843f0a94771aa5158 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 31 Jan 2024 04:28:06 -0500 Subject: [PATCH 19/31] gecco 2024 related experiment files + hacky changes --- dnas_cluster.zsh | 69 ++++++++++++++++++++++++++++ dnas_control.zsh | 60 ++++++++++++++++++++++++ examm/island_speciation_strategy.cxx | 5 +- key | 7 +++ rnn/generate_nn.cxx | 16 ++++++- rnn/genome_property.cxx | 31 +++++++++++-- rnn/genome_property.hxx | 12 ++++- 7 files changed, 191 insertions(+), 9 deletions(-) create mode 100644 dnas_cluster.zsh create mode 100644 dnas_control.zsh create mode 100644 key diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh new file mode 100644 index 00000000..55823c0c --- /dev/null +++ b/dnas_cluster.zsh @@ -0,0 +1,69 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --use_dnas_seed true \ + --use_burn_in_bp_epoch \ + --burn_in_period 1024 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 512; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/dnas_control.zsh b/dnas_control.zsh new file mode 100644 index 00000000..5e6982c8 --- /dev/null +++ b/dnas_control.zsh @@ -0,0 +1,60 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --burn_in_period 1024 \ + --use_burn_in_bp_epoch + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done +} + +run_group diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 920eb203..d8eaabab 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island( Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str()); exit(1); } - return new_genome; + return new_genome17731515; } RNN_Genome* IslandSpeciationStrategy::generate_genome( @@ -370,6 +370,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( Log::info("Island %d: new genome is still null, regenerating\n", generation_island); new_genome = generate_genome(rng_0_1, generator, mutate, crossover); } + generated_genomes++; new_genome->set_generation_id(generated_genomes); islands[generation_island]->set_latest_generation_id(generated_genomes); @@ -577,4 +578,4 @@ void IslandSpeciationStrategy::set_erased_islands_status() { RNN_Genome* IslandSpeciationStrategy::get_seed_genome() { return seed_genome; -} \ No newline at end of file +} diff --git a/key b/key new file mode 100644 index 00000000..391a7405 --- /dev/null +++ b/key @@ -0,0 +1,7 @@ +v11 -> burn in schedule with 0.001 lr 4 mut +v12 -> burn in schedule with 0.01 lr 4 mut +v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut +v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut +v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut +v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut + diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index a84fb36f..d9fd2eac 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -185,13 +185,27 @@ RNN_Genome* get_seed_genome( ); Log::info("Finished transfering seed genome\n"); } else { - if (seed_genome == NULL) { + bool use_dnas_seed = argument_exists(arguments, "--use_dnas_seed"); + + if (!use_dnas_seed) { seed_genome = create_ff( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); + } else { + vector node_types = { + SIMPLE_NODE, + UGRNN_NODE, + MGU_NODE, + GRU_NODE, + DELTA_NODE, + LSTM_NODE + }; + seed_genome = create_dnas_nn( + time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, node_types, weight_rules + ); } } diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx index 6bf061b9..09ea1ae8 100644 --- a/rnn/genome_property.cxx +++ b/rnn/genome_property.cxx @@ -10,6 +10,20 @@ GenomeProperty::GenomeProperty() { max_recurrent_depth = 10; } +int32_t GenomeProperty::compute_bp_iterations(RNN_Genome* genome) { + if (use_burn_in_bp_epoch) { + int32_t n = genome->generation_id / burn_in_period; + n = n > max_burn_in_cycles ? max_burn_in_cycles : n; + + float epochs = bp_epochs_start; + for (int i = 0; i < n; i++) epochs *= burn_in_ratio; + + return (int32_t) epochs; + } else { + return bp_iterations; + } +} + void GenomeProperty::generate_genome_property_from_arguments(const vector& arguments) { get_argument(arguments, "--bp_iterations", true, bp_iterations); use_dropout = get_argument(arguments, "--dropout_probability", false, dropout_probability); @@ -17,6 +31,13 @@ void GenomeProperty::generate_genome_property_from_arguments(const vectorset_bp_iterations(bp_iterations); - if (use_dropout) { - genome->enable_dropout(dropout_probability); - } + genome->set_bp_iterations(compute_bp_iterations(genome)); + + if (use_dropout) genome->enable_dropout(dropout_probability); + genome->normalize_type = normalize_type; genome->set_parameter_names(input_parameter_names, output_parameter_names); genome->set_normalize_bounds(normalize_type, normalize_mins, normalize_maxs, normalize_avgs, normalize_std_devs); @@ -48,4 +69,4 @@ void GenomeProperty::get_time_series_parameters(TimeSeriesSets* time_series_sets uniform_int_distribution GenomeProperty::get_recurrent_depth_dist() { return uniform_int_distribution(this->min_recurrent_depth, this->max_recurrent_depth); -} \ No newline at end of file +} diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx index 7d220ff6..130b26de 100644 --- a/rnn/genome_property.hxx +++ b/rnn/genome_property.hxx @@ -18,6 +18,12 @@ class GenomeProperty { int32_t min_recurrent_depth; int32_t max_recurrent_depth; + bool use_burn_in_bp_epoch; + int32_t burn_in_period = 2048; + int32_t max_burn_in_cycles = 4; + double bp_epochs_start = 0.5; + double burn_in_ratio = 2.0; + // TimeSeriesSets *time_series_sets; int32_t number_inputs; int32_t number_outputs; @@ -30,12 +36,16 @@ class GenomeProperty { map normalize_avgs; map normalize_std_devs; + int32_t compute_bp_iterations(RNN_Genome* genome); + public: GenomeProperty(); + void generate_genome_property_from_arguments(const vector& arguments); void set_genome_properties(RNN_Genome* genome); void get_time_series_parameters(TimeSeriesSets* time_series_sets); + uniform_int_distribution get_recurrent_depth_dist(); }; -#endif \ No newline at end of file +#endif From 5730472f918a1ebf459e68c88092bf2e37ea1ba1 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Fri, 2 Feb 2024 03:16:10 -0500 Subject: [PATCH 20/31] BP schedule --- dnas_control.zsh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dnas_control.zsh b/dnas_control.zsh index 5e6982c8..88a7c882 100644 --- a/dnas_control.zsh +++ b/dnas_control.zsh @@ -17,7 +17,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold + output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold mkdir -p $output_dir srun -n 36 Release/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ @@ -41,7 +41,7 @@ run_examm() { --max_genomes $max_genomes \ --island_size 32 \ --number_islands 8 \ - --num_mutations 2 \ + --num_mutations 4 \ --burn_in_period 1024 \ --use_burn_in_bp_epoch From 70e79d442b1d4fea6c2c752739778f096a9b70ff Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:30:54 -0500 Subject: [PATCH 21/31] moving scripts --- scripts/dnas/analyze.py | 110 ++++++++++++++++++++++++++++ scripts/dnas/analyze.zsh | 12 +++ scripts/dnas/aviation.zsh | 37 ++++++++++ scripts/dnas/coal_dnas_control.zsh | 22 ++++++ scripts/dnas/coal_gp.zsh | 22 ++++++ scripts/dnas/control.zsh | 50 +++++++++++++ scripts/dnas/control_cluster.zsh | 50 +++++++++++++ scripts/dnas/debug.zsh | 55 ++++++++++++++ scripts/dnas/dnas.zsh | 55 ++++++++++++++ scripts/dnas/dnas_cluster.zsh | 69 +++++++++++++++++ scripts/dnas/dnas_control.zsh | 60 +++++++++++++++ scripts/dnas/dnas_r2_cluster.zsh | 67 +++++++++++++++++ scripts/dnas/experiment.zsh | 34 +++++++++ scripts/dnas/gp_control.zsh | 59 +++++++++++++++ scripts/dnas/lib.zsh | 65 ++++++++++++++++ scripts/dnas/mk_jobs.zsh | 6 ++ scripts/dnas/populate_queue.zsh | 29 ++++++++ scripts/dnas/post_training.zsh | 28 +++++++ scripts/dnas/post_training_dnas.zsh | 29 ++++++++ scripts/dnas/posttrain.zsh | 3 + scripts/dnas/run_examm.zsh | 25 +++++++ scripts/dnas/run_experiments.zsh | 4 + scripts/dnas/wind.zsh | 39 ++++++++++ 23 files changed, 930 insertions(+) create mode 100644 scripts/dnas/analyze.py create mode 100644 scripts/dnas/analyze.zsh create mode 100644 scripts/dnas/aviation.zsh create mode 100644 scripts/dnas/coal_dnas_control.zsh create mode 100644 scripts/dnas/coal_gp.zsh create mode 100644 scripts/dnas/control.zsh create mode 100644 scripts/dnas/control_cluster.zsh create mode 100755 scripts/dnas/debug.zsh create mode 100644 scripts/dnas/dnas.zsh create mode 100644 scripts/dnas/dnas_cluster.zsh create mode 100644 scripts/dnas/dnas_control.zsh create mode 100644 scripts/dnas/dnas_r2_cluster.zsh create mode 100755 scripts/dnas/experiment.zsh create mode 100644 scripts/dnas/gp_control.zsh create mode 100644 scripts/dnas/lib.zsh create mode 100644 scripts/dnas/mk_jobs.zsh create mode 100755 scripts/dnas/populate_queue.zsh create mode 100755 scripts/dnas/post_training.zsh create mode 100755 scripts/dnas/post_training_dnas.zsh create mode 100644 scripts/dnas/posttrain.zsh create mode 100644 scripts/dnas/run_examm.zsh create mode 100755 scripts/dnas/run_experiments.zsh create mode 100644 scripts/dnas/wind.zsh diff --git a/scripts/dnas/analyze.py b/scripts/dnas/analyze.py new file mode 100644 index 00000000..78d51466 --- /dev/null +++ b/scripts/dnas/analyze.py @@ -0,0 +1,110 @@ +import pandas + +import numpy as np + +import matplotlib.pyplot as plt + +fig, subplts = plt.subplots(6, 1) + +bprange = [8, 16] +plts = {k:v for k, v in zip(bprange, subplts)} +print(plts) +base = plts[bprange[0]] + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + if k == bprange[0]: + continue + v.sharey(base) + v.sharex(base) + +def avg(files, slice_at=-1): + r = {} + for file in files: + x = [] + + for fold in range(8): + f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] + print(f"{file}/{fold} -> {len(f)}") + x.append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + r[file] = { + 'mean_nodes': nodesmean, + 'mean_edges': edgesmean, + 'mean_rec_edges':redgesmean, + 'bpi': bpimean, + 'mean_mse': msemean, + 'std_mse': msestd, + } + return r + +results = {} +for ci in [64]: + results[ci] = {} + for bpe in bprange: + results[ci][bpe] = {} + for k in [1]: + f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" + x = avg([f])[f] + results[ci][bpe][k] = x + print(x) + + print(x['mean_mse'] - x['std_mse']) + g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] + plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + +control_results = {} +for bp in [8, 16]: + key = f"initial_integration_experiments/results/control_v7/{bp}" + r = avg([key])[key] + control_results[bp] = r + print(list(r.keys())) + g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] + plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + v.legend(fontsize=12, loc="upper right") + + +plt.show() diff --git a/scripts/dnas/analyze.zsh b/scripts/dnas/analyze.zsh new file mode 100644 index 00000000..5c2876f3 --- /dev/null +++ b/scripts/dnas/analyze.zsh @@ -0,0 +1,12 @@ +#!/usr/bin/zsh +# +for crystalize_iters in 64 128 256 512; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3 4 5 6 7; do + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + tail -1 $output_dir/fitness_log.csv + done + done + done +done diff --git a/scripts/dnas/aviation.zsh b/scripts/dnas/aviation.zsh new file mode 100644 index 00000000..7059da3e --- /dev/null +++ b/scripts/dnas/aviation.zsh @@ -0,0 +1,37 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + +for output_params in "E1_CHT1" "Pitch"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/scripts/dnas/coal_dnas_control.zsh b/scripts/dnas/coal_dnas_control.zsh new file mode 100644 index 00000000..9543cc09 --- /dev/null +++ b/scripts/dnas/coal_dnas_control.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +let np=8 +#SBATCH --ntasks=8 +#SBATCH --exclude theocho +#SBATCH --time=8-00:00:00 +#SBATCH -A examm +#SBATCH --partition=TIER +#SBATCH -J examm_coal_gp_control +#SBATCH -o /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +source lib.zsh + +output_dir_prefix=/home/jak5763/exact/results/gp_control +bp_epoch_set=(8 16 32 64 128) +nfolds=20 +MAX_GENOMES=4000 +ISLAND_SIZE=10 +N_ISLANDS=10 +coal diff --git a/scripts/dnas/coal_gp.zsh b/scripts/dnas/coal_gp.zsh new file mode 100644 index 00000000..c1318793 --- /dev/null +++ b/scripts/dnas/coal_gp.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +let np=8 +#SBATCH --ntasks=8 +#SBATCH --exclude theocho +#SBATCH --time=8-00:00:00 +#SBATCH -A examm +#SBATCH --partition=TIER +#SBATCH -J examm_coal_gp_control +#SBATCH -o /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +source lib.zsh + +output_dir_prefix=/home/jak5763/exact/results/gp_control +bp_epoch_set=(8) +nfolds=20 +MAX_GENOMES=10000 +ISLAND_SIZE=10 +N_ISLANDS=10 +coal diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/scripts/dnas/control.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/scripts/dnas/control_cluster.zsh b/scripts/dnas/control_cluster.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/scripts/dnas/control_cluster.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/scripts/dnas/debug.zsh b/scripts/dnas/debug.zsh new file mode 100755 index 00000000..ce159c01 --- /dev/null +++ b/scripts/dnas/debug.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes 8192 \ + --island_size 32 \ + --number_islands 4 \ + --stochastic \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128; do + for bp_epoch in 8; do + for k in 1; do + for fold in 0; do + run_examm + done + # wait + # for fold in 4 5 6 7; do + # run_examm & + # done + # wait + done + done +done diff --git a/scripts/dnas/dnas.zsh b/scripts/dnas/dnas.zsh new file mode 100644 index 00000000..8b525b09 --- /dev/null +++ b/scripts/dnas/dnas.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +bp_ge=(8 8192 16 4096 32 2048) +for crystalize_iters in 256; do + for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for k in 1; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait + done + done +done diff --git a/scripts/dnas/dnas_cluster.zsh b/scripts/dnas/dnas_cluster.zsh new file mode 100644 index 00000000..55823c0c --- /dev/null +++ b/scripts/dnas/dnas_cluster.zsh @@ -0,0 +1,69 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --use_dnas_seed true \ + --use_burn_in_bp_epoch \ + --burn_in_period 1024 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 512; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/scripts/dnas/dnas_control.zsh b/scripts/dnas/dnas_control.zsh new file mode 100644 index 00000000..88a7c882 --- /dev/null +++ b/scripts/dnas/dnas_control.zsh @@ -0,0 +1,60 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 4 \ + --burn_in_period 1024 \ + --use_burn_in_bp_epoch + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done +} + +run_group diff --git a/scripts/dnas/dnas_r2_cluster.zsh b/scripts/dnas/dnas_r2_cluster.zsh new file mode 100644 index 00000000..a8bce387 --- /dev/null +++ b/scripts/dnas/dnas_r2_cluster.zsh @@ -0,0 +1,67 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v9/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.001 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 16 \ + --number_islands 8 \ + --num_mutations 4 \ + --use_dnas_seed true \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 1000000; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/scripts/dnas/experiment.zsh b/scripts/dnas/experiment.zsh new file mode 100755 index 00000000..32a1db55 --- /dev/null +++ b/scripts/dnas/experiment.zsh @@ -0,0 +1,34 @@ +#!/bin/zsh +#SBATCH -n 1 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -o /home/jak5763/exact/aistats/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/aistats/slurm_out/%x.%j.err +#SBATCH --mem=10G + +spack load gcc +spack load openmpi +spack load /5aoa7oi +spack load /dd7nzzh + +for i in $(seq 0 19); do + export i=$i + export output_dir=/home/jak5763/exact/aistats/$control/maxt$maxt/crystal$crystal/bp$bp/$i + + if [ "$control" = "control" ]; then + node_types="simple UGRNN MGU GRU delta LSTM" + else + node_types="DNAS" + fi + + echo $node_types $control + + export node_types=$node_types + + # ./run_examm.zsh + + best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + export BP_ITERS=1 + export GENOME=$best_genome_file + ./post_training.zsh +done diff --git a/scripts/dnas/gp_control.zsh b/scripts/dnas/gp_control.zsh new file mode 100644 index 00000000..049e9750 --- /dev/null +++ b/scripts/dnas/gp_control.zsh @@ -0,0 +1,59 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=test_results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done + done +} + +INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" +training_filenames=(datasets/2018_coal/burner_[0-9].csv) +test_filenames=(datasets/2018_coal/burner_1[0-1].csv) +OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") +run_group + + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUTS=("E1_CHT1" "Pitch") +training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) +test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) +run_group + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" +OUTPUTS=("Cm_avg" "P_avg") +training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) +test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) +run_group diff --git a/scripts/dnas/lib.zsh b/scripts/dnas/lib.zsh new file mode 100644 index 00000000..49ebc581 --- /dev/null +++ b/scripts/dnas/lib.zsh @@ -0,0 +1,65 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=$output_dir_prefix/bp_$bp_epoch/output_$output_params/$fold + mkdir -p $output_dir + echo srun -n $np Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in $bp_epoch_set; do + for fold in $(seq 1 $nfolds); do + run_examm + done + done + done +} + +coal() { + INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" + training_filenames=(datasets/2018_coal/burner_[0-9].csv) + test_filenames=(datasets/2018_coal/burner_1[0-1].csv) + OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") + run_group +} + +aviation() { + INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + OUTPUTS=("E1_CHT1" "Pitch") + training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) + test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) + run_group +} + +wind() { + INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + OUTPUTS=("Cm_avg" "P_avg") + training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) + test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) + run_group +} + diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh new file mode 100644 index 00000000..38a5526c --- /dev/null +++ b/scripts/dnas/mk_jobs.zsh @@ -0,0 +1,6 @@ +bp_ge=(8 8192 16 4096 32 2048 64 1024) +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh + bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh + bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh +done diff --git a/scripts/dnas/populate_queue.zsh b/scripts/dnas/populate_queue.zsh new file mode 100755 index 00000000..43a09dbb --- /dev/null +++ b/scripts/dnas/populate_queue.zsh @@ -0,0 +1,29 @@ +#!/bin/zsh +export INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +export OUTPUT_PARAMETERS='E1_EGT1' + +export offset=1 +export k=1 + +push_job() { + export maxt=$maxt + export crystal=$crystal + export bp=$bp + export control=$control + sbatch -J $control.maxt$maxt.cr$crystal.bp$bp ./experiment.zsh + +} + +export control="exp" +for maxt in 1.66 1.33 1.0; do + for crystal in 64 128 256; do + for bp in 4 8 16; do + push_job + done + done +done + +export control="control" +for bp in 4 8 16; do + push_job +done diff --git a/scripts/dnas/post_training.zsh b/scripts/dnas/post_training.zsh new file mode 100755 index 00000000..38c2d39d --- /dev/null +++ b/scripts/dnas/post_training.zsh @@ -0,0 +1,28 @@ +#!/usr/bin/zsh +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $output_dir \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --crystalize_iters $crystal \ + --dnas_k $k + +} + +post_training diff --git a/scripts/dnas/post_training_dnas.zsh b/scripts/dnas/post_training_dnas.zsh new file mode 100755 index 00000000..1c226178 --- /dev/null +++ b/scripts/dnas/post_training_dnas.zsh @@ -0,0 +1,29 @@ +#!/bin/zsh +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $OUTPUT_DIRECTORY \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 1000 \ + --validation_sequence_length 100 \ + --crystalize_iters $CRYSTALIZE_ITERS \ + --dnas_k $k + + tail -1 $OUTPUT_DIRECTORY/post_training.csv +} + +post_training diff --git a/scripts/dnas/posttrain.zsh b/scripts/dnas/posttrain.zsh new file mode 100644 index 00000000..cc54a2eb --- /dev/null +++ b/scripts/dnas/posttrain.zsh @@ -0,0 +1,3 @@ +#!/bin/zsh + + diff --git a/scripts/dnas/run_examm.zsh b/scripts/dnas/run_examm.zsh new file mode 100644 index 00000000..77d2893f --- /dev/null +++ b/scripts/dnas/run_examm.zsh @@ -0,0 +1,25 @@ +#!/bin/zsh + +output_dir=results/v0/$bp_epoch/$fold +mkdir -p $output_dir + +mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 4000 \ + --island_size 32 \ + --number_islands 4 + +touch $output_dir/completed + diff --git a/scripts/dnas/run_experiments.zsh b/scripts/dnas/run_experiments.zsh new file mode 100755 index 00000000..7dd8e956 --- /dev/null +++ b/scripts/dnas/run_experiments.zsh @@ -0,0 +1,4 @@ +#!/bin/zsh + +initial_integration_experiments/control.zsh +initial_integration_experiments/dnas.zsh diff --git a/scripts/dnas/wind.zsh b/scripts/dnas/wind.zsh new file mode 100644 index 00000000..7e68f482 --- /dev/null +++ b/scripts/dnas/wind.zsh @@ -0,0 +1,39 @@ +#!/bin/zsh + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ + --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + + +for output_params in "Cm_avg" "P_avg"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done From 4c3ebfc64a020a4ec0ae343a1f328dfc14715c64 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:35:20 -0500 Subject: [PATCH 22/31] removed old fileS --- initial_integration_experiments/analyze.py | 110 ------------------ initial_integration_experiments/analyze.zsh | 12 -- initial_integration_experiments/aviation.zsh | 37 ------ initial_integration_experiments/control.zsh | 50 -------- initial_integration_experiments/debug.zsh | 55 --------- initial_integration_experiments/dnas.zsh | 55 --------- .../gp_control.zsh | 59 ---------- .../post_training_dnas.zsh | 29 ----- initial_integration_experiments/posttrain.zsh | 3 - initial_integration_experiments/run_examm.zsh | 25 ---- .../run_experiments.zsh | 4 - initial_integration_experiments/wind.zsh | 39 ------- 12 files changed, 478 deletions(-) delete mode 100644 initial_integration_experiments/analyze.py delete mode 100644 initial_integration_experiments/analyze.zsh delete mode 100644 initial_integration_experiments/aviation.zsh delete mode 100644 initial_integration_experiments/control.zsh delete mode 100755 initial_integration_experiments/debug.zsh delete mode 100644 initial_integration_experiments/dnas.zsh delete mode 100644 initial_integration_experiments/gp_control.zsh delete mode 100755 initial_integration_experiments/post_training_dnas.zsh delete mode 100644 initial_integration_experiments/posttrain.zsh delete mode 100644 initial_integration_experiments/run_examm.zsh delete mode 100755 initial_integration_experiments/run_experiments.zsh delete mode 100644 initial_integration_experiments/wind.zsh diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py deleted file mode 100644 index 78d51466..00000000 --- a/initial_integration_experiments/analyze.py +++ /dev/null @@ -1,110 +0,0 @@ -import pandas - -import numpy as np - -import matplotlib.pyplot as plt - -fig, subplts = plt.subplots(6, 1) - -bprange = [8, 16] -plts = {k:v for k, v in zip(bprange, subplts)} -print(plts) -base = plts[bprange[0]] - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - if k == bprange[0]: - continue - v.sharey(base) - v.sharex(base) - -def avg(files, slice_at=-1): - r = {} - for file in files: - x = [] - - for fold in range(8): - f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] - print(f"{file}/{fold} -> {len(f)}") - x.append(f) - - - enabled_nodes = [] - enabled_edges = [] - enabled_rec_edges = [] - - bpi_columns = [] - mse_columns = [] - - minlen = 100000000 - - for f in x: - bpi_columns.append(f[' Total BP Epochs'].to_numpy()) - mse_columns.append(f[' Best Val. MSE'].to_numpy()) - enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) - enabled_edges.append(f[' Enabled Edges'].to_numpy()) - enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) - - minlen = min(minlen, len(bpi_columns[-1])) - - enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) - enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) - enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) - bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) - mse_columns = list(map(lambda x: x[:minlen], mse_columns)) - - nodesmean = np.mean(np.array(enabled_nodes), axis=0) - edgesmean = np.mean(np.array(enabled_edges), axis=0) - redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) - print(f"Nodes at end mean: {nodesmean[-1]}") - print(f"edges at end mean: {edgesmean[-1]}") - print(f"redges at end mean: {redgesmean[-1]}") - - - bpimean = np.mean(np.array(bpi_columns), axis=0) - msemean = np.mean(np.array(mse_columns), axis=0) - msestd = np.std(np.array(mse_columns), axis=0) - - r[file] = { - 'mean_nodes': nodesmean, - 'mean_edges': edgesmean, - 'mean_rec_edges':redgesmean, - 'bpi': bpimean, - 'mean_mse': msemean, - 'std_mse': msestd, - } - return r - -results = {} -for ci in [64]: - results[ci] = {} - for bpe in bprange: - results[ci][bpe] = {} - for k in [1]: - f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" - x = avg([f])[f] - results[ci][bpe][k] = x - print(x) - - print(x['mean_mse'] - x['std_mse']) - g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] - plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - -control_results = {} -for bp in [8, 16]: - key = f"initial_integration_experiments/results/control_v7/{bp}" - r = avg([key])[key] - control_results[bp] = r - print(list(r.keys())) - g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] - plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - v.legend(fontsize=12, loc="upper right") - - -plt.show() diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh deleted file mode 100644 index 5c2876f3..00000000 --- a/initial_integration_experiments/analyze.zsh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/zsh -# -for crystalize_iters in 64 128 256 512; do - for bp_epoch in 8 16 32 64 128; do - for k in 1; do - for fold in 0 1 2 3 4 5 6 7; do - output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold - tail -1 $output_dir/fitness_log.csv - done - done - done -done diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh deleted file mode 100644 index 7059da3e..00000000 --- a/initial_integration_experiments/aviation.zsh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - -for output_params in "E1_CHT1" "Pitch"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh deleted file mode 100644 index a848302b..00000000 --- a/initial_integration_experiments/control.zsh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh -} - -bp_ge=(8 8192 16 4096 32 2048) - -for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait -done diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh deleted file mode 100755 index ce159c01..00000000 --- a/initial_integration_experiments/debug.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes 8192 \ - --island_size 32 \ - --number_islands 4 \ - --stochastic \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -for crystalize_iters in 128; do - for bp_epoch in 8; do - for k in 1; do - for fold in 0; do - run_examm - done - # wait - # for fold in 4 5 6 7; do - # run_examm & - # done - # wait - done - done -done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh deleted file mode 100644 index 8b525b09..00000000 --- a/initial_integration_experiments/dnas.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -bp_ge=(8 8192 16 4096 32 2048) -for crystalize_iters in 256; do - for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for k in 1; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait - done - done -done diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh deleted file mode 100644 index 049e9750..00000000 --- a/initial_integration_experiments/gp_control.zsh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/zsh - -offset=1 -MAX_GENOMES=10 -N_ISLANDS=4 -ISLAND_SIZE=32 - -run_examm() { - output_dir=test_results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames ${=training_filenames} \ - --test_filenames ${=test_filenames} \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names $output_params \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes $MAX_GENOMES \ - --island_size $ISLAND_SIZE \ - --number_islands $N_ISLANDS - - touch $output_dir/completed -} - -run_group() { - for output_params in $OUTPUTS; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done - done -} - -INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" -training_filenames=(datasets/2018_coal/burner_[0-9].csv) -test_filenames=(datasets/2018_coal/burner_1[0-1].csv) -OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") -run_group - - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUTS=("E1_CHT1" "Pitch") -training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) -test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) -run_group - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" -OUTPUTS=("Cm_avg" "P_avg") -training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) -test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) -run_group diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh deleted file mode 100755 index 1c226178..00000000 --- a/initial_integration_experiments/post_training_dnas.zsh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/zsh -offset=1 - -post_training() { - - echo "genome = $GENOME" - Release/rnn_examples/train_rnn \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $BP_ITERS \ - --stochastic \ - --normalize min_max \ - --genome_file $GENOME \ - --output_directory $OUTPUT_DIRECTORY \ - --log_filename post_training.csv \ - --learning_rate 0.01 \ - --weight_update adagrad \ - --train_sequence_length 1000 \ - --validation_sequence_length 100 \ - --crystalize_iters $CRYSTALIZE_ITERS \ - --dnas_k $k - - tail -1 $OUTPUT_DIRECTORY/post_training.csv -} - -post_training diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh deleted file mode 100644 index cc54a2eb..00000000 --- a/initial_integration_experiments/posttrain.zsh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/zsh - - diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh deleted file mode 100644 index 77d2893f..00000000 --- a/initial_integration_experiments/run_examm.zsh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/zsh - -output_dir=results/v0/$bp_epoch/$fold -mkdir -p $output_dir - -mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 4000 \ - --island_size 32 \ - --number_islands 4 - -touch $output_dir/completed - diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh deleted file mode 100755 index 7dd8e956..00000000 --- a/initial_integration_experiments/run_experiments.zsh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/zsh - -initial_integration_experiments/control.zsh -initial_integration_experiments/dnas.zsh diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh deleted file mode 100644 index 7e68f482..00000000 --- a/initial_integration_experiments/wind.zsh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" - - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ - --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - - -for output_params in "Cm_avg" "P_avg"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done From c0b9e41e02ecdcaa98a501032f466d73ddedd42a Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:36:23 -0500 Subject: [PATCH 23/31] removed old fileS --- initial_integration_experiments/analyze.py | 110 ------------------ initial_integration_experiments/analyze.zsh | 12 -- initial_integration_experiments/aviation.zsh | 37 ------ initial_integration_experiments/control.zsh | 50 -------- initial_integration_experiments/debug.zsh | 55 --------- initial_integration_experiments/dnas.zsh | 55 --------- .../gp_control.zsh | 59 ---------- .../post_training_dnas.zsh | 29 ----- initial_integration_experiments/posttrain.zsh | 3 - initial_integration_experiments/run_examm.zsh | 25 ---- .../run_experiments.zsh | 4 - initial_integration_experiments/wind.zsh | 39 ------- 12 files changed, 478 deletions(-) delete mode 100644 initial_integration_experiments/analyze.py delete mode 100644 initial_integration_experiments/analyze.zsh delete mode 100644 initial_integration_experiments/aviation.zsh delete mode 100644 initial_integration_experiments/control.zsh delete mode 100755 initial_integration_experiments/debug.zsh delete mode 100644 initial_integration_experiments/dnas.zsh delete mode 100644 initial_integration_experiments/gp_control.zsh delete mode 100755 initial_integration_experiments/post_training_dnas.zsh delete mode 100644 initial_integration_experiments/posttrain.zsh delete mode 100644 initial_integration_experiments/run_examm.zsh delete mode 100755 initial_integration_experiments/run_experiments.zsh delete mode 100644 initial_integration_experiments/wind.zsh diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py deleted file mode 100644 index 78d51466..00000000 --- a/initial_integration_experiments/analyze.py +++ /dev/null @@ -1,110 +0,0 @@ -import pandas - -import numpy as np - -import matplotlib.pyplot as plt - -fig, subplts = plt.subplots(6, 1) - -bprange = [8, 16] -plts = {k:v for k, v in zip(bprange, subplts)} -print(plts) -base = plts[bprange[0]] - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - if k == bprange[0]: - continue - v.sharey(base) - v.sharex(base) - -def avg(files, slice_at=-1): - r = {} - for file in files: - x = [] - - for fold in range(8): - f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] - print(f"{file}/{fold} -> {len(f)}") - x.append(f) - - - enabled_nodes = [] - enabled_edges = [] - enabled_rec_edges = [] - - bpi_columns = [] - mse_columns = [] - - minlen = 100000000 - - for f in x: - bpi_columns.append(f[' Total BP Epochs'].to_numpy()) - mse_columns.append(f[' Best Val. MSE'].to_numpy()) - enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) - enabled_edges.append(f[' Enabled Edges'].to_numpy()) - enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) - - minlen = min(minlen, len(bpi_columns[-1])) - - enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) - enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) - enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) - bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) - mse_columns = list(map(lambda x: x[:minlen], mse_columns)) - - nodesmean = np.mean(np.array(enabled_nodes), axis=0) - edgesmean = np.mean(np.array(enabled_edges), axis=0) - redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) - print(f"Nodes at end mean: {nodesmean[-1]}") - print(f"edges at end mean: {edgesmean[-1]}") - print(f"redges at end mean: {redgesmean[-1]}") - - - bpimean = np.mean(np.array(bpi_columns), axis=0) - msemean = np.mean(np.array(mse_columns), axis=0) - msestd = np.std(np.array(mse_columns), axis=0) - - r[file] = { - 'mean_nodes': nodesmean, - 'mean_edges': edgesmean, - 'mean_rec_edges':redgesmean, - 'bpi': bpimean, - 'mean_mse': msemean, - 'std_mse': msestd, - } - return r - -results = {} -for ci in [64]: - results[ci] = {} - for bpe in bprange: - results[ci][bpe] = {} - for k in [1]: - f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" - x = avg([f])[f] - results[ci][bpe][k] = x - print(x) - - print(x['mean_mse'] - x['std_mse']) - g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] - plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - -control_results = {} -for bp in [8, 16]: - key = f"initial_integration_experiments/results/control_v7/{bp}" - r = avg([key])[key] - control_results[bp] = r - print(list(r.keys())) - g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] - plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], - alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) - - -for k, v in plts.items(): - v.set_title(f"{k} BPI") - v.legend(fontsize=12, loc="upper right") - - -plt.show() diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh deleted file mode 100644 index 5c2876f3..00000000 --- a/initial_integration_experiments/analyze.zsh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/zsh -# -for crystalize_iters in 64 128 256 512; do - for bp_epoch in 8 16 32 64 128; do - for k in 1; do - for fold in 0 1 2 3 4 5 6 7; do - output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold - tail -1 $output_dir/fitness_log.csv - done - done - done -done diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh deleted file mode 100644 index 7059da3e..00000000 --- a/initial_integration_experiments/aviation.zsh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - -for output_params in "E1_CHT1" "Pitch"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh deleted file mode 100644 index a848302b..00000000 --- a/initial_integration_experiments/control.zsh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh -} - -bp_ge=(8 8192 16 4096 32 2048) - -for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait -done diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh deleted file mode 100755 index ce159c01..00000000 --- a/initial_integration_experiments/debug.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes 8192 \ - --island_size 32 \ - --number_islands 4 \ - --stochastic \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -for crystalize_iters in 128; do - for bp_epoch in 8; do - for k in 1; do - for fold in 0; do - run_examm - done - # wait - # for fold in 4 5 6 7; do - # run_examm & - # done - # wait - done - done -done diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh deleted file mode 100644 index 8b525b09..00000000 --- a/initial_integration_experiments/dnas.zsh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 4 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -CELL_TYPE='dnas' -bp_ge=(8 8192 16 4096 32 2048) -for crystalize_iters in 256; do - for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for k in 1; do - for fold in 0 1 2 3; do - run_examm & - done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait - done - done -done diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh deleted file mode 100644 index 049e9750..00000000 --- a/initial_integration_experiments/gp_control.zsh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/zsh - -offset=1 -MAX_GENOMES=10 -N_ISLANDS=4 -ISLAND_SIZE=32 - -run_examm() { - output_dir=test_results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames ${=training_filenames} \ - --test_filenames ${=test_filenames} \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names $output_params \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes $MAX_GENOMES \ - --island_size $ISLAND_SIZE \ - --number_islands $N_ISLANDS - - touch $output_dir/completed -} - -run_group() { - for output_params in $OUTPUTS; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done - done -} - -INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" -training_filenames=(datasets/2018_coal/burner_[0-9].csv) -test_filenames=(datasets/2018_coal/burner_1[0-1].csv) -OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") -run_group - - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUTS=("E1_CHT1" "Pitch") -training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) -test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) -run_group - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" -OUTPUTS=("Cm_avg" "P_avg") -training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) -test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) -run_group diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh deleted file mode 100755 index 1c226178..00000000 --- a/initial_integration_experiments/post_training_dnas.zsh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/zsh -offset=1 - -post_training() { - - echo "genome = $GENOME" - Release/rnn_examples/train_rnn \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $BP_ITERS \ - --stochastic \ - --normalize min_max \ - --genome_file $GENOME \ - --output_directory $OUTPUT_DIRECTORY \ - --log_filename post_training.csv \ - --learning_rate 0.01 \ - --weight_update adagrad \ - --train_sequence_length 1000 \ - --validation_sequence_length 100 \ - --crystalize_iters $CRYSTALIZE_ITERS \ - --dnas_k $k - - tail -1 $OUTPUT_DIRECTORY/post_training.csv -} - -post_training diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh deleted file mode 100644 index cc54a2eb..00000000 --- a/initial_integration_experiments/posttrain.zsh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/zsh - - diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh deleted file mode 100644 index 77d2893f..00000000 --- a/initial_integration_experiments/run_examm.zsh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/zsh - -output_dir=results/v0/$bp_epoch/$fold -mkdir -p $output_dir - -mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 4000 \ - --island_size 32 \ - --number_islands 4 - -touch $output_dir/completed - diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh deleted file mode 100755 index 7dd8e956..00000000 --- a/initial_integration_experiments/run_experiments.zsh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/zsh - -initial_integration_experiments/control.zsh -initial_integration_experiments/dnas.zsh diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh deleted file mode 100644 index 7e68f482..00000000 --- a/initial_integration_experiments/wind.zsh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/zsh - -INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" - - -offset=1 - -run_examm() { - output_dir=results/v0/$bp_epoch/$fold - mkdir -p $output_dir - mpirun -np 32 Release/mpi/examm_mpi \ - --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ - --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ - --time_offset $offset \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=output_params} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --max_recurrent_depth 1 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level INFO \ - --file_message_level INFO \ - --max_genomes 10000 \ - --island_size 32 \ - --number_islands 4 - - touch $output_dir/completed -} - - -for output_params in "Cm_avg" "P_avg"; do - for bp_epoch in 2 4 8 16 32; do - for fold in 0 1 2 3 4 5 6 7 8 9; do - run_examm - done - done -done From 79df69ab941e0d6783d20ba31d1929624d9601b7 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 14:37:52 -0500 Subject: [PATCH 24/31] Fixed bug caused by accidental paste --- examm/island_speciation_strategy.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index d8eaabab..b0a7b5e0 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island( Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str()); exit(1); } - return new_genome17731515; + return new_genome; } RNN_Genome* IslandSpeciationStrategy::generate_genome( From 68752460cb9a79e554212b3f0c9a97305defdfc4 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Mon, 19 Feb 2024 15:59:40 -0500 Subject: [PATCH 25/31] Synchronous EXAMM flag added --sychronous --- mpi/examm_mpi.cxx | 75 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index 227c3a85..e350be0a 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -114,10 +114,62 @@ void receive_terminate_message(int32_t source) { MPI_Recv(terminate_message, 1, MPI_INT, source, TERMINATE_TAG, MPI_COMM_WORLD, &status); } -void master(int32_t max_rank) { - // the "main" id will have already been set by the main function so we do not need to re-set it here - Log::debug("MAX int32_t: %d\n", numeric_limits::max()); +void master_sync(int32_t max_rank) { + max_rank -= 1; + int32_t generation = 0; + while (true) { + + // Wait for N work requests + int32_t nreqs = 0; + while (nreqs < max_rank) { + MPI_Status status; + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + + int32_t source = status.MPI_SOURCE; + int32_t tag = status.MPI_TAG; + // Log::info("probe returned message from: %d with tag: %d\n", source, tag); + + if (tag == WORK_REQUEST_TAG) { + receive_work_request(source); + nreqs++; + } else if (tag == GENOME_LENGTH_TAG) { + Log::debug("received genome from: %d\n", source); + RNN_Genome* genome = receive_genome_from(source); + + examm->insert_genome(genome); + + // delete the genome as it won't be used again, a copy was inserted + delete genome; + } else { + Log::fatal("ERROR: received message from %d with unknown tag: %d", source, tag); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + vector genomes(max_rank); + for (int32_t i = 1; i <= max_rank; i++) { + RNN_Genome* genome = examm->generate_genome(); + if (genome == NULL) + break; + genomes[i - 1] = genome; + } + + if (genomes.size() != max_rank) { + break; + } + + for (int i = 1; i <= max_rank; i++) { + send_genome_to(i, genomes[i - 1]); + delete genomes[i - 1]; + } + } + + for (int i = 1; i <= max_rank; i++) { + send_terminate_message(i); + } +} + +void master(int32_t max_rank) { int32_t terminates_sent = 0; while (true) { @@ -134,12 +186,7 @@ void master(int32_t max_rank) { if (tag == WORK_REQUEST_TAG) { receive_work_request(source); - // if (transfer_learning_version.compare("v3") == 0 || transfer_learning_version.compare("v1+v3") == 0) { - // seed_stirs = 3; - // } - examm_mutex.lock(); RNN_Genome* genome = examm->generate_genome(); - examm_mutex.unlock(); if (genome == NULL) { // search was completed if it returns NULL for an individual // send terminate message @@ -167,9 +214,7 @@ void master(int32_t max_rank) { Log::debug("received genome from: %d\n", source); RNN_Genome* genome = receive_genome_from(source); - examm_mutex.lock(); examm->insert_genome(genome); - examm_mutex.unlock(); // delete the genome as it won't be used again, a copy was inserted delete genome; @@ -264,12 +309,20 @@ int main(int argc, char** argv) { RNN_Genome* seed_genome = get_seed_genome(arguments, time_series_sets, weight_rules); + bool synchronous = argument_exists(arguments, "--synchronous"); + Log::warning("synchronous? %d\n", synchronous); + Log::clear_rank_restriction(); if (rank == 0) { write_time_series_to_file(arguments, time_series_sets); examm = generate_examm_from_arguments(arguments, time_series_sets, weight_rules, seed_genome); - master(max_rank); + + if (synchronous) { + master_sync(max_rank); + } else { + master(max_rank); + } } else { worker(rank); } From a6006064aaeddb6160bd665d5208a4e135b2f5af Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Fri, 23 Feb 2024 21:47:51 -0500 Subject: [PATCH 26/31] Adding additional log information --- examm/examm.cxx | 26 ++++++++++++++++++------- examm/examm.hxx | 4 +++- examm/island_speciation_strategy.cxx | 29 +++++++++++++++++++++++----- examm/island_speciation_strategy.hxx | 16 ++++++++++----- examm/neat_speciation_strategy.cxx | 10 +++++----- examm/neat_speciation_strategy.hxx | 10 +++++----- examm/speciation_strategy.hxx | 12 +++++++----- scripts/dnas/control.zsh | 21 +++++++++----------- 8 files changed, 83 insertions(+), 45 deletions(-) diff --git a/examm/examm.cxx b/examm/examm.cxx index 1e1c2314..a90034f2 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -95,8 +95,8 @@ void EXAMM::generate_log() { Log::info("Generating fitness log\n"); mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); - (*log_file) << "Inserted Genomes, Total BP Epochs, Time, Best Val. MAE, Best Val. MSE, Enabled Nodes, Enabled " - "Edges, Enabled Rec. Edges"; + (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -151,7 +151,7 @@ void EXAMM::update_op_log_statistics(RNN_Genome* genome, int32_t insert_position } } -void EXAMM::update_log() { +void EXAMM::update_log(RNN_Genome *genome) { if (log_file != NULL) { // make sure the log file is still good if (!log_file->good()) { @@ -193,8 +193,12 @@ void EXAMM::update_log() { (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," - << best_genome->get_enabled_recurrent_edge_count() - << speciation_strategy->get_strategy_information_values() << endl; + << best_genome->get_enabled_recurrent_edge_count() << "," + << genome->best_validation_mse << "," + << pre_insert_best_mse << "," + << (int32_t) (last_genome_inserted ? 1 : 0) << "," + << genome->get_number_weights() + << speciation_strategy->get_strategy_information_values(genome) << endl; Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count()); } } @@ -243,17 +247,25 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // updates EXAMM's mapping of which genomes have been generated by what genome->update_generation_map(generated_from_map); + pre_insert_best_mse = this->get_best_fitness(); + int32_t insert_position = speciation_strategy->insert_genome(genome); + // write this genome to disk if it was a new best found genome if (insert_position == 0) { // genome->normalize_type = normalize_type; genome->write_graphviz(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".gv"); genome->write_to_file(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".bin"); } + + last_genome_inserted = insert_position >= 0; + speciation_strategy->print(); + update_op_log_statistics(genome, insert_position); - update_log(); - return insert_position >= 0; + update_log(genome); + + return last_genome_inserted; } RNN_Genome* EXAMM::generate_genome() { diff --git a/examm/examm.hxx b/examm/examm.hxx index c0c0ee03..a95d8af4 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -73,6 +73,8 @@ class EXAMM { string output_directory; ofstream* log_file; ofstream* op_log_file; + double pre_insert_best_mse = 1000000; + bool last_genome_inserted = false; std::chrono::time_point startClock; @@ -87,7 +89,7 @@ class EXAMM { ~EXAMM(); void print(); - void update_log(); + void update_log(RNN_Genome *genome); void set_possible_node_types(vector possible_node_type_strings); diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index b0a7b5e0..2e7a91be 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -100,12 +100,12 @@ int32_t IslandSpeciationStrategy::get_evaluated_genomes() const { return evaluated_genomes; } -RNN_Genome* IslandSpeciationStrategy::get_best_genome() { +RNN_Genome* IslandSpeciationStrategy::get_best_genome() const { // the global_best_genome is updated every time a genome is inserted return global_best_genome; } -RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { +RNN_Genome* IslandSpeciationStrategy::get_worst_genome() const { int32_t worst_genome_island = -1; double worst_fitness = -EXAMM_MAX_DOUBLE; @@ -126,7 +126,7 @@ RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { } } -double IslandSpeciationStrategy::get_best_fitness() { +double IslandSpeciationStrategy::get_best_fitness() const { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -135,7 +135,7 @@ double IslandSpeciationStrategy::get_best_fitness() { } } -double IslandSpeciationStrategy::get_worst_fitness() { +double IslandSpeciationStrategy::get_worst_fitness() const { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -376,6 +376,9 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( islands[generation_island]->set_latest_generation_id(generated_genomes); new_genome->set_group_id(generation_island); + pair perf = {this->get_best_fitness(), this->get_worst_fitness()}; + genome_performance.emplace(new_genome->generation_id, perf); + if (current_island->is_initializing()) { RNN_Genome* genome_copy = new_genome->copy(); Log::debug("inserting genome copy!\n"); @@ -386,6 +389,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( generation_island = 0; } + return new_genome; } @@ -456,6 +460,7 @@ void IslandSpeciationStrategy::print(string indent) const { */ string IslandSpeciationStrategy::get_strategy_information_headers() const { string info_header = ""; + info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"); for (int32_t i = 0; i < (int32_t) islands.size(); i++) { info_header.append(","); info_header.append("Island_"); @@ -472,8 +477,22 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const { /** * Gets speciation strategy information values for logs */ -string IslandSpeciationStrategy::get_strategy_information_values() const { +string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const { string info_value = ""; + + auto &[min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id); + info_value.append(","); + info_value.append(to_string(min_mse_pre)); + info_value.append(","); + info_value.append(to_string(max_mse_pre)); + + float min_mse_post = this->get_best_fitness(); + float max_mse_post = this->get_worst_fitness(); + info_value.append(","); + info_value.append(to_string(min_mse_post)); + info_value.append(","); + info_value.append(to_string(max_mse_post)); + for (int32_t i = 0; i < (int32_t) islands.size(); i++) { double best_fitness = islands[i]->get_best_fitness(); double worst_fitness = islands[i]->get_worst_fitness(); diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index 0eed891c..683e2a39 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -62,6 +62,12 @@ class IslandSpeciationStrategy : public SpeciationStrategy { vector islands; RNN_Genome* global_best_genome; + ofstream *island_log_file; + + // Maps genome number to a pair representing (worst island mse, best island mse) at + // the time of genome generation. + unordered_map> genome_performance; + // Transfer learning class properties: bool transfer_learning; @@ -114,25 +120,25 @@ class IslandSpeciationStrategy : public SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - double get_best_fitness(); + double get_best_fitness() const; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - double get_worst_fitness(); + double get_worst_fitness() const; /** * Gets the best genome of all the islands * \return the best genome of all islands or NULL if no genomes have yet been inserted */ - RNN_Genome* get_best_genome(); + RNN_Genome* get_best_genome() const; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands or NULL if no genomes have yet been inserted */ - RNN_Genome* get_worst_genome(); + RNN_Genome* get_worst_genome() const; /** * \return true if all the islands are full @@ -207,7 +213,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - string get_strategy_information_values() const; + string get_strategy_information_values(RNN_Genome *genome) const; /** * Island repopulation through two random parents from two seperate islands, diff --git a/examm/neat_speciation_strategy.cxx b/examm/neat_speciation_strategy.cxx index 8d5f18ac..e71470e1 100644 --- a/examm/neat_speciation_strategy.cxx +++ b/examm/neat_speciation_strategy.cxx @@ -74,7 +74,7 @@ int32_t NeatSpeciationStrategy::get_evaluated_genomes() const { return evaluated_genomes; } -RNN_Genome* NeatSpeciationStrategy::get_best_genome() { +RNN_Genome* NeatSpeciationStrategy::get_best_genome() const { int32_t best_genome_species = -1; double best_fitness = EXAMM_MAX_DOUBLE; @@ -95,7 +95,7 @@ RNN_Genome* NeatSpeciationStrategy::get_best_genome() { } } -RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { +RNN_Genome* NeatSpeciationStrategy::get_worst_genome() const { int32_t worst_genome_species = -1; double worst_fitness = -EXAMM_MAX_DOUBLE; @@ -116,7 +116,7 @@ RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { } } -double NeatSpeciationStrategy::get_best_fitness() { +double NeatSpeciationStrategy::get_best_fitness() const { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -125,7 +125,7 @@ double NeatSpeciationStrategy::get_best_fitness() { } } -double NeatSpeciationStrategy::get_worst_fitness() { +double NeatSpeciationStrategy::get_worst_fitness() const { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -399,7 +399,7 @@ string NeatSpeciationStrategy::get_strategy_information_headers() const { /** * Gets speciation strategy information values for logs */ -string NeatSpeciationStrategy::get_strategy_information_values() const { +string NeatSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const { string info_value = ""; for (int32_t i = 0; i < (int32_t) Neat_Species.size(); i++) { double best_fitness = Neat_Species[i]->get_best_fitness(); diff --git a/examm/neat_speciation_strategy.hxx b/examm/neat_speciation_strategy.hxx index 3416de03..645aabdd 100644 --- a/examm/neat_speciation_strategy.hxx +++ b/examm/neat_speciation_strategy.hxx @@ -64,25 +64,25 @@ class NeatSpeciationStrategy : public SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - double get_best_fitness(); + double get_best_fitness() const; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - double get_worst_fitness(); + double get_worst_fitness() const; /** * Gets the best genome of all the islands * \return the best genome of all islands */ - RNN_Genome* get_best_genome(); + RNN_Genome* get_best_genome() const; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands */ - RNN_Genome* get_worst_genome(); + RNN_Genome* get_worst_genome() const; /** * Inserts a copy of the genome into this speciation strategy. @@ -130,7 +130,7 @@ class NeatSpeciationStrategy : public SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - string get_strategy_information_values() const; + string get_strategy_information_values(RNN_Genome *genome) const; RNN_Genome* get_global_best_genome(); diff --git a/examm/speciation_strategy.hxx b/examm/speciation_strategy.hxx index 9d790ab0..2d66f990 100644 --- a/examm/speciation_strategy.hxx +++ b/examm/speciation_strategy.hxx @@ -9,6 +9,8 @@ using std::string; using std::minstd_rand0; using std::uniform_real_distribution; +#include "rnn/rnn_genome.hxx" + class SpeciationStrategy { public: /** @@ -25,25 +27,25 @@ class SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - virtual double get_best_fitness() = 0; + virtual double get_best_fitness() const = 0; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - virtual double get_worst_fitness() = 0; + virtual double get_worst_fitness() const = 0; /** * Gets the best genome of all the islands * \return the best genome of all islands */ - virtual RNN_Genome* get_best_genome() = 0; + virtual RNN_Genome* get_best_genome() const = 0; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands */ - virtual RNN_Genome* get_worst_genome() = 0; + virtual RNN_Genome* get_worst_genome() const = 0; /** * Inserts a copy of the genome into this speciation strategy. @@ -86,7 +88,7 @@ class SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - virtual string get_strategy_information_values() const = 0; + virtual string get_strategy_information_values(RNN_Genome *genome) const = 0; virtual RNN_Genome* get_global_best_genome() = 0; virtual void initialize_population(function& mutate) = 0; diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh index a848302b..f3532525 100644 --- a/scripts/dnas/control.zsh +++ b/scripts/dnas/control.zsh @@ -6,9 +6,9 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' offset=1 run_examm() { - output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + output_dir=results/control_v8/$bp_epoch/$fold mkdir -p $output_dir - mpirun -np 8 Release/mpi/examm_mpi \ + mpirun -np 14 build/mpi/examm_mpi \ --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ --time_offset $offset \ @@ -25,26 +25,23 @@ run_examm() { --output_directory $output_dir \ --log_filename fitness.csv \ --learning_rate 0.01 \ - --std_message_level WARNING \ + --std_message_level INFO \ --file_message_level WARNING \ --crystalize_iters $crystalize_iters \ --max_genomes $max_genomes \ --island_size 32 \ - --number_islands 4 + --number_islands 4 \ + --synchronous # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh } -bp_ge=(8 8192 16 4096 32 2048) +# bp_ge=(8 8192 16 4096 32 2048) +bp_ge=(8 8192) for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - for fold in 0 1 2 3; do - run_examm & + for fold in $(seq 0 1); do + run_examm done - wait - for fold in 4 5 6 7; do - run_examm & - done - wait done From 72ce5d4a42e6435cff23e5c0984527235f786d7f Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Thu, 29 Feb 2024 15:20:33 -0500 Subject: [PATCH 27/31] Additional log data --- examm/examm.cxx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examm/examm.cxx b/examm/examm.cxx index a90034f2..d0c784b9 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -96,7 +96,7 @@ void EXAMM::generate_log() { mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" - "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters"; + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -197,7 +197,8 @@ void EXAMM::update_log(RNN_Genome *genome) { << genome->best_validation_mse << "," << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << "," - << genome->get_number_weights() + << genome->get_number_weights() << "," + << genome->get_generation_id() << speciation_strategy->get_strategy_information_values(genome) << endl; Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count()); } From 5949736f7bc61d174a7b0465dd104769bcd73781 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 6 Mar 2024 15:17:26 -0500 Subject: [PATCH 28/31] Cluster script updates --- scripts/dnas/examm_bias_exp.zsh | 58 ++++++++++++++++++++++++++++++ scripts/dnas/examm_synchronous.zsh | 55 ++++++++++++++++++++++++++++ scripts/dnas/mk_jobs.zsh | 12 ++++--- 3 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 scripts/dnas/examm_bias_exp.zsh create mode 100644 scripts/dnas/examm_synchronous.zsh diff --git a/scripts/dnas/examm_bias_exp.zsh b/scripts/dnas/examm_bias_exp.zsh new file mode 100644 index 00000000..52816f00 --- /dev/null +++ b/scripts/dnas/examm_bias_exp.zsh @@ -0,0 +1,58 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=18 +#SBATCH --exclude theocho +#SBATCH --time=48:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_bias_ablation +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +cd /home/jak5763/exact + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1' + +offset=1 + +run_examm() { + output_dir=results/$synchronous/$scramble_weights/$max_genomes/$fold + mkdir -p $output_dir + srun -n 18 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types simple ugrnn gru mgu lstm delta \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 10 \ + --number_islands 10 \ + --num_mutations 1 \ + --$synchronous \ + --$scramble_weights +} + +run_group() { + for fold in $(seq 0 19); do + run_examm + done +} + +let base_genomes=100000 +let max_genomes=$base_genomes/$bp_epoch +run_group diff --git a/scripts/dnas/examm_synchronous.zsh b/scripts/dnas/examm_synchronous.zsh new file mode 100644 index 00000000..1d970272 --- /dev/null +++ b/scripts/dnas/examm_synchronous.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1' + +offset=1 + +run_examm() { + output_dir=results/synchronous/$max_genomes/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types simple ugrnn gru mgu lstm delta \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 10 \ + --number_islands 10 \ + --num_mutations 1 \ + --synchronous +} + +run_group() { + for fold in $(seq 0 9); do + run_examm + done +} + +let base_genomes=100000 +let max_genomes=$base_genomes/$bp_epoch +run_group diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh index 38a5526c..b996883e 100644 --- a/scripts/dnas/mk_jobs.zsh +++ b/scripts/dnas/mk_jobs.zsh @@ -1,6 +1,8 @@ -bp_ge=(8 8192 16 4096 32 2048 64 1024) -for bp_epoch max_genomes in "${(@kv)bp_ge}"; do - bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh - bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh - bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh +bp=(1 2 3 4 5 10 15 20 30 40 50 100 150 200) +for bp_epoch in $bp; do + for synchronous in "async" "synchronous"; do + for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do + bp_epoch=$bp_epoch synchronous="$synchronous" scramble_weights="$scramble_weights" sbatch examm_bias_exp.zsh + done + done done From 95277159201b2e0f7b5469223d76794820cd8ccd Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 13 Mar 2024 13:56:00 -0400 Subject: [PATCH 29/31] Remove scripts in root directory --- dnas_cluster.zsh | 69 ------------------------------------------------ dnas_control.zsh | 60 ----------------------------------------- 2 files changed, 129 deletions(-) delete mode 100644 dnas_cluster.zsh delete mode 100644 dnas_control.zsh diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh deleted file mode 100644 index 55823c0c..00000000 --- a/dnas_cluster.zsh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/zsh - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --exclude theocho -#SBATCH --time=23:00:00 -#SBATCH -A examm -#SBATCH --partition=tier3 -#SBATCH -J examm_dnas_experimental -#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out -#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err -#SBATCH --mem=0 - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - srun -n 36 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 10 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 8 \ - --num_mutations 2 \ - --use_dnas_seed true \ - --use_burn_in_bp_epoch \ - --burn_in_period 1024 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -run_group() { - for crystalize_iters in 512; do - for k in 1; do - for fold in $(seq 0 19); do - run_examm - done - done - done -} - -CELL_TYPE='dnas' -# bp_ge=(8 8192 16 4096 32 2048 64 1024) -# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do -run_group -# done diff --git a/dnas_control.zsh b/dnas_control.zsh deleted file mode 100644 index 88a7c882..00000000 --- a/dnas_control.zsh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/zsh - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --exclude theocho -#SBATCH --time=23:00:00 -#SBATCH -A examm -#SBATCH --partition=tier3 -#SBATCH -J examm_dnas_experimental -#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out -#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err -#SBATCH --mem=0 - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold - mkdir -p $output_dir - srun -n 36 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --max_recurrent_depth 10 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 8 \ - --num_mutations 4 \ - --burn_in_period 1024 \ - --use_burn_in_bp_epoch - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -run_group() { - for k in 1; do - for fold in $(seq 0 19); do - run_examm - done - done -} - -run_group From fbb32b2aa3b2adb282e87022b2394169971eb159 Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 13 Mar 2024 13:56:17 -0400 Subject: [PATCH 30/31] Remove junk file --- key | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 key diff --git a/key b/key deleted file mode 100644 index 391a7405..00000000 --- a/key +++ /dev/null @@ -1,7 +0,0 @@ -v11 -> burn in schedule with 0.001 lr 4 mut -v12 -> burn in schedule with 0.01 lr 4 mut -v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut -v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut -v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut -v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut - From efcbf5d348f703b02d244ae83fbd656f3c1efecd Mon Sep 17 00:00:00 2001 From: Joshua Karns Date: Wed, 20 Mar 2024 15:39:43 -0400 Subject: [PATCH 31/31] Add flag to disable epigenetic weights --- dnas_cluster.zsh | 69 ---------------------------------------- dnas_control.zsh | 60 ---------------------------------- examm/examm.cxx | 1 - rnn/genome_property.cxx | 4 +++ rnn/genome_property.hxx | 1 + scripts/dnas/mk_jobs.zsh | 2 +- 6 files changed, 6 insertions(+), 131 deletions(-) delete mode 100644 dnas_cluster.zsh delete mode 100644 dnas_control.zsh diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh deleted file mode 100644 index 55823c0c..00000000 --- a/dnas_cluster.zsh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/zsh - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --exclude theocho -#SBATCH --time=23:00:00 -#SBATCH -A examm -#SBATCH --partition=tier3 -#SBATCH -J examm_dnas_experimental -#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out -#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err -#SBATCH --mem=0 - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold - mkdir -p $output_dir - srun -n 36 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types dnas \ - --stochastic 1 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --validation_sequence_length 100 \ - --max_recurrent_depth 10 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 8 \ - --num_mutations 2 \ - --use_dnas_seed true \ - --use_burn_in_bp_epoch \ - --burn_in_period 1024 \ - --dnas_k $k - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -run_group() { - for crystalize_iters in 512; do - for k in 1; do - for fold in $(seq 0 19); do - run_examm - done - done - done -} - -CELL_TYPE='dnas' -# bp_ge=(8 8192 16 4096 32 2048 64 1024) -# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do -run_group -# done diff --git a/dnas_control.zsh b/dnas_control.zsh deleted file mode 100644 index 88a7c882..00000000 --- a/dnas_control.zsh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/zsh - -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=36 -#SBATCH --exclude theocho -#SBATCH --time=23:00:00 -#SBATCH -A examm -#SBATCH --partition=tier3 -#SBATCH -J examm_dnas_experimental -#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out -#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err -#SBATCH --mem=0 - -INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' -OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' - -offset=1 - -run_examm() { - output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold - mkdir -p $output_dir - srun -n 36 Release/mpi/examm_mpi \ - --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ - --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ - --time_offset $offset \ - --possible_node_types lstm mgu gru ugrnn delta simple \ - --stochastic 0 \ - --input_parameter_names ${=INPUT_PARAMETERS} \ - --output_parameter_names ${=OUTPUT_PARAMETERS} \ - --bp_iterations $bp_epoch \ - --normalize min_max \ - --num_hidden_layers $SIZE \ - --hidden_layer_size $SIZE \ - --max_recurrent_depth 10 \ - --output_directory $output_dir \ - --log_filename fitness.csv \ - --learning_rate 0.01 \ - --std_message_level WARNING \ - --file_message_level WARNING \ - --crystalize_iters $crystalize_iters \ - --max_genomes $max_genomes \ - --island_size 32 \ - --number_islands 8 \ - --num_mutations 4 \ - --burn_in_period 1024 \ - --use_burn_in_bp_epoch - - # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) - # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh -} - -run_group() { - for k in 1; do - for fold in $(seq 0 19); do - run_examm - done - done -} - -run_group diff --git a/examm/examm.cxx b/examm/examm.cxx index d0c784b9..6f51fd02 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -285,7 +285,6 @@ RNN_Genome* EXAMM::generate_genome() { RNN_Genome* genome = speciation_strategy->generate_genome(rng_0_1, generator, mutate_function, crossover_function); genome_property->set_genome_properties(genome); - // if (!epigenetic_weights) genome->initialize_randomly(); // this is just a sanity check, can most likely comment out (checking to see // if all the paramemters are sane) diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx index 09ea1ae8..95b80df1 100644 --- a/rnn/genome_property.cxx +++ b/rnn/genome_property.cxx @@ -31,6 +31,9 @@ void GenomeProperty::generate_genome_property_from_arguments(const vectorset_bp_iterations(compute_bp_iterations(genome)); if (use_dropout) genome->enable_dropout(dropout_probability); + if (!use_epigenetic_weights) genome->initialize_randomly(); genome->normalize_type = normalize_type; genome->set_parameter_names(input_parameter_names, output_parameter_names); diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx index 130b26de..b70fbf1e 100644 --- a/rnn/genome_property.hxx +++ b/rnn/genome_property.hxx @@ -17,6 +17,7 @@ class GenomeProperty { double dropout_probability; int32_t min_recurrent_depth; int32_t max_recurrent_depth; + bool use_epigenetic_weights = true; bool use_burn_in_bp_epoch; int32_t burn_in_period = 2048; diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh index b996883e..6adfff8f 100644 --- a/scripts/dnas/mk_jobs.zsh +++ b/scripts/dnas/mk_jobs.zsh @@ -1,4 +1,4 @@ -bp=(1 2 3 4 5 10 15 20 30 40 50 100 150 200) +bp=(1 2 3 4 5 6 7 8 9 10 15 20 25 30 35 40 45 50 100) for bp_epoch in $bp; do for synchronous in "async" "synchronous"; do for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do