From e69fc712b7cf59ca382743e779a16c09fff31aeb Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 30 Jan 2023 14:05:06 -0500
Subject: [PATCH 01/42] Removing c++20 features unsupported by GCC versions <
 12.2

---
 CMakeLists.txt    | 3 ++-
 rnn/dnas_node.cxx | 4 ++--
 rnn/dnas_node.hxx | 7 ++-----
 3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b34e8f0..9093410b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,7 @@ set (EXACT_VERSION_MINOR 33)
 #add_definitions( -DEXACT_VERSION="${EXACT_VERSION_MAJOR}.${EXACT_VERSION_MINOR}" )
 
 SET (PLATFORM 64)
+set(CMAKE_CXX_STANDARD 20)
 
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++")
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS")
@@ -23,7 +24,7 @@ SET (PLATFORM 64)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                "-std=c++20 -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS                "-Wall -O3 -funroll-loops -msse3")
 SET (CMAKE_CXX_FLAGS_DEBUG          "-g")
 SET (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG")
 SET (CMAKE_CXX_FLAGS_RELEASE        "-O4 -funroll-loops -DNDEBUG")
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 2f040703..957eaba5 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -67,13 +67,13 @@ DNASNode::~DNASNode() {
     delete node;
 }
 
-template <uniform_random_bit_generator Rng>
+template <typename Rng>
 void DNASNode::gumbel_noise(Rng &rng, vector<double> &output) {
   for (int i = 0; i < output.size(); i++)
     output[i] = -log(-log(uniform_real_distribution<double>(0.0, 1.0)(rng)));
 }
 
-template <uniform_random_bit_generator Rng>
+template <typename Rng>
 void DNASNode::sample_gumbel_softmax(Rng &rng) {
   z.assign(pi.size(), 0.0);
   x.assign(pi.size(), 0.0);
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index daa26605..1b63532c 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -7,7 +7,6 @@ using std::string;
 #include <random>
 using std::minstd_rand0;
 using std::uniform_real_distribution;
-using std::uniform_random_bit_generator;
 using std::generate_canonical;
 
 #include <vector>
@@ -27,9 +26,8 @@ using std::unique_ptr;
 #define CRYSTALLIZATION_THRESHOLD 50000
 
 class DNASNode : public RNN_Node_Interface {
-
  private:
-  template <uniform_random_bit_generator R>
+  template <typename R>
   static void gumbel_noise(R &rng, vector<double> &output);
   void calculate_maxi();
 
@@ -75,12 +73,11 @@ class DNASNode : public RNN_Node_Interface {
   vector<vector<double>> node_outputs;
 
  public:
-
   DNASNode(vector<RNN_Node_Interface *> &&nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1);
   DNASNode(const DNASNode &node);
   ~DNASNode();
 
-  template <uniform_random_bit_generator Rng>
+  template <typename Rng>
   void sample_gumbel_softmax(Rng &rng);
   void calculate_z();
 

From 903dcfb40baf8d06fa770875693a0d198efd95e2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 30 Jan 2023 15:26:23 -0500
Subject: [PATCH 02/42] Fix minimum c++ requirement in CMakeLists.txt to be
 compatible with GCC and clang

---
 CMakeLists.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9093410b..2ec362ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 2.6)
+cmake_minimum_required (VERSION 2.8)
 project (EXACT)
 
 # The version number.
@@ -24,10 +24,10 @@ set(CMAKE_CXX_STANDARD 20)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                "-Wall -O3 -funroll-loops -msse3")
-SET (CMAKE_CXX_FLAGS_DEBUG          "-g")
-SET (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG")
-SET (CMAKE_CXX_FLAGS_RELEASE        "-O4 -funroll-loops -DNDEBUG")
+SET (CMAKE_CXX_FLAGS                "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
+SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
+SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
 
 set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib)
 

From 09c5cbcafdb848f4a493c3502baba7fd5db275ca Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 30 Jan 2023 15:37:33 -0500
Subject: [PATCH 03/42] properly specify minimum CMake version

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2ec362ca..558fbefe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 2.8)
+cmake_minimum_required (VERSION 3.1)
 project (EXACT)
 
 # The version number.

From 8ec09ca9826a8e7022c46382c14a7934e02a0731 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 30 Jan 2023 15:41:37 -0500
Subject: [PATCH 04/42] Fixed bug introduced during merge

---
 rnn/dnas_node.hxx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index f56b404b..82ebff65 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -27,7 +27,7 @@ using std::unique_ptr;
 
 class DNASNode : public RNN_Node_Interface {
    private:
-    template <uniform_random_bit_generator R>
+    template <typename R>
     static void gumbel_noise(R &rng, vector<double> &output);
     void calculate_maxi();
 
@@ -77,7 +77,7 @@ class DNASNode : public RNN_Node_Interface {
     DNASNode(const DNASNode &node);
     ~DNASNode();
 
-    template <uniform_random_bit_generator Rng>
+    template <typename Rng>
     void sample_gumbel_softmax(Rng &rng);
     void calculate_z();
 

From 1c9bcfff4042964727bc41f944738f4e82c2d5d2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Tue, 31 Jan 2023 11:12:56 -0500
Subject: [PATCH 05/42] Tweaking for clusteR

---
 CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 558fbefe..1af3a314 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 3.1)
+cmake_minimum_required (VERSION 3.8)
 project (EXACT)
 
 # The version number.
@@ -9,6 +9,7 @@ set (EXACT_VERSION_MINOR 33)
 
 SET (PLATFORM 64)
 set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++")
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS")
@@ -24,13 +25,14 @@ set(CMAKE_CXX_STANDARD 20)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3")
 SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
 SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
 SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
 
 set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib)
 
+message(STATUS "${CMAKE_CXX_FLAGS}")
 message(STATUS "project source dir is ${PROJECT_SOURCE_DIR}")
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/")

From 9c6be46a3141e5cbde600156f9f7b08134600acb Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Tue, 31 Jan 2023 11:35:42 -0500
Subject: [PATCH 06/42] Added updated cluster instructions to the README.md

---
 README.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7fc588a3..449e7194 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 # Getting Started and Prerequisites
 
 EXONA has been developed to compile using CMake, which should be installed before attempting to compile. To use the MPI version, a version of MPI (such as OpenMPI) should be installed. EXACT currently requires libtiff and libpng
-The EXACT algorithm can also checkpoint to a database, however this is not required.  To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX.  Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++11 compatible compiler.
+The EXACT algorithm can also checkpoint to a database, however this is not required.  To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX.  Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++20 compatible compiler.
 
 If you are using OSX, to set up the environment:
 
@@ -15,6 +15,18 @@ brew install libpng
 xcode-select --install
 ```
 
+On the RIT Cluster Computer, load the following packages using spack:
+```
+# CMake
+spack load /ux27hbj
+
+# GCC
+spack load gcc@11.2.0 
+
+# libtiff
+spack load /ycf67m3
+```
+
 To build:
 
 ```
@@ -24,6 +36,8 @@ To build:
 ~/exact/build $ make
 ```
 
+You can add `-DCMAKE_BUILD_TYPE=Release` to the invocation of `cmake` for a release build (slower compile times, faster execution).
+
 You may also want to have graphviz installed so you can generate images of the evolved neural networks.  EXACT/EXALT/EXAMM will write out evolved genomes in a .gv (graphviz) format for this. For example, can generate a pdf from a gv file (assuming graphviz is installed with):
 
 ```

From d7023b64e771dad351ae946f962018de2c4a1acc Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 31 Jan 2023 11:41:24 -0500
Subject: [PATCH 07/42] Updated format script

---
 scripts/util/format.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/util/format.sh b/scripts/util/format.sh
index 85dddbfd..76fb9efa 100755
--- a/scripts/util/format.sh
+++ b/scripts/util/format.sh
@@ -1,2 +1,4 @@
 #!/bin/bash
-find . -type f -name "*.*xx" -exec clang-format -style=file -i {} \;
+for folder in common examm mpi multithreaded rnn rnn_examples rnn_tests time_series weights word_series; do
+  find $folder -type f -name "*.*xx" -exec clang-format -style=file -i {} \;
+done

From 89b3410cebc573430cd244b43ea070ca203fc98f Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 31 Jan 2023 11:41:46 -0500
Subject: [PATCH 08/42] Formatting

---
 rnn/dnas_node.hxx | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 3b341195..435e5400 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -8,7 +8,6 @@ using std::string;
 using std::generate_canonical;
 using std::minstd_rand0;
 using std::uniform_real_distribution;
-using std::generate_canonical;
 
 #include <vector>
 using std::vector;
@@ -28,7 +27,7 @@ using std::unique_ptr;
 class DNASNode : public RNN_Node_Interface {
    private:
     template <typename R>
-    static void gumbel_noise(R &rng, vector<double> &output);
+    static void gumbel_noise(R& rng, vector<double>& output);
 
     void calculate_maxi();
 
@@ -81,7 +80,7 @@ class DNASNode : public RNN_Node_Interface {
     ~DNASNode();
 
     template <typename Rng>
-    void sample_gumbel_softmax(Rng &rng);
+    void sample_gumbel_softmax(Rng& rng);
     void calculate_z();
 
     virtual void initialize_lamarckian(

From cbf757457e0d56235e6c58a64114d3143f3c0f58 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Tue, 31 Jan 2023 12:49:34 -0500
Subject: [PATCH 09/42] Added OpenMPI package to cluster instructions

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 449e7194..c187316a 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,9 @@ spack load /ux27hbj
 # GCC
 spack load gcc@11.2.0 
 
+# OpenMPI
+spack load openmpi@4.1.2
+
 # libtiff
 spack load /ycf67m3
 ```

From 06f12b588b1bcf1ba4380192eaa7ad2eb18dbf0f Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 7 Feb 2023 12:09:54 -0500
Subject: [PATCH 10/42] Adding argument parsing for DNAS

---
 common/process_arguments.cxx |  9 +++++
 examm/examm.cxx              | 18 +--------
 examm/examm.hxx              |  2 +-
 rnn/dnas_node.cxx            | 73 +++++++++++++++++++++++++-----------
 rnn/dnas_node.hxx            |  2 +-
 rnn/generate_nn.cxx          | 11 ++++++
 rnn/rnn_genome.cxx           | 30 ++++-----------
 rnn/rnn_genome.hxx           |  2 +
 rnn/rnn_node_interface.cxx   | 35 ++++++++++++++---
 rnn/rnn_node_interface.hxx   |  9 ++++-
 10 files changed, 124 insertions(+), 67 deletions(-)

diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index f4bf87a8..885f28a0 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -32,6 +32,15 @@ EXAMM* generate_examm_from_arguments(
     // get_argument(arguments, "--sequence_length_lower_bound", false, sequence_length_lower_bound);
     // get_argument(arguments, "--sequence_length_upper_bound", false, sequence_length_upper_bound);
 
+    vector<string> dnas_node_type_strings;
+    get_argument_vector(arguments, "--dnas_node_types", false, dnas_node_type_strings);
+    if (dnas_node_type_strings.size() != 0) {
+        dnas_node_types.clear();
+        for (auto node_type : dnas_node_type_strings) {
+            dnas_node_types.push_back(node_type_from_string(node_type));
+        }
+    }
+
     GenomeProperty* genome_property = new GenomeProperty();
     genome_property->generate_genome_property_from_arguments(arguments);
     genome_property->get_time_series_parameters(time_series_sets);
diff --git a/examm/examm.cxx b/examm/examm.cxx
index ce137d6f..f017ab8b 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -205,22 +205,8 @@ void EXAMM::update_log() {
 void EXAMM::set_possible_node_types(vector<string> possible_node_type_strings) {
     possible_node_types.clear();
 
-    for (int32_t i = 0; i < (int32_t) possible_node_type_strings.size(); i++) {
-        string node_type_s = possible_node_type_strings[i];
-
-        bool found = false;
-
-        for (int32_t j = 0; j < NUMBER_NODE_TYPES; j++) {
-            if (NODE_TYPES[j].compare(node_type_s) == 0) {
-                found = true;
-                possible_node_types.push_back(j);
-            }
-        }
-
-        if (!found) {
-            Log::error("unknown node type: '%s'\n", node_type_s.c_str());
-            exit(1);
-        }
+    for (auto node_type : possible_node_type_strings) {
+        possible_node_types.push_back(node_type_from_string(node_type));
     }
 }
 
diff --git a/examm/examm.hxx b/examm/examm.hxx
index 5ccb545e..ac5d56eb 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -63,7 +63,7 @@ class EXAMM {
     double split_node_rate;
     double merge_node_rate;
 
-    vector<int32_t> possible_node_types;
+    vector<int32_t> possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
 
     vector<string> op_log_ordering;
     map<string, int32_t> inserted_counts;
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index ba6f3ba3..46a20c69 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -1,4 +1,9 @@
 #include <algorithm>
+using std::sort;
+
+#include <utility>
+using std::pair;
+
 #include <cassert>
 #include <cmath>
 using std::max;
@@ -72,7 +77,7 @@ DNASNode::~DNASNode() {
 
 template <typename Rng>
 void DNASNode::gumbel_noise(Rng& rng, vector<double>& output) {
-    for (int i = 0; i < output.size(); i++) {
+    for (auto i = 0; i < output.size(); i++) {
         output[i] = -log(-log(uniform_real_distribution<double>(0.0, 1.0)(rng)));
     }
 }
@@ -92,18 +97,45 @@ void DNASNode::calculate_z() {
 
     xtotal = 0.0;
     double emax = -10000000;
-    for (int i = 0; i < z.size(); i++) {
+    for (auto i = 0; i < z.size(); i++) {
         x[i] = g[i] + log(pi[i]);
         x[i] /= tao;
         emax = max(emax, x[i]);
     }
-    for (int i = 0; i < z.size(); i++) {
+    for (auto i = 0; i < z.size(); i++) {
         x[i] = exp(emax - x[i]);
         xtotal += x[i];
     }
-    for (int i = 0; i < z.size(); i++) {
+    for (auto i = 0; i < z.size(); i++) {
         z[i] = x[i] / xtotal;
     }
+
+    if (k > 0) {
+        pair<int32_t, double> ps_with_indices[z.size()];
+        for (int32_t i = 0; i < (int32_t) z.size(); i++) {
+            ps_with_indices[i] = pair(i, z[i]);
+        }
+
+        std::sort(ps_with_indices, ps_with_indices + z.size(),
+            [](const pair<int32_t, double>& a, const pair<int32_t, double>& b) {
+                // Descending order
+                return a.second > b.second;
+            }
+        );
+
+        double total = 0.0;
+        for (int i = 0; i < k; i++) {
+            total += ps_with_indices[i].second;
+        }
+
+        for (int i = 0; i < z.size(); i++) {
+            z[i] = 0.0;
+        }
+
+        for (int i = 0; i < k; i++) {
+            z[ps_with_indices[i].first] = ps_with_indices[i].second / total;
+        }
+    }
 }
 
 void DNASNode::reset(int32_t series_length) {
@@ -151,7 +183,7 @@ void DNASNode::input_fired(int32_t time, double incoming_output) {
         node_outputs[time][maxi] = nodes[maxi]->output_values[time];
         output_values[time] = nodes[maxi]->output_values[time];
     } else {
-        for (int i = 0; i < nodes.size(); i++) {
+        for (auto i = 0; i < nodes.size(); i++) {
             auto node = nodes[i];
             node->input_fired(time, input_values[time]);
             node_outputs[time][i] = node->output_values[time];
@@ -190,7 +222,7 @@ void DNASNode::try_update_deltas(int32_t time) {
         d_input[time] += nodes[maxi]->d_input[time];
 
     } else {
-        for (int i = 0; i < z.size(); i++) {
+        for (auto i = 0; i < z.size(); i++) {
             nodes[i]->output_fired(time, delta * z[i]);
             double p = (x[i] / pi[i]);
             p *= ((delta * node_outputs[time][i]) / xtotal);
@@ -254,7 +286,7 @@ void DNASNode::set_weights(const vector<double>& parameters) {
 
 void DNASNode::get_weights(int32_t& offset, vector<double>& parameters) const {
     // Log::info("pi start %d; ", offset);
-    for (int i = 0; i < pi.size(); i++) {
+    for (auto i = 0; i < pi.size(); i++) {
         parameters[offset++] = pi[i];
     }
     // Log::info_no_header("pi end %d \n", offset);
@@ -265,24 +297,23 @@ void DNASNode::get_weights(int32_t& offset, vector<double>& parameters) const {
 
 void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
-    for (int i = 0; i < pi.size(); i++) {
+    for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
     }
     // Log::info("Pi indices: %d-%d\n", start, offset);
     for (auto node : nodes) {
         node->set_weights(offset, parameters);
     }
-    Log::info("Just set weights\n");
     calculate_z();
-    string s = "Pi = { ";
-    for (auto p : pi) {
-        s += std::to_string(p) + ", ";
-    }
-    Log::info("%s }\n", s.c_str());
+    // string s = "Pi = { ";
+    // for (auto p : pi) {
+    //     s += std::to_string(p) + ", ";
+    // }
+    // Log::info("%s }\n", s.c_str());
 }
 
 void DNASNode::set_pi(const vector<double>& new_pi) {
-    for (int i = 0; i < pi.size(); i++) {
+    for (auto i = 0; i < pi.size(); i++) {
         pi[i] = new_pi[i];
     }
     calculate_maxi();
@@ -293,7 +324,7 @@ void DNASNode::calculate_maxi() {
         maxi = 0;
         double max_pi = pi[0];
 
-        for (int i = 1; i < nodes.size(); i++) {
+        for (auto i = 1; i < nodes.size(); i++) {
             if (pi[i] > max_pi) {
                 max_pi = pi[i];
                 maxi = i;
@@ -314,11 +345,11 @@ void DNASNode::get_gradients(vector<double>& gradients) {
 
     if (counter >= CRYSTALLIZATION_THRESHOLD) {
         offset += pi.size();
-        for (int i = 0; i < nodes.size(); i++) {
+        for (auto i = 0; i < nodes.size(); i++) {
             RNN_Node_Interface* node = nodes[i];
             if (i == maxi) {
                 node->get_gradients(temp);
-                for (int j = 0; j < temp.size(); j++) {
+                for (auto j = 0; j < temp.size(); j++) {
                     gradients[offset++] = temp[j];
                 }
             } else {
@@ -328,13 +359,13 @@ void DNASNode::get_gradients(vector<double>& gradients) {
     } else {
         gradients.assign(get_number_weights(), 0.0);
         int offset = 0;
-        for (int i = 0; i < pi.size(); i++) {
-            gradients[offset++] = d_pi[i];
+        for (auto i = 0; i < pi.size(); i++) {
+            gradients[offset++] = d_pi[i] * 0.1;
         }
 
         for (auto node : nodes) {
             node->get_gradients(temp);
-            for (int i = 0; i < temp.size(); i++) {
+            for (auto i = 0; i < temp.size(); i++) {
                 gradients[offset++] = temp[i];
             }
         }
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 435e5400..776119cc 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -22,7 +22,7 @@ using std::unique_ptr;
 #include "rnn_node.hxx"
 #include "rnn_node_interface.hxx"
 
-#define CRYSTALLIZATION_THRESHOLD 50000
+#define CRYSTALLIZATION_THRESHOLD 1000
 
 class DNASNode : public RNN_Node_Interface {
    private:
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index c451a098..f9e3c61a 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -39,12 +39,23 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co
         case DNAS_NODE:
             Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n");
             exit(1);
+        default:
+            Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind);
+            exit(1);
     }
+
+    // Unreachable
+    return nullptr;
 }
 
 DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector<int32_t>& node_types) {
     vector<RNN_Node_Interface*> nodes(node_types.size());
 
+    if (node_types.size() == 0) {
+        Log::fatal("Node types cannot be empty - failed to create DNAS node!\n");
+        exit(1);
+    }
+
     int i = 0;
     for (auto node_type : node_types) {
         nodes[i++] = create_hidden_node(node_type, innovation_counter, depth);
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index c7b60b43..370998ee 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -59,6 +59,9 @@ using std::vector;
 #include "rnn_node.hxx"
 #include "time_series/time_series.hxx"
 #include "ugrnn_node.hxx"
+#include "generate_nn.hxx"
+
+extern vector<int32_t> dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
 
 string parse_fitness(double fitness) {
     if (fitness == EXAMM_MAX_DOUBLE) {
@@ -1628,27 +1631,10 @@ RNN_Node_Interface* RNN_Genome::create_node(
     WeightType weight_initialize = weight_rules->get_weight_initialize_method();
 
     Log::trace("CREATING NODE, type: '%s'\n", NODE_TYPES[node_type].c_str());
-    if (node_type == LSTM_NODE) {
-        n = new LSTM_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == DELTA_NODE) {
-        n = new Delta_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == GRU_NODE) {
-        n = new GRU_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == ENARC_NODE) {
-        n = new ENARC_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == ENAS_DAG_NODE) {
-        n = new ENAS_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == RANDOM_DAG_NODE) {
-        n = new RANDOM_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == MGU_NODE) {
-        n = new MGU_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == UGRNN_NODE) {
-        n = new UGRNN_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == SIMPLE_NODE || node_type == JORDAN_NODE || node_type == ELMAN_NODE) {
-        n = new RNN_Node(++node_innovation_count, HIDDEN_LAYER, depth, node_type);
+    if (node_type != DNAS_NODE) {
+        n = create_hidden_node(node_type, node_innovation_count, depth);
     } else {
-        Log::fatal("ERROR: attempted to create a node with an unknown node type: %d\n", node_type);
-        exit(1);
+        n = create_dnas_node(node_innovation_count, depth, dnas_node_types);
     }
 
     if (mutated_component_weight == WeightType::LAMARCKIAN) {
@@ -3213,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
     } else if (node_type == DNAS_NODE) {
         int32_t n_nodes;
         bin_istream.read((char*) &n_nodes, sizeof(int32_t));
-
+        
         int32_t counter;
         bin_istream.read((char*) &counter, sizeof(int32_t));
         vector<double> pi(n_nodes, 0.0);
@@ -3224,7 +3210,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
             nodes[i] = RNN_Genome::read_node_from_stream(bin_istream);
         }
 
-        DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, node_type, depth, counter);
+        DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, layer_type, depth, counter);
         dnas_node->set_pi(pi);
         node = (RNN_Node_Interface*) dnas_node;
     } else {
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index acba093c..deaf8bce 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -32,6 +32,8 @@ using std::vector;
 // mysql can't handle the max float value for some reason
 #define EXAMM_MAX_DOUBLE 10000000
 
+extern vector<int32_t> dnas_node_types;
+
 string parse_fitness(double fitness);
 
 class RNN_Genome {
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index 2ad8d065..55f5e057 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -1,20 +1,45 @@
 #include <cmath>
+using std::max;
+
+#include <algorithm>
+
 #include <fstream>
 using std::ostream;
 
 #include <string>
 using std::string;
 
-#include <cmath>
-using std::max;
-
 #include "common/log.hxx"
 #include "rnn/rnn_genome.hxx"
 #include "rnn_node_interface.hxx"
 
-extern const int32_t NUMBER_NODE_TYPES = 9;
+extern const int32_t NUMBER_NODE_TYPES = 11;
 extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU",
-                                    "GRU",    "delta",  "LSTM",  "ENARC", "ENAS_DAG"};
+                                    "GRU",    "delta",  "LSTM",  "ENARC", "ENAS_DAG", "dnas"};
+extern const unordered_map<string, int32_t> string_to_node_type = {
+  { "simple", SIMPLE_NODE     },
+  { "jordan", JORDAN_NODE     },
+  { "elman", ELMAN_NODE      },
+  { "ugrnn", UGRNN_NODE      },
+  { "mgu", MGU_NODE        },
+  { "gru", GRU_NODE        },
+  { "delta", DELTA_NODE   },
+  { "lstm", LSTM_NODE },
+  { "enarc", ENARC_NODE      },
+  { "enas", ENAS_DAG_NODE   },
+  { "dnas", DNAS_NODE       }
+};
+
+int32_t node_type_from_string(string& node_type) {
+    std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); });
+
+    if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) {
+        return it->second;
+    } else {
+        Log::fatal("Invalid node type '%s'\n", node_type.c_str());
+        exit(1);
+    }
+}
 
 double bound(double value) {
     if (value < -10.0) {
diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx
index 26dc0f3c..15ec45cd 100644
--- a/rnn/rnn_node_interface.hxx
+++ b/rnn/rnn_node_interface.hxx
@@ -12,6 +12,9 @@ using std::uniform_real_distribution;
 #include <string>
 using std::string;
 
+#include <unordered_map>
+using std::unordered_map;
+
 #include <vector>
 using std::vector;
 
@@ -25,6 +28,8 @@ class RNN;
 
 extern const int32_t NUMBER_NODE_TYPES;
 extern const string NODE_TYPES[];
+extern const unordered_map<string, int32_t> string_to_node_type;
+int32_t node_type_from_string(string& node_type);
 
 #define SIMPLE_NODE     0
 #define JORDAN_NODE     1
@@ -39,6 +44,8 @@ extern const string NODE_TYPES[];
 #define RANDOM_DAG_NODE 10
 #define DNAS_NODE       11
 
+int32_t node_type_from_string(string& node_type);
+
 double sigmoid(double value);
 double sigmoid_derivative(double value);
 double tanh_derivative(double value);
@@ -112,7 +119,7 @@ class RNN_Node_Interface {
 
     virtual RNN_Node_Interface* copy() const = 0;
 
-    void write_to_stream(ostream& out);
+    virtual void write_to_stream(ostream& out);
 
     int32_t get_node_type() const;
     int32_t get_layer_type() const;

From ad5a7a3523fa05a965fb28d3baf4f089b2c67910 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Thu, 9 Feb 2023 13:54:11 -0500
Subject: [PATCH 11/42] Formatting

---
 common/process_arguments.cxx |  6 ++++++
 examm/examm.hxx              |  3 ++-
 rnn/dnas_node.cxx            |  3 ++-
 rnn/dnas_node.hxx            |  2 +-
 rnn/generate_nn.cxx          |  4 +++-
 rnn/rnn_genome.cxx           |  6 +++---
 rnn/rnn_node_interface.cxx   | 31 ++++++++++++++++---------------
 7 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index 885f28a0..f2e29ac0 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -39,6 +39,12 @@ EXAMM* generate_examm_from_arguments(
         for (auto node_type : dnas_node_type_strings) {
             dnas_node_types.push_back(node_type_from_string(node_type));
         }
+
+        Log::info("Using following node types for dnas: ");
+        for (auto s : dnas_node_type_strings) {
+            Log::info_no_header("%s", s.c_str());
+        }
+        Log::info_no_header("\n");
     }
 
     GenomeProperty* genome_property = new GenomeProperty();
diff --git a/examm/examm.hxx b/examm/examm.hxx
index ac5d56eb..c0c0ee03 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -63,7 +63,8 @@ class EXAMM {
     double split_node_rate;
     double merge_node_rate;
 
-    vector<int32_t> possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
+    vector<int32_t> possible_node_types = {SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE,
+                                           MGU_NODE,    GRU_NODE,    DELTA_NODE, LSTM_NODE};
 
     vector<string> op_log_ordering;
     map<string, int32_t> inserted_counts;
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 46a20c69..dcdab7e0 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -116,7 +116,8 @@ void DNASNode::calculate_z() {
             ps_with_indices[i] = pair(i, z[i]);
         }
 
-        std::sort(ps_with_indices, ps_with_indices + z.size(),
+        std::sort(
+            ps_with_indices, ps_with_indices + z.size(),
             [](const pair<int32_t, double>& a, const pair<int32_t, double>& b) {
                 // Descending order
                 return a.second > b.second;
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 776119cc..c3d74e6b 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -62,7 +62,7 @@ class DNASNode : public RNN_Node_Interface {
     int32_t maxi = -1;
 
     // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
-    int32_t k = -1;
+    int32_t k = 1;
 
     // Whether to re-sample the gumbel softmax distribution when resetting the node.
     // Can be set externally using DNASNode::set_stochastic
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index f9e3c61a..a84fb36f 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -40,7 +40,9 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co
             Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n");
             exit(1);
         default:
-            Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind);
+            Log::fatal(
+                "If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind
+            );
             exit(1);
     }
 
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index 370998ee..7e452ad0 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -50,6 +50,7 @@ using std::vector;
 #include "dnas_node.hxx"
 #include "enarc_node.hxx"
 #include "enas_dag_node.hxx"
+#include "generate_nn.hxx"
 #include "gru_node.hxx"
 #include "lstm_node.hxx"
 #include "mgu_node.hxx"
@@ -59,9 +60,8 @@ using std::vector;
 #include "rnn_node.hxx"
 #include "time_series/time_series.hxx"
 #include "ugrnn_node.hxx"
-#include "generate_nn.hxx"
 
-extern vector<int32_t> dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
+extern vector<int32_t> dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE};
 
 string parse_fitness(double fitness) {
     if (fitness == EXAMM_MAX_DOUBLE) {
@@ -3199,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
     } else if (node_type == DNAS_NODE) {
         int32_t n_nodes;
         bin_istream.read((char*) &n_nodes, sizeof(int32_t));
-        
+
         int32_t counter;
         bin_istream.read((char*) &counter, sizeof(int32_t));
         vector<double> pi(n_nodes, 0.0);
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index 55f5e057..ab5796b2 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -2,7 +2,6 @@
 using std::max;
 
 #include <algorithm>
-
 #include <fstream>
 using std::ostream;
 
@@ -14,24 +13,26 @@ using std::string;
 #include "rnn_node_interface.hxx"
 
 extern const int32_t NUMBER_NODE_TYPES = 11;
-extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU",
-                                    "GRU",    "delta",  "LSTM",  "ENARC", "ENAS_DAG", "dnas"};
+extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN",    "MGU", "GRU",
+                                    "delta",  "LSTM",   "ENARC", "ENAS_DAG", "dnas"};
 extern const unordered_map<string, int32_t> string_to_node_type = {
-  { "simple", SIMPLE_NODE     },
-  { "jordan", JORDAN_NODE     },
-  { "elman", ELMAN_NODE      },
-  { "ugrnn", UGRNN_NODE      },
-  { "mgu", MGU_NODE        },
-  { "gru", GRU_NODE        },
-  { "delta", DELTA_NODE   },
-  { "lstm", LSTM_NODE },
-  { "enarc", ENARC_NODE      },
-  { "enas", ENAS_DAG_NODE   },
-  { "dnas", DNAS_NODE       }
+    {"simple",   SIMPLE_NODE},
+    {"jordan",   JORDAN_NODE},
+    { "elman",    ELMAN_NODE},
+    { "ugrnn",    UGRNN_NODE},
+    {   "mgu",      MGU_NODE},
+    {   "gru",      GRU_NODE},
+    { "delta",    DELTA_NODE},
+    {  "lstm",     LSTM_NODE},
+    { "enarc",    ENARC_NODE},
+    {  "enas", ENAS_DAG_NODE},
+    {  "dnas",     DNAS_NODE}
 };
 
 int32_t node_type_from_string(string& node_type) {
-    std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); });
+    std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
 
     if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) {
         return it->second;

From eda79a02f422d6b1a5dc600acefcccac5f09f7f9 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 12 Apr 2023 10:54:22 -0400
Subject: [PATCH 12/42] Committing experiment scripts

---
 ground_truth_experiments/cell_experiments.sh | 40 ++++++++++++++++++++
 ground_truth_experiments/source_genomes.sh   | 33 ++++++++++++++++
 rnn/dnas_node.cxx                            | 30 +++++++++++++--
 rnn/dnas_node.hxx                            |  5 ++-
 rnn_examples/train_rnn.cxx                   | 25 +++++++-----
 5 files changed, 118 insertions(+), 15 deletions(-)
 create mode 100755 ground_truth_experiments/cell_experiments.sh
 create mode 100755 ground_truth_experiments/source_genomes.sh

diff --git a/ground_truth_experiments/cell_experiments.sh b/ground_truth_experiments/cell_experiments.sh
new file mode 100755
index 00000000..9c0e29d4
--- /dev/null
+++ b/ground_truth_experiments/cell_experiments.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+
+offset=1
+bp_epoch=1000
+
+for SIZE in 1 2 4; do
+  for CELL_TYPE in dnas; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      output_dir=ground_truth_experiments/results/$CELL_TYPE/$SIZE/$fold
+      mkdir -p $output_dir
+      Release/rnn_examples/train_rnn \
+          --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+          --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+          --time_offset $offset \
+          --input_parameter_names ${=INPUT_PARAMETERS} \
+          --output_parameter_names ${=OUTPUT_PARAMETERS} \
+          --bp_iterations $bp_epoch \
+          --stochastic \
+          --rnn_type $CELL_TYPE \
+          --normalize min_max \
+          --num_hidden_layers $SIZE \
+          --hidden_layer_size $SIZE \
+          --random_sequence_length \
+          --sequence_length_lower_bound 50 \
+          --sequence_length_upper_bound 100 \
+          --max_recurrent_depth 1 \
+          --weight_update adagrad \
+          --output_directory $output_dir \
+          --log_filename fitness.csv \
+          --learning_rate 0.01 \
+          --std_message_level ERROR \
+          --file_message_level INFO &
+    done
+  done
+  wait
+done
+
diff --git a/ground_truth_experiments/source_genomes.sh b/ground_truth_experiments/source_genomes.sh
new file mode 100755
index 00000000..1c251134
--- /dev/null
+++ b/ground_truth_experiments/source_genomes.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/zsh
+# This is an example of running EXAMM MPI version on c172 dataset
+#
+# The c172 dataset is not normalized
+# To run datasets that's not normalized, make sure to add arguments:
+#    --normalize min_max for Min Max normalization, or
+#    --normalize avg_std_dev for Z-score normalization
+
+INPUT_PARAMETERS="AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd"
+OUTPUT_PARAMETERS="Pitch"
+
+for i in 0 1 2 3 4 5 6 7 8 9; do
+  exp_name="ground_truth_experiments/results/source_genomes/$i"
+  mkdir -p $exp_name
+  echo $exp_name
+  mpirun -np 5 Release/mpi/examm_mpi \
+    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+    --time_offset 1 \
+    --input_parameter_names ${=INPUT_PARAMETERS} \
+    --output_parameter_names ${=OUTPUT_PARAMETERS} \
+    --number_islands 8 \
+    --island_size 8 \
+    --max_genomes 10000 \
+    --bp_iterations 5 \
+    --num_mutations 2 \
+    --normalize min_max \
+    --output_directory $exp_name \
+    --possible_node_types simple UGRNN MGU GRU delta LSTM \
+    --std_message_level ERROR \
+    --file_message_level INFO &
+done
+wait
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index dcdab7e0..f87868f3 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -49,7 +49,6 @@ DNASNode::DNASNode(const DNASNode& src) : RNN_Node_Interface(src.innovation_numb
     g = src.g;
     x = src.x;
     xtotal = src.xtotal;
-    tao = src.tao;
     stochastic = src.stochastic;
     counter = src.counter;
     maxi = src.maxi;
@@ -92,8 +91,32 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) {
     calculate_z();
 }
 
+double DNASNode::calculate_pi_lr() {
+    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
+    if (percentage_done < 0.33) {
+        return 0.0;
+    } else if (percentage_done < 0.66) {
+        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
+        return 0.5 + percentage_done_with_phase * .5;
+    } else {
+        return 0.1;
+    }
+}
+
+double DNASNode::calculate_tao() {
+    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
+    if (percentage_done < 0.33) {
+        return 1.33;
+    } else if (percentage_done < 0.66) {
+        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
+        return 1.33 - percentage_done_with_phase * 0.66;
+    } else {
+        return 0.33;
+    }
+}
+
 void DNASNode::calculate_z() {
-    tao = max(1.0 / 3.0, 1.0 / (1.0 + (double) counter * 0.05));
+    tao = calculate_tao();
 
     xtotal = 0.0;
     double emax = -10000000;
@@ -360,8 +383,9 @@ void DNASNode::get_gradients(vector<double>& gradients) {
     } else {
         gradients.assign(get_number_weights(), 0.0);
         int offset = 0;
+        double pi_lr = calculate_pi_lr();
         for (auto i = 0; i < pi.size(); i++) {
-            gradients[offset++] = d_pi[i] * 0.1;
+            gradients[offset++] = d_pi[i] * pi_lr;
         }
 
         for (auto node : nodes) {
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index c3d74e6b..76aa6969 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -56,10 +56,9 @@ class DNASNode : public RNN_Node_Interface {
     // A vector to put gumbel noise into; just to avoid re-allocation
     vector<double> noise;
 
-    // Temperature used when drawing samples from Gumbel-Softmax(pi)
-    double tao = 1.0;
     int32_t counter = 0;
     int32_t maxi = -1;
+    double tao;
 
     // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
     int32_t k = 1;
@@ -82,6 +81,8 @@ class DNASNode : public RNN_Node_Interface {
     template <typename Rng>
     void sample_gumbel_softmax(Rng& rng);
     void calculate_z();
+    double calculate_tao();
+    double calculate_pi_lr();
 
     virtual void initialize_lamarckian(
         minstd_rand0& generator, NormalDistribution& normal_distribution, double mu, double sigma
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index ffdf8999..8c5b0b1c 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -100,6 +100,9 @@ int main(int argc, char** argv) {
     int32_t max_recurrent_depth;
     get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
 
+    int32_t hidden_layer_size = number_inputs;
+    get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
+
     WeightRules* weight_rules = new WeightRules(arguments);
 
     weight_update_method = new WeightUpdate();
@@ -112,59 +115,59 @@ int main(int argc, char** argv) {
     Log::info("RNN TYPE = %s\n", rnn_type.c_str());
     if (rnn_type == "lstm") {
         genome = create_lstm(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "gru") {
         genome = create_gru(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "delta") {
         genome = create_delta(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "mgu") {
         genome = create_mgu(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "ugrnn") {
         genome = create_ugrnn(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "ff") {
         genome = create_ff(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "jordan") {
         genome = create_jordan(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "elman") {
         genome = create_elman(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
     } else if (rnn_type == "dnas") {
-        vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE};
+        vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};
         genome = create_dnas_nn(
             input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types,
             weight_rules
         );
     } else {
-        Log::fatal("ERROR: incorrect rnn type\n");
+        Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str());
         Log::fatal("Possibilities are:\n");
         Log::fatal("    lstm\n");
         Log::fatal("    gru\n");
@@ -232,6 +235,8 @@ int main(int argc, char** argv) {
     genome->get_weights(best_parameters);
     rnn->set_weights(best_parameters);
 
+    genome->write_to_file(output_directory + "/output_genome.bin");
+
     Log::info("TRAINING ERRORS:\n");
     Log::info("MSE: %lf\n", genome->get_mse(best_parameters, training_inputs, training_outputs));
     Log::info("MAE: %lf\n", genome->get_mae(best_parameters, training_inputs, training_outputs));

From cd597a43b03a901a23ed5fdbb4a0f97a964ae997 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 20 Sep 2023 18:01:43 -0400
Subject: [PATCH 13/42] Commit for AISTATS results

---
 common/files.hxx                              |   2 +
 common/log.cxx                                |   8 +-
 common/process_arguments.cxx                  |  17 +-
 initial_integration_experiments/dnas.zsh      |  55 ++++++
 .../post_training_dnas.zsh                    |  31 +++
 mpi/examm_mpi.cxx                             |   7 +-
 rnn/dnas_node.cxx                             |  49 +++--
 rnn/dnas_node.hxx                             |  12 +-
 rnn/rnn_genome.cxx                            |  76 +++++++-
 rnn/rnn_genome.hxx                            |   4 +
 rnn_examples/CMakeLists.txt                   |   3 +
 rnn_examples/dnas_info.cxx                    |  96 +++++++++
 rnn_examples/train_rnn.cxx                    | 182 ++++++++++--------
 13 files changed, 427 insertions(+), 115 deletions(-)
 create mode 100755 initial_integration_experiments/dnas.zsh
 create mode 100755 initial_integration_experiments/post_training_dnas.zsh
 create mode 100644 rnn_examples/dnas_info.cxx

diff --git a/common/files.hxx b/common/files.hxx
index ac23ff0d..8c4c8a43 100644
--- a/common/files.hxx
+++ b/common/files.hxx
@@ -1,6 +1,8 @@
 #ifndef EXACT_BOINC_COMMON_HXX
 #define EXACT_BOINC_COMMON_HXX
 
+#include <stdint.h>
+
 #include <stdexcept>
 using std::runtime_error;
 
diff --git a/common/log.cxx b/common/log.cxx
index 623475e8..6f82e67f 100644
--- a/common/log.cxx
+++ b/common/log.cxx
@@ -79,11 +79,11 @@ int8_t Log::parse_level_from_string(string level) {
 void Log::initialize(const vector<string>& arguments) {
     // TODO: should read these from the CommandLine (to be created)
 
-    string std_message_level_str, file_message_level_str;
+    string std_message_level_str = "INFO", file_message_level_str = "NONE";
 
-    get_argument(arguments, "--std_message_level", true, std_message_level_str);
-    get_argument(arguments, "--file_message_level", true, file_message_level_str);
-    get_argument(arguments, "--output_directory", true, output_directory);
+    get_argument(arguments, "--std_message_level", false, std_message_level_str);
+    get_argument(arguments, "--file_message_level", false, file_message_level_str);
+    get_argument(arguments, "--output_directory", false, output_directory);
 
     std_message_level = parse_level_from_string(std_message_level_str);
     file_message_level = parse_level_from_string(file_message_level_str);
diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index f2e29ac0..4577d86c 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -10,7 +10,7 @@ using std::vector;
 EXAMM* generate_examm_from_arguments(
     const vector<string>& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules,
     RNN_Genome* seed_genome
-) {
+) { 
     Log::info("Getting arguments for EXAMM\n");
     int32_t island_size;
     get_argument(arguments, "--island_size", true, island_size);
@@ -186,11 +186,18 @@ void get_train_validation_data(
     time_series_sets->export_training_series(time_offset, train_inputs, train_outputs);
     time_series_sets->export_test_series(time_offset, validation_inputs, validation_outputs);
 
-    int32_t sequence_length = 0;
-    if (get_argument(arguments, "--sequence_length", false, sequence_length)) {
-        Log::info("Slicing input training data with time sequence length: %d\n", sequence_length);
-        slice_input_data(train_inputs, train_outputs, sequence_length);
+    int32_t train_sequence_length = 0;
+    if (get_argument(arguments, "--train_sequence_length", false, train_sequence_length)) {
+        Log::info("Slicing input training data with time sequence length: %d\n", train_sequence_length);
+        slice_input_data(train_inputs, train_outputs, train_sequence_length);
+    }
+
+    int32_t validation_sequence_length = 0;
+    if (get_argument(arguments, "--validation_sequence_length", false, validation_sequence_length)) {
+        Log::info("Slicing input validation data with time sequence length: %d\n", validation_sequence_length);
+        slice_input_data(validation_inputs, validation_outputs, validation_sequence_length);
     }
+
     Log::info("Generating time series data finished! \n");
 }
 
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
new file mode 100755
index 00000000..490e0c3b
--- /dev/null
+++ b/initial_integration_experiments/dnas.zsh
@@ -0,0 +1,55 @@
+#!/usr/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --stochastic \
+      --possible_node_types DNAS \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --train_sequence_length 100 \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes 10000 \
+      --island_size 8 \
+      --number_islands 8 \
+      --dnas_k $k
+
+  best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+for crystalize_iters in 128 256 512 1024; do
+  for bp_epoch in 8 16 32 64 128; do
+    for k in 1; do
+      for fold in 0 1 2 3; do
+        run_examm &
+      done
+      wait
+      for fold in 4 5 6 7; do
+        run_examm
+      done
+      wait
+    done
+  done
+done
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
new file mode 100755
index 00000000..f3d355f1
--- /dev/null
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -0,0 +1,31 @@
+#!/usr/bin/zsh
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+
+offset=1
+
+post_training() {
+
+    echo "genome = $GENOME"
+    Release/rnn_examples/train_rnn \
+        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+        --time_offset 1 \
+        --input_parameter_names ${=INPUT_PARAMETERS} \
+        --output_parameter_names ${=OUTPUT_PARAMETERS} \
+        --bp_iterations $BP_ITERS \
+        --stochastic \
+        --normalize min_max \
+        --genome_file $GENOME \
+        --output_directory $OUTPUT_DIRECTORY \
+        --log_filename post_training.csv \
+        --learning_rate 0.01 \
+        --weight_update adagrad \
+        --train_sequence_length 100 \
+        --validation_sequence_length 100 \
+        --crystalize_iters $CRYSTALIZE_ITERS \
+        --dnas_k $k
+ 
+}
+
+post_training
diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx
index 7886d91d..c1f1dd1c 100644
--- a/mpi/examm_mpi.cxx
+++ b/mpi/examm_mpi.cxx
@@ -203,13 +203,18 @@ void worker(int32_t rank) {
         } else if (tag == GENOME_LENGTH_TAG) {
             Log::debug("received genome!\n");
             RNN_Genome* genome = receive_genome_from(0);
-
+            
             // have each worker write the backproagation to a separate log file
             string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank);
             Log::set_id(log_id);
+
+            vector<double> params;
+            genome->get_weights(params);
+
             genome->backpropagate_stochastic(
                 training_inputs, training_outputs, validation_inputs, validation_outputs, weight_update_method
             );
+
             Log::release_id(log_id);
 
             // go back to the worker's log for MPI communication
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index f87868f3..ac0e0aa1 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -1,6 +1,8 @@
 #include <algorithm>
 using std::sort;
 
+#include <iomanip>
+
 #include <utility>
 using std::pair;
 
@@ -13,13 +15,16 @@ using std::max;
 #include "common/log.hxx"
 #include "dnas_node.hxx"
 
+int32_t DNASNode::CRYSTALLIZATION_THRESHOLD = 1000;
+int32_t DNASNode::k = -1;
+
 DNASNode::DNASNode(
     vector<RNN_Node_Interface*>&& _nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter
 )
     : RNN_Node_Interface(_innovation_number, _type, _depth),
       nodes(_nodes),
       pi(vector<double>(nodes.size(), 1.0)),
-      z(vector<double>(nodes.size())),
+      z(vector<double>(nodes.size(), 0.0)),
       x(vector<double>(nodes.size())),
       g(vector<double>(nodes.size())),
       d_pi(vector<double>(nodes.size())),
@@ -87,20 +92,11 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) {
     x.assign(pi.size(), 0.0);
 
     gumbel_noise(rng, g);
-
     calculate_z();
 }
 
 double DNASNode::calculate_pi_lr() {
-    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
-    if (percentage_done < 0.33) {
-        return 0.0;
-    } else if (percentage_done < 0.66) {
-        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
-        return 0.5 + percentage_done_with_phase * .5;
-    } else {
-        return 0.1;
-    }
+    return 0.1;
 }
 
 double DNASNode::calculate_tao() {
@@ -109,9 +105,9 @@ double DNASNode::calculate_tao() {
         return 1.33;
     } else if (percentage_done < 0.66) {
         double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
-        return 1.33 - percentage_done_with_phase * 0.66;
+        return 1.33 - percentage_done_with_phase * 1.15;
     } else {
-        return 0.33;
+        return 0.18;
     }
 }
 
@@ -162,6 +158,23 @@ void DNASNode::calculate_z() {
     }
 }
 
+void DNASNode::print_info() {
+    printf(" ");
+    int best_pi_idx = 0;
+    for (int i = 0; i < nodes.size(); i++) {
+       printf("%-10s & ", std::to_string(pi[i]).c_str());
+        if (pi[i] > pi[best_pi_idx])
+            best_pi_idx = i;
+    }
+    printf("\n");
+    Log::info("Node types: ");
+    for (auto node : nodes) {
+        Log::info_no_header("%d ", node->node_type);
+    }
+    Log::info_no_header("\n ");
+    Log::info("Best node: %i, node type: %d\n", best_pi_idx, nodes[best_pi_idx]->node_type);
+}
+
 void DNASNode::reset(int32_t series_length) {
     d_pi = vector<double>(pi.size(), 0.0);
     d_input = vector<double>(series_length, 0.0);
@@ -309,6 +322,7 @@ void DNASNode::set_weights(const vector<double>& parameters) {
 }
 
 void DNASNode::get_weights(int32_t& offset, vector<double>& parameters) const {
+    int start = offset;
     // Log::info("pi start %d; ", offset);
     for (auto i = 0; i < pi.size(); i++) {
         parameters[offset++] = pi[i];
@@ -323,17 +337,14 @@ void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
     for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
+        if (pi[i] < 0.1)
+            pi[i] = 0.1;
     }
-    // Log::info("Pi indices: %d-%d\n", start, offset);
+
     for (auto node : nodes) {
         node->set_weights(offset, parameters);
     }
     calculate_z();
-    // string s = "Pi = { ";
-    // for (auto p : pi) {
-    //     s += std::to_string(p) + ", ";
-    // }
-    // Log::info("%s }\n", s.c_str());
 }
 
 void DNASNode::set_pi(const vector<double>& new_pi) {
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 76aa6969..00867ffe 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -22,8 +22,6 @@ using std::unique_ptr;
 #include "rnn_node.hxx"
 #include "rnn_node_interface.hxx"
 
-#define CRYSTALLIZATION_THRESHOLD 1000
-
 class DNASNode : public RNN_Node_Interface {
    private:
     template <typename R>
@@ -60,9 +58,6 @@ class DNASNode : public RNN_Node_Interface {
     int32_t maxi = -1;
     double tao;
 
-    // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
-    int32_t k = 1;
-
     // Whether to re-sample the gumbel softmax distribution when resetting the node.
     // Can be set externally using DNASNode::set_stochastic
     bool stochastic = true;
@@ -71,6 +66,11 @@ class DNASNode : public RNN_Node_Interface {
     vector<vector<double>> node_outputs;
 
    public:
+    static int32_t CRYSTALLIZATION_THRESHOLD;
+
+    // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
+    static int32_t k;
+
     DNASNode(
         vector<RNN_Node_Interface*>&& nodes, int32_t _innovation_number, int32_t _type, double _depth,
         int32_t counter = -1
@@ -110,6 +110,8 @@ class DNASNode : public RNN_Node_Interface {
     virtual void reset(int32_t _series_length);
     virtual void write_to_stream(ostream& out);
 
+    void print_info();
+
     virtual RNN_Node_Interface* copy() const;
 
     void set_stochastic(bool stochastic);
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index 7e452ad0..72868d5e 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -3221,16 +3221,35 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
     node->enabled = enabled;
     return node;
 }
+
+#define MAGIC 0xFA
+
+#define read_magic(place) \
+      { \
+      uint8_t boo = MAGIC;\
+      bin_istream.read((char *) &boo, sizeof(uint8_t)); \
+      if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \
+      }
+
+#define write_magic() \
+  {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));}
+
 void RNN_Genome::read_from_stream(istream& bin_istream) {
     Log::debug("READING GENOME FROM STREAM\n");
+    
+    read_magic(__LINE__);
 
     bin_istream.read((char*) &generation_id, sizeof(int32_t));
     bin_istream.read((char*) &group_id, sizeof(int32_t));
     bin_istream.read((char*) &bp_iterations, sizeof(int32_t));
 
+    read_magic(__LINE__);
+    
     bin_istream.read((char*) &use_dropout, sizeof(bool));
     bin_istream.read((char*) &dropout_probability, sizeof(double));
 
+    read_magic(__LINE__);
+    
     WeightType weight_initialize = WeightType::NONE;
     WeightType weight_inheritance = WeightType::NONE;
     WeightType mutated_component_weight = WeightType::NONE;
@@ -3239,6 +3258,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     bin_istream.read((char*) &weight_inheritance, sizeof(int32_t));
     bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t));
 
+    read_magic(__LINE__);
+    
     weight_rules = new WeightRules();
     weight_rules->set_weight_initialize_method(weight_initialize);
     weight_rules->set_weight_inheritance_method(weight_inheritance);
@@ -3260,8 +3281,10 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     istringstream generator_iss(generator_str);
     generator_iss >> generator;
 
-    string rng_0_1_str;
-    read_binary_string(bin_istream, rng_0_1_str, "rng_0_1");
+    read_magic(__LINE__);
+    
+    // string rng_0_1_str;
+    // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1");
     // So for some reason this was serialized incorrectly for some genomes,
     // but the value should always be the same so we really don't need to de-serialize it anways and can just
     // assign it a constant value
@@ -3275,6 +3298,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     istringstream generated_by_map_iss(generated_by_map_str);
     read_map(generated_by_map_iss, generated_by_map);
 
+    read_magic(__LINE__);
+    
     bin_istream.read((char*) &best_validation_mse, sizeof(double));
     bin_istream.read((char*) &best_validation_mae, sizeof(double));
 
@@ -3286,6 +3311,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     initial_parameters.assign(initial_parameters_v, initial_parameters_v + n_initial_parameters);
     delete[] initial_parameters_v;
 
+    read_magic(__LINE__);
+    
     int32_t n_best_parameters;
     bin_istream.read((char*) &n_best_parameters, sizeof(int32_t));
     Log::debug("reading %d best parameters.\n", n_best_parameters);
@@ -3294,6 +3321,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     best_parameters.assign(best_parameters_v, best_parameters_v + n_best_parameters);
     delete[] best_parameters_v;
 
+    read_magic(__LINE__);
+    
     input_parameter_names.clear();
     int32_t n_input_parameter_names;
     bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t));
@@ -3304,6 +3333,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         input_parameter_names.push_back(input_parameter_name);
     }
 
+    read_magic(__LINE__);
+
     output_parameter_names.clear();
     int32_t n_output_parameter_names;
     bin_istream.read((char*) &n_output_parameter_names, sizeof(int32_t));
@@ -3314,6 +3345,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         output_parameter_names.push_back(output_parameter_name);
     }
 
+    read_magic(__LINE__);
+    
     int32_t n_nodes;
     bin_istream.read((char*) &n_nodes, sizeof(int32_t));
     Log::debug("reading %d nodes.\n", n_nodes);
@@ -3321,6 +3354,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     nodes.clear();
     for (int32_t i = 0; i < n_nodes; i++) {
         nodes.push_back(RNN_Genome::read_node_from_stream(bin_istream));
+        read_magic(__LINE__);
     }
 
     int32_t n_edges;
@@ -3347,6 +3381,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         // innovation_list.push_back(innovation_number);
         edge->enabled = enabled;
         edges.push_back(edge);
+        read_magic(__LINE__);
     }
 
     int32_t n_recurrent_edges;
@@ -3378,6 +3413,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         // innovation_list.push_back(innovation_number);
         recurrent_edge->enabled = enabled;
         recurrent_edges.push_back(recurrent_edge);
+        read_magic(__LINE__);
     }
 
     read_binary_string(bin_istream, normalize_type, "normalize_type");
@@ -3402,6 +3438,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     istringstream normalize_std_devs_iss(normalize_std_devs_str);
     read_map(normalize_std_devs_iss, normalize_std_devs);
 
+    read_magic(__LINE__);
+    
     assign_reachability();
 }
 
@@ -3425,13 +3463,20 @@ void RNN_Genome::write_to_file(string bin_filename) {
 
 void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     Log::debug("WRITING GENOME TO STREAM\n");
+
+    write_magic();
+
     bin_ostream.write((char*) &generation_id, sizeof(int32_t));
     bin_ostream.write((char*) &group_id, sizeof(int32_t));
     bin_ostream.write((char*) &bp_iterations, sizeof(int32_t));
 
+    write_magic();
+    
     bin_ostream.write((char*) &use_dropout, sizeof(bool));
     bin_ostream.write((char*) &dropout_probability, sizeof(double));
 
+    write_magic();
+    
     WeightType weight_initialize = weight_rules->get_weight_initialize_method();
     WeightType weight_inheritance = weight_rules->get_weight_inheritance_method();
     WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method();
@@ -3439,6 +3484,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &weight_inheritance, sizeof(int32_t));
     bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t));
 
+    write_magic();
+    
     Log::debug("generation_id: %d\n", generation_id);
     Log::debug("bp_iterations: %d\n", bp_iterations);
 
@@ -3456,16 +3503,20 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     string generator_str = generator_oss.str();
     write_binary_string(bin_ostream, generator_str, "generator");
 
-    ostringstream rng_0_1_oss;
-    rng_0_1_oss << rng_0_1;
-    string rng_0_1_str = rng_0_1_oss.str();
-    write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1");
+    write_magic();
+    
+    // ostringstream rng_0_1_oss;
+    // rng_0_1_oss << rng_0_1;
+    // string rng_0_1_str = rng_0_1_oss.str();
+    // write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1");
 
     ostringstream generated_by_map_oss;
     write_map(generated_by_map_oss, generated_by_map);
     string generated_by_map_str = generated_by_map_oss.str();
     write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map");
 
+    write_magic();
+    
     bin_ostream.write((char*) &best_validation_mse, sizeof(double));
     bin_ostream.write((char*) &best_validation_mae, sizeof(double));
 
@@ -3474,18 +3525,24 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &n_initial_parameters, sizeof(int32_t));
     bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size());
 
+    write_magic();
+    
     int32_t n_best_parameters = (int32_t) best_parameters.size();
     bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t));
     if (n_best_parameters) {
         bin_ostream.write((char*) &best_parameters[0], sizeof(double) * best_parameters.size());
     }
 
+    write_magic();
+    
     int32_t n_input_parameter_names = (int32_t) input_parameter_names.size();
     bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) {
         write_binary_string(bin_ostream, input_parameter_names[i], "input_parameter_names[" + std::to_string(i) + "]");
     }
 
+    write_magic();
+    
     int32_t n_output_parameter_names = (int32_t) output_parameter_names.size();
     bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) {
@@ -3494,6 +3551,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
         );
     }
 
+    write_magic();
+    
     int32_t n_nodes = (int32_t) nodes.size();
     bin_ostream.write((char*) &n_nodes, sizeof(int32_t));
     Log::debug("writing %d nodes.\n", n_nodes);
@@ -3504,6 +3563,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
             nodes[i]->depth, nodes[i]->parameter_name.c_str()
         );
         nodes[i]->write_to_stream(bin_ostream);
+        write_magic();
     }
 
     int32_t n_edges = (int32_t) edges.size();
@@ -3516,6 +3576,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
             edges[i]->output_innovation_number
         );
         edges[i]->write_to_stream(bin_ostream);
+        write_magic();
     }
 
     int32_t n_recurrent_edges = (int32_t) recurrent_edges.size();
@@ -3529,6 +3590,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
         );
 
         recurrent_edges[i]->write_to_stream(bin_ostream);
+        write_magic();
     }
 
     write_binary_string(bin_ostream, normalize_type, "normalize_type");
@@ -3552,6 +3614,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     write_map(normalize_std_devs_oss, normalize_std_devs);
     string normalize_std_devs_str = normalize_std_devs_oss.str();
     write_binary_string(bin_ostream, normalize_std_devs_str, "normalize_std_devs");
+
+    write_magic();
 }
 
 void RNN_Genome::update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count) {
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index deaf8bce..d6330512 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -325,6 +325,10 @@ class RNN_Genome {
     );
     vector<RNN_Node_Interface*> pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type);
 
+    const vector<RNN_Node_Interface *> &get_nodes() {
+        return this->nodes;
+    }
+
     void update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count);
 
     vector<int32_t> get_innovation_list();
diff --git a/rnn_examples/CMakeLists.txt b/rnn_examples/CMakeLists.txt
index 2bfda532..f5e294c6 100644
--- a/rnn_examples/CMakeLists.txt
+++ b/rnn_examples/CMakeLists.txt
@@ -16,3 +16,6 @@ target_link_libraries(evaluate_rnns_multi_offset examm_strategy exact_common exa
 add_executable(rnn_statistics rnn_statistics.cxx)
 target_link_libraries(rnn_statistics examm_strategy exact_common exact_time_series exact_weights examm_nn  ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread)
 
+add_executable(dnas_info dnas_info.cxx)
+target_link_libraries(dnas_info examm_strategy exact_common exact_time_series exact_weights examm_nn  ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread)
+
diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx
new file mode 100644
index 00000000..74fd6519
--- /dev/null
+++ b/rnn_examples/dnas_info.cxx
@@ -0,0 +1,96 @@
+#include <chrono>
+#include <fstream>
+using std::getline;
+using std::ifstream;
+using std::ofstream;
+
+#include <random>
+using std::minstd_rand0;
+using std::uniform_real_distribution;
+
+#include <string>
+using std::string;
+
+#include <vector>
+using std::vector;
+
+#include "common/arguments.hxx"
+#include "common/files.hxx"
+#include "common/log.hxx"
+#include "rnn/generate_nn.hxx"
+#include "rnn/gru_node.hxx"
+#include "rnn/lstm_node.hxx"
+#include "rnn/rnn_edge.hxx"
+#include "rnn/rnn_genome.hxx"
+#include "rnn/rnn_node.hxx"
+#include "rnn/rnn_node_interface.hxx"
+#include "time_series/time_series.hxx"
+#include "weights/weight_rules.hxx"
+#include "weights/weight_update.hxx"
+
+vector<vector<vector<double> > > training_inputs;
+vector<vector<vector<double> > > training_outputs;
+vector<vector<vector<double> > > test_inputs;
+vector<vector<vector<double> > > test_outputs;
+
+bool random_sequence_length;
+int32_t sequence_length_lower_bound = 30;
+int32_t sequence_length_upper_bound = 100;
+
+RNN_Genome* genome;
+RNN* rnn;
+WeightUpdate* weight_update_method;
+int32_t bp_iterations;
+bool using_dropout;
+double dropout_probability;
+
+ofstream* log_file;
+string output_directory;
+
+double objective_function(const vector<double>& parameters) {
+    rnn->set_weights(parameters);
+
+    double error = 0.0;
+
+    for (int32_t i = 0; i < (int32_t) training_inputs.size(); i++) {
+        error += rnn->prediction_mae(training_inputs[i], training_outputs[i], false, true, 0.0);
+    }
+
+    return -error;
+}
+
+double test_objective_function(const vector<double>& parameters) {
+    rnn->set_weights(parameters);
+
+    double total_error = 0.0;
+
+    for (int32_t i = 0; i < (int32_t) test_inputs.size(); i++) {
+        double error = rnn->prediction_mse(test_inputs[i], test_outputs[i], false, true, 0.0);
+        total_error += error;
+
+        Log::info("output for series[%d]: %lf\n", i, error);
+    }
+
+    return -total_error;
+}
+
+int main(int argc, char** argv) {
+    vector<string> arguments = vector<string>(argv, argv + argc);
+
+    Log::initialize(arguments);
+    Log::set_id("main");
+
+    string filename;
+    get_argument(arguments, "--filename", true, filename);
+
+    RNN_Genome genome(filename);
+
+    for (auto node : genome.get_nodes()) {
+        if (DNASNode *d = dynamic_cast<DNASNode*>(node)) {
+          std::cout << "'" << filename << "': ";
+          d->print_info();
+        }
+    }
+
+    Log::release_id("main");
+}
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index 8c5b0b1c..02d7db80 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -85,24 +85,20 @@ int main(int argc, char** argv) {
     int32_t time_offset = 1;
     get_argument(arguments, "--time_offset", true, time_offset);
 
+    int32_t crystallization_threshold = 1000;
+    get_argument(arguments, "--crystalize_iters", false, crystallization_threshold);
+    DNASNode::CRYSTALLIZATION_THRESHOLD = crystallization_threshold;
+
+    int32_t k = -1;
+    get_argument(arguments, "--dnas_k", false, k);
+    DNASNode::k = k;
+
     time_series_sets->export_training_series(time_offset, training_inputs, training_outputs);
     time_series_sets->export_test_series(time_offset, test_inputs, test_outputs);
 
     int number_inputs = time_series_sets->get_number_inputs();
     // int number_outputs = time_series_sets->get_number_outputs();
 
-    string rnn_type;
-    get_argument(arguments, "--rnn_type", true, rnn_type);
-
-    int32_t num_hidden_layers;
-    get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers);
-
-    int32_t max_recurrent_depth;
-    get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
-
-    int32_t hidden_layer_size = number_inputs;
-    get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
-
     WeightRules* weight_rules = new WeightRules(arguments);
 
     weight_update_method = new WeightUpdate();
@@ -111,74 +107,110 @@ int main(int argc, char** argv) {
     vector<string> input_parameter_names = time_series_sets->get_input_parameter_names();
     vector<string> output_parameter_names = time_series_sets->get_output_parameter_names();
 
-    RNN_Genome* genome;
-    Log::info("RNN TYPE = %s\n", rnn_type.c_str());
-    if (rnn_type == "lstm") {
-        genome = create_lstm(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "gru") {
-        genome = create_gru(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "delta") {
-        genome = create_delta(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
+    string genome_file;
+    get_argument(arguments, "--genome_file", false, genome_file);
+    Log::info("RNN_GENOME = <%s> \n", genome_file.c_str());
 
-    } else if (rnn_type == "mgu") {
-        genome = create_mgu(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "ugrnn") {
-        genome = create_ugrnn(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "ff") {
-        genome = create_ff(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "jordan") {
-        genome = create_jordan(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
+    RNN_Genome* genome;
 
-    } else if (rnn_type == "elman") {
-        genome = create_elman(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-    } else if (rnn_type == "dnas") {
-        vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};
-        genome = create_dnas_nn(
-            input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types,
-            weight_rules
-        );
+    if (genome_file.size() != 0) {
+        genome = new RNN_Genome(genome_file);
+        Log::info("best weights: { ");
+        for (double &d : genome->get_best_parameters()) {
+            Log::info_no_header("%f, ", d);
+        }
+        Log::info("}\n");
+
+        vector<double> params;
+        genome->get_weights(params);
+        Log::info("current weights: { ");
+        for (double &d : params) {
+            Log::info_no_header("%f, ", d);
+        }
+        Log::info("}\n");
     } else {
-        Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str());
-        Log::fatal("Possibilities are:\n");
-        Log::fatal("    lstm\n");
-        Log::fatal("    gru\n");
-        Log::fatal("    ff\n");
-        Log::fatal("    jordan\n");
-        Log::fatal("    elman\n");
-        exit(1);
+
+        string rnn_type;
+        get_argument(arguments, "--rnn_type", true, rnn_type);
+        
+        Log::info("RNN TYPE = %s\n", rnn_type.c_str());
+
+        int32_t num_hidden_layers;
+        get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers);
+
+        int32_t max_recurrent_depth;
+        get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
+
+        int32_t hidden_layer_size = number_inputs;
+        get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
+
+        if (rnn_type == "lstm") {
+            genome = create_lstm(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "gru") {
+            genome = create_gru(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "delta") {
+            genome = create_delta(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "mgu") {
+            genome = create_mgu(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "ugrnn") {
+            genome = create_ugrnn(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "ff") {
+            genome = create_ff(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "jordan") {
+            genome = create_jordan(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "elman") {
+            genome = create_elman(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+        } else if (rnn_type == "dnas") {
+            vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};
+            genome = create_dnas_nn(
+                input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types,
+                weight_rules
+            );
+        } else {
+            Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str());
+            Log::fatal("Possibilities are:\n");
+            Log::fatal("    lstm\n");
+            Log::fatal("    gru\n");
+            Log::fatal("    ff\n");
+            Log::fatal("    jordan\n");
+            Log::fatal("    elman\n");
+            exit(1);
+        }
     }
 
     get_argument(arguments, "--bp_iterations", true, bp_iterations);
-    genome->set_bp_iterations(bp_iterations);
+    genome->set_bp_iterations(bp_iterations + genome->get_bp_iterations());
 
     get_argument(arguments, "--output_directory", true, output_directory);
     if (output_directory != "") {
@@ -211,7 +243,7 @@ int main(int argc, char** argv) {
 
     using_dropout = false;
 
-    genome->initialize_randomly();
+    genome->set_weights(genome->get_best_parameters());
 
     double learning_rate = 0.001;
     get_argument(arguments, "--learning_rate", false, learning_rate);

From fa03e78c21bd98f20b5e4301a80d569eae9b8c67 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 12 Dec 2023 19:18:05 -0500
Subject: [PATCH 14/42] Preparing for gecco 2024 experiments

---
 common/color_table.cxx                        |  3 +-
 common/process_arguments.cxx                  |  2 +-
 initial_integration_experiments/dnas.zsh      |  4 +-
 .../post_training_dnas.zsh                    |  2 +-
 mpi/examm_mpi.cxx                             |  2 +-
 rnn/dnas_node.cxx                             |  9 +--
 rnn/rnn_genome.cxx                            | 60 ++++++++++---------
 rnn/rnn_genome.hxx                            |  2 +-
 rnn_examples/dnas_info.cxx                    |  6 +-
 rnn_examples/train_rnn.cxx                    | 39 ++++++------
 10 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/common/color_table.cxx b/common/color_table.cxx
index d9e743b0..d0c42a21 100644
--- a/common/color_table.cxx
+++ b/common/color_table.cxx
@@ -1026,7 +1026,8 @@ const static double bent_cool_warm[] = {
     1.0,
     177,
     1,
-    39};
+    39,
+};
 
 Color get_colormap(double value) {
     Color c;
diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index 4577d86c..65efa7b6 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -10,7 +10,7 @@ using std::vector;
 EXAMM* generate_examm_from_arguments(
     const vector<string>& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules,
     RNN_Genome* seed_genome
-) { 
+) {
     Log::info("Getting arguments for EXAMM\n");
     int32_t island_size;
     get_argument(arguments, "--island_size", true, island_size);
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 490e0c3b..9ef90cca 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -1,12 +1,12 @@
 #!/usr/bin/zsh
 
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+OUTPUT_PARAMETERS='E1_EGT1'
 
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold
+  output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
   mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
index f3d355f1..8117dadb 100755
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -1,6 +1,6 @@
 #!/usr/bin/zsh
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+OUTPUT_PARAMETERS='E1_EGT1'
 
 offset=1
 
diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx
index c1f1dd1c..227c3a85 100644
--- a/mpi/examm_mpi.cxx
+++ b/mpi/examm_mpi.cxx
@@ -203,7 +203,7 @@ void worker(int32_t rank) {
         } else if (tag == GENOME_LENGTH_TAG) {
             Log::debug("received genome!\n");
             RNN_Genome* genome = receive_genome_from(0);
-            
+
             // have each worker write the backproagation to a separate log file
             string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank);
             Log::set_id(log_id);
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index ac0e0aa1..465c024c 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -2,7 +2,6 @@
 using std::sort;
 
 #include <iomanip>
-
 #include <utility>
 using std::pair;
 
@@ -162,9 +161,10 @@ void DNASNode::print_info() {
     printf(" ");
     int best_pi_idx = 0;
     for (int i = 0; i < nodes.size(); i++) {
-       printf("%-10s & ", std::to_string(pi[i]).c_str());
-        if (pi[i] > pi[best_pi_idx])
+        printf("%-10s & ", std::to_string(pi[i]).c_str());
+        if (pi[i] > pi[best_pi_idx]) {
             best_pi_idx = i;
+        }
     }
     printf("\n");
     Log::info("Node types: ");
@@ -337,8 +337,9 @@ void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
     for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
-        if (pi[i] < 0.1)
+        if (pi[i] < 0.1) {
             pi[i] = 0.1;
+        }
     }
 
     for (auto node : nodes) {
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index 72868d5e..833feee6 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -3224,19 +3224,25 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
 
 #define MAGIC 0xFA
 
-#define read_magic(place) \
-      { \
-      uint8_t boo = MAGIC;\
-      bin_istream.read((char *) &boo, sizeof(uint8_t)); \
-      if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \
-      }
+#define read_magic(place)                                                                                   \
+    {                                                                                                       \
+        uint8_t boo = MAGIC;                                                                                \
+        bin_istream.read((char*) &boo, sizeof(uint8_t));                                                    \
+        if (boo != MAGIC) {                                                                                 \
+            Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); \
+            exit(-1);                                                                                       \
+        }                                                                                                   \
+    }
 
-#define write_magic() \
-  {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));}
+#define write_magic()                                         \
+    {                                                         \
+        uint8_t xxmagic = MAGIC;                              \
+        bin_ostream.write((char*) &xxmagic, sizeof(uint8_t)); \
+    }
 
 void RNN_Genome::read_from_stream(istream& bin_istream) {
     Log::debug("READING GENOME FROM STREAM\n");
-    
+
     read_magic(__LINE__);
 
     bin_istream.read((char*) &generation_id, sizeof(int32_t));
@@ -3244,12 +3250,12 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     bin_istream.read((char*) &bp_iterations, sizeof(int32_t));
 
     read_magic(__LINE__);
-    
+
     bin_istream.read((char*) &use_dropout, sizeof(bool));
     bin_istream.read((char*) &dropout_probability, sizeof(double));
 
     read_magic(__LINE__);
-    
+
     WeightType weight_initialize = WeightType::NONE;
     WeightType weight_inheritance = WeightType::NONE;
     WeightType mutated_component_weight = WeightType::NONE;
@@ -3259,7 +3265,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t));
 
     read_magic(__LINE__);
-    
+
     weight_rules = new WeightRules();
     weight_rules->set_weight_initialize_method(weight_initialize);
     weight_rules->set_weight_inheritance_method(weight_inheritance);
@@ -3282,7 +3288,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     generator_iss >> generator;
 
     read_magic(__LINE__);
-    
+
     // string rng_0_1_str;
     // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1");
     // So for some reason this was serialized incorrectly for some genomes,
@@ -3299,7 +3305,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     read_map(generated_by_map_iss, generated_by_map);
 
     read_magic(__LINE__);
-    
+
     bin_istream.read((char*) &best_validation_mse, sizeof(double));
     bin_istream.read((char*) &best_validation_mae, sizeof(double));
 
@@ -3312,7 +3318,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     delete[] initial_parameters_v;
 
     read_magic(__LINE__);
-    
+
     int32_t n_best_parameters;
     bin_istream.read((char*) &n_best_parameters, sizeof(int32_t));
     Log::debug("reading %d best parameters.\n", n_best_parameters);
@@ -3322,7 +3328,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     delete[] best_parameters_v;
 
     read_magic(__LINE__);
-    
+
     input_parameter_names.clear();
     int32_t n_input_parameter_names;
     bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t));
@@ -3346,7 +3352,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     }
 
     read_magic(__LINE__);
-    
+
     int32_t n_nodes;
     bin_istream.read((char*) &n_nodes, sizeof(int32_t));
     Log::debug("reading %d nodes.\n", n_nodes);
@@ -3439,7 +3445,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     read_map(normalize_std_devs_iss, normalize_std_devs);
 
     read_magic(__LINE__);
-    
+
     assign_reachability();
 }
 
@@ -3471,12 +3477,12 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &bp_iterations, sizeof(int32_t));
 
     write_magic();
-    
+
     bin_ostream.write((char*) &use_dropout, sizeof(bool));
     bin_ostream.write((char*) &dropout_probability, sizeof(double));
 
     write_magic();
-    
+
     WeightType weight_initialize = weight_rules->get_weight_initialize_method();
     WeightType weight_inheritance = weight_rules->get_weight_inheritance_method();
     WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method();
@@ -3485,7 +3491,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t));
 
     write_magic();
-    
+
     Log::debug("generation_id: %d\n", generation_id);
     Log::debug("bp_iterations: %d\n", bp_iterations);
 
@@ -3504,7 +3510,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     write_binary_string(bin_ostream, generator_str, "generator");
 
     write_magic();
-    
+
     // ostringstream rng_0_1_oss;
     // rng_0_1_oss << rng_0_1;
     // string rng_0_1_str = rng_0_1_oss.str();
@@ -3516,7 +3522,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map");
 
     write_magic();
-    
+
     bin_ostream.write((char*) &best_validation_mse, sizeof(double));
     bin_ostream.write((char*) &best_validation_mae, sizeof(double));
 
@@ -3526,7 +3532,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size());
 
     write_magic();
-    
+
     int32_t n_best_parameters = (int32_t) best_parameters.size();
     bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t));
     if (n_best_parameters) {
@@ -3534,7 +3540,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     }
 
     write_magic();
-    
+
     int32_t n_input_parameter_names = (int32_t) input_parameter_names.size();
     bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) {
@@ -3542,7 +3548,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     }
 
     write_magic();
-    
+
     int32_t n_output_parameter_names = (int32_t) output_parameter_names.size();
     bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) {
@@ -3552,7 +3558,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     }
 
     write_magic();
-    
+
     int32_t n_nodes = (int32_t) nodes.size();
     bin_ostream.write((char*) &n_nodes, sizeof(int32_t));
     Log::debug("writing %d nodes.\n", n_nodes);
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index d6330512..01c7e9e3 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -325,7 +325,7 @@ class RNN_Genome {
     );
     vector<RNN_Node_Interface*> pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type);
 
-    const vector<RNN_Node_Interface *> &get_nodes() {
+    const vector<RNN_Node_Interface*>& get_nodes() {
         return this->nodes;
     }
 
diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx
index 74fd6519..fac60c84 100644
--- a/rnn_examples/dnas_info.cxx
+++ b/rnn_examples/dnas_info.cxx
@@ -86,9 +86,9 @@ int main(int argc, char** argv) {
     RNN_Genome genome(filename);
 
     for (auto node : genome.get_nodes()) {
-        if (DNASNode *d = dynamic_cast<DNASNode*>(node)) {
-          std::cout << "'" << filename << "': ";
-          d->print_info();
+        if (DNASNode* d = dynamic_cast<DNASNode*>(node)) {
+            std::cout << "'" << filename << "': ";
+            d->print_info();
         }
     }
 
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index 02d7db80..7bd5647c 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -116,7 +116,7 @@ int main(int argc, char** argv) {
     if (genome_file.size() != 0) {
         genome = new RNN_Genome(genome_file);
         Log::info("best weights: { ");
-        for (double &d : genome->get_best_parameters()) {
+        for (double& d : genome->get_best_parameters()) {
             Log::info_no_header("%f, ", d);
         }
         Log::info("}\n");
@@ -124,15 +124,14 @@ int main(int argc, char** argv) {
         vector<double> params;
         genome->get_weights(params);
         Log::info("current weights: { ");
-        for (double &d : params) {
+        for (double& d : params) {
             Log::info_no_header("%f, ", d);
         }
         Log::info("}\n");
     } else {
-
         string rnn_type;
         get_argument(arguments, "--rnn_type", true, rnn_type);
-        
+
         Log::info("RNN TYPE = %s\n", rnn_type.c_str());
 
         int32_t num_hidden_layers;
@@ -146,50 +145,50 @@ int main(int argc, char** argv) {
 
         if (rnn_type == "lstm") {
             genome = create_lstm(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "gru") {
             genome = create_gru(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "delta") {
             genome = create_delta(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "mgu") {
             genome = create_mgu(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "ugrnn") {
             genome = create_ugrnn(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "ff") {
             genome = create_ff(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "jordan") {
             genome = create_jordan(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "elman") {
             genome = create_elman(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
         } else if (rnn_type == "dnas") {
             vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};

From c0264d9ad2f82ae86d3369f7cd578e77d0a074eb Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 12 Dec 2023 19:36:16 -0500
Subject: [PATCH 15/42] Tweak experimental parameters

---
 initial_integration_experiments/dnas.zsh |  8 ++++----
 rnn/dnas_node.cxx                        | 17 ++++-------------
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 9ef90cca..0c2a615f 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -8,7 +8,7 @@ offset=1
 run_examm() {
   output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
-  mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \
+  mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
@@ -29,7 +29,7 @@ run_examm() {
       --std_message_level INFO \
       --file_message_level INFO \
       --crystalize_iters $crystalize_iters \
-      --max_genomes 10000 \
+      --max_genomes 4000 \
       --island_size 8 \
       --number_islands 8 \
       --dnas_k $k
@@ -39,7 +39,7 @@ run_examm() {
 }
 
 CELL_TYPE='dnas'
-for crystalize_iters in 128 256 512 1024; do
+for crystalize_iters in 64 128 256 512; do
   for bp_epoch in 8 16 32 64 128; do
     for k in 1; do
       for fold in 0 1 2 3; do
@@ -47,7 +47,7 @@ for crystalize_iters in 128 256 512 1024; do
       done
       wait
       for fold in 4 5 6 7; do
-        run_examm
+        run_examm &
       done
       wait
     done
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 465c024c..87e05d9b 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -99,15 +99,7 @@ double DNASNode::calculate_pi_lr() {
 }
 
 double DNASNode::calculate_tao() {
-    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
-    if (percentage_done < 0.33) {
-        return 1.33;
-    } else if (percentage_done < 0.66) {
-        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
-        return 1.33 - percentage_done_with_phase * 1.15;
-    } else {
-        return 0.18;
-    }
+    return 6.0;
 }
 
 void DNASNode::calculate_z() {
@@ -337,8 +329,8 @@ void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
     for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
-        if (pi[i] < 0.1) {
-            pi[i] = 0.1;
+        if (pi[i] < 0.01) {
+            pi[i] = 0.01;
         }
     }
 
@@ -395,9 +387,8 @@ void DNASNode::get_gradients(vector<double>& gradients) {
     } else {
         gradients.assign(get_number_weights(), 0.0);
         int offset = 0;
-        double pi_lr = calculate_pi_lr();
         for (auto i = 0; i < pi.size(); i++) {
-            gradients[offset++] = d_pi[i] * pi_lr;
+            gradients[offset++] = d_pi[i];
         }
 
         for (auto node : nodes) {

From 1607c268ee922a8b042e555a0cf32f96fdae94bb Mon Sep 17 00:00:00 2001
From: Josh Karns <karns@meta.com>
Date: Tue, 26 Dec 2023 13:27:35 -0500
Subject: [PATCH 16/42] Tweaking experiments

---
 examm/examm.cxx                               |  3 +
 initial_integration_experiments/analyze.py    | 77 +++++++++++++++++++
 initial_integration_experiments/analyze.zsh   | 12 +++
 initial_integration_experiments/dnas.zsh      | 31 ++++----
 .../post_training_dnas.zsh                    |  2 +-
 rnn/generate_nn.cxx                           |  1 +
 rnn/rnn_node_interface.cxx                    |  3 +-
 7 files changed, 110 insertions(+), 19 deletions(-)
 create mode 100644 initial_integration_experiments/analyze.py
 create mode 100644 initial_integration_experiments/analyze.zsh

diff --git a/examm/examm.cxx b/examm/examm.cxx
index f017ab8b..e0be2d07 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -182,10 +182,12 @@ void EXAMM::update_log() {
             }
             (*op_log_file) << endl;
         }
+
         RNN_Genome* best_genome = get_best_genome();
         if (best_genome == NULL) {
             best_genome = speciation_strategy->get_global_best_genome();
         }
+
         std::chrono::time_point<std::chrono::system_clock> currentClock = std::chrono::system_clock::now();
         long milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(currentClock - startClock).count();
         (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds
@@ -193,6 +195,7 @@ void EXAMM::update_log() {
                     << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << ","
                     << best_genome->get_enabled_recurrent_edge_count()
                     << speciation_strategy->get_strategy_information_values() << endl;
+        Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count());
     }
 }
 
diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
new file mode 100644
index 00000000..58ba95b9
--- /dev/null
+++ b/initial_integration_experiments/analyze.py
@@ -0,0 +1,77 @@
+import pandas
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1)
+
+plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128}
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    if k == 8:
+        continue
+    v.sharey(a8)
+    v.sharex(a8)
+
+results = {}
+for ci in [64, 128, 256, 512]:
+    results[ci] = {}
+    for bpe in [8, 16, 32, 64, 128]:
+        results[ci][bpe] = {}
+        for k in [1]:
+            x = []
+            results[ci][bpe][k] = x
+
+            for fold in range(8):
+                f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
+                results[ci][bpe][k].append(f)
+
+
+            enabled_nodes = []
+            enabled_edges = []
+            enabled_rec_edges = []
+
+            bpi_columns = []
+            mse_columns = []
+
+            minlen = 100000000
+
+            for f in x:
+                bpi_columns.append(f[' Total BP Epochs'].to_numpy())
+                mse_columns.append(f[' Best Val. MSE'].to_numpy())
+                enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
+                enabled_edges.append(f[' Enabled Edges'].to_numpy())
+                enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
+
+                minlen = min(minlen, len(bpi_columns[-1]))
+
+            enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
+            enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
+            enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
+            bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
+            mse_columns = list(map(lambda x: x[:minlen], mse_columns))
+
+
+            nodesmean = np.mean(np.array(enabled_nodes), axis=0)
+            edgesmean = np.mean(np.array(enabled_edges), axis=0)
+            redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
+            print(f"Nodes at end mean: {nodesmean[-1]}")
+            print(f"edges at end mean: {edgesmean[-1]}")
+            print(f"redges at end mean: {redgesmean[-1]}")
+
+
+            bpimean = np.mean(np.array(bpi_columns), axis=0)
+            msemean = np.mean(np.array(mse_columns), axis=0)
+            msestd = np.std(np.array(mse_columns), axis=0)
+
+            g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0]
+            plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd,
+                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    v.legend(fontsize=12, loc="upper right")
+
+plt.show()
diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh
new file mode 100644
index 00000000..5c2876f3
--- /dev/null
+++ b/initial_integration_experiments/analyze.zsh
@@ -0,0 +1,12 @@
+#!/usr/bin/zsh
+#
+for crystalize_iters in 64 128 256 512; do
+  for bp_epoch in 8 16 32 64 128; do
+    for k in 1; do
+      for fold in 0 1 2 3 4 5 6 7; do
+        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
+        tail -1 $output_dir/fitness_log.csv
+      done
+    done
+  done
+done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 0c2a615f..995d072a 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -1,26 +1,24 @@
-#!/usr/bin/zsh
+#!/bin/zsh
 
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 
 offset=1
 
 run_examm() {
   output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
-  mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \
+  mpirun -np 8 Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
       --input_parameter_names ${=INPUT_PARAMETERS} \
       --output_parameter_names ${=OUTPUT_PARAMETERS} \
       --bp_iterations $bp_epoch \
-      --stochastic \
-      --possible_node_types DNAS \
       --normalize min_max \
       --num_hidden_layers $SIZE \
       --hidden_layer_size $SIZE \
-      --train_sequence_length 100 \
+      --train_sequence_length 1000 \
       --validation_sequence_length 100 \
       --max_recurrent_depth 1 \
       --output_directory $output_dir \
@@ -30,7 +28,7 @@ run_examm() {
       --file_message_level INFO \
       --crystalize_iters $crystalize_iters \
       --max_genomes 4000 \
-      --island_size 8 \
+      --island_size 32 \
       --number_islands 8 \
       --dnas_k $k
 
@@ -40,16 +38,17 @@ run_examm() {
 
 CELL_TYPE='dnas'
 for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 8 16 32 64 128; do
+  for bp_epoch in 1 2 4 8 16 32 64 128; do
     for k in 1; do
-      for fold in 0 1 2 3; do
-        run_examm &
-      done
-      wait
-      for fold in 4 5 6 7; do
-        run_examm &
-      done
-      wait
+        fold=1 run_examm
+#       for fold in 0 1 2 3; do
+#         run_examm &
+#       done
+#       wait
+#       for fold in 4 5 6 7; do
+#         run_examm &
+#       done
+#       wait
     done
   done
 done
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
index 8117dadb..b25171a1 100755
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -1,4 +1,4 @@
-#!/usr/bin/zsh
+#!/bin/zsh
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
 OUTPUT_PARAMETERS='E1_EGT1'
 
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index a84fb36f..c4068495 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -190,6 +190,7 @@ RNN_Genome* get_seed_genome(
                 time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0,
                 weight_rules
             );
+            // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules);
             seed_genome->initialize_randomly();
             Log::info("Generated seed genome, seed genome is minimal\n");
         }
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index ab5796b2..f86eddd7 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -1,7 +1,6 @@
-#include <cmath>
+#include <algorithm>
 using std::max;
 
-#include <algorithm>
 #include <fstream>
 using std::ostream;
 

From 778d24aa8d695394203cc7d86b8d6d835725a1f2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 8 Jan 2024 13:34:21 -0500
Subject: [PATCH 17/42] Modified scripts

---
 initial_integration_experiments/analyze.py | 17 ++++++++-------
 initial_integration_experiments/dnas.zsh   | 24 ++++++++++------------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
index 58ba95b9..cee900d6 100644
--- a/initial_integration_experiments/analyze.py
+++ b/initial_integration_experiments/analyze.py
@@ -4,28 +4,31 @@
 
 import matplotlib.pyplot as plt
 
-fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1)
+fig, subplts = plt.subplots(6, 1)
 
-plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128}
+bprange = [1, 2, 4, 8, 16, 32]
+plts = {k:v for k, v in zip(bprange, subplts)}
+print(plts)
+base = plts[bprange[0]]
 
 for k, v in plts.items():
     v.set_title(f"{k} BPI")
-    if k == 8:
+    if k == bprange[0]:
         continue
-    v.sharey(a8)
-    v.sharex(a8)
+    v.sharey(base)
+    v.sharex(base)
 
 results = {}
 for ci in [64, 128, 256, 512]:
     results[ci] = {}
-    for bpe in [8, 16, 32, 64, 128]:
+    for bpe in bprange:
         results[ci][bpe] = {}
         for k in [1]:
             x = []
             results[ci][bpe][k] = x
 
             for fold in range(8):
-                f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
+                f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
                 results[ci][bpe][k].append(f)
 
 
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 995d072a..5acc8b06 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -6,7 +6,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
+  output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
   mpirun -np 8 Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
@@ -29,7 +29,7 @@ run_examm() {
       --crystalize_iters $crystalize_iters \
       --max_genomes 4000 \
       --island_size 32 \
-      --number_islands 8 \
+      --number_islands 4 \
       --dnas_k $k
 
   best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
@@ -38,17 +38,15 @@ run_examm() {
 
 CELL_TYPE='dnas'
 for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 1 2 4 8 16 32 64 128; do
-    for k in 1; do
-        fold=1 run_examm
-#       for fold in 0 1 2 3; do
-#         run_examm &
-#       done
-#       wait
-#       for fold in 4 5 6 7; do
-#         run_examm &
-#       done
-#       wait
+  for bp_epoch in 1 2 4 8 16 32; do
+    for k in 1 2; do
+      for fold in 0 1 2 3; do
+        run_examm &
+      done
+      for fold in 4 5 6 7; do
+        run_examm &
+      done
+      wait
     done
   done
 done

From ffa684a5b3bcbd1d2533efe61fcd3ca18c6f600c Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 29 Jan 2024 19:44:50 -0500
Subject: [PATCH 18/42] Prepping for cluster

---
 CMakeLists.txt                                |   3 +-
 examm/examm.cxx                               |   1 +
 initial_integration_experiments/analyze.py    | 122 +++++++++++-------
 initial_integration_experiments/aviation.zsh  |  37 ++++++
 initial_integration_experiments/control.zsh   |  50 +++++++
 initial_integration_experiments/debug.zsh     |  55 ++++++++
 initial_integration_experiments/dnas.zsh      |  25 ++--
 .../gp_control.zsh                            |  59 +++++++++
 .../post_training_dnas.zsh                    |   8 +-
 initial_integration_experiments/posttrain.zsh |   3 +
 initial_integration_experiments/run_examm.zsh |  25 ++++
 .../run_experiments.zsh                       |   4 +
 initial_integration_experiments/wind.zsh      |  39 ++++++
 rnn/dnas_node.cxx                             |  21 +--
 rnn/generate_nn.cxx                           |   1 -
 rnn/rnn_edge.cxx                              |   3 +-
 rnn/rnn_node.cxx                              |   4 +-
 rnn/rnn_node_interface.cxx                    |  12 +-
 rnn/rnn_node_interface.hxx                    |   5 +-
 rnn_examples/train_rnn.cxx                    |  11 +-
 time_series/time_series.cxx                   |   8 +-
 21 files changed, 404 insertions(+), 92 deletions(-)
 create mode 100644 initial_integration_experiments/aviation.zsh
 create mode 100644 initial_integration_experiments/control.zsh
 create mode 100755 initial_integration_experiments/debug.zsh
 mode change 100755 => 100644 initial_integration_experiments/dnas.zsh
 create mode 100644 initial_integration_experiments/gp_control.zsh
 create mode 100644 initial_integration_experiments/posttrain.zsh
 create mode 100644 initial_integration_experiments/run_examm.zsh
 create mode 100755 initial_integration_experiments/run_experiments.zsh
 create mode 100644 initial_integration_experiments/wind.zsh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1af3a314..5d62df91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3")
+# SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3 -fsanitize=address")
+SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3 ")
 SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
 SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
 SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
diff --git a/examm/examm.cxx b/examm/examm.cxx
index e0be2d07..1e1c2314 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -324,6 +324,7 @@ void EXAMM::mutate(int32_t max_mutations, RNN_Genome* g) {
         g->assign_reachability();
         double rng = rng_0_1(generator) * total;
         int32_t new_node_type = get_random_node_type();
+        Log::info("%d %d\n", new_node_type, NODE_TYPES.size());
         string node_type_str = NODE_TYPES[new_node_type];
         Log::debug("rng: %lf, total: %lf, new node type: %d (%s)\n", rng, total, new_node_type, node_type_str.c_str());
 
diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
index cee900d6..78d51466 100644
--- a/initial_integration_experiments/analyze.py
+++ b/initial_integration_experiments/analyze.py
@@ -6,7 +6,7 @@
 
 fig, subplts = plt.subplots(6, 1)
 
-bprange = [1, 2, 4, 8, 16, 32]
+bprange = [8, 16]
 plts = {k:v for k, v in zip(bprange, subplts)}
 print(plts)
 base = plts[bprange[0]]
@@ -18,63 +18,93 @@
     v.sharey(base)
     v.sharex(base)
 
+def avg(files, slice_at=-1):
+    r = {}
+    for file in files:
+        x = []
+
+        for fold in range(8):
+            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
+            print(f"{file}/{fold} -> {len(f)}")
+            x.append(f)
+
+
+        enabled_nodes = []
+        enabled_edges = []
+        enabled_rec_edges = []
+
+        bpi_columns = []
+        mse_columns = []
+
+        minlen = 100000000
+
+        for f in x:
+            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
+            mse_columns.append(f[' Best Val. MSE'].to_numpy())
+            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
+            enabled_edges.append(f[' Enabled Edges'].to_numpy())
+            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
+
+            minlen = min(minlen, len(bpi_columns[-1]))
+
+        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
+        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
+        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
+        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
+        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
+
+        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
+        edgesmean = np.mean(np.array(enabled_edges), axis=0)
+        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
+        print(f"Nodes at end mean: {nodesmean[-1]}")
+        print(f"edges at end mean: {edgesmean[-1]}")
+        print(f"redges at end mean: {redgesmean[-1]}")
+
+
+        bpimean = np.mean(np.array(bpi_columns), axis=0)
+        msemean = np.mean(np.array(mse_columns), axis=0)
+        msestd = np.std(np.array(mse_columns), axis=0)
+        
+        r[file] = {
+            'mean_nodes': nodesmean,
+            'mean_edges': edgesmean,
+            'mean_rec_edges':redgesmean,
+            'bpi': bpimean,
+            'mean_mse': msemean,
+            'std_mse': msestd,
+        }
+    return r
+
 results = {}
-for ci in [64, 128, 256, 512]:
+for ci in [64]:
     results[ci] = {}
     for bpe in bprange:
         results[ci][bpe] = {}
         for k in [1]:
-            x = []
+            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
+            x = avg([f])[f]
             results[ci][bpe][k] = x
+            print(x)
 
-            for fold in range(8):
-                f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
-                results[ci][bpe][k].append(f)
-
-
-            enabled_nodes = []
-            enabled_edges = []
-            enabled_rec_edges = []
-
-            bpi_columns = []
-            mse_columns = []
-
-            minlen = 100000000
-
-            for f in x:
-                bpi_columns.append(f[' Total BP Epochs'].to_numpy())
-                mse_columns.append(f[' Best Val. MSE'].to_numpy())
-                enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
-                enabled_edges.append(f[' Enabled Edges'].to_numpy())
-                enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
-
-                minlen = min(minlen, len(bpi_columns[-1]))
-
-            enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
-            enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
-            enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
-            bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
-            mse_columns = list(map(lambda x: x[:minlen], mse_columns))
-
-
-            nodesmean = np.mean(np.array(enabled_nodes), axis=0)
-            edgesmean = np.mean(np.array(enabled_edges), axis=0)
-            redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
-            print(f"Nodes at end mean: {nodesmean[-1]}")
-            print(f"edges at end mean: {edgesmean[-1]}")
-            print(f"redges at end mean: {redgesmean[-1]}")
-
+            print(x['mean_mse'] - x['std_mse'])
+            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
+            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
+                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
 
-            bpimean = np.mean(np.array(bpi_columns), axis=0)
-            msemean = np.mean(np.array(mse_columns), axis=0)
-            msestd = np.std(np.array(mse_columns), axis=0)
+control_results = {}
+for bp in [8, 16]:
+    key = f"initial_integration_experiments/results/control_v7/{bp}"
+    r = avg([key])[key]
+    control_results[bp] = r
+    print(list(r.keys()))
+    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
+    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
+        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
 
-            g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0]
-            plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd,
-                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
 
 for k, v in plts.items():
     v.set_title(f"{k} BPI")
     v.legend(fontsize=12, loc="upper right")
 
+
 plt.show()
diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh
new file mode 100644
index 00000000..7059da3e
--- /dev/null
+++ b/initial_integration_experiments/aviation.zsh
@@ -0,0 +1,37 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+for output_params in "E1_CHT1" "Pitch"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done
diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh
new file mode 100644
index 00000000..a848302b
--- /dev/null
+++ b/initial_integration_experiments/control.zsh
@@ -0,0 +1,50 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+bp_ge=(8 8192 16 4096 32 2048)
+
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+   for fold in 0 1 2 3; do
+     run_examm &
+   done
+   wait
+   for fold in 4 5 6 7; do
+     run_examm &
+   done
+   wait
+done
diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh
new file mode 100755
index 00000000..ce159c01
--- /dev/null
+++ b/initial_integration_experiments/debug.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes 8192 \
+      --island_size 32 \
+      --number_islands 4 \
+      --stochastic \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+for crystalize_iters in 128; do
+  for bp_epoch in 8; do
+    for k in 1; do
+      for fold in 0; do
+        run_examm
+      done
+ #      wait
+ #      for fold in 4 5 6 7; do
+ #        run_examm &
+ #      done
+ #      wait
+    done
+  done
+done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
old mode 100755
new mode 100644
index 5acc8b06..8b525b09
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -6,43 +6,46 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold
+  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
+  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
       --input_parameter_names ${=INPUT_PARAMETERS} \
       --output_parameter_names ${=OUTPUT_PARAMETERS} \
       --bp_iterations $bp_epoch \
       --normalize min_max \
       --num_hidden_layers $SIZE \
       --hidden_layer_size $SIZE \
-      --train_sequence_length 1000 \
       --validation_sequence_length 100 \
       --max_recurrent_depth 1 \
       --output_directory $output_dir \
       --log_filename fitness.csv \
       --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
       --crystalize_iters $crystalize_iters \
-      --max_genomes 4000 \
+      --max_genomes $max_genomes \
       --island_size 32 \
       --number_islands 4 \
       --dnas_k $k
 
-  best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
 }
 
 CELL_TYPE='dnas'
-for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 1 2 4 8 16 32; do
-    for k in 1 2; do
+bp_ge=(8 8192 16 4096 32 2048)
+for crystalize_iters in 256; do
+  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+    for k in 1; do
       for fold in 0 1 2 3; do
         run_examm &
       done
+      wait
       for fold in 4 5 6 7; do
         run_examm &
       done
diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh
new file mode 100644
index 00000000..049e9750
--- /dev/null
+++ b/initial_integration_experiments/gp_control.zsh
@@ -0,0 +1,59 @@
+#!/bin/zsh
+
+offset=1
+MAX_GENOMES=10
+N_ISLANDS=4
+ISLAND_SIZE=32
+
+run_examm() {
+  output_dir=test_results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames ${=training_filenames} \
+      --test_filenames ${=test_filenames} \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names $output_params \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes $MAX_GENOMES \
+      --island_size $ISLAND_SIZE \
+      --number_islands $N_ISLANDS
+
+  touch $output_dir/completed
+}
+
+run_group() {
+  for output_params in $OUTPUTS; do
+    for bp_epoch in 2 4 8 16 32; do
+      for fold in 0 1 2 3 4 5 6 7 8 9; do
+        run_examm
+      done
+    done
+  done
+}
+
+INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
+training_filenames=(datasets/2018_coal/burner_[0-9].csv)
+test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
+OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
+run_group
+
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUTS=("E1_CHT1" "Pitch")
+training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
+test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
+run_group
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+OUTPUTS=("Cm_avg" "P_avg")
+training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
+test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
+run_group
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
index b25171a1..1c226178 100755
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -1,7 +1,4 @@
 #!/bin/zsh
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1'
-
 offset=1
 
 post_training() {
@@ -21,11 +18,12 @@ post_training() {
         --log_filename post_training.csv \
         --learning_rate 0.01 \
         --weight_update adagrad \
-        --train_sequence_length 100 \
+        --train_sequence_length 1000 \
         --validation_sequence_length 100 \
         --crystalize_iters $CRYSTALIZE_ITERS \
         --dnas_k $k
- 
+
+      tail -1 $OUTPUT_DIRECTORY/post_training.csv
 }
 
 post_training
diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh
new file mode 100644
index 00000000..cc54a2eb
--- /dev/null
+++ b/initial_integration_experiments/posttrain.zsh
@@ -0,0 +1,3 @@
+#!/bin/zsh
+
+
diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh
new file mode 100644
index 00000000..77d2893f
--- /dev/null
+++ b/initial_integration_experiments/run_examm.zsh
@@ -0,0 +1,25 @@
+#!/bin/zsh
+
+output_dir=results/v0/$bp_epoch/$fold
+mkdir -p $output_dir
+
+mpirun -np 32 Release/mpi/examm_mpi \
+    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+    --time_offset $offset \
+    --input_parameter_names ${=INPUT_PARAMETERS} \
+    --output_parameter_names ${=output_params} \
+    --bp_iterations $bp_epoch \
+    --normalize min_max \
+    --max_recurrent_depth 1 \
+    --output_directory $output_dir \
+    --log_filename fitness.csv \
+    --learning_rate 0.01 \
+    --std_message_level INFO \
+    --file_message_level INFO \
+    --max_genomes 4000 \
+    --island_size 32 \
+    --number_islands 4
+
+touch $output_dir/completed
+
diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh
new file mode 100755
index 00000000..7dd8e956
--- /dev/null
+++ b/initial_integration_experiments/run_experiments.zsh
@@ -0,0 +1,4 @@
+#!/bin/zsh
+
+initial_integration_experiments/control.zsh
+initial_integration_experiments/dnas.zsh
diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh
new file mode 100644
index 00000000..7e68f482
--- /dev/null
+++ b/initial_integration_experiments/wind.zsh
@@ -0,0 +1,39 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
+      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+
+for output_params in "Cm_avg" "P_avg"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 87e05d9b..f6d42bfe 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -168,14 +168,15 @@ void DNASNode::print_info() {
 }
 
 void DNASNode::reset(int32_t series_length) {
-    d_pi = vector<double>(pi.size(), 0.0);
-    d_input = vector<double>(series_length, 0.0);
-    node_outputs = vector<vector<double>>(series_length, vector<double>(pi.size(), 0.0));
-    output_values = vector<double>(series_length, 0.0);
-    error_values = vector<double>(series_length, 0.0);
-    inputs_fired = vector<int>(series_length, 0);
-    outputs_fired = vector<int>(series_length, 0);
-    input_values = vector<double>(series_length, 0.0);
+    d_pi.assign(pi.size(), 0.0);
+    d_input.assign(series_length, 0.0);
+    node_outputs.clear();
+    for (int i = 0; i < series_length; i++) node_outputs.emplace_back(pi.size(), 0.0);
+    output_values.assign(series_length, 0.0);
+    error_values.assign(series_length, 0.0);
+    inputs_fired.assign(series_length, 0);
+    outputs_fired.assign(series_length, 0);
+    input_values.assign(series_length, 0.0);
 
     if (counter >= CRYSTALLIZATION_THRESHOLD) {
         nodes[maxi]->reset(series_length);
@@ -206,8 +207,10 @@ void DNASNode::input_fired(int32_t time, double incoming_output) {
     }
 
     if (counter >= CRYSTALLIZATION_THRESHOLD) {
+        Log::info("%d hmm\n", maxi >= 0);
         assert(maxi >= 0);
-
+        
+        Log::info("%d %d %p\n", maxi, time, nodes[maxi]);
         nodes[maxi]->input_fired(time, input_values[time]);
         node_outputs[time][maxi] = nodes[maxi]->output_values[time];
         output_values[time] = nodes[maxi]->output_values[time];
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index c4068495..a84fb36f 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -190,7 +190,6 @@ RNN_Genome* get_seed_genome(
                 time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0,
                 weight_rules
             );
-            // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules);
             seed_genome->initialize_randomly();
             Log::info("Generated seed genome, seed genome is minimal\n");
         }
diff --git a/rnn/rnn_edge.cxx b/rnn/rnn_edge.cxx
index 3227e961..babb2552 100644
--- a/rnn/rnn_edge.cxx
+++ b/rnn/rnn_edge.cxx
@@ -92,7 +92,7 @@ RNN_Edge* RNN_Edge::copy(const vector<RNN_Node_Interface*> new_nodes) {
 }
 
 void RNN_Edge::propagate_forward(int32_t time) {
-    if (input_node->inputs_fired[time] != input_node->total_inputs) {
+    if (input_node->inputs_fired[time] != input_node->total_inputs || time < 0 || time >= input_node->output_values.size()) {
         Log::fatal(
             "ERROR! propagate forward called on edge %d where input_node->inputs_fired[%d] (%d) != total_inputs (%d)\n",
             innovation_number, time, input_node->inputs_fired[time], input_node->total_inputs
@@ -104,7 +104,6 @@ void RNN_Edge::propagate_forward(int32_t time) {
         exit(1);
     }
 
-    // Log::debug("input_node %p %d\n", input_node, input_node->output_values.size());
     double output = input_node->output_values[time] * weight;
 
     // Log::debug("propagating forward at time %d from %d to %d, value: %lf, input: %lf, weight: %lf\n", time,
diff --git a/rnn/rnn_node.cxx b/rnn/rnn_node.cxx
index 075c11ed..3e79a1df 100644
--- a/rnn/rnn_node.cxx
+++ b/rnn/rnn_node.cxx
@@ -57,8 +57,6 @@ void RNN_Node::input_fired(int32_t time, double incoming_output) {
         exit(1);
     }
 
-    Log::debug("node %d - input value[%d]: %lf\n", innovation_number, time, input_values[time]);
-
     output_values[time] = tanh(input_values[time] + bias);
     ld_output[time] = tanh_derivative(output_values[time]);
 
@@ -86,6 +84,8 @@ void RNN_Node::try_update_deltas(int32_t time) {
             outputs_fired[time], total_outputs
         );
         exit(1);
+    } else if (time >= d_input.size() || time < 0) {
+        Log::fatal("invalid time %d\n", time);
     }
 
     d_input[time] *= ld_output[time];
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index f86eddd7..210706a2 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -7,13 +7,16 @@ using std::ostream;
 #include <string>
 using std::string;
 
+#include <vector>
+using std::vector;
+
 #include "common/log.hxx"
 #include "rnn/rnn_genome.hxx"
 #include "rnn_node_interface.hxx"
 
-extern const int32_t NUMBER_NODE_TYPES = 11;
-extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN",    "MGU", "GRU",
-                                    "delta",  "LSTM",   "ENARC", "ENAS_DAG", "dnas"};
+extern const vector<string> NODE_TYPES = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU",
+                                    "delta",  "LSTM",   "ENARC", "ENAS_DAG", "random_dag", "dnas"};
+
 extern const unordered_map<string, int32_t> string_to_node_type = {
     {"simple",   SIMPLE_NODE},
     {"jordan",   JORDAN_NODE},
@@ -25,7 +28,8 @@ extern const unordered_map<string, int32_t> string_to_node_type = {
     {  "lstm",     LSTM_NODE},
     { "enarc",    ENARC_NODE},
     {  "enas", ENAS_DAG_NODE},
-    {  "dnas",     DNAS_NODE}
+    {  "dnas",     DNAS_NODE},
+    {  "random_dag", RANDOM_DAG_NODE},
 };
 
 int32_t node_type_from_string(string& node_type) {
diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx
index 15ec45cd..d1b56fa3 100644
--- a/rnn/rnn_node_interface.hxx
+++ b/rnn/rnn_node_interface.hxx
@@ -26,8 +26,9 @@ class RNN;
 #define HIDDEN_LAYER 1
 #define OUTPUT_LAYER 2
 
-extern const int32_t NUMBER_NODE_TYPES;
-extern const string NODE_TYPES[];
+extern const vector<string> NODE_TYPES;
+#define NUMBER_NODE_TYPES NODE_TYPES.size()
+
 extern const unordered_map<string, int32_t> string_to_node_type;
 int32_t node_type_from_string(string& node_type);
 
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index 7bd5647c..265b9669 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -15,6 +15,7 @@ using std::string;
 using std::vector;
 
 #include "common/arguments.hxx"
+#include "common/process_arguments.hxx"
 #include "common/files.hxx"
 #include "common/log.hxx"
 #include "rnn/generate_nn.hxx"
@@ -81,9 +82,9 @@ int main(int argc, char** argv) {
     Log::set_id("main");
 
     TimeSeriesSets* time_series_sets = TimeSeriesSets::generate_from_arguments(arguments);
-
-    int32_t time_offset = 1;
-    get_argument(arguments, "--time_offset", true, time_offset);
+    get_train_validation_data(
+        arguments, time_series_sets, training_inputs, training_outputs, test_inputs, test_outputs
+    );
 
     int32_t crystallization_threshold = 1000;
     get_argument(arguments, "--crystalize_iters", false, crystallization_threshold);
@@ -93,8 +94,8 @@ int main(int argc, char** argv) {
     get_argument(arguments, "--dnas_k", false, k);
     DNASNode::k = k;
 
-    time_series_sets->export_training_series(time_offset, training_inputs, training_outputs);
-    time_series_sets->export_test_series(time_offset, test_inputs, test_outputs);
+    // time_series_sets->export_training_series(time_offset, training_inputs, training_outputs);
+    // time_series_sets->export_test_series(time_offset, test_inputs, test_outputs);
 
     int number_inputs = time_series_sets->get_number_inputs();
     // int number_outputs = time_series_sets->get_number_outputs();
diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx
index de143147..e315164e 100644
--- a/time_series/time_series.cxx
+++ b/time_series/time_series.cxx
@@ -472,7 +472,7 @@ void TimeSeriesSet::export_time_series(
     if (time_offset == 0) {
         for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) {
             for (int32_t j = 0; j < number_rows; j++) {
-                data[i][j] = time_series[requested_fields[i]]->get_value(j);
+                data[i][j] = time_series.at(requested_fields[i])->get_value(j);
             }
         }
 
@@ -480,7 +480,7 @@ void TimeSeriesSet::export_time_series(
         // output data, ignore the first N values
         for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) {
             for (int32_t j = time_offset; j < number_rows; j++) {
-                data[i][j - time_offset] = time_series[requested_fields[i]]->get_value(j);
+                data[i][j - time_offset] = time_series.at(requested_fields[i])->get_value(j);
             }
         }
 
@@ -492,13 +492,13 @@ void TimeSeriesSet::export_time_series(
                 Log::debug("doing shift for field: '%s'\n", requested_fields[i].c_str());
                 // shift the shifted fields to the same as the output, not the input
                 for (int32_t j = -time_offset; j < number_rows; j++) {
-                    data[i][j + time_offset] = time_series[requested_fields[i]]->get_value(j);
+                    data[i][j + time_offset] = time_series.at(requested_fields[i])->get_value(j);
                     // Log::info("\tdata[%d][%d]: %lf\n", i, j + time_offset, data[i][j + time_offset]);
                 }
             } else {
                 Log::debug("not doing shift for field: '%s'\n", requested_fields[i].c_str());
                 for (int32_t j = 0; j < number_rows + time_offset; j++) {
-                    data[i][j] = time_series[requested_fields[i]]->get_value(j);
+                    data[i][j] = time_series.at(requested_fields[i])->get_value(j);
                 }
             }
         }

From 60acb2c8c08d60844fdc161843f0a94771aa5158 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Wed, 31 Jan 2024 04:28:06 -0500
Subject: [PATCH 19/42] gecco 2024 related experiment files + hacky changes

---
 dnas_cluster.zsh                     | 69 ++++++++++++++++++++++++++++
 dnas_control.zsh                     | 60 ++++++++++++++++++++++++
 examm/island_speciation_strategy.cxx |  5 +-
 key                                  |  7 +++
 rnn/generate_nn.cxx                  | 16 ++++++-
 rnn/genome_property.cxx              | 31 +++++++++++--
 rnn/genome_property.hxx              | 12 ++++-
 7 files changed, 191 insertions(+), 9 deletions(-)
 create mode 100644 dnas_cluster.zsh
 create mode 100644 dnas_control.zsh
 create mode 100644 key

diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh
new file mode 100644
index 00000000..55823c0c
--- /dev/null
+++ b/dnas_cluster.zsh
@@ -0,0 +1,69 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 2 \
+      --use_dnas_seed true \
+      --use_burn_in_bp_epoch \
+      --burn_in_period 1024 \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for crystalize_iters in 512; do
+    for k in 1; do
+      for fold in $(seq 0 19); do
+        run_examm
+      done
+    done
+  done
+}
+
+CELL_TYPE='dnas'
+# bp_ge=(8 8192 16 4096 32 2048 64 1024)
+# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+run_group
+# done
diff --git a/dnas_control.zsh b/dnas_control.zsh
new file mode 100644
index 00000000..5e6982c8
--- /dev/null
+++ b/dnas_control.zsh
@@ -0,0 +1,60 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 2 \
+      --burn_in_period 1024 \
+      --use_burn_in_bp_epoch
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for k in 1; do
+    for fold in $(seq 0 19); do
+      run_examm
+    done
+  done
+}
+
+run_group
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index 920eb203..d8eaabab 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island(
         Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str());
         exit(1);
     }
-    return new_genome;
+    return new_genome17731515;
 }
 
 RNN_Genome* IslandSpeciationStrategy::generate_genome(
@@ -370,6 +370,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome(
         Log::info("Island %d: new genome is still null, regenerating\n", generation_island);
         new_genome = generate_genome(rng_0_1, generator, mutate, crossover);
     }
+
     generated_genomes++;
     new_genome->set_generation_id(generated_genomes);
     islands[generation_island]->set_latest_generation_id(generated_genomes);
@@ -577,4 +578,4 @@ void IslandSpeciationStrategy::set_erased_islands_status() {
 
 RNN_Genome* IslandSpeciationStrategy::get_seed_genome() {
     return seed_genome;
-}
\ No newline at end of file
+}
diff --git a/key b/key
new file mode 100644
index 00000000..391a7405
--- /dev/null
+++ b/key
@@ -0,0 +1,7 @@
+v11 -> burn in schedule with 0.001 lr 4 mut
+v12 -> burn in schedule with 0.01 lr 4 mut
+v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut
+v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut
+v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut
+v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut
+
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index a84fb36f..d9fd2eac 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -185,13 +185,27 @@ RNN_Genome* get_seed_genome(
         );
         Log::info("Finished transfering seed genome\n");
     } else {
-        if (seed_genome == NULL) {
+        bool use_dnas_seed = argument_exists(arguments, "--use_dnas_seed");
+
+        if (!use_dnas_seed) {
             seed_genome = create_ff(
                 time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0,
                 weight_rules
             );
             seed_genome->initialize_randomly();
             Log::info("Generated seed genome, seed genome is minimal\n");
+        } else {
+            vector<int32_t> node_types = {
+                SIMPLE_NODE,
+                UGRNN_NODE,
+                MGU_NODE,
+                GRU_NODE,
+                DELTA_NODE,
+                LSTM_NODE
+            };
+            seed_genome = create_dnas_nn(
+                time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, node_types, weight_rules
+            );
         }
     }
 
diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx
index 6bf061b9..09ea1ae8 100644
--- a/rnn/genome_property.cxx
+++ b/rnn/genome_property.cxx
@@ -10,6 +10,20 @@ GenomeProperty::GenomeProperty() {
     max_recurrent_depth = 10;
 }
 
+int32_t GenomeProperty::compute_bp_iterations(RNN_Genome* genome) {
+    if (use_burn_in_bp_epoch) {
+        int32_t n = genome->generation_id / burn_in_period;
+        n = n > max_burn_in_cycles ? max_burn_in_cycles : n;
+
+        float epochs = bp_epochs_start;
+        for (int i = 0; i < n; i++) epochs *= burn_in_ratio;
+        
+        return (int32_t) epochs;
+    } else {
+        return bp_iterations;
+    }
+}
+
 void GenomeProperty::generate_genome_property_from_arguments(const vector<string>& arguments) {
     get_argument(arguments, "--bp_iterations", true, bp_iterations);
     use_dropout = get_argument(arguments, "--dropout_probability", false, dropout_probability);
@@ -17,6 +31,13 @@ void GenomeProperty::generate_genome_property_from_arguments(const vector<string
     get_argument(arguments, "--min_recurrent_depth", false, min_recurrent_depth);
     get_argument(arguments, "--max_recurrent_depth", false, max_recurrent_depth);
 
+    use_burn_in_bp_epoch = argument_exists(arguments, "--use_burn_in_bp_epoch");
+    get_argument(arguments, "--burn_in_period", false, burn_in_period);
+    get_argument(arguments, "--burn_in_cycles", false, max_burn_in_cycles);
+    get_argument(arguments, "--bp_epochs_start", false, bp_epochs_start);
+    get_argument(arguments, "--burn_in_ratio", false, burn_in_ratio);
+
+
     Log::info("Each generated genome is trained for %d epochs\n", bp_iterations);
     Log::info(
         "Use dropout is set to %s, dropout probability is %f\n", use_dropout ? "True" : "False", dropout_probability
@@ -25,10 +46,10 @@ void GenomeProperty::generate_genome_property_from_arguments(const vector<string
 }
 
 void GenomeProperty::set_genome_properties(RNN_Genome* genome) {
-    genome->set_bp_iterations(bp_iterations);
-    if (use_dropout) {
-        genome->enable_dropout(dropout_probability);
-    }
+    genome->set_bp_iterations(compute_bp_iterations(genome));
+    
+    if (use_dropout) genome->enable_dropout(dropout_probability);
+    
     genome->normalize_type = normalize_type;
     genome->set_parameter_names(input_parameter_names, output_parameter_names);
     genome->set_normalize_bounds(normalize_type, normalize_mins, normalize_maxs, normalize_avgs, normalize_std_devs);
@@ -48,4 +69,4 @@ void GenomeProperty::get_time_series_parameters(TimeSeriesSets* time_series_sets
 
 uniform_int_distribution<int32_t> GenomeProperty::get_recurrent_depth_dist() {
     return uniform_int_distribution<int32_t>(this->min_recurrent_depth, this->max_recurrent_depth);
-}
\ No newline at end of file
+}
diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx
index 7d220ff6..130b26de 100644
--- a/rnn/genome_property.hxx
+++ b/rnn/genome_property.hxx
@@ -18,6 +18,12 @@ class GenomeProperty {
     int32_t min_recurrent_depth;
     int32_t max_recurrent_depth;
 
+    bool use_burn_in_bp_epoch;
+    int32_t burn_in_period = 2048;
+    int32_t max_burn_in_cycles = 4;
+    double bp_epochs_start = 0.5;
+    double burn_in_ratio = 2.0;
+
     // TimeSeriesSets *time_series_sets;
     int32_t number_inputs;
     int32_t number_outputs;
@@ -30,12 +36,16 @@ class GenomeProperty {
     map<string, double> normalize_avgs;
     map<string, double> normalize_std_devs;
 
+    int32_t compute_bp_iterations(RNN_Genome* genome);
+
    public:
     GenomeProperty();
+
     void generate_genome_property_from_arguments(const vector<string>& arguments);
     void set_genome_properties(RNN_Genome* genome);
     void get_time_series_parameters(TimeSeriesSets* time_series_sets);
+    
     uniform_int_distribution<int32_t> get_recurrent_depth_dist();
 };
 
-#endif
\ No newline at end of file
+#endif

From 5730472f918a1ebf459e68c88092bf2e37ea1ba1 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Fri, 2 Feb 2024 03:16:10 -0500
Subject: [PATCH 20/42] BP schedule

---
 dnas_control.zsh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dnas_control.zsh b/dnas_control.zsh
index 5e6982c8..88a7c882 100644
--- a/dnas_control.zsh
+++ b/dnas_control.zsh
@@ -17,7 +17,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold
+  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
   mkdir -p $output_dir
   srun -n 36 Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
@@ -41,7 +41,7 @@ run_examm() {
       --max_genomes $max_genomes \
       --island_size 32 \
       --number_islands 8 \
-      --num_mutations 2 \
+      --num_mutations 4 \
       --burn_in_period 1024 \
       --use_burn_in_bp_epoch
 

From 70e79d442b1d4fea6c2c752739778f096a9b70ff Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 19 Feb 2024 14:30:54 -0500
Subject: [PATCH 21/42] moving scripts

---
 scripts/dnas/analyze.py             | 110 ++++++++++++++++++++++++++++
 scripts/dnas/analyze.zsh            |  12 +++
 scripts/dnas/aviation.zsh           |  37 ++++++++++
 scripts/dnas/coal_dnas_control.zsh  |  22 ++++++
 scripts/dnas/coal_gp.zsh            |  22 ++++++
 scripts/dnas/control.zsh            |  50 +++++++++++++
 scripts/dnas/control_cluster.zsh    |  50 +++++++++++++
 scripts/dnas/debug.zsh              |  55 ++++++++++++++
 scripts/dnas/dnas.zsh               |  55 ++++++++++++++
 scripts/dnas/dnas_cluster.zsh       |  69 +++++++++++++++++
 scripts/dnas/dnas_control.zsh       |  60 +++++++++++++++
 scripts/dnas/dnas_r2_cluster.zsh    |  67 +++++++++++++++++
 scripts/dnas/experiment.zsh         |  34 +++++++++
 scripts/dnas/gp_control.zsh         |  59 +++++++++++++++
 scripts/dnas/lib.zsh                |  65 ++++++++++++++++
 scripts/dnas/mk_jobs.zsh            |   6 ++
 scripts/dnas/populate_queue.zsh     |  29 ++++++++
 scripts/dnas/post_training.zsh      |  28 +++++++
 scripts/dnas/post_training_dnas.zsh |  29 ++++++++
 scripts/dnas/posttrain.zsh          |   3 +
 scripts/dnas/run_examm.zsh          |  25 +++++++
 scripts/dnas/run_experiments.zsh    |   4 +
 scripts/dnas/wind.zsh               |  39 ++++++++++
 23 files changed, 930 insertions(+)
 create mode 100644 scripts/dnas/analyze.py
 create mode 100644 scripts/dnas/analyze.zsh
 create mode 100644 scripts/dnas/aviation.zsh
 create mode 100644 scripts/dnas/coal_dnas_control.zsh
 create mode 100644 scripts/dnas/coal_gp.zsh
 create mode 100644 scripts/dnas/control.zsh
 create mode 100644 scripts/dnas/control_cluster.zsh
 create mode 100755 scripts/dnas/debug.zsh
 create mode 100644 scripts/dnas/dnas.zsh
 create mode 100644 scripts/dnas/dnas_cluster.zsh
 create mode 100644 scripts/dnas/dnas_control.zsh
 create mode 100644 scripts/dnas/dnas_r2_cluster.zsh
 create mode 100755 scripts/dnas/experiment.zsh
 create mode 100644 scripts/dnas/gp_control.zsh
 create mode 100644 scripts/dnas/lib.zsh
 create mode 100644 scripts/dnas/mk_jobs.zsh
 create mode 100755 scripts/dnas/populate_queue.zsh
 create mode 100755 scripts/dnas/post_training.zsh
 create mode 100755 scripts/dnas/post_training_dnas.zsh
 create mode 100644 scripts/dnas/posttrain.zsh
 create mode 100644 scripts/dnas/run_examm.zsh
 create mode 100755 scripts/dnas/run_experiments.zsh
 create mode 100644 scripts/dnas/wind.zsh

diff --git a/scripts/dnas/analyze.py b/scripts/dnas/analyze.py
new file mode 100644
index 00000000..78d51466
--- /dev/null
+++ b/scripts/dnas/analyze.py
@@ -0,0 +1,110 @@
+import pandas
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+fig, subplts = plt.subplots(6, 1)
+
+bprange = [8, 16]
+plts = {k:v for k, v in zip(bprange, subplts)}
+print(plts)
+base = plts[bprange[0]]
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    if k == bprange[0]:
+        continue
+    v.sharey(base)
+    v.sharex(base)
+
+def avg(files, slice_at=-1):
+    r = {}
+    for file in files:
+        x = []
+
+        for fold in range(8):
+            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
+            print(f"{file}/{fold} -> {len(f)}")
+            x.append(f)
+
+
+        enabled_nodes = []
+        enabled_edges = []
+        enabled_rec_edges = []
+
+        bpi_columns = []
+        mse_columns = []
+
+        minlen = 100000000
+
+        for f in x:
+            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
+            mse_columns.append(f[' Best Val. MSE'].to_numpy())
+            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
+            enabled_edges.append(f[' Enabled Edges'].to_numpy())
+            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
+
+            minlen = min(minlen, len(bpi_columns[-1]))
+
+        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
+        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
+        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
+        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
+        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
+
+        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
+        edgesmean = np.mean(np.array(enabled_edges), axis=0)
+        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
+        print(f"Nodes at end mean: {nodesmean[-1]}")
+        print(f"edges at end mean: {edgesmean[-1]}")
+        print(f"redges at end mean: {redgesmean[-1]}")
+
+
+        bpimean = np.mean(np.array(bpi_columns), axis=0)
+        msemean = np.mean(np.array(mse_columns), axis=0)
+        msestd = np.std(np.array(mse_columns), axis=0)
+        
+        r[file] = {
+            'mean_nodes': nodesmean,
+            'mean_edges': edgesmean,
+            'mean_rec_edges':redgesmean,
+            'bpi': bpimean,
+            'mean_mse': msemean,
+            'std_mse': msestd,
+        }
+    return r
+
+results = {}
+for ci in [64]:
+    results[ci] = {}
+    for bpe in bprange:
+        results[ci][bpe] = {}
+        for k in [1]:
+            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
+            x = avg([f])[f]
+            results[ci][bpe][k] = x
+            print(x)
+
+            print(x['mean_mse'] - x['std_mse'])
+            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
+            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
+                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
+
+control_results = {}
+for bp in [8, 16]:
+    key = f"initial_integration_experiments/results/control_v7/{bp}"
+    r = avg([key])[key]
+    control_results[bp] = r
+    print(list(r.keys()))
+    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
+    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
+        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
+
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    v.legend(fontsize=12, loc="upper right")
+
+
+plt.show()
diff --git a/scripts/dnas/analyze.zsh b/scripts/dnas/analyze.zsh
new file mode 100644
index 00000000..5c2876f3
--- /dev/null
+++ b/scripts/dnas/analyze.zsh
@@ -0,0 +1,12 @@
+#!/usr/bin/zsh
+#
+for crystalize_iters in 64 128 256 512; do
+  for bp_epoch in 8 16 32 64 128; do
+    for k in 1; do
+      for fold in 0 1 2 3 4 5 6 7; do
+        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
+        tail -1 $output_dir/fitness_log.csv
+      done
+    done
+  done
+done
diff --git a/scripts/dnas/aviation.zsh b/scripts/dnas/aviation.zsh
new file mode 100644
index 00000000..7059da3e
--- /dev/null
+++ b/scripts/dnas/aviation.zsh
@@ -0,0 +1,37 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+for output_params in "E1_CHT1" "Pitch"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done
diff --git a/scripts/dnas/coal_dnas_control.zsh b/scripts/dnas/coal_dnas_control.zsh
new file mode 100644
index 00000000..9543cc09
--- /dev/null
+++ b/scripts/dnas/coal_dnas_control.zsh
@@ -0,0 +1,22 @@
+#!/bin/zsh
+
+let np=8
+#SBATCH  --ntasks=8
+#SBATCH  --exclude theocho
+#SBATCH  --time=8-00:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=TIER
+#SBATCH  -J examm_coal_gp_control
+#SBATCH  -o /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.err
+#SBATCH  --mem=64GB
+
+source lib.zsh
+
+output_dir_prefix=/home/jak5763/exact/results/gp_control
+bp_epoch_set=(8 16 32 64 128)
+nfolds=20
+MAX_GENOMES=4000
+ISLAND_SIZE=10
+N_ISLANDS=10
+coal
diff --git a/scripts/dnas/coal_gp.zsh b/scripts/dnas/coal_gp.zsh
new file mode 100644
index 00000000..c1318793
--- /dev/null
+++ b/scripts/dnas/coal_gp.zsh
@@ -0,0 +1,22 @@
+#!/bin/zsh
+
+let np=8
+#SBATCH  --ntasks=8
+#SBATCH  --exclude theocho
+#SBATCH  --time=8-00:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=TIER
+#SBATCH  -J examm_coal_gp_control
+#SBATCH  -o /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.err
+#SBATCH  --mem=64GB
+
+source lib.zsh
+
+output_dir_prefix=/home/jak5763/exact/results/gp_control
+bp_epoch_set=(8)
+nfolds=20
+MAX_GENOMES=10000
+ISLAND_SIZE=10
+N_ISLANDS=10
+coal
diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh
new file mode 100644
index 00000000..a848302b
--- /dev/null
+++ b/scripts/dnas/control.zsh
@@ -0,0 +1,50 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+bp_ge=(8 8192 16 4096 32 2048)
+
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+   for fold in 0 1 2 3; do
+     run_examm &
+   done
+   wait
+   for fold in 4 5 6 7; do
+     run_examm &
+   done
+   wait
+done
diff --git a/scripts/dnas/control_cluster.zsh b/scripts/dnas/control_cluster.zsh
new file mode 100644
index 00000000..a848302b
--- /dev/null
+++ b/scripts/dnas/control_cluster.zsh
@@ -0,0 +1,50 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+bp_ge=(8 8192 16 4096 32 2048)
+
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+   for fold in 0 1 2 3; do
+     run_examm &
+   done
+   wait
+   for fold in 4 5 6 7; do
+     run_examm &
+   done
+   wait
+done
diff --git a/scripts/dnas/debug.zsh b/scripts/dnas/debug.zsh
new file mode 100755
index 00000000..ce159c01
--- /dev/null
+++ b/scripts/dnas/debug.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes 8192 \
+      --island_size 32 \
+      --number_islands 4 \
+      --stochastic \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+for crystalize_iters in 128; do
+  for bp_epoch in 8; do
+    for k in 1; do
+      for fold in 0; do
+        run_examm
+      done
+ #      wait
+ #      for fold in 4 5 6 7; do
+ #        run_examm &
+ #      done
+ #      wait
+    done
+  done
+done
diff --git a/scripts/dnas/dnas.zsh b/scripts/dnas/dnas.zsh
new file mode 100644
index 00000000..8b525b09
--- /dev/null
+++ b/scripts/dnas/dnas.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4 \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+bp_ge=(8 8192 16 4096 32 2048)
+for crystalize_iters in 256; do
+  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+    for k in 1; do
+      for fold in 0 1 2 3; do
+        run_examm &
+      done
+      wait
+      for fold in 4 5 6 7; do
+        run_examm &
+      done
+      wait
+    done
+  done
+done
diff --git a/scripts/dnas/dnas_cluster.zsh b/scripts/dnas/dnas_cluster.zsh
new file mode 100644
index 00000000..55823c0c
--- /dev/null
+++ b/scripts/dnas/dnas_cluster.zsh
@@ -0,0 +1,69 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 2 \
+      --use_dnas_seed true \
+      --use_burn_in_bp_epoch \
+      --burn_in_period 1024 \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for crystalize_iters in 512; do
+    for k in 1; do
+      for fold in $(seq 0 19); do
+        run_examm
+      done
+    done
+  done
+}
+
+CELL_TYPE='dnas'
+# bp_ge=(8 8192 16 4096 32 2048 64 1024)
+# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+run_group
+# done
diff --git a/scripts/dnas/dnas_control.zsh b/scripts/dnas/dnas_control.zsh
new file mode 100644
index 00000000..88a7c882
--- /dev/null
+++ b/scripts/dnas/dnas_control.zsh
@@ -0,0 +1,60 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 4 \
+      --burn_in_period 1024 \
+      --use_burn_in_bp_epoch
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for k in 1; do
+    for fold in $(seq 0 19); do
+      run_examm
+    done
+  done
+}
+
+run_group
diff --git a/scripts/dnas/dnas_r2_cluster.zsh b/scripts/dnas/dnas_r2_cluster.zsh
new file mode 100644
index 00000000..a8bce387
--- /dev/null
+++ b/scripts/dnas/dnas_r2_cluster.zsh
@@ -0,0 +1,67 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v9/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.001 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 16 \
+      --number_islands 8 \
+      --num_mutations 4 \
+      --use_dnas_seed true \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for crystalize_iters in 1000000; do
+    for k in 1; do
+      for fold in $(seq 0 19); do
+        run_examm
+      done
+    done
+  done
+}
+
+CELL_TYPE='dnas'
+# bp_ge=(8 8192 16 4096 32 2048 64 1024)
+# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+run_group
+# done
diff --git a/scripts/dnas/experiment.zsh b/scripts/dnas/experiment.zsh
new file mode 100755
index 00000000..32a1db55
--- /dev/null
+++ b/scripts/dnas/experiment.zsh
@@ -0,0 +1,34 @@
+#!/bin/zsh
+#SBATCH -n 1
+#SBATCH -A examm
+#SBATCH --partition=tier3
+#SBATCH -o /home/jak5763/exact/aistats/slurm_out/%x.%j.out
+#SBATCH -e /home/jak5763/exact/aistats/slurm_out/%x.%j.err
+#SBATCH --mem=10G
+
+spack load gcc
+spack load openmpi
+spack load /5aoa7oi
+spack load /dd7nzzh
+
+for i in $(seq 0 19); do
+  export i=$i
+  export output_dir=/home/jak5763/exact/aistats/$control/maxt$maxt/crystal$crystal/bp$bp/$i
+
+  if [ "$control" = "control" ]; then
+      node_types="simple UGRNN MGU GRU delta LSTM"
+  else
+      node_types="DNAS"
+  fi
+
+  echo $node_types $control
+
+  export node_types=$node_types
+
+  # ./run_examm.zsh
+
+  best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  export BP_ITERS=1
+  export GENOME=$best_genome_file
+  ./post_training.zsh
+done
diff --git a/scripts/dnas/gp_control.zsh b/scripts/dnas/gp_control.zsh
new file mode 100644
index 00000000..049e9750
--- /dev/null
+++ b/scripts/dnas/gp_control.zsh
@@ -0,0 +1,59 @@
+#!/bin/zsh
+
+offset=1
+MAX_GENOMES=10
+N_ISLANDS=4
+ISLAND_SIZE=32
+
+run_examm() {
+  output_dir=test_results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames ${=training_filenames} \
+      --test_filenames ${=test_filenames} \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names $output_params \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes $MAX_GENOMES \
+      --island_size $ISLAND_SIZE \
+      --number_islands $N_ISLANDS
+
+  touch $output_dir/completed
+}
+
+run_group() {
+  for output_params in $OUTPUTS; do
+    for bp_epoch in 2 4 8 16 32; do
+      for fold in 0 1 2 3 4 5 6 7 8 9; do
+        run_examm
+      done
+    done
+  done
+}
+
+INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
+training_filenames=(datasets/2018_coal/burner_[0-9].csv)
+test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
+OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
+run_group
+
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUTS=("E1_CHT1" "Pitch")
+training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
+test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
+run_group
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+OUTPUTS=("Cm_avg" "P_avg")
+training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
+test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
+run_group
diff --git a/scripts/dnas/lib.zsh b/scripts/dnas/lib.zsh
new file mode 100644
index 00000000..49ebc581
--- /dev/null
+++ b/scripts/dnas/lib.zsh
@@ -0,0 +1,65 @@
+#!/bin/zsh
+
+offset=1
+MAX_GENOMES=10
+N_ISLANDS=4
+ISLAND_SIZE=32
+
+run_examm() {
+  output_dir=$output_dir_prefix/bp_$bp_epoch/output_$output_params/$fold
+  mkdir -p $output_dir
+  echo srun -n $np Release/mpi/examm_mpi \
+      --training_filenames ${=training_filenames} \
+      --test_filenames ${=test_filenames} \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names $output_params \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes $MAX_GENOMES \
+      --island_size $ISLAND_SIZE \
+      --number_islands $N_ISLANDS
+
+  touch $output_dir/completed
+}
+
+run_group() {
+  for output_params in $OUTPUTS; do
+    for bp_epoch in $bp_epoch_set; do
+      for fold in $(seq 1 $nfolds); do
+        run_examm
+      done
+    done
+  done
+}
+
+coal() {
+    INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
+    training_filenames=(datasets/2018_coal/burner_[0-9].csv)
+    test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
+    OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
+    run_group
+}
+
+aviation() {
+    INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+    OUTPUTS=("E1_CHT1" "Pitch")
+    training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
+    test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
+    run_group
+}
+
+wind() {
+    INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+    OUTPUTS=("Cm_avg" "P_avg")
+    training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
+    test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
+    run_group
+}
+
diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh
new file mode 100644
index 00000000..38a5526c
--- /dev/null
+++ b/scripts/dnas/mk_jobs.zsh
@@ -0,0 +1,6 @@
+bp_ge=(8 8192 16 4096 32 2048 64 1024)
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh
+  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh
+  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh
+done
diff --git a/scripts/dnas/populate_queue.zsh b/scripts/dnas/populate_queue.zsh
new file mode 100755
index 00000000..43a09dbb
--- /dev/null
+++ b/scripts/dnas/populate_queue.zsh
@@ -0,0 +1,29 @@
+#!/bin/zsh
+export INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+export OUTPUT_PARAMETERS='E1_EGT1'
+
+export offset=1
+export k=1
+
+push_job() {
+  export maxt=$maxt
+  export crystal=$crystal
+  export bp=$bp
+  export control=$control
+  sbatch -J $control.maxt$maxt.cr$crystal.bp$bp ./experiment.zsh
+
+}
+
+export control="exp"
+for maxt in 1.66 1.33 1.0; do
+  for crystal in 64 128 256; do
+    for bp in 4 8 16; do
+      push_job
+    done
+  done
+done
+
+export control="control"
+for bp in 4 8 16; do
+  push_job
+done
diff --git a/scripts/dnas/post_training.zsh b/scripts/dnas/post_training.zsh
new file mode 100755
index 00000000..38c2d39d
--- /dev/null
+++ b/scripts/dnas/post_training.zsh
@@ -0,0 +1,28 @@
+#!/usr/bin/zsh
+offset=1
+
+post_training() {
+
+    echo "genome = $GENOME"
+    Release/rnn_examples/train_rnn \
+        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+        --time_offset $offset \
+        --input_parameter_names ${=INPUT_PARAMETERS} \
+        --output_parameter_names ${=OUTPUT_PARAMETERS} \
+        --bp_iterations $BP_ITERS \
+        --stochastic \
+        --normalize min_max \
+        --genome_file $GENOME \
+        --output_directory $output_dir \
+        --log_filename post_training.csv \
+        --learning_rate 0.01 \
+        --weight_update adagrad \
+        --train_sequence_length 100 \
+        --validation_sequence_length 100 \
+        --crystalize_iters $crystal \
+        --dnas_k $k
+ 
+}
+
+post_training
diff --git a/scripts/dnas/post_training_dnas.zsh b/scripts/dnas/post_training_dnas.zsh
new file mode 100755
index 00000000..1c226178
--- /dev/null
+++ b/scripts/dnas/post_training_dnas.zsh
@@ -0,0 +1,29 @@
+#!/bin/zsh
+offset=1
+
+post_training() {
+
+    echo "genome = $GENOME"
+    Release/rnn_examples/train_rnn \
+        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+        --time_offset 1 \
+        --input_parameter_names ${=INPUT_PARAMETERS} \
+        --output_parameter_names ${=OUTPUT_PARAMETERS} \
+        --bp_iterations $BP_ITERS \
+        --stochastic \
+        --normalize min_max \
+        --genome_file $GENOME \
+        --output_directory $OUTPUT_DIRECTORY \
+        --log_filename post_training.csv \
+        --learning_rate 0.01 \
+        --weight_update adagrad \
+        --train_sequence_length 1000 \
+        --validation_sequence_length 100 \
+        --crystalize_iters $CRYSTALIZE_ITERS \
+        --dnas_k $k
+
+      tail -1 $OUTPUT_DIRECTORY/post_training.csv
+}
+
+post_training
diff --git a/scripts/dnas/posttrain.zsh b/scripts/dnas/posttrain.zsh
new file mode 100644
index 00000000..cc54a2eb
--- /dev/null
+++ b/scripts/dnas/posttrain.zsh
@@ -0,0 +1,3 @@
+#!/bin/zsh
+
+
diff --git a/scripts/dnas/run_examm.zsh b/scripts/dnas/run_examm.zsh
new file mode 100644
index 00000000..77d2893f
--- /dev/null
+++ b/scripts/dnas/run_examm.zsh
@@ -0,0 +1,25 @@
+#!/bin/zsh
+
+output_dir=results/v0/$bp_epoch/$fold
+mkdir -p $output_dir
+
+mpirun -np 32 Release/mpi/examm_mpi \
+    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+    --time_offset $offset \
+    --input_parameter_names ${=INPUT_PARAMETERS} \
+    --output_parameter_names ${=output_params} \
+    --bp_iterations $bp_epoch \
+    --normalize min_max \
+    --max_recurrent_depth 1 \
+    --output_directory $output_dir \
+    --log_filename fitness.csv \
+    --learning_rate 0.01 \
+    --std_message_level INFO \
+    --file_message_level INFO \
+    --max_genomes 4000 \
+    --island_size 32 \
+    --number_islands 4
+
+touch $output_dir/completed
+
diff --git a/scripts/dnas/run_experiments.zsh b/scripts/dnas/run_experiments.zsh
new file mode 100755
index 00000000..7dd8e956
--- /dev/null
+++ b/scripts/dnas/run_experiments.zsh
@@ -0,0 +1,4 @@
+#!/bin/zsh
+
+initial_integration_experiments/control.zsh
+initial_integration_experiments/dnas.zsh
diff --git a/scripts/dnas/wind.zsh b/scripts/dnas/wind.zsh
new file mode 100644
index 00000000..7e68f482
--- /dev/null
+++ b/scripts/dnas/wind.zsh
@@ -0,0 +1,39 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
+      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+
+for output_params in "Cm_avg" "P_avg"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done

From 4c3ebfc64a020a4ec0ae343a1f328dfc14715c64 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@Joshuas-MacBook-Pro.local>
Date: Mon, 19 Feb 2024 14:35:20 -0500
Subject: [PATCH 22/42] removed old fileS

---
 initial_integration_experiments/analyze.py    | 110 ------------------
 initial_integration_experiments/analyze.zsh   |  12 --
 initial_integration_experiments/aviation.zsh  |  37 ------
 initial_integration_experiments/control.zsh   |  50 --------
 initial_integration_experiments/debug.zsh     |  55 ---------
 initial_integration_experiments/dnas.zsh      |  55 ---------
 .../gp_control.zsh                            |  59 ----------
 .../post_training_dnas.zsh                    |  29 -----
 initial_integration_experiments/posttrain.zsh |   3 -
 initial_integration_experiments/run_examm.zsh |  25 ----
 .../run_experiments.zsh                       |   4 -
 initial_integration_experiments/wind.zsh      |  39 -------
 12 files changed, 478 deletions(-)
 delete mode 100644 initial_integration_experiments/analyze.py
 delete mode 100644 initial_integration_experiments/analyze.zsh
 delete mode 100644 initial_integration_experiments/aviation.zsh
 delete mode 100644 initial_integration_experiments/control.zsh
 delete mode 100755 initial_integration_experiments/debug.zsh
 delete mode 100644 initial_integration_experiments/dnas.zsh
 delete mode 100644 initial_integration_experiments/gp_control.zsh
 delete mode 100755 initial_integration_experiments/post_training_dnas.zsh
 delete mode 100644 initial_integration_experiments/posttrain.zsh
 delete mode 100644 initial_integration_experiments/run_examm.zsh
 delete mode 100755 initial_integration_experiments/run_experiments.zsh
 delete mode 100644 initial_integration_experiments/wind.zsh

diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
deleted file mode 100644
index 78d51466..00000000
--- a/initial_integration_experiments/analyze.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import pandas
-
-import numpy as np
-
-import matplotlib.pyplot as plt
-
-fig, subplts = plt.subplots(6, 1)
-
-bprange = [8, 16]
-plts = {k:v for k, v in zip(bprange, subplts)}
-print(plts)
-base = plts[bprange[0]]
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    if k == bprange[0]:
-        continue
-    v.sharey(base)
-    v.sharex(base)
-
-def avg(files, slice_at=-1):
-    r = {}
-    for file in files:
-        x = []
-
-        for fold in range(8):
-            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
-            print(f"{file}/{fold} -> {len(f)}")
-            x.append(f)
-
-
-        enabled_nodes = []
-        enabled_edges = []
-        enabled_rec_edges = []
-
-        bpi_columns = []
-        mse_columns = []
-
-        minlen = 100000000
-
-        for f in x:
-            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
-            mse_columns.append(f[' Best Val. MSE'].to_numpy())
-            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
-            enabled_edges.append(f[' Enabled Edges'].to_numpy())
-            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
-
-            minlen = min(minlen, len(bpi_columns[-1]))
-
-        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
-        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
-        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
-        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
-        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
-
-        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
-        edgesmean = np.mean(np.array(enabled_edges), axis=0)
-        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
-        print(f"Nodes at end mean: {nodesmean[-1]}")
-        print(f"edges at end mean: {edgesmean[-1]}")
-        print(f"redges at end mean: {redgesmean[-1]}")
-
-
-        bpimean = np.mean(np.array(bpi_columns), axis=0)
-        msemean = np.mean(np.array(mse_columns), axis=0)
-        msestd = np.std(np.array(mse_columns), axis=0)
-        
-        r[file] = {
-            'mean_nodes': nodesmean,
-            'mean_edges': edgesmean,
-            'mean_rec_edges':redgesmean,
-            'bpi': bpimean,
-            'mean_mse': msemean,
-            'std_mse': msestd,
-        }
-    return r
-
-results = {}
-for ci in [64]:
-    results[ci] = {}
-    for bpe in bprange:
-        results[ci][bpe] = {}
-        for k in [1]:
-            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
-            x = avg([f])[f]
-            results[ci][bpe][k] = x
-            print(x)
-
-            print(x['mean_mse'] - x['std_mse'])
-            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
-            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
-                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-control_results = {}
-for bp in [8, 16]:
-    key = f"initial_integration_experiments/results/control_v7/{bp}"
-    r = avg([key])[key]
-    control_results[bp] = r
-    print(list(r.keys()))
-    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
-    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
-        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    v.legend(fontsize=12, loc="upper right")
-
-
-plt.show()
diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh
deleted file mode 100644
index 5c2876f3..00000000
--- a/initial_integration_experiments/analyze.zsh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/zsh
-#
-for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 8 16 32 64 128; do
-    for k in 1; do
-      for fold in 0 1 2 3 4 5 6 7; do
-        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
-        tail -1 $output_dir/fitness_log.csv
-      done
-    done
-  done
-done
diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh
deleted file mode 100644
index 7059da3e..00000000
--- a/initial_integration_experiments/aviation.zsh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-for output_params in "E1_CHT1" "Pitch"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done
diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh
deleted file mode 100644
index a848302b..00000000
--- a/initial_integration_experiments/control.zsh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
-}
-
-bp_ge=(8 8192 16 4096 32 2048)
-
-for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-   for fold in 0 1 2 3; do
-     run_examm &
-   done
-   wait
-   for fold in 4 5 6 7; do
-     run_examm &
-   done
-   wait
-done
diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh
deleted file mode 100755
index ce159c01..00000000
--- a/initial_integration_experiments/debug.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes 8192 \
-      --island_size 32 \
-      --number_islands 4 \
-      --stochastic \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-for crystalize_iters in 128; do
-  for bp_epoch in 8; do
-    for k in 1; do
-      for fold in 0; do
-        run_examm
-      done
- #      wait
- #      for fold in 4 5 6 7; do
- #        run_examm &
- #      done
- #      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
deleted file mode 100644
index 8b525b09..00000000
--- a/initial_integration_experiments/dnas.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-bp_ge=(8 8192 16 4096 32 2048)
-for crystalize_iters in 256; do
-  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-    for k in 1; do
-      for fold in 0 1 2 3; do
-        run_examm &
-      done
-      wait
-      for fold in 4 5 6 7; do
-        run_examm &
-      done
-      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh
deleted file mode 100644
index 049e9750..00000000
--- a/initial_integration_experiments/gp_control.zsh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/zsh
-
-offset=1
-MAX_GENOMES=10
-N_ISLANDS=4
-ISLAND_SIZE=32
-
-run_examm() {
-  output_dir=test_results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames ${=training_filenames} \
-      --test_filenames ${=test_filenames} \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names $output_params \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes $MAX_GENOMES \
-      --island_size $ISLAND_SIZE \
-      --number_islands $N_ISLANDS
-
-  touch $output_dir/completed
-}
-
-run_group() {
-  for output_params in $OUTPUTS; do
-    for bp_epoch in 2 4 8 16 32; do
-      for fold in 0 1 2 3 4 5 6 7 8 9; do
-        run_examm
-      done
-    done
-  done
-}
-
-INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
-training_filenames=(datasets/2018_coal/burner_[0-9].csv)
-test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
-OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
-run_group
-
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUTS=("E1_CHT1" "Pitch")
-training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
-test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
-run_group
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-OUTPUTS=("Cm_avg" "P_avg")
-training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
-test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
-run_group
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
deleted file mode 100755
index 1c226178..00000000
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/zsh
-offset=1
-
-post_training() {
-
-    echo "genome = $GENOME"
-    Release/rnn_examples/train_rnn \
-        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-        --time_offset 1 \
-        --input_parameter_names ${=INPUT_PARAMETERS} \
-        --output_parameter_names ${=OUTPUT_PARAMETERS} \
-        --bp_iterations $BP_ITERS \
-        --stochastic \
-        --normalize min_max \
-        --genome_file $GENOME \
-        --output_directory $OUTPUT_DIRECTORY \
-        --log_filename post_training.csv \
-        --learning_rate 0.01 \
-        --weight_update adagrad \
-        --train_sequence_length 1000 \
-        --validation_sequence_length 100 \
-        --crystalize_iters $CRYSTALIZE_ITERS \
-        --dnas_k $k
-
-      tail -1 $OUTPUT_DIRECTORY/post_training.csv
-}
-
-post_training
diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh
deleted file mode 100644
index cc54a2eb..00000000
--- a/initial_integration_experiments/posttrain.zsh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/zsh
-
-
diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh
deleted file mode 100644
index 77d2893f..00000000
--- a/initial_integration_experiments/run_examm.zsh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/zsh
-
-output_dir=results/v0/$bp_epoch/$fold
-mkdir -p $output_dir
-
-mpirun -np 32 Release/mpi/examm_mpi \
-    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-    --time_offset $offset \
-    --input_parameter_names ${=INPUT_PARAMETERS} \
-    --output_parameter_names ${=output_params} \
-    --bp_iterations $bp_epoch \
-    --normalize min_max \
-    --max_recurrent_depth 1 \
-    --output_directory $output_dir \
-    --log_filename fitness.csv \
-    --learning_rate 0.01 \
-    --std_message_level INFO \
-    --file_message_level INFO \
-    --max_genomes 4000 \
-    --island_size 32 \
-    --number_islands 4
-
-touch $output_dir/completed
-
diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh
deleted file mode 100755
index 7dd8e956..00000000
--- a/initial_integration_experiments/run_experiments.zsh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/zsh
-
-initial_integration_experiments/control.zsh
-initial_integration_experiments/dnas.zsh
diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh
deleted file mode 100644
index 7e68f482..00000000
--- a/initial_integration_experiments/wind.zsh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
-      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-
-for output_params in "Cm_avg" "P_avg"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done

From c0b9e41e02ecdcaa98a501032f466d73ddedd42a Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 19 Feb 2024 14:36:23 -0500
Subject: [PATCH 23/42] removed old fileS

---
 initial_integration_experiments/analyze.py    | 110 ------------------
 initial_integration_experiments/analyze.zsh   |  12 --
 initial_integration_experiments/aviation.zsh  |  37 ------
 initial_integration_experiments/control.zsh   |  50 --------
 initial_integration_experiments/debug.zsh     |  55 ---------
 initial_integration_experiments/dnas.zsh      |  55 ---------
 .../gp_control.zsh                            |  59 ----------
 .../post_training_dnas.zsh                    |  29 -----
 initial_integration_experiments/posttrain.zsh |   3 -
 initial_integration_experiments/run_examm.zsh |  25 ----
 .../run_experiments.zsh                       |   4 -
 initial_integration_experiments/wind.zsh      |  39 -------
 12 files changed, 478 deletions(-)
 delete mode 100644 initial_integration_experiments/analyze.py
 delete mode 100644 initial_integration_experiments/analyze.zsh
 delete mode 100644 initial_integration_experiments/aviation.zsh
 delete mode 100644 initial_integration_experiments/control.zsh
 delete mode 100755 initial_integration_experiments/debug.zsh
 delete mode 100644 initial_integration_experiments/dnas.zsh
 delete mode 100644 initial_integration_experiments/gp_control.zsh
 delete mode 100755 initial_integration_experiments/post_training_dnas.zsh
 delete mode 100644 initial_integration_experiments/posttrain.zsh
 delete mode 100644 initial_integration_experiments/run_examm.zsh
 delete mode 100755 initial_integration_experiments/run_experiments.zsh
 delete mode 100644 initial_integration_experiments/wind.zsh

diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
deleted file mode 100644
index 78d51466..00000000
--- a/initial_integration_experiments/analyze.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import pandas
-
-import numpy as np
-
-import matplotlib.pyplot as plt
-
-fig, subplts = plt.subplots(6, 1)
-
-bprange = [8, 16]
-plts = {k:v for k, v in zip(bprange, subplts)}
-print(plts)
-base = plts[bprange[0]]
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    if k == bprange[0]:
-        continue
-    v.sharey(base)
-    v.sharex(base)
-
-def avg(files, slice_at=-1):
-    r = {}
-    for file in files:
-        x = []
-
-        for fold in range(8):
-            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
-            print(f"{file}/{fold} -> {len(f)}")
-            x.append(f)
-
-
-        enabled_nodes = []
-        enabled_edges = []
-        enabled_rec_edges = []
-
-        bpi_columns = []
-        mse_columns = []
-
-        minlen = 100000000
-
-        for f in x:
-            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
-            mse_columns.append(f[' Best Val. MSE'].to_numpy())
-            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
-            enabled_edges.append(f[' Enabled Edges'].to_numpy())
-            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
-
-            minlen = min(minlen, len(bpi_columns[-1]))
-
-        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
-        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
-        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
-        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
-        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
-
-        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
-        edgesmean = np.mean(np.array(enabled_edges), axis=0)
-        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
-        print(f"Nodes at end mean: {nodesmean[-1]}")
-        print(f"edges at end mean: {edgesmean[-1]}")
-        print(f"redges at end mean: {redgesmean[-1]}")
-
-
-        bpimean = np.mean(np.array(bpi_columns), axis=0)
-        msemean = np.mean(np.array(mse_columns), axis=0)
-        msestd = np.std(np.array(mse_columns), axis=0)
-        
-        r[file] = {
-            'mean_nodes': nodesmean,
-            'mean_edges': edgesmean,
-            'mean_rec_edges':redgesmean,
-            'bpi': bpimean,
-            'mean_mse': msemean,
-            'std_mse': msestd,
-        }
-    return r
-
-results = {}
-for ci in [64]:
-    results[ci] = {}
-    for bpe in bprange:
-        results[ci][bpe] = {}
-        for k in [1]:
-            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
-            x = avg([f])[f]
-            results[ci][bpe][k] = x
-            print(x)
-
-            print(x['mean_mse'] - x['std_mse'])
-            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
-            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
-                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-control_results = {}
-for bp in [8, 16]:
-    key = f"initial_integration_experiments/results/control_v7/{bp}"
-    r = avg([key])[key]
-    control_results[bp] = r
-    print(list(r.keys()))
-    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
-    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
-        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    v.legend(fontsize=12, loc="upper right")
-
-
-plt.show()
diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh
deleted file mode 100644
index 5c2876f3..00000000
--- a/initial_integration_experiments/analyze.zsh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/zsh
-#
-for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 8 16 32 64 128; do
-    for k in 1; do
-      for fold in 0 1 2 3 4 5 6 7; do
-        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
-        tail -1 $output_dir/fitness_log.csv
-      done
-    done
-  done
-done
diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh
deleted file mode 100644
index 7059da3e..00000000
--- a/initial_integration_experiments/aviation.zsh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-for output_params in "E1_CHT1" "Pitch"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done
diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh
deleted file mode 100644
index a848302b..00000000
--- a/initial_integration_experiments/control.zsh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
-}
-
-bp_ge=(8 8192 16 4096 32 2048)
-
-for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-   for fold in 0 1 2 3; do
-     run_examm &
-   done
-   wait
-   for fold in 4 5 6 7; do
-     run_examm &
-   done
-   wait
-done
diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh
deleted file mode 100755
index ce159c01..00000000
--- a/initial_integration_experiments/debug.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes 8192 \
-      --island_size 32 \
-      --number_islands 4 \
-      --stochastic \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-for crystalize_iters in 128; do
-  for bp_epoch in 8; do
-    for k in 1; do
-      for fold in 0; do
-        run_examm
-      done
- #      wait
- #      for fold in 4 5 6 7; do
- #        run_examm &
- #      done
- #      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
deleted file mode 100644
index 8b525b09..00000000
--- a/initial_integration_experiments/dnas.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-bp_ge=(8 8192 16 4096 32 2048)
-for crystalize_iters in 256; do
-  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-    for k in 1; do
-      for fold in 0 1 2 3; do
-        run_examm &
-      done
-      wait
-      for fold in 4 5 6 7; do
-        run_examm &
-      done
-      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh
deleted file mode 100644
index 049e9750..00000000
--- a/initial_integration_experiments/gp_control.zsh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/zsh
-
-offset=1
-MAX_GENOMES=10
-N_ISLANDS=4
-ISLAND_SIZE=32
-
-run_examm() {
-  output_dir=test_results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames ${=training_filenames} \
-      --test_filenames ${=test_filenames} \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names $output_params \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes $MAX_GENOMES \
-      --island_size $ISLAND_SIZE \
-      --number_islands $N_ISLANDS
-
-  touch $output_dir/completed
-}
-
-run_group() {
-  for output_params in $OUTPUTS; do
-    for bp_epoch in 2 4 8 16 32; do
-      for fold in 0 1 2 3 4 5 6 7 8 9; do
-        run_examm
-      done
-    done
-  done
-}
-
-INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
-training_filenames=(datasets/2018_coal/burner_[0-9].csv)
-test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
-OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
-run_group
-
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUTS=("E1_CHT1" "Pitch")
-training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
-test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
-run_group
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-OUTPUTS=("Cm_avg" "P_avg")
-training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
-test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
-run_group
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
deleted file mode 100755
index 1c226178..00000000
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/zsh
-offset=1
-
-post_training() {
-
-    echo "genome = $GENOME"
-    Release/rnn_examples/train_rnn \
-        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-        --time_offset 1 \
-        --input_parameter_names ${=INPUT_PARAMETERS} \
-        --output_parameter_names ${=OUTPUT_PARAMETERS} \
-        --bp_iterations $BP_ITERS \
-        --stochastic \
-        --normalize min_max \
-        --genome_file $GENOME \
-        --output_directory $OUTPUT_DIRECTORY \
-        --log_filename post_training.csv \
-        --learning_rate 0.01 \
-        --weight_update adagrad \
-        --train_sequence_length 1000 \
-        --validation_sequence_length 100 \
-        --crystalize_iters $CRYSTALIZE_ITERS \
-        --dnas_k $k
-
-      tail -1 $OUTPUT_DIRECTORY/post_training.csv
-}
-
-post_training
diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh
deleted file mode 100644
index cc54a2eb..00000000
--- a/initial_integration_experiments/posttrain.zsh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/zsh
-
-
diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh
deleted file mode 100644
index 77d2893f..00000000
--- a/initial_integration_experiments/run_examm.zsh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/zsh
-
-output_dir=results/v0/$bp_epoch/$fold
-mkdir -p $output_dir
-
-mpirun -np 32 Release/mpi/examm_mpi \
-    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-    --time_offset $offset \
-    --input_parameter_names ${=INPUT_PARAMETERS} \
-    --output_parameter_names ${=output_params} \
-    --bp_iterations $bp_epoch \
-    --normalize min_max \
-    --max_recurrent_depth 1 \
-    --output_directory $output_dir \
-    --log_filename fitness.csv \
-    --learning_rate 0.01 \
-    --std_message_level INFO \
-    --file_message_level INFO \
-    --max_genomes 4000 \
-    --island_size 32 \
-    --number_islands 4
-
-touch $output_dir/completed
-
diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh
deleted file mode 100755
index 7dd8e956..00000000
--- a/initial_integration_experiments/run_experiments.zsh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/zsh
-
-initial_integration_experiments/control.zsh
-initial_integration_experiments/dnas.zsh
diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh
deleted file mode 100644
index 7e68f482..00000000
--- a/initial_integration_experiments/wind.zsh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
-      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-
-for output_params in "Cm_avg" "P_avg"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done

From 79df69ab941e0d6783d20ba31d1929624d9601b7 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 19 Feb 2024 14:37:52 -0500
Subject: [PATCH 24/42] Fixed bug caused by accidental paste

---
 examm/island_speciation_strategy.cxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index d8eaabab..b0a7b5e0 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island(
         Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str());
         exit(1);
     }
-    return new_genome17731515;
+    return new_genome;
 }
 
 RNN_Genome* IslandSpeciationStrategy::generate_genome(

From 68752460cb9a79e554212b3f0c9a97305defdfc4 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 19 Feb 2024 15:59:40 -0500
Subject: [PATCH 25/42] Synchronous EXAMM flag added --sychronous

---
 mpi/examm_mpi.cxx | 75 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 11 deletions(-)

diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx
index 227c3a85..e350be0a 100644
--- a/mpi/examm_mpi.cxx
+++ b/mpi/examm_mpi.cxx
@@ -114,10 +114,62 @@ void receive_terminate_message(int32_t source) {
     MPI_Recv(terminate_message, 1, MPI_INT, source, TERMINATE_TAG, MPI_COMM_WORLD, &status);
 }
 
-void master(int32_t max_rank) {
-    // the "main" id will have already been set by the main function so we do not need to re-set it here
-    Log::debug("MAX int32_t: %d\n", numeric_limits<int32_t>::max());
+void master_sync(int32_t max_rank) {
+    max_rank -= 1;
+    int32_t generation = 0;
+    while (true) {
+    
+        // Wait for N work requests
+        int32_t nreqs = 0;
+        while (nreqs < max_rank) {
+            MPI_Status status;
+            MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
+
+            int32_t source = status.MPI_SOURCE;
+            int32_t tag = status.MPI_TAG;
+            // Log::info("probe returned message from: %d with tag: %d\n", source, tag);
+            
+            if (tag == WORK_REQUEST_TAG) {
+                receive_work_request(source);
+                nreqs++;
+            } else if (tag == GENOME_LENGTH_TAG) {
+                Log::debug("received genome from: %d\n", source);
+                RNN_Genome* genome = receive_genome_from(source);
+                
+                examm->insert_genome(genome);
+
+                // delete the genome as it won't be used again, a copy was inserted
+                delete genome;
+            } else {
+                Log::fatal("ERROR: received message from %d with unknown tag: %d", source, tag);
+                MPI_Abort(MPI_COMM_WORLD, 1);
+            }
+        }
 
+        vector<RNN_Genome *> genomes(max_rank);
+        for (int32_t i = 1; i <= max_rank; i++) {
+            RNN_Genome* genome = examm->generate_genome();
+            if (genome == NULL)
+                break;
+            genomes[i - 1] = genome;
+        }
+
+        if (genomes.size() != max_rank) {
+            break;
+        }
+
+        for (int i = 1; i <= max_rank; i++) {
+            send_genome_to(i, genomes[i - 1]);
+            delete genomes[i - 1];
+        }
+    }
+  
+    for (int i = 1; i <= max_rank; i++) {
+        send_terminate_message(i);
+    }
+}
+
+void master(int32_t max_rank) {
     int32_t terminates_sent = 0;
 
     while (true) {
@@ -134,12 +186,7 @@ void master(int32_t max_rank) {
         if (tag == WORK_REQUEST_TAG) {
             receive_work_request(source);
 
-            // if (transfer_learning_version.compare("v3") == 0 || transfer_learning_version.compare("v1+v3") == 0) {
-            //     seed_stirs = 3;
-            // }
-            examm_mutex.lock();
             RNN_Genome* genome = examm->generate_genome();
-            examm_mutex.unlock();
 
             if (genome == NULL) {  // search was completed if it returns NULL for an individual
                 // send terminate message
@@ -167,9 +214,7 @@ void master(int32_t max_rank) {
             Log::debug("received genome from: %d\n", source);
             RNN_Genome* genome = receive_genome_from(source);
 
-            examm_mutex.lock();
             examm->insert_genome(genome);
-            examm_mutex.unlock();
 
             // delete the genome as it won't be used again, a copy was inserted
             delete genome;
@@ -264,12 +309,20 @@ int main(int argc, char** argv) {
 
     RNN_Genome* seed_genome = get_seed_genome(arguments, time_series_sets, weight_rules);
 
+    bool synchronous = argument_exists(arguments, "--synchronous");
+    Log::warning("synchronous? %d\n", synchronous); 
+
     Log::clear_rank_restriction();
 
     if (rank == 0) {
         write_time_series_to_file(arguments, time_series_sets);
         examm = generate_examm_from_arguments(arguments, time_series_sets, weight_rules, seed_genome);
-        master(max_rank);
+        
+        if (synchronous) {
+            master_sync(max_rank);
+        } else {
+            master(max_rank);
+        }
     } else {
         worker(rank);
     }

From a6006064aaeddb6160bd665d5208a4e135b2f5af Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Fri, 23 Feb 2024 21:47:51 -0500
Subject: [PATCH 26/42] Adding additional log information

---
 examm/examm.cxx                      | 26 ++++++++++++++++++-------
 examm/examm.hxx                      |  4 +++-
 examm/island_speciation_strategy.cxx | 29 +++++++++++++++++++++++-----
 examm/island_speciation_strategy.hxx | 16 ++++++++++-----
 examm/neat_speciation_strategy.cxx   | 10 +++++-----
 examm/neat_speciation_strategy.hxx   | 10 +++++-----
 examm/speciation_strategy.hxx        | 12 +++++++-----
 scripts/dnas/control.zsh             | 21 +++++++++-----------
 8 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/examm/examm.cxx b/examm/examm.cxx
index 1e1c2314..a90034f2 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -95,8 +95,8 @@ void EXAMM::generate_log() {
         Log::info("Generating fitness log\n");
         mkpath(output_directory.c_str(), 0777);
         log_file = new ofstream(output_directory + "/" + "fitness_log.csv");
-        (*log_file) << "Inserted Genomes, Total BP Epochs, Time, Best Val. MAE, Best Val. MSE, Enabled Nodes, Enabled "
-                       "Edges, Enabled Rec. Edges";
+        (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled"
+                       "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters";
         (*log_file) << speciation_strategy->get_strategy_information_headers();
         (*log_file) << endl;
 
@@ -151,7 +151,7 @@ void EXAMM::update_op_log_statistics(RNN_Genome* genome, int32_t insert_position
     }
 }
 
-void EXAMM::update_log() {
+void EXAMM::update_log(RNN_Genome *genome) {
     if (log_file != NULL) {
         // make sure the log file is still good
         if (!log_file->good()) {
@@ -193,8 +193,12 @@ void EXAMM::update_log() {
         (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds
                     << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << ","
                     << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << ","
-                    << best_genome->get_enabled_recurrent_edge_count()
-                    << speciation_strategy->get_strategy_information_values() << endl;
+                    << best_genome->get_enabled_recurrent_edge_count() << ","
+                    << genome->best_validation_mse << ","
+                    << pre_insert_best_mse << ","
+                    << (int32_t) (last_genome_inserted ? 1 : 0) << ","
+                    << genome->get_number_weights()
+                    << speciation_strategy->get_strategy_information_values(genome) << endl;
         Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count());
     }
 }
@@ -243,17 +247,25 @@ bool EXAMM::insert_genome(RNN_Genome* genome) {
 
     // updates EXAMM's mapping of which genomes have been generated by what
     genome->update_generation_map(generated_from_map);
+    pre_insert_best_mse = this->get_best_fitness();
+    
     int32_t insert_position = speciation_strategy->insert_genome(genome);
+    
     // write this genome to disk if it was a new best found genome
     if (insert_position == 0) {
         // genome->normalize_type = normalize_type;
         genome->write_graphviz(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".gv");
         genome->write_to_file(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".bin");
     }
+    
+    last_genome_inserted = insert_position >= 0;
+    
     speciation_strategy->print();
+    
     update_op_log_statistics(genome, insert_position);
-    update_log();
-    return insert_position >= 0;
+    update_log(genome);
+
+    return last_genome_inserted;
 }
 
 RNN_Genome* EXAMM::generate_genome() {
diff --git a/examm/examm.hxx b/examm/examm.hxx
index c0c0ee03..a95d8af4 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -73,6 +73,8 @@ class EXAMM {
     string output_directory;
     ofstream* log_file;
     ofstream* op_log_file;
+    double pre_insert_best_mse = 1000000;
+    bool last_genome_inserted = false;
 
     std::chrono::time_point<std::chrono::system_clock> startClock;
 
@@ -87,7 +89,7 @@ class EXAMM {
     ~EXAMM();
 
     void print();
-    void update_log();
+    void update_log(RNN_Genome *genome);
 
     void set_possible_node_types(vector<string> possible_node_type_strings);
 
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index b0a7b5e0..2e7a91be 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -100,12 +100,12 @@ int32_t IslandSpeciationStrategy::get_evaluated_genomes() const {
     return evaluated_genomes;
 }
 
-RNN_Genome* IslandSpeciationStrategy::get_best_genome() {
+RNN_Genome* IslandSpeciationStrategy::get_best_genome() const {
     // the global_best_genome is updated every time a genome is inserted
     return global_best_genome;
 }
 
-RNN_Genome* IslandSpeciationStrategy::get_worst_genome() {
+RNN_Genome* IslandSpeciationStrategy::get_worst_genome() const {
     int32_t worst_genome_island = -1;
     double worst_fitness = -EXAMM_MAX_DOUBLE;
 
@@ -126,7 +126,7 @@ RNN_Genome* IslandSpeciationStrategy::get_worst_genome() {
     }
 }
 
-double IslandSpeciationStrategy::get_best_fitness() {
+double IslandSpeciationStrategy::get_best_fitness() const {
     RNN_Genome* best_genome = get_best_genome();
     if (best_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -135,7 +135,7 @@ double IslandSpeciationStrategy::get_best_fitness() {
     }
 }
 
-double IslandSpeciationStrategy::get_worst_fitness() {
+double IslandSpeciationStrategy::get_worst_fitness() const {
     RNN_Genome* worst_genome = get_worst_genome();
     if (worst_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -376,6 +376,9 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome(
     islands[generation_island]->set_latest_generation_id(generated_genomes);
     new_genome->set_group_id(generation_island);
 
+    pair<double, double> perf = {this->get_best_fitness(), this->get_worst_fitness()};
+    genome_performance.emplace(new_genome->generation_id, perf);
+    
     if (current_island->is_initializing()) {
         RNN_Genome* genome_copy = new_genome->copy();
         Log::debug("inserting genome copy!\n");
@@ -386,6 +389,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome(
         generation_island = 0;
     }
 
+
     return new_genome;
 }
 
@@ -456,6 +460,7 @@ void IslandSpeciationStrategy::print(string indent) const {
  */
 string IslandSpeciationStrategy::get_strategy_information_headers() const {
     string info_header = "";
+    info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post");
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
         info_header.append(",");
         info_header.append("Island_");
@@ -472,8 +477,22 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const {
 /**
  * Gets speciation strategy information values for logs
  */
-string IslandSpeciationStrategy::get_strategy_information_values() const {
+string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const {
     string info_value = "";
+    
+    auto &[min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id);
+    info_value.append(",");
+    info_value.append(to_string(min_mse_pre));
+    info_value.append(",");
+    info_value.append(to_string(max_mse_pre));
+    
+    float min_mse_post = this->get_best_fitness();
+    float max_mse_post = this->get_worst_fitness();
+    info_value.append(",");
+    info_value.append(to_string(min_mse_post));
+    info_value.append(",");
+    info_value.append(to_string(max_mse_post));
+
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
         double best_fitness = islands[i]->get_best_fitness();
         double worst_fitness = islands[i]->get_worst_fitness();
diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx
index 0eed891c..683e2a39 100644
--- a/examm/island_speciation_strategy.hxx
+++ b/examm/island_speciation_strategy.hxx
@@ -62,6 +62,12 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
     vector<Island*> islands;
     RNN_Genome* global_best_genome;
 
+    ofstream *island_log_file;
+    
+    // Maps genome number to a pair representing (worst island mse, best island mse) at
+    // the time of genome generation.
+    unordered_map<int32_t, pair<double, double>> genome_performance;
+
     // Transfer learning class properties:
 
     bool transfer_learning;
@@ -114,25 +120,25 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
      * Gets the fitness of the best genome of all the islands
      * \return the best fitness over all islands
      */
-    double get_best_fitness();
+    double get_best_fitness() const;
 
     /**
      * Gets the fitness of the worst genome of all the islands
      * \return the worst fitness over all islands
      */
-    double get_worst_fitness();
+    double get_worst_fitness() const;
 
     /**
      * Gets the best genome of all the islands
      * \return the best genome of all islands or NULL if no genomes have yet been inserted
      */
-    RNN_Genome* get_best_genome();
+    RNN_Genome* get_best_genome() const;
 
     /**
      * Gets the the worst genome of all the islands
      * \return the worst genome of all islands or NULL if no genomes have yet been inserted
      */
-    RNN_Genome* get_worst_genome();
+    RNN_Genome* get_worst_genome() const;
 
     /**
      *  \return true if all the islands are full
@@ -207,7 +213,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
     /**
      * Gets speciation strategy information values for logs
      */
-    string get_strategy_information_values() const;
+    string get_strategy_information_values(RNN_Genome *genome) const;
 
     /**
      * Island repopulation through two random parents from two seperate islands,
diff --git a/examm/neat_speciation_strategy.cxx b/examm/neat_speciation_strategy.cxx
index 8d5f18ac..e71470e1 100644
--- a/examm/neat_speciation_strategy.cxx
+++ b/examm/neat_speciation_strategy.cxx
@@ -74,7 +74,7 @@ int32_t NeatSpeciationStrategy::get_evaluated_genomes() const {
     return evaluated_genomes;
 }
 
-RNN_Genome* NeatSpeciationStrategy::get_best_genome() {
+RNN_Genome* NeatSpeciationStrategy::get_best_genome() const {
     int32_t best_genome_species = -1;
     double best_fitness = EXAMM_MAX_DOUBLE;
 
@@ -95,7 +95,7 @@ RNN_Genome* NeatSpeciationStrategy::get_best_genome() {
     }
 }
 
-RNN_Genome* NeatSpeciationStrategy::get_worst_genome() {
+RNN_Genome* NeatSpeciationStrategy::get_worst_genome() const {
     int32_t worst_genome_species = -1;
     double worst_fitness = -EXAMM_MAX_DOUBLE;
 
@@ -116,7 +116,7 @@ RNN_Genome* NeatSpeciationStrategy::get_worst_genome() {
     }
 }
 
-double NeatSpeciationStrategy::get_best_fitness() {
+double NeatSpeciationStrategy::get_best_fitness() const {
     RNN_Genome* best_genome = get_best_genome();
     if (best_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -125,7 +125,7 @@ double NeatSpeciationStrategy::get_best_fitness() {
     }
 }
 
-double NeatSpeciationStrategy::get_worst_fitness() {
+double NeatSpeciationStrategy::get_worst_fitness() const {
     RNN_Genome* worst_genome = get_worst_genome();
     if (worst_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -399,7 +399,7 @@ string NeatSpeciationStrategy::get_strategy_information_headers() const {
 /**
  * Gets speciation strategy information values for logs
  */
-string NeatSpeciationStrategy::get_strategy_information_values() const {
+string NeatSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const {
     string info_value = "";
     for (int32_t i = 0; i < (int32_t) Neat_Species.size(); i++) {
         double best_fitness = Neat_Species[i]->get_best_fitness();
diff --git a/examm/neat_speciation_strategy.hxx b/examm/neat_speciation_strategy.hxx
index 3416de03..645aabdd 100644
--- a/examm/neat_speciation_strategy.hxx
+++ b/examm/neat_speciation_strategy.hxx
@@ -64,25 +64,25 @@ class NeatSpeciationStrategy : public SpeciationStrategy {
      * Gets the fitness of the best genome of all the islands
      * \return the best fitness over all islands
      */
-    double get_best_fitness();
+    double get_best_fitness() const;
 
     /**
      * Gets the fitness of the worst genome of all the islands
      * \return the worst fitness over all islands
      */
-    double get_worst_fitness();
+    double get_worst_fitness() const;
 
     /**
      * Gets the best genome of all the islands
      * \return the best genome of all islands
      */
-    RNN_Genome* get_best_genome();
+    RNN_Genome* get_best_genome() const;
 
     /**
      * Gets the the worst genome of all the islands
      * \return the worst genome of all islands
      */
-    RNN_Genome* get_worst_genome();
+    RNN_Genome* get_worst_genome() const;
 
     /**
      * Inserts a <b>copy</b> of the genome into this speciation strategy.
@@ -130,7 +130,7 @@ class NeatSpeciationStrategy : public SpeciationStrategy {
     /**
      * Gets speciation strategy information values for logs
      */
-    string get_strategy_information_values() const;
+    string get_strategy_information_values(RNN_Genome *genome) const;
 
     RNN_Genome* get_global_best_genome();
 
diff --git a/examm/speciation_strategy.hxx b/examm/speciation_strategy.hxx
index 9d790ab0..2d66f990 100644
--- a/examm/speciation_strategy.hxx
+++ b/examm/speciation_strategy.hxx
@@ -9,6 +9,8 @@ using std::string;
 using std::minstd_rand0;
 using std::uniform_real_distribution;
 
+#include "rnn/rnn_genome.hxx"
+
 class SpeciationStrategy {
    public:
     /**
@@ -25,25 +27,25 @@ class SpeciationStrategy {
      * Gets the fitness of the best genome of all the islands
      * \return the best fitness over all islands
      */
-    virtual double get_best_fitness() = 0;
+    virtual double get_best_fitness() const = 0;
 
     /**
      * Gets the fitness of the worst genome of all the islands
      * \return the worst fitness over all islands
      */
-    virtual double get_worst_fitness() = 0;
+    virtual double get_worst_fitness() const = 0;
 
     /**
      * Gets the best genome of all the islands
      * \return the best genome of all islands
      */
-    virtual RNN_Genome* get_best_genome() = 0;
+    virtual RNN_Genome* get_best_genome() const = 0;
 
     /**
      * Gets the the worst genome of all the islands
      * \return the worst genome of all islands
      */
-    virtual RNN_Genome* get_worst_genome() = 0;
+    virtual RNN_Genome* get_worst_genome() const = 0;
 
     /**
      * Inserts a <b>copy</b> of the genome into this speciation strategy.
@@ -86,7 +88,7 @@ class SpeciationStrategy {
     /**
      * Gets speciation strategy information values for logs
      */
-    virtual string get_strategy_information_values() const = 0;
+    virtual string get_strategy_information_values(RNN_Genome *genome) const = 0;
 
     virtual RNN_Genome* get_global_best_genome() = 0;
     virtual void initialize_population(function<void(int32_t, RNN_Genome*)>& mutate) = 0;
diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh
index a848302b..f3532525 100644
--- a/scripts/dnas/control.zsh
+++ b/scripts/dnas/control.zsh
@@ -6,9 +6,9 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  output_dir=results/control_v8/$bp_epoch/$fold
   mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
+  mpirun -np 14 build/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
@@ -25,26 +25,23 @@ run_examm() {
       --output_directory $output_dir \
       --log_filename fitness.csv \
       --learning_rate 0.01 \
-      --std_message_level WARNING \
+      --std_message_level INFO \
       --file_message_level WARNING \
       --crystalize_iters $crystalize_iters \
       --max_genomes $max_genomes \
       --island_size 32 \
-      --number_islands 4
+      --number_islands 4 \
+      --synchronous
 
   # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
   # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
 }
 
-bp_ge=(8 8192 16 4096 32 2048)
+# bp_ge=(8 8192 16 4096 32 2048)
+bp_ge=(8 8192)
 
 for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-   for fold in 0 1 2 3; do
-     run_examm &
+  for fold in $(seq 0 1); do
+     run_examm
    done
-   wait
-   for fold in 4 5 6 7; do
-     run_examm &
-   done
-   wait
 done

From 72ce5d4a42e6435cff23e5c0984527235f786d7f Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Thu, 29 Feb 2024 15:20:33 -0500
Subject: [PATCH 27/42] Additional log data

---
 examm/examm.cxx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examm/examm.cxx b/examm/examm.cxx
index a90034f2..d0c784b9 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -96,7 +96,7 @@ void EXAMM::generate_log() {
         mkpath(output_directory.c_str(), 0777);
         log_file = new ofstream(output_directory + "/" + "fitness_log.csv");
         (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled"
-                       "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters";
+                       "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id";
         (*log_file) << speciation_strategy->get_strategy_information_headers();
         (*log_file) << endl;
 
@@ -197,7 +197,8 @@ void EXAMM::update_log(RNN_Genome *genome) {
                     << genome->best_validation_mse << ","
                     << pre_insert_best_mse << ","
                     << (int32_t) (last_genome_inserted ? 1 : 0) << ","
-                    << genome->get_number_weights()
+                    << genome->get_number_weights() << ","
+                    << genome->get_generation_id() 
                     << speciation_strategy->get_strategy_information_values(genome) << endl;
         Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count());
     }

From 5949736f7bc61d174a7b0465dd104769bcd73781 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Wed, 6 Mar 2024 15:17:26 -0500
Subject: [PATCH 28/42] Cluster script updates

---
 scripts/dnas/examm_bias_exp.zsh    | 58 ++++++++++++++++++++++++++++++
 scripts/dnas/examm_synchronous.zsh | 55 ++++++++++++++++++++++++++++
 scripts/dnas/mk_jobs.zsh           | 12 ++++---
 3 files changed, 120 insertions(+), 5 deletions(-)
 create mode 100644 scripts/dnas/examm_bias_exp.zsh
 create mode 100644 scripts/dnas/examm_synchronous.zsh

diff --git a/scripts/dnas/examm_bias_exp.zsh b/scripts/dnas/examm_bias_exp.zsh
new file mode 100644
index 00000000..52816f00
--- /dev/null
+++ b/scripts/dnas/examm_bias_exp.zsh
@@ -0,0 +1,58 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=18
+#SBATCH  --exclude theocho
+#SBATCH  --time=48:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_bias_ablation
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=64GB
+
+cd /home/jak5763/exact
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1'
+
+offset=1
+
+run_examm() {
+  output_dir=results/$synchronous/$scramble_weights/$max_genomes/$fold
+  mkdir -p $output_dir
+  srun -n 18 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types simple ugrnn gru mgu lstm delta \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 10 \
+      --number_islands 10 \
+      --num_mutations 1 \
+      --$synchronous \
+      --$scramble_weights
+}
+
+run_group() {
+  for fold in $(seq 0 19); do
+    run_examm
+  done
+}
+
+let base_genomes=100000
+let max_genomes=$base_genomes/$bp_epoch
+run_group
diff --git a/scripts/dnas/examm_synchronous.zsh b/scripts/dnas/examm_synchronous.zsh
new file mode 100644
index 00000000..1d970272
--- /dev/null
+++ b/scripts/dnas/examm_synchronous.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1'
+
+offset=1
+
+run_examm() {
+  output_dir=results/synchronous/$max_genomes/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types simple ugrnn gru mgu lstm delta \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 10 \
+      --number_islands 10 \
+      --num_mutations 1 \
+      --synchronous
+}
+
+run_group() {
+  for fold in $(seq 0 9); do
+    run_examm
+  done
+}
+
+let base_genomes=100000
+let max_genomes=$base_genomes/$bp_epoch
+run_group
diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh
index 38a5526c..b996883e 100644
--- a/scripts/dnas/mk_jobs.zsh
+++ b/scripts/dnas/mk_jobs.zsh
@@ -1,6 +1,8 @@
-bp_ge=(8 8192 16 4096 32 2048 64 1024)
-for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh
-  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh
-  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh
+bp=(1 2 3 4 5 10 15 20 30 40 50 100 150 200)
+for bp_epoch in $bp; do
+  for synchronous in "async" "synchronous"; do
+    for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do
+      bp_epoch=$bp_epoch synchronous="$synchronous" scramble_weights="$scramble_weights" sbatch examm_bias_exp.zsh
+    done
+  done
 done

From 95277159201b2e0f7b5469223d76794820cd8ccd Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 13 Mar 2024 13:56:00 -0400
Subject: [PATCH 29/42] Remove scripts in root directory

---
 dnas_cluster.zsh | 69 ------------------------------------------------
 dnas_control.zsh | 60 -----------------------------------------
 2 files changed, 129 deletions(-)
 delete mode 100644 dnas_cluster.zsh
 delete mode 100644 dnas_control.zsh

diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh
deleted file mode 100644
index 55823c0c..00000000
--- a/dnas_cluster.zsh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/zsh
-
-#SBATCH  --nodes=1
-#SBATCH  --ntasks-per-node=36
-#SBATCH  --exclude theocho
-#SBATCH  --time=23:00:00
-#SBATCH  -A examm
-#SBATCH  --partition=tier3
-#SBATCH  -J examm_dnas_experimental
-#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
-#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
-#SBATCH  --mem=0
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  srun -n 36 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 10 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 8 \
-      --num_mutations 2 \
-      --use_dnas_seed true \
-      --use_burn_in_bp_epoch \
-      --burn_in_period 1024 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-run_group() {
-  for crystalize_iters in 512; do
-    for k in 1; do
-      for fold in $(seq 0 19); do
-        run_examm
-      done
-    done
-  done
-}
-
-CELL_TYPE='dnas'
-# bp_ge=(8 8192 16 4096 32 2048 64 1024)
-# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-run_group
-# done
diff --git a/dnas_control.zsh b/dnas_control.zsh
deleted file mode 100644
index 88a7c882..00000000
--- a/dnas_control.zsh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/zsh
-
-#SBATCH  --nodes=1
-#SBATCH  --ntasks-per-node=36
-#SBATCH  --exclude theocho
-#SBATCH  --time=23:00:00
-#SBATCH  -A examm
-#SBATCH  --partition=tier3
-#SBATCH  -J examm_dnas_experimental
-#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
-#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
-#SBATCH  --mem=0
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
-  mkdir -p $output_dir
-  srun -n 36 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --max_recurrent_depth 10 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 8 \
-      --num_mutations 4 \
-      --burn_in_period 1024 \
-      --use_burn_in_bp_epoch
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-run_group() {
-  for k in 1; do
-    for fold in $(seq 0 19); do
-      run_examm
-    done
-  done
-}
-
-run_group

From fbb32b2aa3b2adb282e87022b2394169971eb159 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 13 Mar 2024 13:56:17 -0400
Subject: [PATCH 30/42] Remove junk file

---
 key | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 key

diff --git a/key b/key
deleted file mode 100644
index 391a7405..00000000
--- a/key
+++ /dev/null
@@ -1,7 +0,0 @@
-v11 -> burn in schedule with 0.001 lr 4 mut
-v12 -> burn in schedule with 0.01 lr 4 mut
-v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut
-v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut
-v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut
-v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut
-

From 217e5bedae3fcce907d10ae823d6caff367241d2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 18 Mar 2024 13:58:35 -0400
Subject: [PATCH 31/42] First stage of removing old-style architectural hashes

---
 examm/island.cxx                     | 171 +++------------------------
 examm/island.hxx                     |   7 +-
 examm/island_speciation_strategy.cxx |   1 -
 rnn/rnn_genome.cxx                   |  15 ++-
 rnn/rnn_genome.hxx                   |  13 +-
 5 files changed, 49 insertions(+), 158 deletions(-)

diff --git a/examm/island.cxx b/examm/island.cxx
index 6d8b0b5f..6a26bbcd 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -160,104 +160,26 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
     // check and see if the structural hash of the genome is in the
     // set of hashes for this population
     Log::info("getting structural hash\n");
-    string structural_hash = genome->get_structural_hash();
-    if (structure_map.count(structural_hash) > 0) {
-        vector<RNN_Genome*>& potential_matches = structure_map.find(structural_hash)->second;
-        Log::debug(
-            "potential duplicate for hash '%s', had %d potential matches.\n", structural_hash.c_str(),
-            potential_matches.size()
-        );
+    auto duplicate_it = structure_set.find(genome);
 
-        for (auto potential_match = potential_matches.begin(); potential_match != potential_matches.end();) {
-            Log::debug(
-                "on potential match %d of %d\n", potential_match - potential_matches.begin(), potential_matches.size()
-            );
-            if ((*potential_match)->equals(genome)) {
-                if ((*potential_match)->get_fitness() > new_fitness) {
-                    Log::debug(
-                        "REPLACING DUPLICATE GENOME, fitness of genome in search: %s, new fitness: %s\n",
-                        parse_fitness((*potential_match)->get_fitness()).c_str(),
-                        parse_fitness(genome->get_fitness()).c_str()
-                    );
-                    // we have an exact match for this genome in the island and its fitness is worse
-                    // than the genome we're trying to remove, so remove the duplicate it from the genomes
-                    // as well from the potential matches vector
-
-                    auto duplicate_genome_iterator =
-                        lower_bound(genomes.begin(), genomes.end(), *potential_match, sort_genomes_by_fitness());
-                    bool found = false;
-                    for (; duplicate_genome_iterator != genomes.end(); duplicate_genome_iterator++) {
-                        Log::debug(
-                            "duplicate_genome_iterator: %p, (*potential_match): %p\n", (*duplicate_genome_iterator),
-                            (*potential_match)
-                        );
-                        if ((*duplicate_genome_iterator) == (*potential_match)) {
-                            found = true;
-                            break;
-                        }
-                    }
-                    if (!found) {
-                        Log::fatal(
-                            "ERROR: could not find duplicate genome even though its structural hash was in the island, "
-                            "this should never happen!\n"
-                        );
-                        exit(1);
-                    }
-                    Log::debug(
-                        "potential_match->get_fitness(): %lf, duplicate_genome_iterator->get_fitness(): %lf, "
-                        "new_fitness: %lf\n",
-                        (*potential_match)->get_fitness(), (*duplicate_genome_iterator)->get_fitness(), new_fitness
-                    );
-                    int32_t duplicate_genome_index = duplicate_genome_iterator - genomes.begin();
-                    Log::debug("duplicate_genome_index: %d\n", duplicate_genome_index);
-                    // int32_t test_index = contains(genome);
-                    // Log::info("test_index: %d\n", test_index);
-                    RNN_Genome* duplicate = genomes[duplicate_genome_index];
-                    // Log::info("duplicate.equals(potential_match)? %d\n", duplicate->equals(*potential_match));
-                    genomes.erase(genomes.begin() + duplicate_genome_index);
-                    Log::debug("potential_matches.size() before erase: %d\n", potential_matches.size());
-
-                    // erase the potential match from the structure map as well
-                    // returns an iterator to next element after the deleted one so
-                    // we don't need to increment it
-                    potential_match = potential_matches.erase(potential_match);
-                    delete duplicate;
-
-                    Log::debug("potential_matches.size() after erase: %d\n", potential_matches.size());
-                    Log::debug(
-                        "structure_map[%s].size() after erase: %d\n", structural_hash.c_str(),
-                        structure_map[structural_hash].size()
-                    );
-                    if (potential_matches.size() == 0) {
-                        Log::debug(
-                            "deleting the potential_matches vector for hash '%s' because it was empty.\n",
-                            structural_hash.c_str()
-                        );
-                        structure_map.erase(structural_hash);
-                        break;  // break because this vector is now empty and deleted
-                    }
-                } else {
-                    Log::info(
-                        "Island %d: island already contains a duplicate genome with a better fitness! not inserting.\n",
-                        id
-                    );
-                    do_population_check(__LINE__, initial_size);
-                    return -1;
-                }
-            } else {
-                // increment potential match because we didn't delete an entry (or return from the method)
-                potential_match++;
-            }
+    if (duplicate_it != structure_set.end()) {
+        RNN_Genome* duplicate = *duplicate_it;
+        // TODO: Add annealment here
+        if (duplicate->get_fitness() > genome->get_fitness()) {
+            genomes.erase(std::find(genomes.begin(), genomes.end(), duplicate));
         }
     }
 
+
     // inorder insert the new individual
     RNN_Genome* copy = genome->copy();
+    copy->set_generation_id(genome->get_generation_id());
+    
     vector<double> best = copy->get_best_parameters();
     if (best.size() != 0) {
         copy->set_weights(best);
     }
-    copy->set_generation_id(genome->get_generation_id());
+
     Log::debug("created copy to insert to island: %d\n", copy->get_group_id());
     auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness());
     int32_t insert_index = index_iterator - genomes.begin();
@@ -274,12 +196,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
     }
 
     genomes.insert(index_iterator, copy);
-    // calculate the index the genome was inseretd at from the iterator
-
-    structural_hash = copy->get_structural_hash();
-    // add the genome to the vector for this structural hash
-    structure_map[structural_hash].push_back(copy);
-    Log::debug("adding to structure_map[%s] : %p\n", structural_hash.c_str(), &copy);
+    structure_set.insert(copy);
 
     if (insert_index == 0) {
         // this was a new best genome for this island
@@ -309,51 +226,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
         Log::debug("deleting worst genome\n");
         RNN_Genome* worst = genomes.back();
         genomes.pop_back();
-        structural_hash = worst->get_structural_hash();
-
-        vector<RNN_Genome*>& potential_matches = structure_map.find(structural_hash)->second;
-
-        bool found = false;
-        for (auto potential_match = potential_matches.begin(); potential_match != potential_matches.end();) {
-            // make sure the addresses of the pointers are the same
-            Log::debug(
-                "checking to remove worst from structure_map - &worst: %p, &(*potential_match): %p\n", worst,
-                (*potential_match)
-            );
-            if ((*potential_match) == worst) {
-                found = true;
-                Log::debug("potential_matches.size() before erase: %d\n", potential_matches.size());
-
-                // erase the potential match from the structure map as well
-                potential_match = potential_matches.erase(potential_match);
-
-                Log::debug("potential_matches.size() after erase: %d\n", potential_matches.size());
-                Log::debug(
-                    "structure_map[%s].size() after erase: %d\n", structural_hash.c_str(),
-                    structure_map[structural_hash].size()
-                );
-
-                // clean up the structure_map if no genomes in the population have this hash
-                if (potential_matches.size() == 0) {
-                    Log::debug(
-                        "deleting the potential_matches vector for hash '%s' because it was empty.\n",
-                        structural_hash.c_str()
-                    );
-                    structure_map.erase(structural_hash);
-                    break;
-                }
-            } else {
-                potential_match++;
-            }
-        }
-
-        if (!found) {
-            Log::debug(
-                "could not erase from structure_map[%s], genome not found! This should never happen.\n",
-                structural_hash.c_str()
-            );
-            exit(1);
-        }
+        structure_set.erase(worst);
 
         delete worst;
     }
@@ -382,24 +255,18 @@ void Island::print(string indent) {
 }
 
 void Island::erase_island() {
-    erased_generation_id = latest_generation_id;
-    for (int32_t i = 0; i < (int32_t) genomes.size(); i++) {
+    structure_set.clear();
+    
+    for (int32_t i = 0; i < (int32_t) genomes.size(); i++)
         delete genomes[i];
-    }
+
     genomes.clear();
+    
     erased = true;
     erase_again = 5;
+    erased_generation_id = latest_generation_id;
+    
     Log::debug("Worst island size after erased: %d\n", genomes.size());
-
-    if (genomes.size() != 0) {
-        Log::error("The worst island is not fully erased!\n");
-    }
-}
-
-void Island::erase_structure_map() {
-    Log::debug("Erasing the structure map in the worst performing island\n");
-    structure_map.clear();
-    Log::debug("after erase structure map size is %d\n", structure_map.size());
 }
 
 int32_t Island::get_erased_generation_id() {
diff --git a/examm/island.hxx b/examm/island.hxx
index c75921aa..86ffdbbc 100644
--- a/examm/island.hxx
+++ b/examm/island.hxx
@@ -18,6 +18,9 @@ using std::string;
 #include <unordered_map>
 using std::unordered_map;
 
+#include <unordered_set>
+using std::unordered_set;
+
 #include "rnn/rnn_genome.hxx"
 
 class Island {
@@ -34,8 +37,8 @@ class Island {
      * The genomes on this island, stored in sorted order best (front) to worst (back).
      */
     vector<RNN_Genome*> genomes;
+    unordered_set<RNN_Genome *, RNN_Genome::StructuralHash> structure_set;
 
-    unordered_map<string, vector<RNN_Genome*>> structure_map;
     int32_t
         status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or  Island::REPOPULATING */
 
@@ -172,8 +175,6 @@ class Island {
      */
     void erase_island();
 
-    void erase_structure_map();
-
     /**
      * returns the get_erased_generation_id.
      */
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index a2463b2d..6fd15bb4 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -228,7 +228,6 @@ void IslandSpeciationStrategy::repopulate() {
                     if (rank[i] >= 0) {
                         Log::info("found island: %d is the worst island \n", rank[0]);
                         islands[rank[i]]->erase_island();
-                        islands[rank[i]]->erase_structure_map();
                         islands[rank[i]]->set_status(Island::REPOPULATING);
                     } else {
                         Log::error("Didn't find the worst island!");
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index d0887808..47bd934f 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -1337,7 +1337,7 @@ bool RNN_Genome::has_node_with_innovation(int32_t innovation_number) const {
     return false;
 }
 
-bool RNN_Genome::equals(RNN_Genome* other) {
+bool RNN_Genome::equals(const RNN_Genome* other) const {
     if (nodes.size() != other->nodes.size()) {
         return false;
     }
@@ -1369,6 +1369,19 @@ bool RNN_Genome::equals(RNN_Genome* other) {
     return true;
 }
 
+bool RNN_Genome::operator==(const RNN_Genome& other) const {
+    return other.equals(this);
+}
+
+size_t RNN_Genome::StructuralHash::operator()(const RNN_Genome* genome) const {
+    return this->operator()(*genome);
+}
+
+size_t RNN_Genome::StructuralHash::operator()(const RNN_Genome& genome) const {
+    std::hash<string> hasher;
+    return hasher(genome.get_structural_hash());
+}
+
 void RNN_Genome::assign_reachability() {
     Log::trace("assigning reachability!\n");
     Log::trace("%6d nodes, %6d edges, %6d recurrent edges\n", nodes.size(), edges.size(), recurrent_edges.size());
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index 56977e76..74c174c9 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -295,7 +295,18 @@ class RNN_Genome {
      */
     bool has_node_with_innovation(int32_t innovation_number) const;
 
-    bool equals(RNN_Genome* other);
+    bool equals(const RNN_Genome* other) const;
+    bool operator==(const RNN_Genome& other) const;
+
+    /**
+     * Hash function implementation.
+     * Based on the hash code of the structural hash.
+     * */
+    struct StructuralHash {
+        size_t operator()(const RNN_Genome& other) const;
+        size_t operator()(const RNN_Genome* other) const;
+    };
+
 
     string get_color(double weight, bool is_recurrent);
     void write_graphviz(string filename);

From ed5ad176215af9c550947f94c3a2bb76ead39de3 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 18 Mar 2024 18:39:48 -0400
Subject: [PATCH 32/42] Add annealing policy

---
 examm/CMakeLists.txt                 |  2 +-
 examm/annealing.cxx                  | 81 ++++++++++++++++++++++++++++
 examm/annealing.hxx                  | 65 ++++++++++++++++++++++
 examm/island.cxx                     | 16 +++---
 examm/island.hxx                     | 11 ++--
 examm/island_speciation_strategy.cxx | 20 +++----
 examm/island_speciation_strategy.hxx |  7 +--
 7 files changed, 169 insertions(+), 33 deletions(-)
 create mode 100644 examm/annealing.cxx
 create mode 100644 examm/annealing.hxx

diff --git a/examm/CMakeLists.txt b/examm/CMakeLists.txt
index d5c532f9..2f5942b7 100644
--- a/examm/CMakeLists.txt
+++ b/examm/CMakeLists.txt
@@ -1 +1 @@
-add_library(examm_strategy examm.cxx  species.cxx island.cxx island_speciation_strategy.cxx species.cxx neat_speciation_strategy.cxx)
+add_library(examm_strategy examm.cxx species.cxx island.cxx island_speciation_strategy.cxx species.cxx neat_speciation_strategy.cxx annealing.cxx)
diff --git a/examm/annealing.cxx b/examm/annealing.cxx
new file mode 100644
index 00000000..d3c9db9c
--- /dev/null
+++ b/examm/annealing.cxx
@@ -0,0 +1,81 @@
+#include "annealing.hxx"
+#include "common/arguments.hxx"
+#include "common/log.hxx"
+
+#include <cmath>
+#include <memory>
+
+unique_ptr<AnnealingPolicy> AnnealingPolicy::from_arguments(const vector<string> &arguments) {
+  string type;
+  get_argument(arguments, "--annealing_policy", false, type);
+
+  if (type == "linear") {
+      return unique_ptr<AnnealingPolicy>(new LinearAnnealingPolicy(arguments));
+  } else if (type == "inv_exp") {
+      return unique_ptr<AnnealingPolicy>(new InvExpAnnealingPolicy(arguments));
+  } else if (type == "sin") {
+      return unique_ptr<AnnealingPolicy>(new SinAnnealingPolicy(arguments));
+  } else {
+      Log::info("Using default annealing policy\n");
+      return make_unique<AnnealingPolicy>();
+  }
+}
+
+double AnnealingPolicy::operator()(int32_t genome_number) {
+    return 0.0;
+}
+
+LinearAnnealingPolicy::LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes)
+    : start_value(start_value),
+      end_value(end_value),
+      start_genomes(start_genomes),
+      interp_genomes(interp_genomes) {}
+
+LinearAnnealingPolicy::LinearAnnealingPolicy(const vector<string> &arguments) {
+    get_argument(arguments, "--linear_start_value", true, start_value);
+    get_argument(arguments, "--linear_end_value", true, end_value);
+    get_argument(arguments, "--linear_start_genomes", true, start_genomes);
+    get_argument(arguments, "--linear_interp_genomes", true, interp_genomes);
+}
+
+double LinearAnnealingPolicy::operator()(int32_t genome_number) {
+    if (genome_number <= start_genomes) {
+        return start_value;
+    } else if (genome_number <= interp_genomes + start_genomes) {
+        double weight = (double) (genome_number - (interp_genomes + start_genomes)) / (double) interp_genomes;
+        return weight * end_value + (1 - weight) * start_value;
+    } else {
+        return end_value;
+    }
+}
+
+InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) {}
+InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector<string> &arguments) {
+    get_argument(arguments, "--exp_decay_factor", true, decay_factor);
+}
+
+double InvExpAnnealingPolicy::operator()(int32_t genome_number) {
+    return std::pow(1. + genome_number, decay_factor);
+}
+
+SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) : period(period), min_p(min_p), max_p(max_p) {
+    if (min_p > max_p)
+        std::swap(min_p, max_p);
+
+    if (min_p > 1.0 || min_p < 0.0)
+        throw "Invalid min_p supplied to SinAnnealingPolicyConstructor";
+    if (max_p > 1.0 || max_p < 0.0)
+        throw "Invalid max_p supplied to SinAnnealingPolicyConstructor";
+
+}
+SinAnnealingPolicy::SinAnnealingPolicy(const vector<string> &arguments) {
+    get_argument(arguments, "--sin_min_p", true, min_p);
+    get_argument(arguments, "--sin_max_p", true, max_p);
+    get_argument(arguments, "--sin_period", true, period);
+}
+
+double SinAnnealingPolicy::operator()(int32_t genome_number) {
+    double range = max_p - min_p;
+
+    return (max_p + min_p) / 2. + range / 2. + std::sin(2. * M_PI * genome_number / period);
+}
diff --git a/examm/annealing.hxx b/examm/annealing.hxx
new file mode 100644
index 00000000..1cfada54
--- /dev/null
+++ b/examm/annealing.hxx
@@ -0,0 +1,65 @@
+#include <stdint.h>
+#include <memory>
+using std::unique_ptr;
+
+#include <string>
+using std::string;
+
+#include <vector>
+using std::vector;
+
+struct AnnealingPolicy {
+
+    static unique_ptr<AnnealingPolicy> from_arguments(const vector<string> &arguments);
+
+    /**
+     * Compute the probability to be used during genome insertion.
+     * This represents the probability of inserting the genome, even if it
+     * has a fitness value that is worse than the worst member in the population.
+     */
+    double operator()(int32_t genome_number);
+};
+
+/**
+ * Interpolate between two values for a set number of genomes.
+ * The `start_value` will be returned for `start_genomes`,
+ * then a linear interpolation of `start_value` and `end_value` for
+ * `interp_genomes`. Then, `end_value` is given indefinitely.
+ */
+class LinearAnnealingPolicy : public AnnealingPolicy {
+    double start_value, end_value;
+    int32_t start_genomes, interp_genomes;
+
+  public:
+    LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes);
+    LinearAnnealingPolicy(const vector<string> &arguments);
+
+    double operator()(int32_t genome_number);
+};
+
+/**
+ * Calculates p by simply computing `genome_number^(-decay_factor).
+ **/
+class InvExpAnnealingPolicy : public AnnealingPolicy {
+    double decay_factor;
+  
+  public:
+    InvExpAnnealingPolicy(double decay_factor);
+    InvExpAnnealingPolicy(const vector<string> &arguments);
+
+    double operator()(int32_t genome_number);
+};
+
+/**
+ * Computes `p` as a value falling on a sinusoidal curve with the supplied period.
+ * a `min_p` and a `max_p` specify the range of the curve.
+ **/
+class SinAnnealingPolicy : public AnnealingPolicy {
+    double period, min_p, max_p;
+
+  public:
+    SinAnnealingPolicy(double period, double min_p, double max_p);
+    SinAnnealingPolicy(const vector<string> &arguments);
+
+    double operator()(int32_t genome_number);
+};
diff --git a/examm/island.cxx b/examm/island.cxx
index 6a26bbcd..1313764a 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -1,10 +1,8 @@
 #include <algorithm>
-using std::lower_bound;
-using std::sort;
+#include <memory>
 using std::upper_bound;
 
 #include <iomanip>
-using std::setw;
 
 #include <random>
 using std::minstd_rand0;
@@ -14,9 +12,6 @@ using std::uniform_real_distribution;
 using std::string;
 using std::to_string;
 
-#include <unordered_map>
-using std::unordered_map;
-
 #include <vector>
 using std::vector;
 
@@ -24,17 +19,18 @@ using std::vector;
 #include "island.hxx"
 #include "rnn/rnn_genome.hxx"
 
-Island::Island(int32_t _id, int32_t _max_size)
-    : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false) {
+Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy)
+    : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false), annealing_policy(annealing_policy) {
 }
 
-Island::Island(int32_t _id, vector<RNN_Genome*> _genomes)
+Island::Island(int32_t _id, vector<RNN_Genome*> _genomes, AnnealingPolicy& annealing_policy)
     : id(_id),
       max_size((int32_t) _genomes.size()),
       genomes(_genomes),
       status(Island::FILLED),
       erase_again(0),
-      erased(false) {
+      erased(false),
+      annealing_policy(annealing_policy) {
 }
 
 RNN_Genome* Island::get_best_genome() {
diff --git a/examm/island.hxx b/examm/island.hxx
index 86ffdbbc..d2120bca 100644
--- a/examm/island.hxx
+++ b/examm/island.hxx
@@ -2,10 +2,9 @@
 #define EXAMM_ISLAND_STRATEGY_HXX
 
 #include <algorithm>
-using std::sort;
-using std::upper_bound;
 
 #include <functional>
+#include <memory>
 using std::function;
 
 #include <random>
@@ -16,12 +15,12 @@ using std::uniform_real_distribution;
 using std::string;
 
 #include <unordered_map>
-using std::unordered_map;
 
 #include <unordered_set>
 using std::unordered_set;
 
 #include "rnn/rnn_genome.hxx"
+#include "annealing.hxx"
 
 class Island {
    private:
@@ -39,6 +38,8 @@ class Island {
     vector<RNN_Genome*> genomes;
     unordered_set<RNN_Genome *, RNN_Genome::StructuralHash> structure_set;
 
+    AnnealingPolicy& annealing_policy;
+
     int32_t
         status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or  Island::REPOPULATING */
 
@@ -55,13 +56,13 @@ class Island {
      *
      *  \param max_size is the maximum number of genomes in the island.
      */
-    Island(int32_t id, int32_t max_size);
+    Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy);
 
     /**
      * Initializes an island filled the supplied genomes. The size of the island will be the size
      * of the supplied genome vector. The island status is set to filled.
      */
-    Island(int32_t id, vector<RNN_Genome*> genomes);
+    Island(int32_t id, vector<RNN_Genome*> genomes, AnnealingPolicy& annealing_policy);
 
     /**
      * Returns the fitness of the best genome in the island
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index 6fd15bb4..52941f3b 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -26,7 +26,8 @@ IslandSpeciationStrategy::IslandSpeciationStrategy(
     double _inter_island_crossover_rate, RNN_Genome* _seed_genome, string _island_ranking_method,
     string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations,
     int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, bool _start_filled,
-    bool _transfer_learning, string _transfer_learning_version, int32_t _seed_stirs, bool _tl_epigenetic_weights
+    bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights,
+    unique_ptr<AnnealingPolicy>& annealing_policy
 )
     : generation_island(0),
       number_of_islands(_number_of_islands),
@@ -47,8 +48,8 @@ IslandSpeciationStrategy::IslandSpeciationStrategy(
       start_filled(_start_filled),
       transfer_learning(_transfer_learning),
       transfer_learning_version(_transfer_learning_version),
-      seed_stirs(_seed_stirs),
-      tl_epigenetic_weights(_tl_epigenetic_weights) {
+      tl_epigenetic_weights(_tl_epigenetic_weights),
+      annealing_policy(std::move(annealing_policy)) {
     double rate_sum = mutation_rate + intra_island_crossover_rate + inter_island_crossover_rate;
     if (rate_sum != 1.0) {
         mutation_rate = mutation_rate / rate_sum;
@@ -78,15 +79,14 @@ IslandSpeciationStrategy::IslandSpeciationStrategy(
 
     if (transfer_learning) {
         Log::info("Transfer learning version is %s\n", transfer_learning_version.c_str());
-        Log::info("Apply seed stirs: %d\n", seed_stirs);
     }
 }
 
 void IslandSpeciationStrategy::initialize_population(function<void(int32_t, RNN_Genome*)>& mutate) {
     for (int32_t i = 0; i < number_of_islands; i++) {
-        Island* new_island = new Island(i, max_island_size);
+        Island* new_island = new Island(i, max_island_size, *annealing_policy);
         if (start_filled) {
-            new_island->fill_with_mutated_genomes(seed_genome, seed_stirs, tl_epigenetic_weights, mutate);
+            new_island->fill_with_mutated_genomes(seed_genome, num_mutations, tl_epigenetic_weights, mutate);
         }
         islands.push_back(new_island);
     }
@@ -286,14 +286,6 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_initializing_island(
         new_genome = seed_genome->copy();
         new_genome->initialize_randomly();
 
-        bool stir_seed_genome = false;
-        if (stir_seed_genome) {
-            Log::info("Stir the seed genome with %d mutations\n", seed_stirs);
-            mutate(seed_stirs, new_genome);
-            if (!tl_epigenetic_weights) {
-                new_genome->initialize_randomly();
-            }
-        }
     } else {
         Log::info("Island %d: island is initializing but not empty, mutating a random genome\n", generation_island);
         while (new_genome == NULL) {
diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx
index 19eff273..fdc41024 100644
--- a/examm/island_speciation_strategy.hxx
+++ b/examm/island_speciation_strategy.hxx
@@ -72,9 +72,10 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
 
     bool transfer_learning;
     string transfer_learning_version;
-    int32_t seed_stirs;
     bool tl_epigenetic_weights;
 
+    unique_ptr<AnnealingPolicy> annealing_policy;
+
    public:
     // static void register_command_line_arguments();
     // static IslandSpeciationStrategy* generate_from_command_line();
@@ -90,8 +91,8 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
         double _intra_island_crossover_rate, double _inter_island_crossover_rate, RNN_Genome* _seed_genome,
         string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number,
         int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction,
-        bool _start_filled, bool _transfer_learning, string _transfer_learning_version, int32_t _seed_stirs,
-        bool _tl_epigenetic_weights
+        bool _start_filled, bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights,
+        unique_ptr<AnnealingPolicy>& annealing_policy
     );
 
     // /**

From 7f9686b31f348cf51458863c115f123a82587f90 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 18 Mar 2024 18:50:21 -0400
Subject: [PATCH 33/42] Clang format

---
 common/process_arguments.cxx |  4 ++-
 examm/annealing.cxx          | 63 +++++++++++++++++++-----------------
 examm/annealing.hxx          | 18 +++++------
 examm/island.cxx             | 20 +++++++-----
 examm/island.hxx             |  6 ++--
 rnn/rnn_genome.hxx           |  1 -
 6 files changed, 60 insertions(+), 52 deletions(-)

diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index 202ddd42..9419fdd7 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -1,3 +1,4 @@
+#include <memory>
 #include <string>
 using std::string;
 
@@ -120,12 +121,13 @@ IslandSpeciationStrategy* generate_island_speciation_strategy_from_arguments(
     get_argument(arguments, "--seed_stirs", false, seed_stirs);
     bool start_filled = argument_exists(arguments, "--start_filled");
     bool tl_epigenetic_weights = argument_exists(arguments, "--tl_epigenetic_weights");
+    unique_ptr<AnnealingPolicy> annealing_policy = AnnealingPolicy::from_arguments(arguments);
 
     IslandSpeciationStrategy* island_strategy = new IslandSpeciationStrategy(
         number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, seed_genome,
         island_ranking_method, repopulation_method, extinction_event_generation_number, num_mutations,
         islands_to_exterminate, max_genomes, repeat_extinction, start_filled, transfer_learning,
-        transfer_learning_version, seed_stirs, tl_epigenetic_weights
+        transfer_learning_version, tl_epigenetic_weights, annealing_policy
     );
 
     return island_strategy;
diff --git a/examm/annealing.cxx b/examm/annealing.cxx
index d3c9db9c..023e872f 100644
--- a/examm/annealing.cxx
+++ b/examm/annealing.cxx
@@ -1,37 +1,38 @@
 #include "annealing.hxx"
-#include "common/arguments.hxx"
-#include "common/log.hxx"
 
 #include <cmath>
 #include <memory>
 
-unique_ptr<AnnealingPolicy> AnnealingPolicy::from_arguments(const vector<string> &arguments) {
-  string type;
-  get_argument(arguments, "--annealing_policy", false, type);
+#include "common/arguments.hxx"
+#include "common/log.hxx"
 
-  if (type == "linear") {
-      return unique_ptr<AnnealingPolicy>(new LinearAnnealingPolicy(arguments));
-  } else if (type == "inv_exp") {
-      return unique_ptr<AnnealingPolicy>(new InvExpAnnealingPolicy(arguments));
-  } else if (type == "sin") {
-      return unique_ptr<AnnealingPolicy>(new SinAnnealingPolicy(arguments));
-  } else {
-      Log::info("Using default annealing policy\n");
-      return make_unique<AnnealingPolicy>();
-  }
+unique_ptr<AnnealingPolicy> AnnealingPolicy::from_arguments(const vector<string>& arguments) {
+    string type;
+    get_argument(arguments, "--annealing_policy", false, type);
+
+    if (type == "linear") {
+        return unique_ptr<AnnealingPolicy>(new LinearAnnealingPolicy(arguments));
+    } else if (type == "inv_exp") {
+        return unique_ptr<AnnealingPolicy>(new InvExpAnnealingPolicy(arguments));
+    } else if (type == "sin") {
+        return unique_ptr<AnnealingPolicy>(new SinAnnealingPolicy(arguments));
+    } else {
+        Log::info("Using default annealing policy\n");
+        return make_unique<AnnealingPolicy>();
+    }
 }
 
 double AnnealingPolicy::operator()(int32_t genome_number) {
     return 0.0;
 }
 
-LinearAnnealingPolicy::LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes)
-    : start_value(start_value),
-      end_value(end_value),
-      start_genomes(start_genomes),
-      interp_genomes(interp_genomes) {}
+LinearAnnealingPolicy::LinearAnnealingPolicy(
+    double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes
+)
+    : start_value(start_value), end_value(end_value), start_genomes(start_genomes), interp_genomes(interp_genomes) {
+}
 
-LinearAnnealingPolicy::LinearAnnealingPolicy(const vector<string> &arguments) {
+LinearAnnealingPolicy::LinearAnnealingPolicy(const vector<string>& arguments) {
     get_argument(arguments, "--linear_start_value", true, start_value);
     get_argument(arguments, "--linear_end_value", true, end_value);
     get_argument(arguments, "--linear_start_genomes", true, start_genomes);
@@ -49,8 +50,9 @@ double LinearAnnealingPolicy::operator()(int32_t genome_number) {
     }
 }
 
-InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) {}
-InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector<string> &arguments) {
+InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) {
+}
+InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector<string>& arguments) {
     get_argument(arguments, "--exp_decay_factor", true, decay_factor);
 }
 
@@ -58,17 +60,20 @@ double InvExpAnnealingPolicy::operator()(int32_t genome_number) {
     return std::pow(1. + genome_number, decay_factor);
 }
 
-SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) : period(period), min_p(min_p), max_p(max_p) {
-    if (min_p > max_p)
+SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p)
+    : period(period), min_p(min_p), max_p(max_p) {
+    if (min_p > max_p) {
         std::swap(min_p, max_p);
+    }
 
-    if (min_p > 1.0 || min_p < 0.0)
+    if (min_p > 1.0 || min_p < 0.0) {
         throw "Invalid min_p supplied to SinAnnealingPolicyConstructor";
-    if (max_p > 1.0 || max_p < 0.0)
+    }
+    if (max_p > 1.0 || max_p < 0.0) {
         throw "Invalid max_p supplied to SinAnnealingPolicyConstructor";
-
+    }
 }
-SinAnnealingPolicy::SinAnnealingPolicy(const vector<string> &arguments) {
+SinAnnealingPolicy::SinAnnealingPolicy(const vector<string>& arguments) {
     get_argument(arguments, "--sin_min_p", true, min_p);
     get_argument(arguments, "--sin_max_p", true, max_p);
     get_argument(arguments, "--sin_period", true, period);
diff --git a/examm/annealing.hxx b/examm/annealing.hxx
index 1cfada54..83addc91 100644
--- a/examm/annealing.hxx
+++ b/examm/annealing.hxx
@@ -1,4 +1,5 @@
 #include <stdint.h>
+
 #include <memory>
 using std::unique_ptr;
 
@@ -9,8 +10,7 @@ using std::string;
 using std::vector;
 
 struct AnnealingPolicy {
-
-    static unique_ptr<AnnealingPolicy> from_arguments(const vector<string> &arguments);
+    static unique_ptr<AnnealingPolicy> from_arguments(const vector<string>& arguments);
 
     /**
      * Compute the probability to be used during genome insertion.
@@ -30,9 +30,9 @@ class LinearAnnealingPolicy : public AnnealingPolicy {
     double start_value, end_value;
     int32_t start_genomes, interp_genomes;
 
-  public:
+   public:
     LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes);
-    LinearAnnealingPolicy(const vector<string> &arguments);
+    LinearAnnealingPolicy(const vector<string>& arguments);
 
     double operator()(int32_t genome_number);
 };
@@ -42,10 +42,10 @@ class LinearAnnealingPolicy : public AnnealingPolicy {
  **/
 class InvExpAnnealingPolicy : public AnnealingPolicy {
     double decay_factor;
-  
-  public:
+
+   public:
     InvExpAnnealingPolicy(double decay_factor);
-    InvExpAnnealingPolicy(const vector<string> &arguments);
+    InvExpAnnealingPolicy(const vector<string>& arguments);
 
     double operator()(int32_t genome_number);
 };
@@ -57,9 +57,9 @@ class InvExpAnnealingPolicy : public AnnealingPolicy {
 class SinAnnealingPolicy : public AnnealingPolicy {
     double period, min_p, max_p;
 
-  public:
+   public:
     SinAnnealingPolicy(double period, double min_p, double max_p);
-    SinAnnealingPolicy(const vector<string> &arguments);
+    SinAnnealingPolicy(const vector<string>& arguments);
 
     double operator()(int32_t genome_number);
 };
diff --git a/examm/island.cxx b/examm/island.cxx
index 1313764a..64af839b 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -3,7 +3,6 @@
 using std::upper_bound;
 
 #include <iomanip>
-
 #include <random>
 using std::minstd_rand0;
 using std::uniform_real_distribution;
@@ -20,7 +19,12 @@ using std::vector;
 #include "rnn/rnn_genome.hxx"
 
 Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy)
-    : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false), annealing_policy(annealing_policy) {
+    : id(_id),
+      max_size(_max_size),
+      status(Island::INITIALIZING),
+      erase_again(0),
+      erased(false),
+      annealing_policy(annealing_policy) {
 }
 
 Island::Island(int32_t _id, vector<RNN_Genome*> _genomes, AnnealingPolicy& annealing_policy)
@@ -166,11 +170,10 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
         }
     }
 
-
     // inorder insert the new individual
     RNN_Genome* copy = genome->copy();
     copy->set_generation_id(genome->get_generation_id());
-    
+
     vector<double> best = copy->get_best_parameters();
     if (best.size() != 0) {
         copy->set_weights(best);
@@ -252,16 +255,17 @@ void Island::print(string indent) {
 
 void Island::erase_island() {
     structure_set.clear();
-    
-    for (int32_t i = 0; i < (int32_t) genomes.size(); i++)
+
+    for (int32_t i = 0; i < (int32_t) genomes.size(); i++) {
         delete genomes[i];
+    }
 
     genomes.clear();
-    
+
     erased = true;
     erase_again = 5;
     erased_generation_id = latest_generation_id;
-    
+
     Log::debug("Worst island size after erased: %d\n", genomes.size());
 }
 
diff --git a/examm/island.hxx b/examm/island.hxx
index d2120bca..5c5940e1 100644
--- a/examm/island.hxx
+++ b/examm/island.hxx
@@ -2,7 +2,6 @@
 #define EXAMM_ISLAND_STRATEGY_HXX
 
 #include <algorithm>
-
 #include <functional>
 #include <memory>
 using std::function;
@@ -15,12 +14,11 @@ using std::uniform_real_distribution;
 using std::string;
 
 #include <unordered_map>
-
 #include <unordered_set>
 using std::unordered_set;
 
-#include "rnn/rnn_genome.hxx"
 #include "annealing.hxx"
+#include "rnn/rnn_genome.hxx"
 
 class Island {
    private:
@@ -36,7 +34,7 @@ class Island {
      * The genomes on this island, stored in sorted order best (front) to worst (back).
      */
     vector<RNN_Genome*> genomes;
-    unordered_set<RNN_Genome *, RNN_Genome::StructuralHash> structure_set;
+    unordered_set<RNN_Genome*, RNN_Genome::StructuralHash> structure_set;
 
     AnnealingPolicy& annealing_policy;
 
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index 74c174c9..c3584e51 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -307,7 +307,6 @@ class RNN_Genome {
         size_t operator()(const RNN_Genome* other) const;
     };
 
-
     string get_color(double weight, bool is_recurrent);
     void write_graphviz(string filename);
     void print_equations();

From 9c0ec5bb787c644b0e54c7d134be10d88ded1165 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 18 Mar 2024 19:41:11 -0400
Subject: [PATCH 34/42] Initial implementation

---
 examm/island.cxx | 33 +++++++++++++++++++++++++--------
 examm/island.hxx |  2 ++
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/examm/island.cxx b/examm/island.cxx
index 64af839b..2aa6cc19 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -21,20 +21,26 @@ using std::vector;
 Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy)
     : id(_id),
       max_size(_max_size),
+      annealing_policy(annealing_policy),
       status(Island::INITIALIZING),
       erase_again(0),
-      erased(false),
-      annealing_policy(annealing_policy) {
+      erased(false) {
+    using namespace std::chrono;
+    long long t = time_point_cast<nanoseconds>(system_clock::now()).time_since_epoch().count();
+    generator = minstd_rand0(t);
 }
 
 Island::Island(int32_t _id, vector<RNN_Genome*> _genomes, AnnealingPolicy& annealing_policy)
     : id(_id),
       max_size((int32_t) _genomes.size()),
       genomes(_genomes),
+      annealing_policy(annealing_policy),
       status(Island::FILLED),
       erase_again(0),
-      erased(false),
-      annealing_policy(annealing_policy) {
+      erased(false) {
+    using namespace std::chrono;
+    long long t = time_point_cast<nanoseconds>(system_clock::now()).time_since_epoch().count();
+    generator = minstd_rand0(t);
 }
 
 RNN_Genome* Island::get_best_genome() {
@@ -162,7 +168,8 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
     Log::info("getting structural hash\n");
     auto duplicate_it = structure_set.find(genome);
 
-    if (duplicate_it != structure_set.end()) {
+    bool duplicate_exists = duplicate_it != structure_set.end();
+    if (duplicate_exists) {
         RNN_Genome* duplicate = *duplicate_it;
         // TODO: Add annealment here
         if (duplicate->get_fitness() > genome->get_fitness()) {
@@ -179,13 +186,24 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
         copy->set_weights(best);
     }
 
-    Log::debug("created copy to insert to island: %d\n", copy->get_group_id());
+    // Only do simulated annealing if the island is full
+    // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept
+    // genomes by deleting a random member of the population./
+    if (genomes.size() == max_size && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) {
+        int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size();
+        
+        RNN_Genome *victim = genomes[index];
+        genomes.erase(genomes.begin() + index);
+        structure_set.erase(victim);
+    }
+
     auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness());
     int32_t insert_index = index_iterator - genomes.begin();
     Log::debug("inserting genome at index: %d\n", insert_index);
 
     if (insert_index >= max_size) {
-        // if we're going to insert this at the back of the population
+        // For simulated annealing: if this is true, then we should remove a random member of the population to insert.
+            // if we're going to insert this at the back of the population
         // its just going to get removed anyways, so we can delete
         // it and report it was not inserted.
         Log::debug("not inserting genome because it is worse than the worst fitness\n");
@@ -199,7 +217,6 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
 
     if (insert_index == 0) {
         // this was a new best genome for this island
-
         Log::info("Island %d: new best fitness found!\n", id);
 
         if (genome->get_fitness() != EXAMM_MAX_DOUBLE) {
diff --git a/examm/island.hxx b/examm/island.hxx
index 5c5940e1..7b977d47 100644
--- a/examm/island.hxx
+++ b/examm/island.hxx
@@ -36,6 +36,8 @@ class Island {
     vector<RNN_Genome*> genomes;
     unordered_set<RNN_Genome*, RNN_Genome::StructuralHash> structure_set;
 
+    minstd_rand0 generator;
+
     AnnealingPolicy& annealing_policy;
 
     int32_t

From 050e2780859d2bc7e275b934e6e0b26eafa337c1 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 18 Mar 2024 19:41:24 -0400
Subject: [PATCH 35/42] Clang format

---
 examm/island.cxx | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examm/island.cxx b/examm/island.cxx
index 2aa6cc19..2681aade 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -189,10 +189,11 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
     // Only do simulated annealing if the island is full
     // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept
     // genomes by deleting a random member of the population./
-    if (genomes.size() == max_size && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) {
+    if (genomes.size() == max_size
+        && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) {
         int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size();
-        
-        RNN_Genome *victim = genomes[index];
+
+        RNN_Genome* victim = genomes[index];
         genomes.erase(genomes.begin() + index);
         structure_set.erase(victim);
     }
@@ -203,7 +204,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
 
     if (insert_index >= max_size) {
         // For simulated annealing: if this is true, then we should remove a random member of the population to insert.
-            // if we're going to insert this at the back of the population
+        // if we're going to insert this at the back of the population
         // its just going to get removed anyways, so we can delete
         // it and report it was not inserted.
         Log::debug("not inserting genome because it is worse than the worst fitness\n");

From e59c649de177b14d518f600b598200000eee56c6 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 20 Mar 2024 15:23:10 -0400
Subject: [PATCH 36/42] Finishing touches

---
 examm/annealing.cxx                  |  6 +-
 examm/annealing.hxx                  |  8 +--
 examm/examm.cxx                      | 21 +++++--
 examm/examm.hxx                      |  5 +-
 examm/island.cxx                     | 83 +++++++++++++---------------
 examm/island.hxx                     | 25 ++++++++-
 examm/island_speciation_strategy.cxx | 44 ++++++---------
 examm/species.cxx                    |  8 +--
 8 files changed, 104 insertions(+), 96 deletions(-)

diff --git a/examm/annealing.cxx b/examm/annealing.cxx
index 023e872f..6d0b5868 100644
--- a/examm/annealing.cxx
+++ b/examm/annealing.cxx
@@ -9,7 +9,7 @@
 unique_ptr<AnnealingPolicy> AnnealingPolicy::from_arguments(const vector<string>& arguments) {
     string type;
     get_argument(arguments, "--annealing_policy", false, type);
-
+    Log::info("Annealing policy = %s\n", type.c_str());
     if (type == "linear") {
         return unique_ptr<AnnealingPolicy>(new LinearAnnealingPolicy(arguments));
     } else if (type == "inv_exp") {
@@ -57,7 +57,7 @@ InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector<string>& arguments) {
 }
 
 double InvExpAnnealingPolicy::operator()(int32_t genome_number) {
-    return std::pow(1. + genome_number, decay_factor);
+    return std::pow(1. + genome_number, -decay_factor);
 }
 
 SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p)
@@ -82,5 +82,5 @@ SinAnnealingPolicy::SinAnnealingPolicy(const vector<string>& arguments) {
 double SinAnnealingPolicy::operator()(int32_t genome_number) {
     double range = max_p - min_p;
 
-    return (max_p + min_p) / 2. + range / 2. + std::sin(2. * M_PI * genome_number / period);
+    return (max_p + min_p) / 2. + range / 2. * std::sin(2. * M_PI * genome_number / period);
 }
diff --git a/examm/annealing.hxx b/examm/annealing.hxx
index 83addc91..4406610c 100644
--- a/examm/annealing.hxx
+++ b/examm/annealing.hxx
@@ -17,7 +17,7 @@ struct AnnealingPolicy {
      * This represents the probability of inserting the genome, even if it
      * has a fitness value that is worse than the worst member in the population.
      */
-    double operator()(int32_t genome_number);
+    virtual double operator()(int32_t genome_number);
 };
 
 /**
@@ -34,7 +34,7 @@ class LinearAnnealingPolicy : public AnnealingPolicy {
     LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes);
     LinearAnnealingPolicy(const vector<string>& arguments);
 
-    double operator()(int32_t genome_number);
+    virtual double operator()(int32_t genome_number);
 };
 
 /**
@@ -47,7 +47,7 @@ class InvExpAnnealingPolicy : public AnnealingPolicy {
     InvExpAnnealingPolicy(double decay_factor);
     InvExpAnnealingPolicy(const vector<string>& arguments);
 
-    double operator()(int32_t genome_number);
+    virtual double operator()(int32_t genome_number);
 };
 
 /**
@@ -61,5 +61,5 @@ class SinAnnealingPolicy : public AnnealingPolicy {
     SinAnnealingPolicy(double period, double min_p, double max_p);
     SinAnnealingPolicy(const vector<string>& arguments);
 
-    double operator()(int32_t genome_number);
+    virtual double operator()(int32_t genome_number);
 };
diff --git a/examm/examm.cxx b/examm/examm.cxx
index 0c76c500..164d84d1 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -97,8 +97,8 @@ void EXAMM::generate_log() {
         mkpath(output_directory.c_str(), 0777);
         log_file = new ofstream(output_directory + "/" + "fitness_log.csv");
         (*log_file
-        ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled"
-             "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id";
+        ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled Nodes,Enabled"
+             "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Genome Trainable Parameters,Island Id";
         (*log_file) << speciation_strategy->get_strategy_information_headers();
         (*log_file) << endl;
 
@@ -194,6 +194,7 @@ void EXAMM::update_log(RNN_Genome* genome) {
         long milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(currentClock - startClock).count();
         (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds
                     << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << ","
+                    << best_genome->get_number_weights() << ","
                     << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << ","
                     << best_genome->get_enabled_recurrent_edge_count() << "," << genome->best_validation_mse << ","
                     << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << ","
@@ -263,10 +264,10 @@ bool EXAMM::insert_genome(RNN_Genome* genome) {
 
     // write this genome to disk if it was a new best found genome
     if (save_genome_option.compare("all_best_genomes") == 0) {
-        Log::info("save genome option compared, save genome option size: %d!\n", save_genome_option.size());
-        for (int i = 0; i < 20 && i < save_genome_option.size(); i++) {
-            cout << "save_genome_option[" << i << "]: " << save_genome_option[i] << endl;
-        }
+        // Log::info("save genome option compared, save genome option size: %d!\n", save_genome_option.size());
+        // for (int i = 0; i < 20 && i < save_genome_option.size(); i++) {
+        //     cout << "save_genome_option[" << i << "]: " << save_genome_option[i] << endl;
+        // }
 
         if (insert_position == 0) {
             Log::info("saving genome!");
@@ -288,6 +289,14 @@ bool EXAMM::insert_genome(RNN_Genome* genome) {
 
 // write function to save genomes to file
 void EXAMM::save_genome(RNN_Genome* genome, string genome_name = "rnn_genome") {
+    if (genome->get_fitness() != EXAMM_MAX_DOUBLE) {
+        // need to set the weights for non-initial genomes so we
+        // can generate a proper graphviz file
+        vector<double> best_parameters = genome->get_best_parameters();
+        genome->set_weights(best_parameters);
+        Log::info("set genome parameters to best\n");
+    }
+
     genome->write_graphviz(output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".gv");
     ofstream equations_filestream(
         output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".txt"
diff --git a/examm/examm.hxx b/examm/examm.hxx
index a1e7cc59..3ec90bd9 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -70,7 +70,7 @@ class EXAMM {
     map<string, int32_t> inserted_counts;
     map<string, int32_t> generated_counts;
 
-    string output_directory;
+    const string output_directory;
     ofstream* log_file;
     ofstream* op_log_file;
     double pre_insert_best_mse = 1000000;
@@ -78,8 +78,7 @@ class EXAMM {
 
     std::chrono::time_point<std::chrono::system_clock> startClock;
 
-    string genome_file_name;
-    string save_genome_option;
+    const string save_genome_option;
 
    public:
     EXAMM(
diff --git a/examm/island.cxx b/examm/island.cxx
index 2681aade..dc73e1d5 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -3,9 +3,6 @@
 using std::upper_bound;
 
 #include <iomanip>
-#include <random>
-using std::minstd_rand0;
-using std::uniform_real_distribution;
 
 #include <string>
 using std::string;
@@ -18,31 +15,23 @@ using std::vector;
 #include "island.hxx"
 #include "rnn/rnn_genome.hxx"
 
-Island::Island(int32_t _id, int32_t _max_size, AnnealingPolicy& annealing_policy)
-    : id(_id),
-      max_size(_max_size),
-      annealing_policy(annealing_policy),
-      status(Island::INITIALIZING),
-      erase_again(0),
-      erased(false) {
+Island::Island(int32_t id, int32_t max_size, vector<RNN_Genome *> genomes, int32_t status, AnnealingPolicy& annealing_policy) :
+  id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) {
     using namespace std::chrono;
     long long t = time_point_cast<nanoseconds>(system_clock::now()).time_since_epoch().count();
-    generator = minstd_rand0(t);
-}
+    generator = mt19937_64(t + 1123 * id + 12334 * max_size);
+
+    for (int i = 0; i < 100; i++)
+        generate_canonical<double, 10>(generator);
 
-Island::Island(int32_t _id, vector<RNN_Genome*> _genomes, AnnealingPolicy& annealing_policy)
-    : id(_id),
-      max_size((int32_t) _genomes.size()),
-      genomes(_genomes),
-      annealing_policy(annealing_policy),
-      status(Island::FILLED),
-      erase_again(0),
-      erased(false) {
-    using namespace std::chrono;
-    long long t = time_point_cast<nanoseconds>(system_clock::now()).time_since_epoch().count();
-    generator = minstd_rand0(t);
 }
 
+Island::Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy)
+    : Island(id, max_size, vector<RNN_Genome *>(), Island::INITIALIZING, annealing_policy) {}
+
+Island::Island(int32_t id, vector<RNN_Genome*> genomes, AnnealingPolicy& annealing_policy)
+    : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) {}
+
 RNN_Genome* Island::get_best_genome() {
     if (genomes.size() == 0) {
         return NULL;
@@ -68,6 +57,13 @@ double Island::get_best_fitness() {
     }
 }
 
+double Island::get_best_all_time_fitness() {
+    if (all_time_local_best)
+        return all_time_local_best->get_fitness();
+    else
+        return EXAMM_MAX_DOUBLE;
+}
+
 double Island::get_worst_fitness() {
     RNN_Genome* worst_genome = get_worst_genome();
     if (worst_genome == NULL) {
@@ -153,6 +149,22 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
     double new_fitness = genome->get_fitness();
     Log::info("inserting genome with fitness: %s to island %d\n", parse_fitness(genome->get_fitness()).c_str(), id);
 
+    // Only do simulated annealing if the island is full
+    // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept
+    // genomes by deleting a random member of the population.
+    double p = annealing_policy(genome->get_generation_id());
+    Log::info("Annealing policy p = %f\n", p);
+
+    if (is_full() && uniform_real_distribution<>(0.0, 1.0)(generator) < p) {
+        int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size();
+
+        Log::info("Simulated annealing triggered - deleting a random genome %d\n", index);
+
+        RNN_Genome* victim = genomes[index];
+        genomes.erase(genomes.begin() + index);
+        structure_set.erase(victim);
+    }
+
     // discard the genome if the island is full and it's fitness is worse than the worst in thte population
     if (is_full() && new_fitness > get_worst_fitness()) {
         Log::debug(
@@ -160,7 +172,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
             genomes.back()->get_fitness()
         );
         do_population_check(__LINE__, initial_size);
-        return false;
+        return -1;
     }
 
     // check and see if the structural hash of the genome is in the
@@ -186,21 +198,9 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
         copy->set_weights(best);
     }
 
-    // Only do simulated annealing if the island is full
-    // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept
-    // genomes by deleting a random member of the population./
-    if (genomes.size() == max_size
-        && uniform_real_distribution<>(0.0, 1.0)(generator) < annealing_policy(copy->get_generation_id())) {
-        int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size();
-
-        RNN_Genome* victim = genomes[index];
-        genomes.erase(genomes.begin() + index);
-        structure_set.erase(victim);
-    }
-
     auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness());
     int32_t insert_index = index_iterator - genomes.begin();
-    Log::debug("inserting genome at index: %d\n", insert_index);
+    Log::info("inserting genome at index: %d\n", insert_index);
 
     if (insert_index >= max_size) {
         // For simulated annealing: if this is true, then we should remove a random member of the population to insert.
@@ -220,13 +220,8 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
         // this was a new best genome for this island
         Log::info("Island %d: new best fitness found!\n", id);
 
-        if (genome->get_fitness() != EXAMM_MAX_DOUBLE) {
-            // need to set the weights for non-initial genomes so we
-            // can generate a proper graphviz file
-            vector<double> best_parameters = genome->get_best_parameters();
-            genome->set_weights(best_parameters);
-            Log::info("set genome parameters to best\n");
-        }
+        if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness())
+            all_time_local_best = unique_ptr<RNN_Genome>(genome->copy());
     }
 
     if ((int32_t) genomes.size() >= max_size) {
diff --git a/examm/island.hxx b/examm/island.hxx
index 7b977d47..b6d50104 100644
--- a/examm/island.hxx
+++ b/examm/island.hxx
@@ -7,6 +7,7 @@
 using std::function;
 
 #include <random>
+using std::mt19937_64;
 using std::minstd_rand0;
 using std::uniform_real_distribution;
 
@@ -34,23 +35,36 @@ class Island {
      * The genomes on this island, stored in sorted order best (front) to worst (back).
      */
     vector<RNN_Genome*> genomes;
+    
+    /**
+     * If we are using simulated annealing, then the genomes vector may not contain the best genome we have discovered.
+     * Keep an additional clone of the best genome here for logging.
+     **/
+    unique_ptr<RNN_Genome> all_time_local_best;
+
+    /**
+     * A set of the genomes this island contains (one entry per genome in Island::genomes.
+     * These are hashed by their structure: the nodes, edges, and their innovation numbers. Weights are not considered.
+     **/
     unordered_set<RNN_Genome*, RNN_Genome::StructuralHash> structure_set;
 
-    minstd_rand0 generator;
+    mt19937_64 generator;
 
     AnnealingPolicy& annealing_policy;
 
     int32_t
         status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or  Island::REPOPULATING */
 
-    int32_t erase_again; /**< a flag to track if this islands has been erased */
-    bool erased;         /**< a flag to track if this islands has been erased */
+    int32_t erase_again = 0; /**< a flag to track if this islands has been erased */
+    bool erased = false;     /**< a flag to track if this islands has been erased */
 
    public:
     const static int32_t INITIALIZING = 0; /**< status flag for if the island is initializing. */
     const static int32_t FILLED = 1;       /**< status flag for if the island is filled. */
     const static int32_t REPOPULATING = 2; /**< status flag for if the island is repopulating. */
 
+    Island(int32_t id, int32_t max_size, vector<RNN_Genome *> genomes, int32_t status, AnnealingPolicy& annealing_policy);
+
     /**
      *  Initializes an island with a given max size.
      *
@@ -71,6 +85,11 @@ class Island {
      */
     double get_best_fitness();
 
+    /**
+     * Returns the best fitness ever obtains by any genome in this island - even if that genome has been removed.
+     **/
+    double get_best_all_time_fitness();
+
     /**
      * Returns the fitness of the worst genome in the island
      *
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index 52941f3b..094cb304 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -1,4 +1,5 @@
 #include <functional>
+#include <sstream>
 using std::function;
 
 #include <chrono>
@@ -186,7 +187,7 @@ int32_t IslandSpeciationStrategy::insert_genome(RNN_Genome* genome) {
         Log::fatal("ERROR: island[%d] is null!\n", island);
     }
     int32_t insert_position = islands[island]->insert_genome(genome);
-    Log::info("Island %d: Insert position was: %d\n", insert_position);
+    Log::info("Island %d: Insert position was: %d\n", island, insert_position);
 
     if (insert_position == 0) {
         if (new_global_best) {
@@ -455,49 +456,40 @@ void IslandSpeciationStrategy::print(string indent) const {
  * Gets speciation strategy information headers for logs
  */
 string IslandSpeciationStrategy::get_strategy_information_headers() const {
+    stringstream oss;
+
     string info_header = "";
-    info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post");
+    oss << ",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post";
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
-        info_header.append(",");
-        info_header.append("Island_");
-        info_header.append(to_string(i));
-        info_header.append("_best_fitness");
-        info_header.append(",");
-        info_header.append("Island_");
-        info_header.append(to_string(i));
-        info_header.append("_worst_fitness");
-    }
-    return info_header;
+        oss << ",Island_" << i << "_best_fitness" 
+            << ",Island_" << i << "_wort_fitness"
+            << ",Island_" << i << "_all_time_best";
+    }
+
+    return oss.str();
 }
 
 /**
  * Gets speciation strategy information values for logs
  */
 string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const {
-    string info_value = "";
 
+    stringstream oss;
     auto& [min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id);
-    info_value.append(",");
-    info_value.append(to_string(min_mse_pre));
-    info_value.append(",");
-    info_value.append(to_string(max_mse_pre));
+    oss << "," << min_mse_pre << "," << max_mse_pre;
 
     float min_mse_post = this->get_best_fitness();
     float max_mse_post = this->get_worst_fitness();
-    info_value.append(",");
-    info_value.append(to_string(min_mse_post));
-    info_value.append(",");
-    info_value.append(to_string(max_mse_post));
+    oss << "," << min_mse_post << "," << max_mse_post;
 
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
         double best_fitness = islands[i]->get_best_fitness();
         double worst_fitness = islands[i]->get_worst_fitness();
-        info_value.append(",");
-        info_value.append(to_string(best_fitness));
-        info_value.append(",");
-        info_value.append(to_string(worst_fitness));
+        double all_time_best = islands[i]->get_best_all_time_fitness();
+        oss << "," << best_fitness << "," << worst_fitness << "," << all_time_best;
     }
-    return info_value;
+    
+    return oss.str();
 }
 
 RNN_Genome* IslandSpeciationStrategy::parents_repopulation(
diff --git a/examm/species.cxx b/examm/species.cxx
index 1e650ec7..9081b203 100644
--- a/examm/species.cxx
+++ b/examm/species.cxx
@@ -130,12 +130,6 @@ int32_t Species::insert_genome(RNN_Genome* genome) {
     if (insert_index == 0) {
         // this was a new best genome for this island
         Log::info("new best fitness for island: %d!\n", id);
-        if (genome->get_fitness() != EXAMM_MAX_DOUBLE) {
-            // need to set the weights for non-initial genomes so we
-            // can generate a proper graphviz file
-            vector<double> best_parameters = genome->get_best_parameters();
-            genome->set_weights(best_parameters);
-        }
         species_not_improving_count = 0;
     } else {
         species_not_improving_count++;
@@ -233,4 +227,4 @@ int32_t Species::get_species_not_improving_count() {
 
 void Species::set_species_not_improving_count(int32_t count) {
     species_not_improving_count = count;
-}
\ No newline at end of file
+}

From 19f43fe9a1502a4ceb66a9f8dbf5ddcf3075bb66 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 20 Mar 2024 15:23:30 -0400
Subject: [PATCH 37/42] Format

---
 examm/examm.cxx                      | 15 ++++++++-------
 examm/island.cxx                     | 25 +++++++++++++++----------
 examm/island.hxx                     |  8 +++++---
 examm/island_speciation_strategy.cxx |  8 +++-----
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/examm/examm.cxx b/examm/examm.cxx
index 164d84d1..7d9e7cd4 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -97,7 +97,8 @@ void EXAMM::generate_log() {
         mkpath(output_directory.c_str(), 0777);
         log_file = new ofstream(output_directory + "/" + "fitness_log.csv");
         (*log_file
-        ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled Nodes,Enabled"
+        ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled "
+             "Nodes,Enabled"
              "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Genome Trainable Parameters,Island Id";
         (*log_file) << speciation_strategy->get_strategy_information_headers();
         (*log_file) << endl;
@@ -194,12 +195,12 @@ void EXAMM::update_log(RNN_Genome* genome) {
         long milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(currentClock - startClock).count();
         (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds
                     << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << ","
-                    << best_genome->get_number_weights() << ","
-                    << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << ","
-                    << best_genome->get_enabled_recurrent_edge_count() << "," << genome->best_validation_mse << ","
-                    << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << ","
-                    << genome->get_number_weights() << "," << genome->get_generation_id()
-                    << speciation_strategy->get_strategy_information_values(genome) << endl;
+                    << best_genome->get_number_weights() << "," << best_genome->get_enabled_node_count() << ","
+                    << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count()
+                    << "," << genome->best_validation_mse << "," << pre_insert_best_mse << ","
+                    << (int32_t) (last_genome_inserted ? 1 : 0) << "," << genome->get_number_weights() << ","
+                    << genome->get_generation_id() << speciation_strategy->get_strategy_information_values(genome)
+                    << endl;
         Log::info(
             "mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse,
             best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(),
diff --git a/examm/island.cxx b/examm/island.cxx
index dc73e1d5..eb39ed98 100644
--- a/examm/island.cxx
+++ b/examm/island.cxx
@@ -3,7 +3,6 @@
 using std::upper_bound;
 
 #include <iomanip>
-
 #include <string>
 using std::string;
 using std::to_string;
@@ -15,22 +14,26 @@ using std::vector;
 #include "island.hxx"
 #include "rnn/rnn_genome.hxx"
 
-Island::Island(int32_t id, int32_t max_size, vector<RNN_Genome *> genomes, int32_t status, AnnealingPolicy& annealing_policy) :
-  id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) {
+Island::Island(
+    int32_t id, int32_t max_size, vector<RNN_Genome*> genomes, int32_t status, AnnealingPolicy& annealing_policy
+)
+    : id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) {
     using namespace std::chrono;
     long long t = time_point_cast<nanoseconds>(system_clock::now()).time_since_epoch().count();
     generator = mt19937_64(t + 1123 * id + 12334 * max_size);
 
-    for (int i = 0; i < 100; i++)
+    for (int i = 0; i < 100; i++) {
         generate_canonical<double, 10>(generator);
-
+    }
 }
 
 Island::Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy)
-    : Island(id, max_size, vector<RNN_Genome *>(), Island::INITIALIZING, annealing_policy) {}
+    : Island(id, max_size, vector<RNN_Genome*>(), Island::INITIALIZING, annealing_policy) {
+}
 
 Island::Island(int32_t id, vector<RNN_Genome*> genomes, AnnealingPolicy& annealing_policy)
-    : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) {}
+    : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) {
+}
 
 RNN_Genome* Island::get_best_genome() {
     if (genomes.size() == 0) {
@@ -58,10 +61,11 @@ double Island::get_best_fitness() {
 }
 
 double Island::get_best_all_time_fitness() {
-    if (all_time_local_best)
+    if (all_time_local_best) {
         return all_time_local_best->get_fitness();
-    else
+    } else {
         return EXAMM_MAX_DOUBLE;
+    }
 }
 
 double Island::get_worst_fitness() {
@@ -220,8 +224,9 @@ int32_t Island::insert_genome(RNN_Genome* genome) {
         // this was a new best genome for this island
         Log::info("Island %d: new best fitness found!\n", id);
 
-        if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness())
+        if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness()) {
             all_time_local_best = unique_ptr<RNN_Genome>(genome->copy());
+        }
     }
 
     if ((int32_t) genomes.size() >= max_size) {
diff --git a/examm/island.hxx b/examm/island.hxx
index b6d50104..707bf9db 100644
--- a/examm/island.hxx
+++ b/examm/island.hxx
@@ -7,8 +7,8 @@
 using std::function;
 
 #include <random>
-using std::mt19937_64;
 using std::minstd_rand0;
+using std::mt19937_64;
 using std::uniform_real_distribution;
 
 #include <string>
@@ -35,7 +35,7 @@ class Island {
      * The genomes on this island, stored in sorted order best (front) to worst (back).
      */
     vector<RNN_Genome*> genomes;
-    
+
     /**
      * If we are using simulated annealing, then the genomes vector may not contain the best genome we have discovered.
      * Keep an additional clone of the best genome here for logging.
@@ -63,7 +63,9 @@ class Island {
     const static int32_t FILLED = 1;       /**< status flag for if the island is filled. */
     const static int32_t REPOPULATING = 2; /**< status flag for if the island is repopulating. */
 
-    Island(int32_t id, int32_t max_size, vector<RNN_Genome *> genomes, int32_t status, AnnealingPolicy& annealing_policy);
+    Island(
+        int32_t id, int32_t max_size, vector<RNN_Genome*> genomes, int32_t status, AnnealingPolicy& annealing_policy
+    );
 
     /**
      *  Initializes an island with a given max size.
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index 094cb304..c8605fbe 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -461,9 +461,8 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const {
     string info_header = "";
     oss << ",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post";
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
-        oss << ",Island_" << i << "_best_fitness" 
-            << ",Island_" << i << "_wort_fitness"
-            << ",Island_" << i << "_all_time_best";
+        oss << ",Island_" << i << "_best_fitness" << ",Island_" << i << "_wort_fitness" << ",Island_" << i
+            << "_all_time_best";
     }
 
     return oss.str();
@@ -473,7 +472,6 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const {
  * Gets speciation strategy information values for logs
  */
 string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const {
-
     stringstream oss;
     auto& [min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id);
     oss << "," << min_mse_pre << "," << max_mse_pre;
@@ -488,7 +486,7 @@ string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* gen
         double all_time_best = islands[i]->get_best_all_time_fitness();
         oss << "," << best_fitness << "," << worst_fitness << "," << all_time_best;
     }
-    
+
     return oss.str();
 }
 

From 3bfc9fac289773623cc1e85678e4cef6ea3f4712 Mon Sep 17 00:00:00 2001
From: aidanlabella <alabella@brown.edu>
Date: Wed, 3 Apr 2024 19:58:15 -0400
Subject: [PATCH 38/42] add ability to specify delimiter for CSV inputs

---
 time_series/time_series.cxx | 27 +++++++++++++++++++++++----
 time_series/time_series.hxx |  4 +++-
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx
index b91c13a2..2979e9ba 100644
--- a/time_series/time_series.cxx
+++ b/time_series/time_series.cxx
@@ -238,7 +238,7 @@ void TimeSeriesSet::add_time_series(string name) {
     }
 }
 
-TimeSeriesSet::TimeSeriesSet(string _filename, const vector<string>& _fields) {
+TimeSeriesSet::TimeSeriesSet(string _filename, const vector<string>& _fields, char delim) {
     filename = _filename;
     fields = _fields;
 
@@ -251,7 +251,7 @@ TimeSeriesSet::TimeSeriesSet(string _filename, const vector<string>& _fields) {
     }
 
     vector<string> file_fields;
-    string_split(line, ',', file_fields);
+    string_split(line, delim, file_fields);
     for (int32_t i = 0; i < (int32_t) file_fields.size(); i++) {
         // get rid of carriage returns (sometimes windows messes this up)
         file_fields[i].erase(std::remove(file_fields[i].begin(), file_fields[i].end(), '\r'), file_fields[i].end());
@@ -308,7 +308,7 @@ TimeSeriesSet::TimeSeriesSet(string _filename, const vector<string>& _fields) {
         }
 
         vector<string> parts;
-        string_split(line, ',', parts);
+        string_split(line, delim, parts);
 
         if (parts.size() != file_fields.size()) {
             Log::fatal(
@@ -734,7 +734,7 @@ void TimeSeriesSets::load_time_series() {
     for (int32_t i = 0; i < (int32_t) filenames.size(); i++) {
         Log::info("\t%s\n", filenames[i].c_str());
 
-        TimeSeriesSet* ts = new TimeSeriesSet(filenames[i], all_parameter_names);
+        TimeSeriesSet* ts = new TimeSeriesSet(filenames[i], all_parameter_names, this->csv_delimiter);
         time_series.push_back(ts);
 
         rows += ts->get_number_rows();
@@ -831,6 +831,25 @@ TimeSeriesSets* TimeSeriesSets::generate_from_arguments(const vector<string>& ar
         exit(1);
     }
 
+    if (argument_exists(arguments, "--csv_delimiter")) {
+        vector<string> delim_vec;
+        get_argument_vector(arguments, "--csv_delimiter", false, delim_vec);
+
+        string delim_str = delim_vec.front();
+
+        if (delim_vec.size() != 1 || delim_str.size() != 1) {
+            // Exit if the user specifies more than one delimiter character
+            Log::fatal(
+                "The delimeter for CSV files should be a single character."
+            );
+
+            help_message();
+            exit(1);
+        }
+
+        tss->csv_delimiter = delim_str.at(0);
+    }
+
     tss->load_time_series();
 
     tss->normalize_type = "";
diff --git a/time_series/time_series.hxx b/time_series/time_series.hxx
index fada6f51..2ccc75c0 100644
--- a/time_series/time_series.hxx
+++ b/time_series/time_series.hxx
@@ -72,7 +72,7 @@ class TimeSeriesSet {
     TimeSeriesSet();
 
    public:
-    TimeSeriesSet(string _filename, const vector<string>& _fields);
+    TimeSeriesSet(string _filename, const vector<string>& _fields, char delim);
     ~TimeSeriesSet();
     void add_time_series(string name);
 
@@ -115,6 +115,8 @@ class TimeSeriesSet {
 
 class TimeSeriesSets {
    private:
+    char csv_delimiter = ',';
+
     string normalize_type;
 
     vector<string> filenames;

From d665e842ee1618b0c1e5950c8d168b13de4c184a Mon Sep 17 00:00:00 2001
From: aidanlabella <alabella@brown.edu>
Date: Wed, 3 Apr 2024 20:05:56 -0400
Subject: [PATCH 39/42] add script for new delimiter option

---
 scripts/air_quality/evolve_aq.sh | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100755 scripts/air_quality/evolve_aq.sh

diff --git a/scripts/air_quality/evolve_aq.sh b/scripts/air_quality/evolve_aq.sh
new file mode 100755
index 00000000..e36407fe
--- /dev/null
+++ b/scripts/air_quality/evolve_aq.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# This is a script for evlolving networks to predict Air Quality data
+# This also doubles as an example for the csv delimiter option
+
+cd build
+
+INPUT_PARAMETERS="Date Time PT08.S1(CO) PT08.S2(NMHC) PT08.S3(NOx) PT08.S4(NO2) PT08.S5(O3) T RH AH"
+OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)"
+
+exp_name="/home/aidan/sandbox/DEEPSPrj/output/init_mvar_2"
+mkdir -p $exp_name
+echo "Running base EXAMM code with UCI Air Quality dataset, results will be saved to: "$exp_name
+echo "###-------------------###"
+
+../../build/multithreaded/examm_mt \
+--training_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \
+--test_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \
+--time_offset 1 \
+--input_parameter_names $INPUT_PARAMETERS \
+--output_parameter_names $OUTPUT_PARAMETERS \
+--number_islands 10 \
+--island_size 10 \
+--max_genomes 20000 \
+--number_threads 14 \
+--bp_iterations 15 \
+--normalize min_max \
+--output_directory $exp_name \
+--possible_node_types simple UGRNN MGU GRU delta LSTM \
+--std_message_level INFO \
+--file_message_level NONE \
+--csv_delimiter ";"

From a5fdba975f0b917800c956c568e894ecd292faa2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Fri, 5 Apr 2024 13:11:15 -0400
Subject: [PATCH 40/42] Write island best to disk

---
 common/process_arguments.cxx         |  4 +++-
 examm/island_speciation_strategy.cxx | 15 +++++++++------
 examm/island_speciation_strategy.hxx |  3 ++-
 rnn_examples/train_rnn.cxx           | 18 +++++-------------
 4 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index 9419fdd7..fe2a49f0 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -122,9 +122,11 @@ IslandSpeciationStrategy* generate_island_speciation_strategy_from_arguments(
     bool start_filled = argument_exists(arguments, "--start_filled");
     bool tl_epigenetic_weights = argument_exists(arguments, "--tl_epigenetic_weights");
     unique_ptr<AnnealingPolicy> annealing_policy = AnnealingPolicy::from_arguments(arguments);
+    string output_directory = "";
+    get_argument(arguments, "--output_directory", false, output_directory);
 
     IslandSpeciationStrategy* island_strategy = new IslandSpeciationStrategy(
-        number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, seed_genome,
+        number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, output_directory, seed_genome,
         island_ranking_method, repopulation_method, extinction_event_generation_number, num_mutations,
         islands_to_exterminate, max_genomes, repeat_extinction, start_filled, transfer_learning,
         transfer_learning_version, tl_epigenetic_weights, annealing_policy
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index c8605fbe..e05b95d1 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -5,6 +5,8 @@ using std::function;
 #include <chrono>
 
 // #include <iostream>
+#include <sstream>
+using std::stringstream;
 
 #include <random>
 
@@ -24,7 +26,7 @@ using std::string;
  */
 IslandSpeciationStrategy::IslandSpeciationStrategy(
     int32_t _number_of_islands, int32_t _max_island_size, double _mutation_rate, double _intra_island_crossover_rate,
-    double _inter_island_crossover_rate, RNN_Genome* _seed_genome, string _island_ranking_method,
+    double _inter_island_crossover_rate, string output_directory, RNN_Genome* _seed_genome, string _island_ranking_method,
     string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations,
     int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, bool _start_filled,
     bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights,
@@ -36,6 +38,7 @@ IslandSpeciationStrategy::IslandSpeciationStrategy(
       mutation_rate(_mutation_rate),
       intra_island_crossover_rate(_intra_island_crossover_rate),
       inter_island_crossover_rate(_inter_island_crossover_rate),
+      output_directory(output_directory),
       generated_genomes(0),
       evaluated_genomes(0),
       seed_genome(_seed_genome),
@@ -190,11 +193,11 @@ int32_t IslandSpeciationStrategy::insert_genome(RNN_Genome* genome) {
     Log::info("Island %d: Insert position was: %d\n", island, insert_position);
 
     if (insert_position == 0) {
-        if (new_global_best) {
-            return 0;
-        } else {
-            return 1;
-        }
+        stringstream ss;
+        ss << output_directory << "/island_" << island << "_best.bin";
+        genome->write_to_file(ss.str());
+
+        return insert_position != 0;
     } else {
         return insert_position;  // will be -1 if not inserted, or > 0 if not the global best
     }
diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx
index fdc41024..bd32d507 100644
--- a/examm/island_speciation_strategy.hxx
+++ b/examm/island_speciation_strategy.hxx
@@ -37,6 +37,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
     RNN_Genome* seed_genome; /**< keep a reference to the seed genome so we can re-use it across islands and not
                                 duplicate innovation numbers. */
 
+    string output_directory;
     string island_ranking_method; /**< The method used to find the worst island in population */
 
     string repopulation_method; /**< The method used to repopulate the island after being erased */
@@ -88,7 +89,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
      */
     IslandSpeciationStrategy(
         int32_t _number_of_islands, int32_t _max_island_size, double _mutation_rate,
-        double _intra_island_crossover_rate, double _inter_island_crossover_rate, RNN_Genome* _seed_genome,
+        double _intra_island_crossover_rate, double _inter_island_crossover_rate, string output_directory, RNN_Genome* _seed_genome,
         string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number,
         int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction,
         bool _start_filled, bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights,
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index c790b112..a452fe1a 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -116,19 +116,8 @@ int main(int argc, char** argv) {
 
     if (genome_file.size() != 0) {
         genome = new RNN_Genome(genome_file);
-        Log::info("best weights: { ");
-        for (double& d : genome->get_best_parameters()) {
-            Log::info_no_header("%f, ", d);
-        }
-        Log::info("}\n");
-
-        vector<double> params;
-        genome->get_weights(params);
-        Log::info("current weights: { ");
-        for (double& d : params) {
-            Log::info_no_header("%f, ", d);
-        }
-        Log::info("}\n");
+        genome->set_weights(genome->get_best_parameters());
+        Log::info("Number of weights = %d\n", genome->get_number_weights());
     } else {
         string rnn_type;
         get_argument(arguments, "--rnn_type", true, rnn_type);
@@ -222,6 +211,9 @@ int main(int argc, char** argv) {
         genome->set_log_filename(output_directory + "/" + log_filename);
     }
 
+    string output_genome_name = "output_genome.bin";
+    get_argument(arguments, "--output_genome_name", false, output_genome_name);
+
     genome->set_parameter_names(
         time_series_sets->get_input_parameter_names(), time_series_sets->get_output_parameter_names()
     );

From 28fe8329fba140c3258e18a19e0c6c4372bf4019 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 9 Apr 2024 15:52:26 -0400
Subject: [PATCH 41/42] Update build for native optimization

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 65d1f2a8..f51b9f51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,11 +27,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
 # SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3 -fsanitize=address")
-SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS                " -Wall -march=native -O3")
 #SET (CMAKE_CXX_FLAGS                " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG")
 #SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
 #SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
-SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
+SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -DNDEBUG")
 
 #SET (CMAKE_SHARED_LINKER_FLAGS      " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG")
 

From 99fda86862c63e256d78f5c3fcc192ba83fdde6b Mon Sep 17 00:00:00 2001
From: aidanlabella <alabella@brown.edu>
Date: Fri, 3 May 2024 15:44:54 -0400
Subject: [PATCH 42/42] add scripts for training/evolution with NASA merra

---
 rnn/inverse_node.cxx                |  2 +-
 rnn_examples/train_rnn.cxx          |  4 ++--
 scripts/air_quality/eval_merra.sh   | 17 ++++++++++++++
 scripts/air_quality/evolve_aq.sh    | 13 +++++++----
 scripts/air_quality/evolve_merra.sh | 35 +++++++++++++++++++++++++++++
 scripts/air_quality/train_aq.sh     | 32 ++++++++++++++++++++++++++
 scripts/air_quality/train_merra.sh  | 32 ++++++++++++++++++++++++++
 7 files changed, 128 insertions(+), 7 deletions(-)
 create mode 100755 scripts/air_quality/eval_merra.sh
 create mode 100755 scripts/air_quality/evolve_merra.sh
 create mode 100755 scripts/air_quality/train_aq.sh
 create mode 100755 scripts/air_quality/train_merra.sh

diff --git a/rnn/inverse_node.cxx b/rnn/inverse_node.cxx
index 202dac29..fff9bcd4 100644
--- a/rnn/inverse_node.cxx
+++ b/rnn/inverse_node.cxx
@@ -19,7 +19,7 @@ double INVERSE_Node::activation_function(double input) {
 
 double INVERSE_Node::derivative_function(double input) {
     double gradient = -1.0 / ((input) * (input));
-    if (isnan(gradient) || isinf(gradient)) {
+    if (std::isnan(gradient) || std::isinf(gradient)) {
         gradient = -1000.0;
     }
     return gradient;
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index a452fe1a..64727f07 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -125,10 +125,10 @@ int main(int argc, char** argv) {
         Log::info("RNN TYPE = %s\n", rnn_type.c_str());
 
         int32_t num_hidden_layers;
-        get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers);
+        get_argument(arguments, "--num_hidden_layers", false, num_hidden_layers);
 
         int32_t max_recurrent_depth;
-        get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
+        get_argument(arguments, "--max_recurrent_depth", false, max_recurrent_depth);
 
         int32_t hidden_layer_size = number_inputs;
         get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
diff --git a/scripts/air_quality/eval_merra.sh b/scripts/air_quality/eval_merra.sh
new file mode 100755
index 00000000..d3de8abd
--- /dev/null
+++ b/scripts/air_quality/eval_merra.sh
@@ -0,0 +1,17 @@
+INPUT_PARAMETERS="lon lat lev AIRDENS SO4 SO2 RH PS H O3 T U V"
+# OUTPUT_PARAMETERS="CO"
+OUTPUT_PARAMETERS="CO SO4 SO2 O3"
+
+exp_name="/home/aidan/sandbox/DEEPSPrj/output/merra/multivar_B/evaluation"
+mkdir -p $exp_name
+
+../../build/rnn_examples/evaluate_rnn \
+--testing_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_eval_1000.csv \
+--time_offset 1 \
+--input_parameter_names $INPUT_PARAMETERS \
+--output_parameter_names $OUTPUT_PARAMETERS \
+--genome_file $1 \
+--output_directory $exp_name \
+--std_message_level INFO \
+--file_message_level ERROR
+# --bp_iterations $epochs \
diff --git a/scripts/air_quality/evolve_aq.sh b/scripts/air_quality/evolve_aq.sh
index e36407fe..0136307e 100755
--- a/scripts/air_quality/evolve_aq.sh
+++ b/scripts/air_quality/evolve_aq.sh
@@ -4,10 +4,12 @@
 
 cd build
 
-INPUT_PARAMETERS="Date Time PT08.S1(CO) PT08.S2(NMHC) PT08.S3(NOx) PT08.S4(NO2) PT08.S5(O3) T RH AH"
-OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)"
+# INPUT_PARAMETERS="Date Time PT08.S1(CO) PT08.S2(NMHC) PT08.S3(NOx) PT08.S4(NO2) PT08.S5(O3) T RH AH"
+INPUT_PARAMETERS="PT08.S5(O3) T RH AH"
+# OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)"
+OUTPUT_PARAMETERS="CO(GT)"
 
-exp_name="/home/aidan/sandbox/DEEPSPrj/output/init_mvar_2"
+exp_name="/home/aidan/sandbox/DEEPSPrj/output/univar3"
 mkdir -p $exp_name
 echo "Running base EXAMM code with UCI Air Quality dataset, results will be saved to: "$exp_name
 echo "###-------------------###"
@@ -19,10 +21,13 @@ echo "###-------------------###"
 --input_parameter_names $INPUT_PARAMETERS \
 --output_parameter_names $OUTPUT_PARAMETERS \
 --number_islands 10 \
+--min_recurrent_depth 10 \
+--max_recurrent_depth 40 \
 --island_size 10 \
 --max_genomes 20000 \
 --number_threads 14 \
---bp_iterations 15 \
+--num_mutations 20 \
+--bp_iterations 20 \
 --normalize min_max \
 --output_directory $exp_name \
 --possible_node_types simple UGRNN MGU GRU delta LSTM \
diff --git a/scripts/air_quality/evolve_merra.sh b/scripts/air_quality/evolve_merra.sh
new file mode 100755
index 00000000..d5a71e31
--- /dev/null
+++ b/scripts/air_quality/evolve_merra.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# This is a script for evlolving networks to predict Air Quality data
+# This also doubles as an example for the csv delimiter option
+
+cd build
+
+INPUT_PARAMETERS="lon lat lev AIRDENS SO4 SO2 RH PS H O3 T U V"
+# OUTPUT_PARAMETERS="CO"
+OUTPUT_PARAMETERS="CO SO4 SO2 O3"
+
+exp_name="/home/aidan/sandbox/DEEPSPrj/output/merra/mv-A"
+mkdir -p $exp_name
+echo "Running base EXAMM code with MERRA-2 dataset, results will be saved to: "$exp_name
+echo "###-------------------###"
+
+../../build/multithreaded/examm_mt \
+--training_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_100k_23.csv \
+--test_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_100k_23_test.csv \
+--time_offset 1 \
+--input_parameter_names $INPUT_PARAMETERS \
+--output_parameter_names $OUTPUT_PARAMETERS \
+--number_islands 10 \
+--min_recurrent_depth 1 \
+--max_recurrent_depth 100 \
+--island_size 10 \
+--max_genomes 1000 \
+--number_threads 14 \
+--num_mutations 20 \
+--bp_iterations 5 \
+--normalize none \
+--output_directory $exp_name \
+--possible_node_types simple UGRNN MGU GRU delta LSTM \
+--std_message_level INFO \
+--file_message_level NONE \
+--csv_delimiter ","
diff --git a/scripts/air_quality/train_aq.sh b/scripts/air_quality/train_aq.sh
new file mode 100755
index 00000000..1d323ab8
--- /dev/null
+++ b/scripts/air_quality/train_aq.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# This is an example of running EXAMM MPI version on pa28 dataset, output parameters are non engine parameters
+#
+# The pa28 dataset is not normalized
+# To run datasets that's not normalized, make sure to add arguments:
+#    --normalize min_max for Min Max normalization, or
+#    --normalize avg_std_dev for Z-score normalization
+
+cd build
+
+INPUT_PARAMETERS="Date Time PT08.S5(O3) T RH AH"
+# OUTPUT_PARAMETERS="CO(GT) NO2(GT) NOx(GT) NMHC(GT)"
+OUTPUT_PARAMETERS="CO(GT)"
+
+exp_name="/home/aidan/sandbox/DEEPSPrj/output/init_mvar_1"
+mkdir -p "${exp_name}/training"
+echo "Running base EXAMM rnn training code with UCI Air Quality dataset, results will be saved to: "$exp_name
+echo "###-------------------###"
+
+../../build/rnn_examples/train_rnn \
+--training_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \
+--test_filenames /home/aidan/sandbox/DEEPSPrj/data/AirQualityUCI.csv \
+--time_offset 1 \
+--input_parameter_names $INPUT_PARAMETERS \
+--output_parameter_names $OUTPUT_PARAMETERS \
+--bp_iterations 100000 \
+--output_directory "${exp_name}/training" \
+--std_message_level INFO \
+--file_message_level NONE \
+--genome_file $1 \
+--learning_rate 0.001 \
+--csv_delimiter ";"
diff --git a/scripts/air_quality/train_merra.sh b/scripts/air_quality/train_merra.sh
new file mode 100755
index 00000000..5fd2931e
--- /dev/null
+++ b/scripts/air_quality/train_merra.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# This is an example of running EXAMM MPI version on pa28 dataset, output parameters are non engine parameters
+#
+# The pa28 dataset is not normalized
+# To run datasets that's not normalized, make sure to add arguments:
+#    --normalize min_max for Min Max normalization, or
+#    --normalize avg_std_dev for Z-score normalization
+
+cd build
+
+INPUT_PARAMETERS="lon lat lev AIRDENS SO4 SO2 RH PS H O3 T U V"
+# OUTPUT_PARAMETERS="CO"
+OUTPUT_PARAMETERS="CO SO4 SO2 O3"
+
+exp_name="/home/aidan/sandbox/DEEPSPrj/output/merra/multivar_B"
+mkdir -p "${exp_name}/training"
+echo "Running base EXAMM rnn training code with UCI Air Quality dataset, results will be saved to: "$exp_name"/training"
+echo "###-------------------###"
+
+../../build/rnn_examples/train_rnn \
+--training_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/merra_post.csv \
+--test_filenames /home/aidan/sandbox/DEEPSPrj/data/MERRA/poc_merra_test.csv \
+--time_offset 1 \
+--input_parameter_names $INPUT_PARAMETERS \
+--output_parameter_names $OUTPUT_PARAMETERS \
+--bp_iterations 5 \
+--output_directory "${exp_name}/training" \
+--std_message_level INFO \
+--file_message_level NONE \
+--genome_file $1 \
+--learning_rate 0.01 \
+--csv_delimiter ","