From e69fc712b7cf59ca382743e779a16c09fff31aeb Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 30 Jan 2023 14:05:06 -0500
Subject: [PATCH 01/31] Removing c++20 features unsupported by GCC versions <
 12.2

---
 CMakeLists.txt    | 3 ++-
 rnn/dnas_node.cxx | 4 ++--
 rnn/dnas_node.hxx | 7 ++-----
 3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b34e8f0..9093410b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,7 @@ set (EXACT_VERSION_MINOR 33)
 #add_definitions( -DEXACT_VERSION="${EXACT_VERSION_MAJOR}.${EXACT_VERSION_MINOR}" )
 
 SET (PLATFORM 64)
+set(CMAKE_CXX_STANDARD 20)
 
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++")
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS")
@@ -23,7 +24,7 @@ SET (PLATFORM 64)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                "-std=c++20 -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS                "-Wall -O3 -funroll-loops -msse3")
 SET (CMAKE_CXX_FLAGS_DEBUG          "-g")
 SET (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG")
 SET (CMAKE_CXX_FLAGS_RELEASE        "-O4 -funroll-loops -DNDEBUG")
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 2f040703..957eaba5 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -67,13 +67,13 @@ DNASNode::~DNASNode() {
     delete node;
 }
 
-template <uniform_random_bit_generator Rng>
+template <typename Rng>
 void DNASNode::gumbel_noise(Rng &rng, vector<double> &output) {
   for (int i = 0; i < output.size(); i++)
     output[i] = -log(-log(uniform_real_distribution<double>(0.0, 1.0)(rng)));
 }
 
-template <uniform_random_bit_generator Rng>
+template <typename Rng>
 void DNASNode::sample_gumbel_softmax(Rng &rng) {
   z.assign(pi.size(), 0.0);
   x.assign(pi.size(), 0.0);
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index daa26605..1b63532c 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -7,7 +7,6 @@ using std::string;
 #include <random>
 using std::minstd_rand0;
 using std::uniform_real_distribution;
-using std::uniform_random_bit_generator;
 using std::generate_canonical;
 
 #include <vector>
@@ -27,9 +26,8 @@ using std::unique_ptr;
 #define CRYSTALLIZATION_THRESHOLD 50000
 
 class DNASNode : public RNN_Node_Interface {
-
  private:
-  template <uniform_random_bit_generator R>
+  template <typename R>
   static void gumbel_noise(R &rng, vector<double> &output);
   void calculate_maxi();
 
@@ -75,12 +73,11 @@ class DNASNode : public RNN_Node_Interface {
   vector<vector<double>> node_outputs;
 
  public:
-
   DNASNode(vector<RNN_Node_Interface *> &&nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1);
   DNASNode(const DNASNode &node);
   ~DNASNode();
 
-  template <uniform_random_bit_generator Rng>
+  template <typename Rng>
   void sample_gumbel_softmax(Rng &rng);
   void calculate_z();
 

From 903dcfb40baf8d06fa770875693a0d198efd95e2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 30 Jan 2023 15:26:23 -0500
Subject: [PATCH 02/31] Fix minimum c++ requirement in CMakeLists.txt to be
 compatible with GCC and clang

---
 CMakeLists.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9093410b..2ec362ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 2.6)
+cmake_minimum_required (VERSION 2.8)
 project (EXACT)
 
 # The version number.
@@ -24,10 +24,10 @@ set(CMAKE_CXX_STANDARD 20)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                "-Wall -O3 -funroll-loops -msse3")
-SET (CMAKE_CXX_FLAGS_DEBUG          "-g")
-SET (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG")
-SET (CMAKE_CXX_FLAGS_RELEASE        "-O4 -funroll-loops -DNDEBUG")
+SET (CMAKE_CXX_FLAGS                "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
+SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
+SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
 
 set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib)
 

From 09c5cbcafdb848f4a493c3502baba7fd5db275ca Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 30 Jan 2023 15:37:33 -0500
Subject: [PATCH 03/31] properly specify minimum CMake version

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2ec362ca..558fbefe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 2.8)
+cmake_minimum_required (VERSION 3.1)
 project (EXACT)
 
 # The version number.

From 8ec09ca9826a8e7022c46382c14a7934e02a0731 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 30 Jan 2023 15:41:37 -0500
Subject: [PATCH 04/31] Fixed bug introduced during merge

---
 rnn/dnas_node.hxx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index f56b404b..82ebff65 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -27,7 +27,7 @@ using std::unique_ptr;
 
 class DNASNode : public RNN_Node_Interface {
    private:
-    template <uniform_random_bit_generator R>
+    template <typename R>
     static void gumbel_noise(R &rng, vector<double> &output);
     void calculate_maxi();
 
@@ -77,7 +77,7 @@ class DNASNode : public RNN_Node_Interface {
     DNASNode(const DNASNode &node);
     ~DNASNode();
 
-    template <uniform_random_bit_generator Rng>
+    template <typename Rng>
     void sample_gumbel_softmax(Rng &rng);
     void calculate_z();
 

From 1c9bcfff4042964727bc41f944738f4e82c2d5d2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Tue, 31 Jan 2023 11:12:56 -0500
Subject: [PATCH 05/31] Tweaking for clusteR

---
 CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 558fbefe..1af3a314 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 3.1)
+cmake_minimum_required (VERSION 3.8)
 project (EXACT)
 
 # The version number.
@@ -9,6 +9,7 @@ set (EXACT_VERSION_MINOR 33)
 
 SET (PLATFORM 64)
 set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -stdlib=libstdc++")
 #SET (CMAKE_CXX_FLAGS                "-std=c++11 -Wall -O3 -funroll-loops -msse3 -fsanitize=address -DNAN_CHECKS")
@@ -24,13 +25,14 @@ set(CMAKE_CXX_STANDARD 20)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                "${CMAKE_CXX_FLAGS} -Wall -O3 -funroll-loops -msse3")
+SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3")
 SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
 SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
 SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
 
 set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /opt/local/lib)
 
+message(STATUS "${CMAKE_CXX_FLAGS}")
 message(STATUS "project source dir is ${PROJECT_SOURCE_DIR}")
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/")

From 9c6be46a3141e5cbde600156f9f7b08134600acb Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Tue, 31 Jan 2023 11:35:42 -0500
Subject: [PATCH 06/31] Added updated cluster instructions to the README.md

---
 README.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7fc588a3..449e7194 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 # Getting Started and Prerequisites
 
 EXONA has been developed to compile using CMake, which should be installed before attempting to compile. To use the MPI version, a version of MPI (such as OpenMPI) should be installed. EXACT currently requires libtiff and libpng
-The EXACT algorithm can also checkpoint to a database, however this is not required.  To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX.  Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++11 compatible compiler.
+The EXACT algorithm can also checkpoint to a database, however this is not required.  To enable this I recommend installing libmysql-dev via apt-get on Linux systems, or mysql via [homebrew](https://brew.sh) on OSX.  Other than that, EXACT/EXALT/EXAMM has no prerequesites other than c++20 compatible compiler.
 
 If you are using OSX, to set up the environment:
 
@@ -15,6 +15,18 @@ brew install libpng
 xcode-select --install
 ```
 
+On the RIT Cluster Computer, load the following packages using spack:
+```
+# CMake
+spack load /ux27hbj
+
+# GCC
+spack load gcc@11.2.0 
+
+# libtiff
+spack load /ycf67m3
+```
+
 To build:
 
 ```
@@ -24,6 +36,8 @@ To build:
 ~/exact/build $ make
 ```
 
+You can add `-DCMAKE_BUILD_TYPE=Release` to the invocation of `cmake` for a release build (slower compile times, faster execution).
+
 You may also want to have graphviz installed so you can generate images of the evolved neural networks.  EXACT/EXALT/EXAMM will write out evolved genomes in a .gv (graphviz) format for this. For example, can generate a pdf from a gv file (assuming graphviz is installed with):
 
 ```

From d7023b64e771dad351ae946f962018de2c4a1acc Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 31 Jan 2023 11:41:24 -0500
Subject: [PATCH 07/31] Updated format script

---
 scripts/util/format.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/util/format.sh b/scripts/util/format.sh
index 85dddbfd..76fb9efa 100755
--- a/scripts/util/format.sh
+++ b/scripts/util/format.sh
@@ -1,2 +1,4 @@
 #!/bin/bash
-find . -type f -name "*.*xx" -exec clang-format -style=file -i {} \;
+for folder in common examm mpi multithreaded rnn rnn_examples rnn_tests time_series weights word_series; do
+  find $folder -type f -name "*.*xx" -exec clang-format -style=file -i {} \;
+done

From 89b3410cebc573430cd244b43ea070ca203fc98f Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 31 Jan 2023 11:41:46 -0500
Subject: [PATCH 08/31] Formatting

---
 rnn/dnas_node.hxx | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 3b341195..435e5400 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -8,7 +8,6 @@ using std::string;
 using std::generate_canonical;
 using std::minstd_rand0;
 using std::uniform_real_distribution;
-using std::generate_canonical;
 
 #include <vector>
 using std::vector;
@@ -28,7 +27,7 @@ using std::unique_ptr;
 class DNASNode : public RNN_Node_Interface {
    private:
     template <typename R>
-    static void gumbel_noise(R &rng, vector<double> &output);
+    static void gumbel_noise(R& rng, vector<double>& output);
 
     void calculate_maxi();
 
@@ -81,7 +80,7 @@ class DNASNode : public RNN_Node_Interface {
     ~DNASNode();
 
     template <typename Rng>
-    void sample_gumbel_softmax(Rng &rng);
+    void sample_gumbel_softmax(Rng& rng);
     void calculate_z();
 
     virtual void initialize_lamarckian(

From cbf757457e0d56235e6c58a64114d3143f3c0f58 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Tue, 31 Jan 2023 12:49:34 -0500
Subject: [PATCH 09/31] Added OpenMPI package to cluster instructions

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 449e7194..c187316a 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,9 @@ spack load /ux27hbj
 # GCC
 spack load gcc@11.2.0 
 
+# OpenMPI
+spack load openmpi@4.1.2
+
 # libtiff
 spack load /ycf67m3
 ```

From 06f12b588b1bcf1ba4380192eaa7ad2eb18dbf0f Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 7 Feb 2023 12:09:54 -0500
Subject: [PATCH 10/31] Adding argument parsing for DNAS

---
 common/process_arguments.cxx |  9 +++++
 examm/examm.cxx              | 18 +--------
 examm/examm.hxx              |  2 +-
 rnn/dnas_node.cxx            | 73 +++++++++++++++++++++++++-----------
 rnn/dnas_node.hxx            |  2 +-
 rnn/generate_nn.cxx          | 11 ++++++
 rnn/rnn_genome.cxx           | 30 ++++-----------
 rnn/rnn_genome.hxx           |  2 +
 rnn/rnn_node_interface.cxx   | 35 ++++++++++++++---
 rnn/rnn_node_interface.hxx   |  9 ++++-
 10 files changed, 124 insertions(+), 67 deletions(-)

diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index f4bf87a8..885f28a0 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -32,6 +32,15 @@ EXAMM* generate_examm_from_arguments(
     // get_argument(arguments, "--sequence_length_lower_bound", false, sequence_length_lower_bound);
     // get_argument(arguments, "--sequence_length_upper_bound", false, sequence_length_upper_bound);
 
+    vector<string> dnas_node_type_strings;
+    get_argument_vector(arguments, "--dnas_node_types", false, dnas_node_type_strings);
+    if (dnas_node_type_strings.size() != 0) {
+        dnas_node_types.clear();
+        for (auto node_type : dnas_node_type_strings) {
+            dnas_node_types.push_back(node_type_from_string(node_type));
+        }
+    }
+
     GenomeProperty* genome_property = new GenomeProperty();
     genome_property->generate_genome_property_from_arguments(arguments);
     genome_property->get_time_series_parameters(time_series_sets);
diff --git a/examm/examm.cxx b/examm/examm.cxx
index ce137d6f..f017ab8b 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -205,22 +205,8 @@ void EXAMM::update_log() {
 void EXAMM::set_possible_node_types(vector<string> possible_node_type_strings) {
     possible_node_types.clear();
 
-    for (int32_t i = 0; i < (int32_t) possible_node_type_strings.size(); i++) {
-        string node_type_s = possible_node_type_strings[i];
-
-        bool found = false;
-
-        for (int32_t j = 0; j < NUMBER_NODE_TYPES; j++) {
-            if (NODE_TYPES[j].compare(node_type_s) == 0) {
-                found = true;
-                possible_node_types.push_back(j);
-            }
-        }
-
-        if (!found) {
-            Log::error("unknown node type: '%s'\n", node_type_s.c_str());
-            exit(1);
-        }
+    for (auto node_type : possible_node_type_strings) {
+        possible_node_types.push_back(node_type_from_string(node_type));
     }
 }
 
diff --git a/examm/examm.hxx b/examm/examm.hxx
index 5ccb545e..ac5d56eb 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -63,7 +63,7 @@ class EXAMM {
     double split_node_rate;
     double merge_node_rate;
 
-    vector<int32_t> possible_node_types;
+    vector<int32_t> possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
 
     vector<string> op_log_ordering;
     map<string, int32_t> inserted_counts;
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index ba6f3ba3..46a20c69 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -1,4 +1,9 @@
 #include <algorithm>
+using std::sort;
+
+#include <utility>
+using std::pair;
+
 #include <cassert>
 #include <cmath>
 using std::max;
@@ -72,7 +77,7 @@ DNASNode::~DNASNode() {
 
 template <typename Rng>
 void DNASNode::gumbel_noise(Rng& rng, vector<double>& output) {
-    for (int i = 0; i < output.size(); i++) {
+    for (auto i = 0; i < output.size(); i++) {
         output[i] = -log(-log(uniform_real_distribution<double>(0.0, 1.0)(rng)));
     }
 }
@@ -92,18 +97,45 @@ void DNASNode::calculate_z() {
 
     xtotal = 0.0;
     double emax = -10000000;
-    for (int i = 0; i < z.size(); i++) {
+    for (auto i = 0; i < z.size(); i++) {
         x[i] = g[i] + log(pi[i]);
         x[i] /= tao;
         emax = max(emax, x[i]);
     }
-    for (int i = 0; i < z.size(); i++) {
+    for (auto i = 0; i < z.size(); i++) {
         x[i] = exp(emax - x[i]);
         xtotal += x[i];
     }
-    for (int i = 0; i < z.size(); i++) {
+    for (auto i = 0; i < z.size(); i++) {
         z[i] = x[i] / xtotal;
     }
+
+    if (k > 0) {
+        pair<int32_t, double> ps_with_indices[z.size()];
+        for (int32_t i = 0; i < (int32_t) z.size(); i++) {
+            ps_with_indices[i] = pair(i, z[i]);
+        }
+
+        std::sort(ps_with_indices, ps_with_indices + z.size(),
+            [](const pair<int32_t, double>& a, const pair<int32_t, double>& b) {
+                // Descending order
+                return a.second > b.second;
+            }
+        );
+
+        double total = 0.0;
+        for (int i = 0; i < k; i++) {
+            total += ps_with_indices[i].second;
+        }
+
+        for (int i = 0; i < z.size(); i++) {
+            z[i] = 0.0;
+        }
+
+        for (int i = 0; i < k; i++) {
+            z[ps_with_indices[i].first] = ps_with_indices[i].second / total;
+        }
+    }
 }
 
 void DNASNode::reset(int32_t series_length) {
@@ -151,7 +183,7 @@ void DNASNode::input_fired(int32_t time, double incoming_output) {
         node_outputs[time][maxi] = nodes[maxi]->output_values[time];
         output_values[time] = nodes[maxi]->output_values[time];
     } else {
-        for (int i = 0; i < nodes.size(); i++) {
+        for (auto i = 0; i < nodes.size(); i++) {
             auto node = nodes[i];
             node->input_fired(time, input_values[time]);
             node_outputs[time][i] = node->output_values[time];
@@ -190,7 +222,7 @@ void DNASNode::try_update_deltas(int32_t time) {
         d_input[time] += nodes[maxi]->d_input[time];
 
     } else {
-        for (int i = 0; i < z.size(); i++) {
+        for (auto i = 0; i < z.size(); i++) {
             nodes[i]->output_fired(time, delta * z[i]);
             double p = (x[i] / pi[i]);
             p *= ((delta * node_outputs[time][i]) / xtotal);
@@ -254,7 +286,7 @@ void DNASNode::set_weights(const vector<double>& parameters) {
 
 void DNASNode::get_weights(int32_t& offset, vector<double>& parameters) const {
     // Log::info("pi start %d; ", offset);
-    for (int i = 0; i < pi.size(); i++) {
+    for (auto i = 0; i < pi.size(); i++) {
         parameters[offset++] = pi[i];
     }
     // Log::info_no_header("pi end %d \n", offset);
@@ -265,24 +297,23 @@ void DNASNode::get_weights(int32_t& offset, vector<double>& parameters) const {
 
 void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
-    for (int i = 0; i < pi.size(); i++) {
+    for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
     }
     // Log::info("Pi indices: %d-%d\n", start, offset);
     for (auto node : nodes) {
         node->set_weights(offset, parameters);
     }
-    Log::info("Just set weights\n");
     calculate_z();
-    string s = "Pi = { ";
-    for (auto p : pi) {
-        s += std::to_string(p) + ", ";
-    }
-    Log::info("%s }\n", s.c_str());
+    // string s = "Pi = { ";
+    // for (auto p : pi) {
+    //     s += std::to_string(p) + ", ";
+    // }
+    // Log::info("%s }\n", s.c_str());
 }
 
 void DNASNode::set_pi(const vector<double>& new_pi) {
-    for (int i = 0; i < pi.size(); i++) {
+    for (auto i = 0; i < pi.size(); i++) {
         pi[i] = new_pi[i];
     }
     calculate_maxi();
@@ -293,7 +324,7 @@ void DNASNode::calculate_maxi() {
         maxi = 0;
         double max_pi = pi[0];
 
-        for (int i = 1; i < nodes.size(); i++) {
+        for (auto i = 1; i < nodes.size(); i++) {
             if (pi[i] > max_pi) {
                 max_pi = pi[i];
                 maxi = i;
@@ -314,11 +345,11 @@ void DNASNode::get_gradients(vector<double>& gradients) {
 
     if (counter >= CRYSTALLIZATION_THRESHOLD) {
         offset += pi.size();
-        for (int i = 0; i < nodes.size(); i++) {
+        for (auto i = 0; i < nodes.size(); i++) {
             RNN_Node_Interface* node = nodes[i];
             if (i == maxi) {
                 node->get_gradients(temp);
-                for (int j = 0; j < temp.size(); j++) {
+                for (auto j = 0; j < temp.size(); j++) {
                     gradients[offset++] = temp[j];
                 }
             } else {
@@ -328,13 +359,13 @@ void DNASNode::get_gradients(vector<double>& gradients) {
     } else {
         gradients.assign(get_number_weights(), 0.0);
         int offset = 0;
-        for (int i = 0; i < pi.size(); i++) {
-            gradients[offset++] = d_pi[i];
+        for (auto i = 0; i < pi.size(); i++) {
+            gradients[offset++] = d_pi[i] * 0.1;
         }
 
         for (auto node : nodes) {
             node->get_gradients(temp);
-            for (int i = 0; i < temp.size(); i++) {
+            for (auto i = 0; i < temp.size(); i++) {
                 gradients[offset++] = temp[i];
             }
         }
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 435e5400..776119cc 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -22,7 +22,7 @@ using std::unique_ptr;
 #include "rnn_node.hxx"
 #include "rnn_node_interface.hxx"
 
-#define CRYSTALLIZATION_THRESHOLD 50000
+#define CRYSTALLIZATION_THRESHOLD 1000
 
 class DNASNode : public RNN_Node_Interface {
    private:
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index c451a098..f9e3c61a 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -39,12 +39,23 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co
         case DNAS_NODE:
             Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n");
             exit(1);
+        default:
+            Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind);
+            exit(1);
     }
+
+    // Unreachable
+    return nullptr;
 }
 
 DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector<int32_t>& node_types) {
     vector<RNN_Node_Interface*> nodes(node_types.size());
 
+    if (node_types.size() == 0) {
+        Log::fatal("Node types cannot be empty - failed to create DNAS node!\n");
+        exit(1);
+    }
+
     int i = 0;
     for (auto node_type : node_types) {
         nodes[i++] = create_hidden_node(node_type, innovation_counter, depth);
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index c7b60b43..370998ee 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -59,6 +59,9 @@ using std::vector;
 #include "rnn_node.hxx"
 #include "time_series/time_series.hxx"
 #include "ugrnn_node.hxx"
+#include "generate_nn.hxx"
+
+extern vector<int32_t> dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
 
 string parse_fitness(double fitness) {
     if (fitness == EXAMM_MAX_DOUBLE) {
@@ -1628,27 +1631,10 @@ RNN_Node_Interface* RNN_Genome::create_node(
     WeightType weight_initialize = weight_rules->get_weight_initialize_method();
 
     Log::trace("CREATING NODE, type: '%s'\n", NODE_TYPES[node_type].c_str());
-    if (node_type == LSTM_NODE) {
-        n = new LSTM_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == DELTA_NODE) {
-        n = new Delta_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == GRU_NODE) {
-        n = new GRU_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == ENARC_NODE) {
-        n = new ENARC_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == ENAS_DAG_NODE) {
-        n = new ENAS_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == RANDOM_DAG_NODE) {
-        n = new RANDOM_DAG_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == MGU_NODE) {
-        n = new MGU_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == UGRNN_NODE) {
-        n = new UGRNN_Node(++node_innovation_count, HIDDEN_LAYER, depth);
-    } else if (node_type == SIMPLE_NODE || node_type == JORDAN_NODE || node_type == ELMAN_NODE) {
-        n = new RNN_Node(++node_innovation_count, HIDDEN_LAYER, depth, node_type);
+    if (node_type != DNAS_NODE) {
+        n = create_hidden_node(node_type, node_innovation_count, depth);
     } else {
-        Log::fatal("ERROR: attempted to create a node with an unknown node type: %d\n", node_type);
-        exit(1);
+        n = create_dnas_node(node_innovation_count, depth, dnas_node_types);
     }
 
     if (mutated_component_weight == WeightType::LAMARCKIAN) {
@@ -3213,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
     } else if (node_type == DNAS_NODE) {
         int32_t n_nodes;
         bin_istream.read((char*) &n_nodes, sizeof(int32_t));
-
+        
         int32_t counter;
         bin_istream.read((char*) &counter, sizeof(int32_t));
         vector<double> pi(n_nodes, 0.0);
@@ -3224,7 +3210,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
             nodes[i] = RNN_Genome::read_node_from_stream(bin_istream);
         }
 
-        DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, node_type, depth, counter);
+        DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, layer_type, depth, counter);
         dnas_node->set_pi(pi);
         node = (RNN_Node_Interface*) dnas_node;
     } else {
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index acba093c..deaf8bce 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -32,6 +32,8 @@ using std::vector;
 // mysql can't handle the max float value for some reason
 #define EXAMM_MAX_DOUBLE 10000000
 
+extern vector<int32_t> dnas_node_types;
+
 string parse_fitness(double fitness);
 
 class RNN_Genome {
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index 2ad8d065..55f5e057 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -1,20 +1,45 @@
 #include <cmath>
+using std::max;
+
+#include <algorithm>
+
 #include <fstream>
 using std::ostream;
 
 #include <string>
 using std::string;
 
-#include <cmath>
-using std::max;
-
 #include "common/log.hxx"
 #include "rnn/rnn_genome.hxx"
 #include "rnn_node_interface.hxx"
 
-extern const int32_t NUMBER_NODE_TYPES = 9;
+extern const int32_t NUMBER_NODE_TYPES = 11;
 extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU",
-                                    "GRU",    "delta",  "LSTM",  "ENARC", "ENAS_DAG"};
+                                    "GRU",    "delta",  "LSTM",  "ENARC", "ENAS_DAG", "dnas"};
+extern const unordered_map<string, int32_t> string_to_node_type = {
+  { "simple", SIMPLE_NODE     },
+  { "jordan", JORDAN_NODE     },
+  { "elman", ELMAN_NODE      },
+  { "ugrnn", UGRNN_NODE      },
+  { "mgu", MGU_NODE        },
+  { "gru", GRU_NODE        },
+  { "delta", DELTA_NODE   },
+  { "lstm", LSTM_NODE },
+  { "enarc", ENARC_NODE      },
+  { "enas", ENAS_DAG_NODE   },
+  { "dnas", DNAS_NODE       }
+};
+
+int32_t node_type_from_string(string& node_type) {
+    std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); });
+
+    if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) {
+        return it->second;
+    } else {
+        Log::fatal("Invalid node type '%s'\n", node_type.c_str());
+        exit(1);
+    }
+}
 
 double bound(double value) {
     if (value < -10.0) {
diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx
index 26dc0f3c..15ec45cd 100644
--- a/rnn/rnn_node_interface.hxx
+++ b/rnn/rnn_node_interface.hxx
@@ -12,6 +12,9 @@ using std::uniform_real_distribution;
 #include <string>
 using std::string;
 
+#include <unordered_map>
+using std::unordered_map;
+
 #include <vector>
 using std::vector;
 
@@ -25,6 +28,8 @@ class RNN;
 
 extern const int32_t NUMBER_NODE_TYPES;
 extern const string NODE_TYPES[];
+extern const unordered_map<string, int32_t> string_to_node_type;
+int32_t node_type_from_string(string& node_type);
 
 #define SIMPLE_NODE     0
 #define JORDAN_NODE     1
@@ -39,6 +44,8 @@ extern const string NODE_TYPES[];
 #define RANDOM_DAG_NODE 10
 #define DNAS_NODE       11
 
+int32_t node_type_from_string(string& node_type);
+
 double sigmoid(double value);
 double sigmoid_derivative(double value);
 double tanh_derivative(double value);
@@ -112,7 +119,7 @@ class RNN_Node_Interface {
 
     virtual RNN_Node_Interface* copy() const = 0;
 
-    void write_to_stream(ostream& out);
+    virtual void write_to_stream(ostream& out);
 
     int32_t get_node_type() const;
     int32_t get_layer_type() const;

From ad5a7a3523fa05a965fb28d3baf4f089b2c67910 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Thu, 9 Feb 2023 13:54:11 -0500
Subject: [PATCH 11/31] Formatting

---
 common/process_arguments.cxx |  6 ++++++
 examm/examm.hxx              |  3 ++-
 rnn/dnas_node.cxx            |  3 ++-
 rnn/dnas_node.hxx            |  2 +-
 rnn/generate_nn.cxx          |  4 +++-
 rnn/rnn_genome.cxx           |  6 +++---
 rnn/rnn_node_interface.cxx   | 31 ++++++++++++++++---------------
 7 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index 885f28a0..f2e29ac0 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -39,6 +39,12 @@ EXAMM* generate_examm_from_arguments(
         for (auto node_type : dnas_node_type_strings) {
             dnas_node_types.push_back(node_type_from_string(node_type));
         }
+
+        Log::info("Using following node types for dnas: ");
+        for (auto s : dnas_node_type_strings) {
+            Log::info_no_header("%s", s.c_str());
+        }
+        Log::info_no_header("\n");
     }
 
     GenomeProperty* genome_property = new GenomeProperty();
diff --git a/examm/examm.hxx b/examm/examm.hxx
index ac5d56eb..c0c0ee03 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -63,7 +63,8 @@ class EXAMM {
     double split_node_rate;
     double merge_node_rate;
 
-    vector<int32_t> possible_node_types = { SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
+    vector<int32_t> possible_node_types = {SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE,
+                                           MGU_NODE,    GRU_NODE,    DELTA_NODE, LSTM_NODE};
 
     vector<string> op_log_ordering;
     map<string, int32_t> inserted_counts;
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 46a20c69..dcdab7e0 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -116,7 +116,8 @@ void DNASNode::calculate_z() {
             ps_with_indices[i] = pair(i, z[i]);
         }
 
-        std::sort(ps_with_indices, ps_with_indices + z.size(),
+        std::sort(
+            ps_with_indices, ps_with_indices + z.size(),
             [](const pair<int32_t, double>& a, const pair<int32_t, double>& b) {
                 // Descending order
                 return a.second > b.second;
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 776119cc..c3d74e6b 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -62,7 +62,7 @@ class DNASNode : public RNN_Node_Interface {
     int32_t maxi = -1;
 
     // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
-    int32_t k = -1;
+    int32_t k = 1;
 
     // Whether to re-sample the gumbel softmax distribution when resetting the node.
     // Can be set externally using DNASNode::set_stochastic
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index f9e3c61a..a84fb36f 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -40,7 +40,9 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co
             Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n");
             exit(1);
         default:
-            Log::fatal("If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind);
+            Log::fatal(
+                "If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind
+            );
             exit(1);
     }
 
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index 370998ee..7e452ad0 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -50,6 +50,7 @@ using std::vector;
 #include "dnas_node.hxx"
 #include "enarc_node.hxx"
 #include "enas_dag_node.hxx"
+#include "generate_nn.hxx"
 #include "gru_node.hxx"
 #include "lstm_node.hxx"
 #include "mgu_node.hxx"
@@ -59,9 +60,8 @@ using std::vector;
 #include "rnn_node.hxx"
 #include "time_series/time_series.hxx"
 #include "ugrnn_node.hxx"
-#include "generate_nn.hxx"
 
-extern vector<int32_t> dnas_node_types = { SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE };
+extern vector<int32_t> dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE};
 
 string parse_fitness(double fitness) {
     if (fitness == EXAMM_MAX_DOUBLE) {
@@ -3199,7 +3199,7 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
     } else if (node_type == DNAS_NODE) {
         int32_t n_nodes;
         bin_istream.read((char*) &n_nodes, sizeof(int32_t));
-        
+
         int32_t counter;
         bin_istream.read((char*) &counter, sizeof(int32_t));
         vector<double> pi(n_nodes, 0.0);
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index 55f5e057..ab5796b2 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -2,7 +2,6 @@
 using std::max;
 
 #include <algorithm>
-
 #include <fstream>
 using std::ostream;
 
@@ -14,24 +13,26 @@ using std::string;
 #include "rnn_node_interface.hxx"
 
 extern const int32_t NUMBER_NODE_TYPES = 11;
-extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU",
-                                    "GRU",    "delta",  "LSTM",  "ENARC", "ENAS_DAG", "dnas"};
+extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN",    "MGU", "GRU",
+                                    "delta",  "LSTM",   "ENARC", "ENAS_DAG", "dnas"};
 extern const unordered_map<string, int32_t> string_to_node_type = {
-  { "simple", SIMPLE_NODE     },
-  { "jordan", JORDAN_NODE     },
-  { "elman", ELMAN_NODE      },
-  { "ugrnn", UGRNN_NODE      },
-  { "mgu", MGU_NODE        },
-  { "gru", GRU_NODE        },
-  { "delta", DELTA_NODE   },
-  { "lstm", LSTM_NODE },
-  { "enarc", ENARC_NODE      },
-  { "enas", ENAS_DAG_NODE   },
-  { "dnas", DNAS_NODE       }
+    {"simple",   SIMPLE_NODE},
+    {"jordan",   JORDAN_NODE},
+    { "elman",    ELMAN_NODE},
+    { "ugrnn",    UGRNN_NODE},
+    {   "mgu",      MGU_NODE},
+    {   "gru",      GRU_NODE},
+    { "delta",    DELTA_NODE},
+    {  "lstm",     LSTM_NODE},
+    { "enarc",    ENARC_NODE},
+    {  "enas", ENAS_DAG_NODE},
+    {  "dnas",     DNAS_NODE}
 };
 
 int32_t node_type_from_string(string& node_type) {
-    std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c){ return std::tolower(c); });
+    std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
 
     if (auto it = string_to_node_type.find(node_type); it != string_to_node_type.end()) {
         return it->second;

From eda79a02f422d6b1a5dc600acefcccac5f09f7f9 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 12 Apr 2023 10:54:22 -0400
Subject: [PATCH 12/31] Committing experiment scripts

---
 ground_truth_experiments/cell_experiments.sh | 40 ++++++++++++++++++++
 ground_truth_experiments/source_genomes.sh   | 33 ++++++++++++++++
 rnn/dnas_node.cxx                            | 30 +++++++++++++--
 rnn/dnas_node.hxx                            |  5 ++-
 rnn_examples/train_rnn.cxx                   | 25 +++++++-----
 5 files changed, 118 insertions(+), 15 deletions(-)
 create mode 100755 ground_truth_experiments/cell_experiments.sh
 create mode 100755 ground_truth_experiments/source_genomes.sh

diff --git a/ground_truth_experiments/cell_experiments.sh b/ground_truth_experiments/cell_experiments.sh
new file mode 100755
index 00000000..9c0e29d4
--- /dev/null
+++ b/ground_truth_experiments/cell_experiments.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+
+offset=1
+bp_epoch=1000
+
+for SIZE in 1 2 4; do
+  for CELL_TYPE in dnas; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      output_dir=ground_truth_experiments/results/$CELL_TYPE/$SIZE/$fold
+      mkdir -p $output_dir
+      Release/rnn_examples/train_rnn \
+          --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+          --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+          --time_offset $offset \
+          --input_parameter_names ${=INPUT_PARAMETERS} \
+          --output_parameter_names ${=OUTPUT_PARAMETERS} \
+          --bp_iterations $bp_epoch \
+          --stochastic \
+          --rnn_type $CELL_TYPE \
+          --normalize min_max \
+          --num_hidden_layers $SIZE \
+          --hidden_layer_size $SIZE \
+          --random_sequence_length \
+          --sequence_length_lower_bound 50 \
+          --sequence_length_upper_bound 100 \
+          --max_recurrent_depth 1 \
+          --weight_update adagrad \
+          --output_directory $output_dir \
+          --log_filename fitness.csv \
+          --learning_rate 0.01 \
+          --std_message_level ERROR \
+          --file_message_level INFO &
+    done
+  done
+  wait
+done
+
diff --git a/ground_truth_experiments/source_genomes.sh b/ground_truth_experiments/source_genomes.sh
new file mode 100755
index 00000000..1c251134
--- /dev/null
+++ b/ground_truth_experiments/source_genomes.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/zsh
+# This is an example of running EXAMM MPI version on c172 dataset
+#
+# The c172 dataset is not normalized
+# To run datasets that's not normalized, make sure to add arguments:
+#    --normalize min_max for Min Max normalization, or
+#    --normalize avg_std_dev for Z-score normalization
+
+INPUT_PARAMETERS="AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd"
+OUTPUT_PARAMETERS="Pitch"
+
+for i in 0 1 2 3 4 5 6 7 8 9; do
+  exp_name="ground_truth_experiments/results/source_genomes/$i"
+  mkdir -p $exp_name
+  echo $exp_name
+  mpirun -np 5 Release/mpi/examm_mpi \
+    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+    --time_offset 1 \
+    --input_parameter_names ${=INPUT_PARAMETERS} \
+    --output_parameter_names ${=OUTPUT_PARAMETERS} \
+    --number_islands 8 \
+    --island_size 8 \
+    --max_genomes 10000 \
+    --bp_iterations 5 \
+    --num_mutations 2 \
+    --normalize min_max \
+    --output_directory $exp_name \
+    --possible_node_types simple UGRNN MGU GRU delta LSTM \
+    --std_message_level ERROR \
+    --file_message_level INFO &
+done
+wait
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index dcdab7e0..f87868f3 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -49,7 +49,6 @@ DNASNode::DNASNode(const DNASNode& src) : RNN_Node_Interface(src.innovation_numb
     g = src.g;
     x = src.x;
     xtotal = src.xtotal;
-    tao = src.tao;
     stochastic = src.stochastic;
     counter = src.counter;
     maxi = src.maxi;
@@ -92,8 +91,32 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) {
     calculate_z();
 }
 
+double DNASNode::calculate_pi_lr() {
+    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
+    if (percentage_done < 0.33) {
+        return 0.0;
+    } else if (percentage_done < 0.66) {
+        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
+        return 0.5 + percentage_done_with_phase * .5;
+    } else {
+        return 0.1;
+    }
+}
+
+double DNASNode::calculate_tao() {
+    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
+    if (percentage_done < 0.33) {
+        return 1.33;
+    } else if (percentage_done < 0.66) {
+        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
+        return 1.33 - percentage_done_with_phase * 0.66;
+    } else {
+        return 0.33;
+    }
+}
+
 void DNASNode::calculate_z() {
-    tao = max(1.0 / 3.0, 1.0 / (1.0 + (double) counter * 0.05));
+    tao = calculate_tao();
 
     xtotal = 0.0;
     double emax = -10000000;
@@ -360,8 +383,9 @@ void DNASNode::get_gradients(vector<double>& gradients) {
     } else {
         gradients.assign(get_number_weights(), 0.0);
         int offset = 0;
+        double pi_lr = calculate_pi_lr();
         for (auto i = 0; i < pi.size(); i++) {
-            gradients[offset++] = d_pi[i] * 0.1;
+            gradients[offset++] = d_pi[i] * pi_lr;
         }
 
         for (auto node : nodes) {
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index c3d74e6b..76aa6969 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -56,10 +56,9 @@ class DNASNode : public RNN_Node_Interface {
     // A vector to put gumbel noise into; just to avoid re-allocation
     vector<double> noise;
 
-    // Temperature used when drawing samples from Gumbel-Softmax(pi)
-    double tao = 1.0;
     int32_t counter = 0;
     int32_t maxi = -1;
+    double tao;
 
     // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
     int32_t k = 1;
@@ -82,6 +81,8 @@ class DNASNode : public RNN_Node_Interface {
     template <typename Rng>
     void sample_gumbel_softmax(Rng& rng);
     void calculate_z();
+    double calculate_tao();
+    double calculate_pi_lr();
 
     virtual void initialize_lamarckian(
         minstd_rand0& generator, NormalDistribution& normal_distribution, double mu, double sigma
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index ffdf8999..8c5b0b1c 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -100,6 +100,9 @@ int main(int argc, char** argv) {
     int32_t max_recurrent_depth;
     get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
 
+    int32_t hidden_layer_size = number_inputs;
+    get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
+
     WeightRules* weight_rules = new WeightRules(arguments);
 
     weight_update_method = new WeightUpdate();
@@ -112,59 +115,59 @@ int main(int argc, char** argv) {
     Log::info("RNN TYPE = %s\n", rnn_type.c_str());
     if (rnn_type == "lstm") {
         genome = create_lstm(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "gru") {
         genome = create_gru(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "delta") {
         genome = create_delta(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "mgu") {
         genome = create_mgu(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "ugrnn") {
         genome = create_ugrnn(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "ff") {
         genome = create_ff(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "jordan") {
         genome = create_jordan(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
 
     } else if (rnn_type == "elman") {
         genome = create_elman(
-            input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth,
+            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
             weight_rules
         );
     } else if (rnn_type == "dnas") {
-        vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE};
+        vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};
         genome = create_dnas_nn(
             input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types,
             weight_rules
         );
     } else {
-        Log::fatal("ERROR: incorrect rnn type\n");
+        Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str());
         Log::fatal("Possibilities are:\n");
         Log::fatal("    lstm\n");
         Log::fatal("    gru\n");
@@ -232,6 +235,8 @@ int main(int argc, char** argv) {
     genome->get_weights(best_parameters);
     rnn->set_weights(best_parameters);
 
+    genome->write_to_file(output_directory + "/output_genome.bin");
+
     Log::info("TRAINING ERRORS:\n");
     Log::info("MSE: %lf\n", genome->get_mse(best_parameters, training_inputs, training_outputs));
     Log::info("MAE: %lf\n", genome->get_mae(best_parameters, training_inputs, training_outputs));

From cd597a43b03a901a23ed5fdbb4a0f97a964ae997 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 20 Sep 2023 18:01:43 -0400
Subject: [PATCH 13/31] Commit for AISTATS results

---
 common/files.hxx                              |   2 +
 common/log.cxx                                |   8 +-
 common/process_arguments.cxx                  |  17 +-
 initial_integration_experiments/dnas.zsh      |  55 ++++++
 .../post_training_dnas.zsh                    |  31 +++
 mpi/examm_mpi.cxx                             |   7 +-
 rnn/dnas_node.cxx                             |  49 +++--
 rnn/dnas_node.hxx                             |  12 +-
 rnn/rnn_genome.cxx                            |  76 +++++++-
 rnn/rnn_genome.hxx                            |   4 +
 rnn_examples/CMakeLists.txt                   |   3 +
 rnn_examples/dnas_info.cxx                    |  96 +++++++++
 rnn_examples/train_rnn.cxx                    | 182 ++++++++++--------
 13 files changed, 427 insertions(+), 115 deletions(-)
 create mode 100755 initial_integration_experiments/dnas.zsh
 create mode 100755 initial_integration_experiments/post_training_dnas.zsh
 create mode 100644 rnn_examples/dnas_info.cxx

diff --git a/common/files.hxx b/common/files.hxx
index ac23ff0d..8c4c8a43 100644
--- a/common/files.hxx
+++ b/common/files.hxx
@@ -1,6 +1,8 @@
 #ifndef EXACT_BOINC_COMMON_HXX
 #define EXACT_BOINC_COMMON_HXX
 
+#include <stdint.h>
+
 #include <stdexcept>
 using std::runtime_error;
 
diff --git a/common/log.cxx b/common/log.cxx
index 623475e8..6f82e67f 100644
--- a/common/log.cxx
+++ b/common/log.cxx
@@ -79,11 +79,11 @@ int8_t Log::parse_level_from_string(string level) {
 void Log::initialize(const vector<string>& arguments) {
     // TODO: should read these from the CommandLine (to be created)
 
-    string std_message_level_str, file_message_level_str;
+    string std_message_level_str = "INFO", file_message_level_str = "NONE";
 
-    get_argument(arguments, "--std_message_level", true, std_message_level_str);
-    get_argument(arguments, "--file_message_level", true, file_message_level_str);
-    get_argument(arguments, "--output_directory", true, output_directory);
+    get_argument(arguments, "--std_message_level", false, std_message_level_str);
+    get_argument(arguments, "--file_message_level", false, file_message_level_str);
+    get_argument(arguments, "--output_directory", false, output_directory);
 
     std_message_level = parse_level_from_string(std_message_level_str);
     file_message_level = parse_level_from_string(file_message_level_str);
diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index f2e29ac0..4577d86c 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -10,7 +10,7 @@ using std::vector;
 EXAMM* generate_examm_from_arguments(
     const vector<string>& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules,
     RNN_Genome* seed_genome
-) {
+) { 
     Log::info("Getting arguments for EXAMM\n");
     int32_t island_size;
     get_argument(arguments, "--island_size", true, island_size);
@@ -186,11 +186,18 @@ void get_train_validation_data(
     time_series_sets->export_training_series(time_offset, train_inputs, train_outputs);
     time_series_sets->export_test_series(time_offset, validation_inputs, validation_outputs);
 
-    int32_t sequence_length = 0;
-    if (get_argument(arguments, "--sequence_length", false, sequence_length)) {
-        Log::info("Slicing input training data with time sequence length: %d\n", sequence_length);
-        slice_input_data(train_inputs, train_outputs, sequence_length);
+    int32_t train_sequence_length = 0;
+    if (get_argument(arguments, "--train_sequence_length", false, train_sequence_length)) {
+        Log::info("Slicing input training data with time sequence length: %d\n", train_sequence_length);
+        slice_input_data(train_inputs, train_outputs, train_sequence_length);
+    }
+
+    int32_t validation_sequence_length = 0;
+    if (get_argument(arguments, "--validation_sequence_length", false, validation_sequence_length)) {
+        Log::info("Slicing input validation data with time sequence length: %d\n", validation_sequence_length);
+        slice_input_data(validation_inputs, validation_outputs, validation_sequence_length);
     }
+
     Log::info("Generating time series data finished! \n");
 }
 
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
new file mode 100755
index 00000000..490e0c3b
--- /dev/null
+++ b/initial_integration_experiments/dnas.zsh
@@ -0,0 +1,55 @@
+#!/usr/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --stochastic \
+      --possible_node_types DNAS \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --train_sequence_length 100 \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes 10000 \
+      --island_size 8 \
+      --number_islands 8 \
+      --dnas_k $k
+
+  best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+for crystalize_iters in 128 256 512 1024; do
+  for bp_epoch in 8 16 32 64 128; do
+    for k in 1; do
+      for fold in 0 1 2 3; do
+        run_examm &
+      done
+      wait
+      for fold in 4 5 6 7; do
+        run_examm
+      done
+      wait
+    done
+  done
+done
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
new file mode 100755
index 00000000..f3d355f1
--- /dev/null
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -0,0 +1,31 @@
+#!/usr/bin/zsh
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+
+offset=1
+
+post_training() {
+
+    echo "genome = $GENOME"
+    Release/rnn_examples/train_rnn \
+        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+        --time_offset 1 \
+        --input_parameter_names ${=INPUT_PARAMETERS} \
+        --output_parameter_names ${=OUTPUT_PARAMETERS} \
+        --bp_iterations $BP_ITERS \
+        --stochastic \
+        --normalize min_max \
+        --genome_file $GENOME \
+        --output_directory $OUTPUT_DIRECTORY \
+        --log_filename post_training.csv \
+        --learning_rate 0.01 \
+        --weight_update adagrad \
+        --train_sequence_length 100 \
+        --validation_sequence_length 100 \
+        --crystalize_iters $CRYSTALIZE_ITERS \
+        --dnas_k $k
+ 
+}
+
+post_training
diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx
index 7886d91d..c1f1dd1c 100644
--- a/mpi/examm_mpi.cxx
+++ b/mpi/examm_mpi.cxx
@@ -203,13 +203,18 @@ void worker(int32_t rank) {
         } else if (tag == GENOME_LENGTH_TAG) {
             Log::debug("received genome!\n");
             RNN_Genome* genome = receive_genome_from(0);
-
+            
             // have each worker write the backproagation to a separate log file
             string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank);
             Log::set_id(log_id);
+
+            vector<double> params;
+            genome->get_weights(params);
+
             genome->backpropagate_stochastic(
                 training_inputs, training_outputs, validation_inputs, validation_outputs, weight_update_method
             );
+
             Log::release_id(log_id);
 
             // go back to the worker's log for MPI communication
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index f87868f3..ac0e0aa1 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -1,6 +1,8 @@
 #include <algorithm>
 using std::sort;
 
+#include <iomanip>
+
 #include <utility>
 using std::pair;
 
@@ -13,13 +15,16 @@ using std::max;
 #include "common/log.hxx"
 #include "dnas_node.hxx"
 
+int32_t DNASNode::CRYSTALLIZATION_THRESHOLD = 1000;
+int32_t DNASNode::k = -1;
+
 DNASNode::DNASNode(
     vector<RNN_Node_Interface*>&& _nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter
 )
     : RNN_Node_Interface(_innovation_number, _type, _depth),
       nodes(_nodes),
       pi(vector<double>(nodes.size(), 1.0)),
-      z(vector<double>(nodes.size())),
+      z(vector<double>(nodes.size(), 0.0)),
       x(vector<double>(nodes.size())),
       g(vector<double>(nodes.size())),
       d_pi(vector<double>(nodes.size())),
@@ -87,20 +92,11 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) {
     x.assign(pi.size(), 0.0);
 
     gumbel_noise(rng, g);
-
     calculate_z();
 }
 
 double DNASNode::calculate_pi_lr() {
-    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
-    if (percentage_done < 0.33) {
-        return 0.0;
-    } else if (percentage_done < 0.66) {
-        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
-        return 0.5 + percentage_done_with_phase * .5;
-    } else {
-        return 0.1;
-    }
+    return 0.1;
 }
 
 double DNASNode::calculate_tao() {
@@ -109,9 +105,9 @@ double DNASNode::calculate_tao() {
         return 1.33;
     } else if (percentage_done < 0.66) {
         double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
-        return 1.33 - percentage_done_with_phase * 0.66;
+        return 1.33 - percentage_done_with_phase * 1.15;
     } else {
-        return 0.33;
+        return 0.18;
     }
 }
 
@@ -162,6 +158,23 @@ void DNASNode::calculate_z() {
     }
 }
 
+void DNASNode::print_info() {
+    printf(" ");
+    int best_pi_idx = 0;
+    for (int i = 0; i < nodes.size(); i++) {
+       printf("%-10s & ", std::to_string(pi[i]).c_str());
+        if (pi[i] > pi[best_pi_idx])
+            best_pi_idx = i;
+    }
+    printf("\n");
+    Log::info("Node types: ");
+    for (auto node : nodes) {
+        Log::info_no_header("%d ", node->node_type);
+    }
+    Log::info_no_header("\n ");
+    Log::info("Best node: %i, node type: %d\n", best_pi_idx, nodes[best_pi_idx]->node_type);
+}
+
 void DNASNode::reset(int32_t series_length) {
     d_pi = vector<double>(pi.size(), 0.0);
     d_input = vector<double>(series_length, 0.0);
@@ -309,6 +322,7 @@ void DNASNode::set_weights(const vector<double>& parameters) {
 }
 
 void DNASNode::get_weights(int32_t& offset, vector<double>& parameters) const {
+    int start = offset;
     // Log::info("pi start %d; ", offset);
     for (auto i = 0; i < pi.size(); i++) {
         parameters[offset++] = pi[i];
@@ -323,17 +337,14 @@ void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
     for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
+        if (pi[i] < 0.1)
+            pi[i] = 0.1;
     }
-    // Log::info("Pi indices: %d-%d\n", start, offset);
+
     for (auto node : nodes) {
         node->set_weights(offset, parameters);
     }
     calculate_z();
-    // string s = "Pi = { ";
-    // for (auto p : pi) {
-    //     s += std::to_string(p) + ", ";
-    // }
-    // Log::info("%s }\n", s.c_str());
 }
 
 void DNASNode::set_pi(const vector<double>& new_pi) {
diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx
index 76aa6969..00867ffe 100644
--- a/rnn/dnas_node.hxx
+++ b/rnn/dnas_node.hxx
@@ -22,8 +22,6 @@ using std::unique_ptr;
 #include "rnn_node.hxx"
 #include "rnn_node_interface.hxx"
 
-#define CRYSTALLIZATION_THRESHOLD 1000
-
 class DNASNode : public RNN_Node_Interface {
    private:
     template <typename R>
@@ -60,9 +58,6 @@ class DNASNode : public RNN_Node_Interface {
     int32_t maxi = -1;
     double tao;
 
-    // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
-    int32_t k = 1;
-
     // Whether to re-sample the gumbel softmax distribution when resetting the node.
     // Can be set externally using DNASNode::set_stochastic
     bool stochastic = true;
@@ -71,6 +66,11 @@ class DNASNode : public RNN_Node_Interface {
     vector<vector<double>> node_outputs;
 
    public:
+    static int32_t CRYSTALLIZATION_THRESHOLD;
+
+    // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one)
+    static int32_t k;
+
     DNASNode(
         vector<RNN_Node_Interface*>&& nodes, int32_t _innovation_number, int32_t _type, double _depth,
         int32_t counter = -1
@@ -110,6 +110,8 @@ class DNASNode : public RNN_Node_Interface {
     virtual void reset(int32_t _series_length);
     virtual void write_to_stream(ostream& out);
 
+    void print_info();
+
     virtual RNN_Node_Interface* copy() const;
 
     void set_stochastic(bool stochastic);
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index 7e452ad0..72868d5e 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -3221,16 +3221,35 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
     node->enabled = enabled;
     return node;
 }
+
+#define MAGIC 0xFA
+
+#define read_magic(place) \
+      { \
+      uint8_t boo = MAGIC;\
+      bin_istream.read((char *) &boo, sizeof(uint8_t)); \
+      if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \
+      }
+
+#define write_magic() \
+  {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));}
+
 void RNN_Genome::read_from_stream(istream& bin_istream) {
     Log::debug("READING GENOME FROM STREAM\n");
+    
+    read_magic(__LINE__);
 
     bin_istream.read((char*) &generation_id, sizeof(int32_t));
     bin_istream.read((char*) &group_id, sizeof(int32_t));
     bin_istream.read((char*) &bp_iterations, sizeof(int32_t));
 
+    read_magic(__LINE__);
+    
     bin_istream.read((char*) &use_dropout, sizeof(bool));
     bin_istream.read((char*) &dropout_probability, sizeof(double));
 
+    read_magic(__LINE__);
+    
     WeightType weight_initialize = WeightType::NONE;
     WeightType weight_inheritance = WeightType::NONE;
     WeightType mutated_component_weight = WeightType::NONE;
@@ -3239,6 +3258,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     bin_istream.read((char*) &weight_inheritance, sizeof(int32_t));
     bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t));
 
+    read_magic(__LINE__);
+    
     weight_rules = new WeightRules();
     weight_rules->set_weight_initialize_method(weight_initialize);
     weight_rules->set_weight_inheritance_method(weight_inheritance);
@@ -3260,8 +3281,10 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     istringstream generator_iss(generator_str);
     generator_iss >> generator;
 
-    string rng_0_1_str;
-    read_binary_string(bin_istream, rng_0_1_str, "rng_0_1");
+    read_magic(__LINE__);
+    
+    // string rng_0_1_str;
+    // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1");
     // So for some reason this was serialized incorrectly for some genomes,
     // but the value should always be the same so we really don't need to de-serialize it anways and can just
     // assign it a constant value
@@ -3275,6 +3298,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     istringstream generated_by_map_iss(generated_by_map_str);
     read_map(generated_by_map_iss, generated_by_map);
 
+    read_magic(__LINE__);
+    
     bin_istream.read((char*) &best_validation_mse, sizeof(double));
     bin_istream.read((char*) &best_validation_mae, sizeof(double));
 
@@ -3286,6 +3311,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     initial_parameters.assign(initial_parameters_v, initial_parameters_v + n_initial_parameters);
     delete[] initial_parameters_v;
 
+    read_magic(__LINE__);
+    
     int32_t n_best_parameters;
     bin_istream.read((char*) &n_best_parameters, sizeof(int32_t));
     Log::debug("reading %d best parameters.\n", n_best_parameters);
@@ -3294,6 +3321,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     best_parameters.assign(best_parameters_v, best_parameters_v + n_best_parameters);
     delete[] best_parameters_v;
 
+    read_magic(__LINE__);
+    
     input_parameter_names.clear();
     int32_t n_input_parameter_names;
     bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t));
@@ -3304,6 +3333,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         input_parameter_names.push_back(input_parameter_name);
     }
 
+    read_magic(__LINE__);
+
     output_parameter_names.clear();
     int32_t n_output_parameter_names;
     bin_istream.read((char*) &n_output_parameter_names, sizeof(int32_t));
@@ -3314,6 +3345,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         output_parameter_names.push_back(output_parameter_name);
     }
 
+    read_magic(__LINE__);
+    
     int32_t n_nodes;
     bin_istream.read((char*) &n_nodes, sizeof(int32_t));
     Log::debug("reading %d nodes.\n", n_nodes);
@@ -3321,6 +3354,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     nodes.clear();
     for (int32_t i = 0; i < n_nodes; i++) {
         nodes.push_back(RNN_Genome::read_node_from_stream(bin_istream));
+        read_magic(__LINE__);
     }
 
     int32_t n_edges;
@@ -3347,6 +3381,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         // innovation_list.push_back(innovation_number);
         edge->enabled = enabled;
         edges.push_back(edge);
+        read_magic(__LINE__);
     }
 
     int32_t n_recurrent_edges;
@@ -3378,6 +3413,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
         // innovation_list.push_back(innovation_number);
         recurrent_edge->enabled = enabled;
         recurrent_edges.push_back(recurrent_edge);
+        read_magic(__LINE__);
     }
 
     read_binary_string(bin_istream, normalize_type, "normalize_type");
@@ -3402,6 +3438,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     istringstream normalize_std_devs_iss(normalize_std_devs_str);
     read_map(normalize_std_devs_iss, normalize_std_devs);
 
+    read_magic(__LINE__);
+    
     assign_reachability();
 }
 
@@ -3425,13 +3463,20 @@ void RNN_Genome::write_to_file(string bin_filename) {
 
 void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     Log::debug("WRITING GENOME TO STREAM\n");
+
+    write_magic();
+
     bin_ostream.write((char*) &generation_id, sizeof(int32_t));
     bin_ostream.write((char*) &group_id, sizeof(int32_t));
     bin_ostream.write((char*) &bp_iterations, sizeof(int32_t));
 
+    write_magic();
+    
     bin_ostream.write((char*) &use_dropout, sizeof(bool));
     bin_ostream.write((char*) &dropout_probability, sizeof(double));
 
+    write_magic();
+    
     WeightType weight_initialize = weight_rules->get_weight_initialize_method();
     WeightType weight_inheritance = weight_rules->get_weight_inheritance_method();
     WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method();
@@ -3439,6 +3484,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &weight_inheritance, sizeof(int32_t));
     bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t));
 
+    write_magic();
+    
     Log::debug("generation_id: %d\n", generation_id);
     Log::debug("bp_iterations: %d\n", bp_iterations);
 
@@ -3456,16 +3503,20 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     string generator_str = generator_oss.str();
     write_binary_string(bin_ostream, generator_str, "generator");
 
-    ostringstream rng_0_1_oss;
-    rng_0_1_oss << rng_0_1;
-    string rng_0_1_str = rng_0_1_oss.str();
-    write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1");
+    write_magic();
+    
+    // ostringstream rng_0_1_oss;
+    // rng_0_1_oss << rng_0_1;
+    // string rng_0_1_str = rng_0_1_oss.str();
+    // write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1");
 
     ostringstream generated_by_map_oss;
     write_map(generated_by_map_oss, generated_by_map);
     string generated_by_map_str = generated_by_map_oss.str();
     write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map");
 
+    write_magic();
+    
     bin_ostream.write((char*) &best_validation_mse, sizeof(double));
     bin_ostream.write((char*) &best_validation_mae, sizeof(double));
 
@@ -3474,18 +3525,24 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &n_initial_parameters, sizeof(int32_t));
     bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size());
 
+    write_magic();
+    
     int32_t n_best_parameters = (int32_t) best_parameters.size();
     bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t));
     if (n_best_parameters) {
         bin_ostream.write((char*) &best_parameters[0], sizeof(double) * best_parameters.size());
     }
 
+    write_magic();
+    
     int32_t n_input_parameter_names = (int32_t) input_parameter_names.size();
     bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) {
         write_binary_string(bin_ostream, input_parameter_names[i], "input_parameter_names[" + std::to_string(i) + "]");
     }
 
+    write_magic();
+    
     int32_t n_output_parameter_names = (int32_t) output_parameter_names.size();
     bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) {
@@ -3494,6 +3551,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
         );
     }
 
+    write_magic();
+    
     int32_t n_nodes = (int32_t) nodes.size();
     bin_ostream.write((char*) &n_nodes, sizeof(int32_t));
     Log::debug("writing %d nodes.\n", n_nodes);
@@ -3504,6 +3563,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
             nodes[i]->depth, nodes[i]->parameter_name.c_str()
         );
         nodes[i]->write_to_stream(bin_ostream);
+        write_magic();
     }
 
     int32_t n_edges = (int32_t) edges.size();
@@ -3516,6 +3576,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
             edges[i]->output_innovation_number
         );
         edges[i]->write_to_stream(bin_ostream);
+        write_magic();
     }
 
     int32_t n_recurrent_edges = (int32_t) recurrent_edges.size();
@@ -3529,6 +3590,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
         );
 
         recurrent_edges[i]->write_to_stream(bin_ostream);
+        write_magic();
     }
 
     write_binary_string(bin_ostream, normalize_type, "normalize_type");
@@ -3552,6 +3614,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     write_map(normalize_std_devs_oss, normalize_std_devs);
     string normalize_std_devs_str = normalize_std_devs_oss.str();
     write_binary_string(bin_ostream, normalize_std_devs_str, "normalize_std_devs");
+
+    write_magic();
 }
 
 void RNN_Genome::update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count) {
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index deaf8bce..d6330512 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -325,6 +325,10 @@ class RNN_Genome {
     );
     vector<RNN_Node_Interface*> pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type);
 
+    const vector<RNN_Node_Interface *> &get_nodes() {
+        return this->nodes;
+    }
+
     void update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count);
 
     vector<int32_t> get_innovation_list();
diff --git a/rnn_examples/CMakeLists.txt b/rnn_examples/CMakeLists.txt
index 2bfda532..f5e294c6 100644
--- a/rnn_examples/CMakeLists.txt
+++ b/rnn_examples/CMakeLists.txt
@@ -16,3 +16,6 @@ target_link_libraries(evaluate_rnns_multi_offset examm_strategy exact_common exa
 add_executable(rnn_statistics rnn_statistics.cxx)
 target_link_libraries(rnn_statistics examm_strategy exact_common exact_time_series exact_weights examm_nn  ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread)
 
+add_executable(dnas_info dnas_info.cxx)
+target_link_libraries(dnas_info examm_strategy exact_common exact_time_series exact_weights examm_nn  ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread)
+
diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx
new file mode 100644
index 00000000..74fd6519
--- /dev/null
+++ b/rnn_examples/dnas_info.cxx
@@ -0,0 +1,96 @@
+#include <chrono>
+#include <fstream>
+using std::getline;
+using std::ifstream;
+using std::ofstream;
+
+#include <random>
+using std::minstd_rand0;
+using std::uniform_real_distribution;
+
+#include <string>
+using std::string;
+
+#include <vector>
+using std::vector;
+
+#include "common/arguments.hxx"
+#include "common/files.hxx"
+#include "common/log.hxx"
+#include "rnn/generate_nn.hxx"
+#include "rnn/gru_node.hxx"
+#include "rnn/lstm_node.hxx"
+#include "rnn/rnn_edge.hxx"
+#include "rnn/rnn_genome.hxx"
+#include "rnn/rnn_node.hxx"
+#include "rnn/rnn_node_interface.hxx"
+#include "time_series/time_series.hxx"
+#include "weights/weight_rules.hxx"
+#include "weights/weight_update.hxx"
+
+vector<vector<vector<double> > > training_inputs;
+vector<vector<vector<double> > > training_outputs;
+vector<vector<vector<double> > > test_inputs;
+vector<vector<vector<double> > > test_outputs;
+
+bool random_sequence_length;
+int32_t sequence_length_lower_bound = 30;
+int32_t sequence_length_upper_bound = 100;
+
+RNN_Genome* genome;
+RNN* rnn;
+WeightUpdate* weight_update_method;
+int32_t bp_iterations;
+bool using_dropout;
+double dropout_probability;
+
+ofstream* log_file;
+string output_directory;
+
+double objective_function(const vector<double>& parameters) {
+    rnn->set_weights(parameters);
+
+    double error = 0.0;
+
+    for (int32_t i = 0; i < (int32_t) training_inputs.size(); i++) {
+        error += rnn->prediction_mae(training_inputs[i], training_outputs[i], false, true, 0.0);
+    }
+
+    return -error;
+}
+
+double test_objective_function(const vector<double>& parameters) {
+    rnn->set_weights(parameters);
+
+    double total_error = 0.0;
+
+    for (int32_t i = 0; i < (int32_t) test_inputs.size(); i++) {
+        double error = rnn->prediction_mse(test_inputs[i], test_outputs[i], false, true, 0.0);
+        total_error += error;
+
+        Log::info("output for series[%d]: %lf\n", i, error);
+    }
+
+    return -total_error;
+}
+
+int main(int argc, char** argv) {
+    vector<string> arguments = vector<string>(argv, argv + argc);
+
+    Log::initialize(arguments);
+    Log::set_id("main");
+
+    string filename;
+    get_argument(arguments, "--filename", true, filename);
+
+    RNN_Genome genome(filename);
+
+    for (auto node : genome.get_nodes()) {
+        if (DNASNode *d = dynamic_cast<DNASNode*>(node)) {
+          std::cout << "'" << filename << "': ";
+          d->print_info();
+        }
+    }
+
+    Log::release_id("main");
+}
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index 8c5b0b1c..02d7db80 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -85,24 +85,20 @@ int main(int argc, char** argv) {
     int32_t time_offset = 1;
     get_argument(arguments, "--time_offset", true, time_offset);
 
+    int32_t crystallization_threshold = 1000;
+    get_argument(arguments, "--crystalize_iters", false, crystallization_threshold);
+    DNASNode::CRYSTALLIZATION_THRESHOLD = crystallization_threshold;
+
+    int32_t k = -1;
+    get_argument(arguments, "--dnas_k", false, k);
+    DNASNode::k = k;
+
     time_series_sets->export_training_series(time_offset, training_inputs, training_outputs);
     time_series_sets->export_test_series(time_offset, test_inputs, test_outputs);
 
     int number_inputs = time_series_sets->get_number_inputs();
     // int number_outputs = time_series_sets->get_number_outputs();
 
-    string rnn_type;
-    get_argument(arguments, "--rnn_type", true, rnn_type);
-
-    int32_t num_hidden_layers;
-    get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers);
-
-    int32_t max_recurrent_depth;
-    get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
-
-    int32_t hidden_layer_size = number_inputs;
-    get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
-
     WeightRules* weight_rules = new WeightRules(arguments);
 
     weight_update_method = new WeightUpdate();
@@ -111,74 +107,110 @@ int main(int argc, char** argv) {
     vector<string> input_parameter_names = time_series_sets->get_input_parameter_names();
     vector<string> output_parameter_names = time_series_sets->get_output_parameter_names();
 
-    RNN_Genome* genome;
-    Log::info("RNN TYPE = %s\n", rnn_type.c_str());
-    if (rnn_type == "lstm") {
-        genome = create_lstm(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "gru") {
-        genome = create_gru(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "delta") {
-        genome = create_delta(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
+    string genome_file;
+    get_argument(arguments, "--genome_file", false, genome_file);
+    Log::info("RNN_GENOME = <%s> \n", genome_file.c_str());
 
-    } else if (rnn_type == "mgu") {
-        genome = create_mgu(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "ugrnn") {
-        genome = create_ugrnn(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "ff") {
-        genome = create_ff(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-
-    } else if (rnn_type == "jordan") {
-        genome = create_jordan(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
+    RNN_Genome* genome;
 
-    } else if (rnn_type == "elman") {
-        genome = create_elman(
-            input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-            weight_rules
-        );
-    } else if (rnn_type == "dnas") {
-        vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};
-        genome = create_dnas_nn(
-            input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types,
-            weight_rules
-        );
+    if (genome_file.size() != 0) {
+        genome = new RNN_Genome(genome_file);
+        Log::info("best weights: { ");
+        for (double &d : genome->get_best_parameters()) {
+            Log::info_no_header("%f, ", d);
+        }
+        Log::info("}\n");
+
+        vector<double> params;
+        genome->get_weights(params);
+        Log::info("current weights: { ");
+        for (double &d : params) {
+            Log::info_no_header("%f, ", d);
+        }
+        Log::info("}\n");
     } else {
-        Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str());
-        Log::fatal("Possibilities are:\n");
-        Log::fatal("    lstm\n");
-        Log::fatal("    gru\n");
-        Log::fatal("    ff\n");
-        Log::fatal("    jordan\n");
-        Log::fatal("    elman\n");
-        exit(1);
+
+        string rnn_type;
+        get_argument(arguments, "--rnn_type", true, rnn_type);
+        
+        Log::info("RNN TYPE = %s\n", rnn_type.c_str());
+
+        int32_t num_hidden_layers;
+        get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers);
+
+        int32_t max_recurrent_depth;
+        get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth);
+
+        int32_t hidden_layer_size = number_inputs;
+        get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size);
+
+        if (rnn_type == "lstm") {
+            genome = create_lstm(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "gru") {
+            genome = create_gru(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "delta") {
+            genome = create_delta(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "mgu") {
+            genome = create_mgu(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "ugrnn") {
+            genome = create_ugrnn(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "ff") {
+            genome = create_ff(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "jordan") {
+            genome = create_jordan(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+
+        } else if (rnn_type == "elman") {
+            genome = create_elman(
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
+                weight_rules
+            );
+        } else if (rnn_type == "dnas") {
+            vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};
+            genome = create_dnas_nn(
+                input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types,
+                weight_rules
+            );
+        } else {
+            Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str());
+            Log::fatal("Possibilities are:\n");
+            Log::fatal("    lstm\n");
+            Log::fatal("    gru\n");
+            Log::fatal("    ff\n");
+            Log::fatal("    jordan\n");
+            Log::fatal("    elman\n");
+            exit(1);
+        }
     }
 
     get_argument(arguments, "--bp_iterations", true, bp_iterations);
-    genome->set_bp_iterations(bp_iterations);
+    genome->set_bp_iterations(bp_iterations + genome->get_bp_iterations());
 
     get_argument(arguments, "--output_directory", true, output_directory);
     if (output_directory != "") {
@@ -211,7 +243,7 @@ int main(int argc, char** argv) {
 
     using_dropout = false;
 
-    genome->initialize_randomly();
+    genome->set_weights(genome->get_best_parameters());
 
     double learning_rate = 0.001;
     get_argument(arguments, "--learning_rate", false, learning_rate);

From fa03e78c21bd98f20b5e4301a80d569eae9b8c67 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 12 Dec 2023 19:18:05 -0500
Subject: [PATCH 14/31] Preparing for gecco 2024 experiments

---
 common/color_table.cxx                        |  3 +-
 common/process_arguments.cxx                  |  2 +-
 initial_integration_experiments/dnas.zsh      |  4 +-
 .../post_training_dnas.zsh                    |  2 +-
 mpi/examm_mpi.cxx                             |  2 +-
 rnn/dnas_node.cxx                             |  9 +--
 rnn/rnn_genome.cxx                            | 60 ++++++++++---------
 rnn/rnn_genome.hxx                            |  2 +-
 rnn_examples/dnas_info.cxx                    |  6 +-
 rnn_examples/train_rnn.cxx                    | 39 ++++++------
 10 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/common/color_table.cxx b/common/color_table.cxx
index d9e743b0..d0c42a21 100644
--- a/common/color_table.cxx
+++ b/common/color_table.cxx
@@ -1026,7 +1026,8 @@ const static double bent_cool_warm[] = {
     1.0,
     177,
     1,
-    39};
+    39,
+};
 
 Color get_colormap(double value) {
     Color c;
diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx
index 4577d86c..65efa7b6 100644
--- a/common/process_arguments.cxx
+++ b/common/process_arguments.cxx
@@ -10,7 +10,7 @@ using std::vector;
 EXAMM* generate_examm_from_arguments(
     const vector<string>& arguments, TimeSeriesSets* time_series_sets, WeightRules* weight_rules,
     RNN_Genome* seed_genome
-) { 
+) {
     Log::info("Getting arguments for EXAMM\n");
     int32_t island_size;
     get_argument(arguments, "--island_size", true, island_size);
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 490e0c3b..9ef90cca 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -1,12 +1,12 @@
 #!/usr/bin/zsh
 
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+OUTPUT_PARAMETERS='E1_EGT1'
 
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/v1/$crystalize_iters/$bp_epoch/$k/$fold
+  output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
   mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
index f3d355f1..8117dadb 100755
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -1,6 +1,6 @@
 #!/usr/bin/zsh
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM'
+OUTPUT_PARAMETERS='E1_EGT1'
 
 offset=1
 
diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx
index c1f1dd1c..227c3a85 100644
--- a/mpi/examm_mpi.cxx
+++ b/mpi/examm_mpi.cxx
@@ -203,7 +203,7 @@ void worker(int32_t rank) {
         } else if (tag == GENOME_LENGTH_TAG) {
             Log::debug("received genome!\n");
             RNN_Genome* genome = receive_genome_from(0);
-            
+
             // have each worker write the backproagation to a separate log file
             string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank);
             Log::set_id(log_id);
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index ac0e0aa1..465c024c 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -2,7 +2,6 @@
 using std::sort;
 
 #include <iomanip>
-
 #include <utility>
 using std::pair;
 
@@ -162,9 +161,10 @@ void DNASNode::print_info() {
     printf(" ");
     int best_pi_idx = 0;
     for (int i = 0; i < nodes.size(); i++) {
-       printf("%-10s & ", std::to_string(pi[i]).c_str());
-        if (pi[i] > pi[best_pi_idx])
+        printf("%-10s & ", std::to_string(pi[i]).c_str());
+        if (pi[i] > pi[best_pi_idx]) {
             best_pi_idx = i;
+        }
     }
     printf("\n");
     Log::info("Node types: ");
@@ -337,8 +337,9 @@ void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
     for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
-        if (pi[i] < 0.1)
+        if (pi[i] < 0.1) {
             pi[i] = 0.1;
+        }
     }
 
     for (auto node : nodes) {
diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx
index 72868d5e..833feee6 100644
--- a/rnn/rnn_genome.cxx
+++ b/rnn/rnn_genome.cxx
@@ -3224,19 +3224,25 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) {
 
 #define MAGIC 0xFA
 
-#define read_magic(place) \
-      { \
-      uint8_t boo = MAGIC;\
-      bin_istream.read((char *) &boo, sizeof(uint8_t)); \
-      if (boo != MAGIC) { Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); exit(-1); } \
-      }
+#define read_magic(place)                                                                                   \
+    {                                                                                                       \
+        uint8_t boo = MAGIC;                                                                                \
+        bin_istream.read((char*) &boo, sizeof(uint8_t));                                                    \
+        if (boo != MAGIC) {                                                                                 \
+            Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); \
+            exit(-1);                                                                                       \
+        }                                                                                                   \
+    }
 
-#define write_magic() \
-  {uint8_t xxmagic = MAGIC; bin_ostream.write((char *) &xxmagic, sizeof(uint8_t));}
+#define write_magic()                                         \
+    {                                                         \
+        uint8_t xxmagic = MAGIC;                              \
+        bin_ostream.write((char*) &xxmagic, sizeof(uint8_t)); \
+    }
 
 void RNN_Genome::read_from_stream(istream& bin_istream) {
     Log::debug("READING GENOME FROM STREAM\n");
-    
+
     read_magic(__LINE__);
 
     bin_istream.read((char*) &generation_id, sizeof(int32_t));
@@ -3244,12 +3250,12 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     bin_istream.read((char*) &bp_iterations, sizeof(int32_t));
 
     read_magic(__LINE__);
-    
+
     bin_istream.read((char*) &use_dropout, sizeof(bool));
     bin_istream.read((char*) &dropout_probability, sizeof(double));
 
     read_magic(__LINE__);
-    
+
     WeightType weight_initialize = WeightType::NONE;
     WeightType weight_inheritance = WeightType::NONE;
     WeightType mutated_component_weight = WeightType::NONE;
@@ -3259,7 +3265,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t));
 
     read_magic(__LINE__);
-    
+
     weight_rules = new WeightRules();
     weight_rules->set_weight_initialize_method(weight_initialize);
     weight_rules->set_weight_inheritance_method(weight_inheritance);
@@ -3282,7 +3288,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     generator_iss >> generator;
 
     read_magic(__LINE__);
-    
+
     // string rng_0_1_str;
     // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1");
     // So for some reason this was serialized incorrectly for some genomes,
@@ -3299,7 +3305,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     read_map(generated_by_map_iss, generated_by_map);
 
     read_magic(__LINE__);
-    
+
     bin_istream.read((char*) &best_validation_mse, sizeof(double));
     bin_istream.read((char*) &best_validation_mae, sizeof(double));
 
@@ -3312,7 +3318,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     delete[] initial_parameters_v;
 
     read_magic(__LINE__);
-    
+
     int32_t n_best_parameters;
     bin_istream.read((char*) &n_best_parameters, sizeof(int32_t));
     Log::debug("reading %d best parameters.\n", n_best_parameters);
@@ -3322,7 +3328,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     delete[] best_parameters_v;
 
     read_magic(__LINE__);
-    
+
     input_parameter_names.clear();
     int32_t n_input_parameter_names;
     bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t));
@@ -3346,7 +3352,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     }
 
     read_magic(__LINE__);
-    
+
     int32_t n_nodes;
     bin_istream.read((char*) &n_nodes, sizeof(int32_t));
     Log::debug("reading %d nodes.\n", n_nodes);
@@ -3439,7 +3445,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) {
     read_map(normalize_std_devs_iss, normalize_std_devs);
 
     read_magic(__LINE__);
-    
+
     assign_reachability();
 }
 
@@ -3471,12 +3477,12 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &bp_iterations, sizeof(int32_t));
 
     write_magic();
-    
+
     bin_ostream.write((char*) &use_dropout, sizeof(bool));
     bin_ostream.write((char*) &dropout_probability, sizeof(double));
 
     write_magic();
-    
+
     WeightType weight_initialize = weight_rules->get_weight_initialize_method();
     WeightType weight_inheritance = weight_rules->get_weight_inheritance_method();
     WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method();
@@ -3485,7 +3491,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t));
 
     write_magic();
-    
+
     Log::debug("generation_id: %d\n", generation_id);
     Log::debug("bp_iterations: %d\n", bp_iterations);
 
@@ -3504,7 +3510,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     write_binary_string(bin_ostream, generator_str, "generator");
 
     write_magic();
-    
+
     // ostringstream rng_0_1_oss;
     // rng_0_1_oss << rng_0_1;
     // string rng_0_1_str = rng_0_1_oss.str();
@@ -3516,7 +3522,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map");
 
     write_magic();
-    
+
     bin_ostream.write((char*) &best_validation_mse, sizeof(double));
     bin_ostream.write((char*) &best_validation_mae, sizeof(double));
 
@@ -3526,7 +3532,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size());
 
     write_magic();
-    
+
     int32_t n_best_parameters = (int32_t) best_parameters.size();
     bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t));
     if (n_best_parameters) {
@@ -3534,7 +3540,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     }
 
     write_magic();
-    
+
     int32_t n_input_parameter_names = (int32_t) input_parameter_names.size();
     bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) {
@@ -3542,7 +3548,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     }
 
     write_magic();
-    
+
     int32_t n_output_parameter_names = (int32_t) output_parameter_names.size();
     bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t));
     for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) {
@@ -3552,7 +3558,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) {
     }
 
     write_magic();
-    
+
     int32_t n_nodes = (int32_t) nodes.size();
     bin_ostream.write((char*) &n_nodes, sizeof(int32_t));
     Log::debug("writing %d nodes.\n", n_nodes);
diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx
index d6330512..01c7e9e3 100644
--- a/rnn/rnn_genome.hxx
+++ b/rnn/rnn_genome.hxx
@@ -325,7 +325,7 @@ class RNN_Genome {
     );
     vector<RNN_Node_Interface*> pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type);
 
-    const vector<RNN_Node_Interface *> &get_nodes() {
+    const vector<RNN_Node_Interface*>& get_nodes() {
         return this->nodes;
     }
 
diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx
index 74fd6519..fac60c84 100644
--- a/rnn_examples/dnas_info.cxx
+++ b/rnn_examples/dnas_info.cxx
@@ -86,9 +86,9 @@ int main(int argc, char** argv) {
     RNN_Genome genome(filename);
 
     for (auto node : genome.get_nodes()) {
-        if (DNASNode *d = dynamic_cast<DNASNode*>(node)) {
-          std::cout << "'" << filename << "': ";
-          d->print_info();
+        if (DNASNode* d = dynamic_cast<DNASNode*>(node)) {
+            std::cout << "'" << filename << "': ";
+            d->print_info();
         }
     }
 
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index 02d7db80..7bd5647c 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -116,7 +116,7 @@ int main(int argc, char** argv) {
     if (genome_file.size() != 0) {
         genome = new RNN_Genome(genome_file);
         Log::info("best weights: { ");
-        for (double &d : genome->get_best_parameters()) {
+        for (double& d : genome->get_best_parameters()) {
             Log::info_no_header("%f, ", d);
         }
         Log::info("}\n");
@@ -124,15 +124,14 @@ int main(int argc, char** argv) {
         vector<double> params;
         genome->get_weights(params);
         Log::info("current weights: { ");
-        for (double &d : params) {
+        for (double& d : params) {
             Log::info_no_header("%f, ", d);
         }
         Log::info("}\n");
     } else {
-
         string rnn_type;
         get_argument(arguments, "--rnn_type", true, rnn_type);
-        
+
         Log::info("RNN TYPE = %s\n", rnn_type.c_str());
 
         int32_t num_hidden_layers;
@@ -146,50 +145,50 @@ int main(int argc, char** argv) {
 
         if (rnn_type == "lstm") {
             genome = create_lstm(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "gru") {
             genome = create_gru(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "delta") {
             genome = create_delta(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "mgu") {
             genome = create_mgu(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "ugrnn") {
             genome = create_ugrnn(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "ff") {
             genome = create_ff(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "jordan") {
             genome = create_jordan(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
 
         } else if (rnn_type == "elman") {
             genome = create_elman(
-                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, max_recurrent_depth,
-                weight_rules
+                input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names,
+                max_recurrent_depth, weight_rules
             );
         } else if (rnn_type == "dnas") {
             vector<int> node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE};

From c0264d9ad2f82ae86d3369f7cd578e77d0a074eb Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Tue, 12 Dec 2023 19:36:16 -0500
Subject: [PATCH 15/31] Tweak experimental parameters

---
 initial_integration_experiments/dnas.zsh |  8 ++++----
 rnn/dnas_node.cxx                        | 17 ++++-------------
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 9ef90cca..0c2a615f 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -8,7 +8,7 @@ offset=1
 run_examm() {
   output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
-  mpirun -np 16 --bind-to socket Release/mpi/examm_mpi \
+  mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
@@ -29,7 +29,7 @@ run_examm() {
       --std_message_level INFO \
       --file_message_level INFO \
       --crystalize_iters $crystalize_iters \
-      --max_genomes 10000 \
+      --max_genomes 4000 \
       --island_size 8 \
       --number_islands 8 \
       --dnas_k $k
@@ -39,7 +39,7 @@ run_examm() {
 }
 
 CELL_TYPE='dnas'
-for crystalize_iters in 128 256 512 1024; do
+for crystalize_iters in 64 128 256 512; do
   for bp_epoch in 8 16 32 64 128; do
     for k in 1; do
       for fold in 0 1 2 3; do
@@ -47,7 +47,7 @@ for crystalize_iters in 128 256 512 1024; do
       done
       wait
       for fold in 4 5 6 7; do
-        run_examm
+        run_examm &
       done
       wait
     done
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 465c024c..87e05d9b 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -99,15 +99,7 @@ double DNASNode::calculate_pi_lr() {
 }
 
 double DNASNode::calculate_tao() {
-    double percentage_done = (double) counter / (double) CRYSTALLIZATION_THRESHOLD;
-    if (percentage_done < 0.33) {
-        return 1.33;
-    } else if (percentage_done < 0.66) {
-        double percentage_done_with_phase = (0.66 - percentage_done) / 0.33;
-        return 1.33 - percentage_done_with_phase * 1.15;
-    } else {
-        return 0.18;
-    }
+    return 6.0;
 }
 
 void DNASNode::calculate_z() {
@@ -337,8 +329,8 @@ void DNASNode::set_weights(int32_t& offset, const vector<double>& parameters) {
     // int start = offset;
     for (auto i = 0; i < pi.size(); i++) {
         pi[i] = parameters[offset++];
-        if (pi[i] < 0.1) {
-            pi[i] = 0.1;
+        if (pi[i] < 0.01) {
+            pi[i] = 0.01;
         }
     }
 
@@ -395,9 +387,8 @@ void DNASNode::get_gradients(vector<double>& gradients) {
     } else {
         gradients.assign(get_number_weights(), 0.0);
         int offset = 0;
-        double pi_lr = calculate_pi_lr();
         for (auto i = 0; i < pi.size(); i++) {
-            gradients[offset++] = d_pi[i] * pi_lr;
+            gradients[offset++] = d_pi[i];
         }
 
         for (auto node : nodes) {

From 1607c268ee922a8b042e555a0cf32f96fdae94bb Mon Sep 17 00:00:00 2001
From: Josh Karns <karns@meta.com>
Date: Tue, 26 Dec 2023 13:27:35 -0500
Subject: [PATCH 16/31] Tweaking experiments

---
 examm/examm.cxx                               |  3 +
 initial_integration_experiments/analyze.py    | 77 +++++++++++++++++++
 initial_integration_experiments/analyze.zsh   | 12 +++
 initial_integration_experiments/dnas.zsh      | 31 ++++----
 .../post_training_dnas.zsh                    |  2 +-
 rnn/generate_nn.cxx                           |  1 +
 rnn/rnn_node_interface.cxx                    |  3 +-
 7 files changed, 110 insertions(+), 19 deletions(-)
 create mode 100644 initial_integration_experiments/analyze.py
 create mode 100644 initial_integration_experiments/analyze.zsh

diff --git a/examm/examm.cxx b/examm/examm.cxx
index f017ab8b..e0be2d07 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -182,10 +182,12 @@ void EXAMM::update_log() {
             }
             (*op_log_file) << endl;
         }
+
         RNN_Genome* best_genome = get_best_genome();
         if (best_genome == NULL) {
             best_genome = speciation_strategy->get_global_best_genome();
         }
+
         std::chrono::time_point<std::chrono::system_clock> currentClock = std::chrono::system_clock::now();
         long milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(currentClock - startClock).count();
         (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds
@@ -193,6 +195,7 @@ void EXAMM::update_log() {
                     << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << ","
                     << best_genome->get_enabled_recurrent_edge_count()
                     << speciation_strategy->get_strategy_information_values() << endl;
+        Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count());
     }
 }
 
diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
new file mode 100644
index 00000000..58ba95b9
--- /dev/null
+++ b/initial_integration_experiments/analyze.py
@@ -0,0 +1,77 @@
+import pandas
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1)
+
+plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128}
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    if k == 8:
+        continue
+    v.sharey(a8)
+    v.sharex(a8)
+
+results = {}
+for ci in [64, 128, 256, 512]:
+    results[ci] = {}
+    for bpe in [8, 16, 32, 64, 128]:
+        results[ci][bpe] = {}
+        for k in [1]:
+            x = []
+            results[ci][bpe][k] = x
+
+            for fold in range(8):
+                f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
+                results[ci][bpe][k].append(f)
+
+
+            enabled_nodes = []
+            enabled_edges = []
+            enabled_rec_edges = []
+
+            bpi_columns = []
+            mse_columns = []
+
+            minlen = 100000000
+
+            for f in x:
+                bpi_columns.append(f[' Total BP Epochs'].to_numpy())
+                mse_columns.append(f[' Best Val. MSE'].to_numpy())
+                enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
+                enabled_edges.append(f[' Enabled Edges'].to_numpy())
+                enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
+
+                minlen = min(minlen, len(bpi_columns[-1]))
+
+            enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
+            enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
+            enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
+            bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
+            mse_columns = list(map(lambda x: x[:minlen], mse_columns))
+
+
+            nodesmean = np.mean(np.array(enabled_nodes), axis=0)
+            edgesmean = np.mean(np.array(enabled_edges), axis=0)
+            redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
+            print(f"Nodes at end mean: {nodesmean[-1]}")
+            print(f"edges at end mean: {edgesmean[-1]}")
+            print(f"redges at end mean: {redgesmean[-1]}")
+
+
+            bpimean = np.mean(np.array(bpi_columns), axis=0)
+            msemean = np.mean(np.array(mse_columns), axis=0)
+            msestd = np.std(np.array(mse_columns), axis=0)
+
+            g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0]
+            plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd,
+                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    v.legend(fontsize=12, loc="upper right")
+
+plt.show()
diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh
new file mode 100644
index 00000000..5c2876f3
--- /dev/null
+++ b/initial_integration_experiments/analyze.zsh
@@ -0,0 +1,12 @@
+#!/usr/bin/zsh
+#
+for crystalize_iters in 64 128 256 512; do
+  for bp_epoch in 8 16 32 64 128; do
+    for k in 1; do
+      for fold in 0 1 2 3 4 5 6 7; do
+        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
+        tail -1 $output_dir/fitness_log.csv
+      done
+    done
+  done
+done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 0c2a615f..995d072a 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -1,26 +1,24 @@
-#!/usr/bin/zsh
+#!/bin/zsh
 
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 
 offset=1
 
 run_examm() {
   output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
-  mpirun -np 8 --bind-to socket Release/mpi/examm_mpi \
+  mpirun -np 8 Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
       --input_parameter_names ${=INPUT_PARAMETERS} \
       --output_parameter_names ${=OUTPUT_PARAMETERS} \
       --bp_iterations $bp_epoch \
-      --stochastic \
-      --possible_node_types DNAS \
       --normalize min_max \
       --num_hidden_layers $SIZE \
       --hidden_layer_size $SIZE \
-      --train_sequence_length 100 \
+      --train_sequence_length 1000 \
       --validation_sequence_length 100 \
       --max_recurrent_depth 1 \
       --output_directory $output_dir \
@@ -30,7 +28,7 @@ run_examm() {
       --file_message_level INFO \
       --crystalize_iters $crystalize_iters \
       --max_genomes 4000 \
-      --island_size 8 \
+      --island_size 32 \
       --number_islands 8 \
       --dnas_k $k
 
@@ -40,16 +38,17 @@ run_examm() {
 
 CELL_TYPE='dnas'
 for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 8 16 32 64 128; do
+  for bp_epoch in 1 2 4 8 16 32 64 128; do
     for k in 1; do
-      for fold in 0 1 2 3; do
-        run_examm &
-      done
-      wait
-      for fold in 4 5 6 7; do
-        run_examm &
-      done
-      wait
+        fold=1 run_examm
+#       for fold in 0 1 2 3; do
+#         run_examm &
+#       done
+#       wait
+#       for fold in 4 5 6 7; do
+#         run_examm &
+#       done
+#       wait
     done
   done
 done
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
index 8117dadb..b25171a1 100755
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -1,4 +1,4 @@
-#!/usr/bin/zsh
+#!/bin/zsh
 INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
 OUTPUT_PARAMETERS='E1_EGT1'
 
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index a84fb36f..c4068495 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -190,6 +190,7 @@ RNN_Genome* get_seed_genome(
                 time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0,
                 weight_rules
             );
+            // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules);
             seed_genome->initialize_randomly();
             Log::info("Generated seed genome, seed genome is minimal\n");
         }
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index ab5796b2..f86eddd7 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -1,7 +1,6 @@
-#include <cmath>
+#include <algorithm>
 using std::max;
 
-#include <algorithm>
 #include <fstream>
 using std::ostream;
 

From 778d24aa8d695394203cc7d86b8d6d835725a1f2 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 8 Jan 2024 13:34:21 -0500
Subject: [PATCH 17/31] Modified scripts

---
 initial_integration_experiments/analyze.py | 17 ++++++++-------
 initial_integration_experiments/dnas.zsh   | 24 ++++++++++------------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
index 58ba95b9..cee900d6 100644
--- a/initial_integration_experiments/analyze.py
+++ b/initial_integration_experiments/analyze.py
@@ -4,28 +4,31 @@
 
 import matplotlib.pyplot as plt
 
-fig, [a8, a16, a32, a64, a128] = plt.subplots(5, 1)
+fig, subplts = plt.subplots(6, 1)
 
-plts = {8: a8, 16: a16, 32: a32, 64: a64, 128: a128}
+bprange = [1, 2, 4, 8, 16, 32]
+plts = {k:v for k, v in zip(bprange, subplts)}
+print(plts)
+base = plts[bprange[0]]
 
 for k, v in plts.items():
     v.set_title(f"{k} BPI")
-    if k == 8:
+    if k == bprange[0]:
         continue
-    v.sharey(a8)
-    v.sharex(a8)
+    v.sharey(base)
+    v.sharex(base)
 
 results = {}
 for ci in [64, 128, 256, 512]:
     results[ci] = {}
-    for bpe in [8, 16, 32, 64, 128]:
+    for bpe in bprange:
         results[ci][bpe] = {}
         for k in [1]:
             x = []
             results[ci][bpe][k] = x
 
             for fold in range(8):
-                f = pandas.read_csv(f"initial_integration_experiments/results/v2/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
+                f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
                 results[ci][bpe][k].append(f)
 
 
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
index 995d072a..5acc8b06 100755
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -6,7 +6,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
+  output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
   mpirun -np 8 Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
@@ -29,7 +29,7 @@ run_examm() {
       --crystalize_iters $crystalize_iters \
       --max_genomes 4000 \
       --island_size 32 \
-      --number_islands 8 \
+      --number_islands 4 \
       --dnas_k $k
 
   best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
@@ -38,17 +38,15 @@ run_examm() {
 
 CELL_TYPE='dnas'
 for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 1 2 4 8 16 32 64 128; do
-    for k in 1; do
-        fold=1 run_examm
-#       for fold in 0 1 2 3; do
-#         run_examm &
-#       done
-#       wait
-#       for fold in 4 5 6 7; do
-#         run_examm &
-#       done
-#       wait
+  for bp_epoch in 1 2 4 8 16 32; do
+    for k in 1 2; do
+      for fold in 0 1 2 3; do
+        run_examm &
+      done
+      for fold in 4 5 6 7; do
+        run_examm &
+      done
+      wait
     done
   done
 done

From ffa684a5b3bcbd1d2533efe61fcd3ca18c6f600c Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 29 Jan 2024 19:44:50 -0500
Subject: [PATCH 18/31] Prepping for cluster

---
 CMakeLists.txt                                |   3 +-
 examm/examm.cxx                               |   1 +
 initial_integration_experiments/analyze.py    | 122 +++++++++++-------
 initial_integration_experiments/aviation.zsh  |  37 ++++++
 initial_integration_experiments/control.zsh   |  50 +++++++
 initial_integration_experiments/debug.zsh     |  55 ++++++++
 initial_integration_experiments/dnas.zsh      |  25 ++--
 .../gp_control.zsh                            |  59 +++++++++
 .../post_training_dnas.zsh                    |   8 +-
 initial_integration_experiments/posttrain.zsh |   3 +
 initial_integration_experiments/run_examm.zsh |  25 ++++
 .../run_experiments.zsh                       |   4 +
 initial_integration_experiments/wind.zsh      |  39 ++++++
 rnn/dnas_node.cxx                             |  21 +--
 rnn/generate_nn.cxx                           |   1 -
 rnn/rnn_edge.cxx                              |   3 +-
 rnn/rnn_node.cxx                              |   4 +-
 rnn/rnn_node_interface.cxx                    |  12 +-
 rnn/rnn_node_interface.hxx                    |   5 +-
 rnn_examples/train_rnn.cxx                    |  11 +-
 time_series/time_series.cxx                   |   8 +-
 21 files changed, 404 insertions(+), 92 deletions(-)
 create mode 100644 initial_integration_experiments/aviation.zsh
 create mode 100644 initial_integration_experiments/control.zsh
 create mode 100755 initial_integration_experiments/debug.zsh
 mode change 100755 => 100644 initial_integration_experiments/dnas.zsh
 create mode 100644 initial_integration_experiments/gp_control.zsh
 create mode 100644 initial_integration_experiments/posttrain.zsh
 create mode 100644 initial_integration_experiments/run_examm.zsh
 create mode 100755 initial_integration_experiments/run_experiments.zsh
 create mode 100644 initial_integration_experiments/wind.zsh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1af3a314..5d62df91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # 2 This line for cluster
 #SET (CMAKE_CXX_FLAGS                "-std=gnu++17 -Wall -O3 -funroll-loops  -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG")
 
-SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3")
+# SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3 -fsanitize=address")
+SET (CMAKE_CXX_FLAGS                " -Wall -O3 -funroll-loops -msse3 ")
 SET (CMAKE_CXX_FLAGS_DEBUG          "${CMAKE_CXX_FLAGS} -g")
 SET (CMAKE_CXX_FLAGS_MINSIZEREL     "${CMAKE_CXX_FLAGS} -Os -DNDEBUG")
 SET (CMAKE_CXX_FLAGS_RELEASE        "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG")
diff --git a/examm/examm.cxx b/examm/examm.cxx
index e0be2d07..1e1c2314 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -324,6 +324,7 @@ void EXAMM::mutate(int32_t max_mutations, RNN_Genome* g) {
         g->assign_reachability();
         double rng = rng_0_1(generator) * total;
         int32_t new_node_type = get_random_node_type();
+        Log::info("%d %d\n", new_node_type, NODE_TYPES.size());
         string node_type_str = NODE_TYPES[new_node_type];
         Log::debug("rng: %lf, total: %lf, new node type: %d (%s)\n", rng, total, new_node_type, node_type_str.c_str());
 
diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
index cee900d6..78d51466 100644
--- a/initial_integration_experiments/analyze.py
+++ b/initial_integration_experiments/analyze.py
@@ -6,7 +6,7 @@
 
 fig, subplts = plt.subplots(6, 1)
 
-bprange = [1, 2, 4, 8, 16, 32]
+bprange = [8, 16]
 plts = {k:v for k, v in zip(bprange, subplts)}
 print(plts)
 base = plts[bprange[0]]
@@ -18,63 +18,93 @@
     v.sharey(base)
     v.sharex(base)
 
+def avg(files, slice_at=-1):
+    r = {}
+    for file in files:
+        x = []
+
+        for fold in range(8):
+            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
+            print(f"{file}/{fold} -> {len(f)}")
+            x.append(f)
+
+
+        enabled_nodes = []
+        enabled_edges = []
+        enabled_rec_edges = []
+
+        bpi_columns = []
+        mse_columns = []
+
+        minlen = 100000000
+
+        for f in x:
+            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
+            mse_columns.append(f[' Best Val. MSE'].to_numpy())
+            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
+            enabled_edges.append(f[' Enabled Edges'].to_numpy())
+            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
+
+            minlen = min(minlen, len(bpi_columns[-1]))
+
+        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
+        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
+        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
+        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
+        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
+
+        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
+        edgesmean = np.mean(np.array(enabled_edges), axis=0)
+        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
+        print(f"Nodes at end mean: {nodesmean[-1]}")
+        print(f"edges at end mean: {edgesmean[-1]}")
+        print(f"redges at end mean: {redgesmean[-1]}")
+
+
+        bpimean = np.mean(np.array(bpi_columns), axis=0)
+        msemean = np.mean(np.array(mse_columns), axis=0)
+        msestd = np.std(np.array(mse_columns), axis=0)
+        
+        r[file] = {
+            'mean_nodes': nodesmean,
+            'mean_edges': edgesmean,
+            'mean_rec_edges':redgesmean,
+            'bpi': bpimean,
+            'mean_mse': msemean,
+            'std_mse': msestd,
+        }
+    return r
+
 results = {}
-for ci in [64, 128, 256, 512]:
+for ci in [64]:
     results[ci] = {}
     for bpe in bprange:
         results[ci][bpe] = {}
         for k in [1]:
-            x = []
+            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
+            x = avg([f])[f]
             results[ci][bpe][k] = x
+            print(x)
 
-            for fold in range(8):
-                f = pandas.read_csv(f"initial_integration_experiments/results/v3/{ci}/{bpe}/{k}/{fold}/fitness_log.csv")
-                results[ci][bpe][k].append(f)
-
-
-            enabled_nodes = []
-            enabled_edges = []
-            enabled_rec_edges = []
-
-            bpi_columns = []
-            mse_columns = []
-
-            minlen = 100000000
-
-            for f in x:
-                bpi_columns.append(f[' Total BP Epochs'].to_numpy())
-                mse_columns.append(f[' Best Val. MSE'].to_numpy())
-                enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
-                enabled_edges.append(f[' Enabled Edges'].to_numpy())
-                enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
-
-                minlen = min(minlen, len(bpi_columns[-1]))
-
-            enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
-            enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
-            enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
-            bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
-            mse_columns = list(map(lambda x: x[:minlen], mse_columns))
-
-
-            nodesmean = np.mean(np.array(enabled_nodes), axis=0)
-            edgesmean = np.mean(np.array(enabled_edges), axis=0)
-            redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
-            print(f"Nodes at end mean: {nodesmean[-1]}")
-            print(f"edges at end mean: {edgesmean[-1]}")
-            print(f"redges at end mean: {redgesmean[-1]}")
-
+            print(x['mean_mse'] - x['std_mse'])
+            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
+            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
+                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
 
-            bpimean = np.mean(np.array(bpi_columns), axis=0)
-            msemean = np.mean(np.array(mse_columns), axis=0)
-            msestd = np.std(np.array(mse_columns), axis=0)
+control_results = {}
+for bp in [8, 16]:
+    key = f"initial_integration_experiments/results/control_v7/{bp}"
+    r = avg([key])[key]
+    control_results[bp] = r
+    print(list(r.keys()))
+    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
+    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
+        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
 
-            g = plts[bpe].plot(bpimean, msemean, label=f"ci={ci}")[0]
-            plts[bpe].fill_between(bpimean, msemean - msestd, msemean + msestd,
-                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
 
 for k, v in plts.items():
     v.set_title(f"{k} BPI")
     v.legend(fontsize=12, loc="upper right")
 
+
 plt.show()
diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh
new file mode 100644
index 00000000..7059da3e
--- /dev/null
+++ b/initial_integration_experiments/aviation.zsh
@@ -0,0 +1,37 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+for output_params in "E1_CHT1" "Pitch"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done
diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh
new file mode 100644
index 00000000..a848302b
--- /dev/null
+++ b/initial_integration_experiments/control.zsh
@@ -0,0 +1,50 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+bp_ge=(8 8192 16 4096 32 2048)
+
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+   for fold in 0 1 2 3; do
+     run_examm &
+   done
+   wait
+   for fold in 4 5 6 7; do
+     run_examm &
+   done
+   wait
+done
diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh
new file mode 100755
index 00000000..ce159c01
--- /dev/null
+++ b/initial_integration_experiments/debug.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes 8192 \
+      --island_size 32 \
+      --number_islands 4 \
+      --stochastic \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+for crystalize_iters in 128; do
+  for bp_epoch in 8; do
+    for k in 1; do
+      for fold in 0; do
+        run_examm
+      done
+ #      wait
+ #      for fold in 4 5 6 7; do
+ #        run_examm &
+ #      done
+ #      wait
+    done
+  done
+done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
old mode 100755
new mode 100644
index 5acc8b06..8b525b09
--- a/initial_integration_experiments/dnas.zsh
+++ b/initial_integration_experiments/dnas.zsh
@@ -6,43 +6,46 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/v3/$crystalize_iters/$bp_epoch/$k/$fold
+  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
   mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
+  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
       --input_parameter_names ${=INPUT_PARAMETERS} \
       --output_parameter_names ${=OUTPUT_PARAMETERS} \
       --bp_iterations $bp_epoch \
       --normalize min_max \
       --num_hidden_layers $SIZE \
       --hidden_layer_size $SIZE \
-      --train_sequence_length 1000 \
       --validation_sequence_length 100 \
       --max_recurrent_depth 1 \
       --output_directory $output_dir \
       --log_filename fitness.csv \
       --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
       --crystalize_iters $crystalize_iters \
-      --max_genomes 4000 \
+      --max_genomes $max_genomes \
       --island_size 32 \
       --number_islands 4 \
       --dnas_k $k
 
-  best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
 }
 
 CELL_TYPE='dnas'
-for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 1 2 4 8 16 32; do
-    for k in 1 2; do
+bp_ge=(8 8192 16 4096 32 2048)
+for crystalize_iters in 256; do
+  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+    for k in 1; do
       for fold in 0 1 2 3; do
         run_examm &
       done
+      wait
       for fold in 4 5 6 7; do
         run_examm &
       done
diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh
new file mode 100644
index 00000000..049e9750
--- /dev/null
+++ b/initial_integration_experiments/gp_control.zsh
@@ -0,0 +1,59 @@
+#!/bin/zsh
+
+offset=1
+MAX_GENOMES=10
+N_ISLANDS=4
+ISLAND_SIZE=32
+
+run_examm() {
+  output_dir=test_results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames ${=training_filenames} \
+      --test_filenames ${=test_filenames} \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names $output_params \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes $MAX_GENOMES \
+      --island_size $ISLAND_SIZE \
+      --number_islands $N_ISLANDS
+
+  touch $output_dir/completed
+}
+
+run_group() {
+  for output_params in $OUTPUTS; do
+    for bp_epoch in 2 4 8 16 32; do
+      for fold in 0 1 2 3 4 5 6 7 8 9; do
+        run_examm
+      done
+    done
+  done
+}
+
+INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
+training_filenames=(datasets/2018_coal/burner_[0-9].csv)
+test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
+OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
+run_group
+
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUTS=("E1_CHT1" "Pitch")
+training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
+test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
+run_group
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+OUTPUTS=("Cm_avg" "P_avg")
+training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
+test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
+run_group
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
index b25171a1..1c226178 100755
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ b/initial_integration_experiments/post_training_dnas.zsh
@@ -1,7 +1,4 @@
 #!/bin/zsh
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_EGT1'
-
 offset=1
 
 post_training() {
@@ -21,11 +18,12 @@ post_training() {
         --log_filename post_training.csv \
         --learning_rate 0.01 \
         --weight_update adagrad \
-        --train_sequence_length 100 \
+        --train_sequence_length 1000 \
         --validation_sequence_length 100 \
         --crystalize_iters $CRYSTALIZE_ITERS \
         --dnas_k $k
- 
+
+      tail -1 $OUTPUT_DIRECTORY/post_training.csv
 }
 
 post_training
diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh
new file mode 100644
index 00000000..cc54a2eb
--- /dev/null
+++ b/initial_integration_experiments/posttrain.zsh
@@ -0,0 +1,3 @@
+#!/bin/zsh
+
+
diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh
new file mode 100644
index 00000000..77d2893f
--- /dev/null
+++ b/initial_integration_experiments/run_examm.zsh
@@ -0,0 +1,25 @@
+#!/bin/zsh
+
+output_dir=results/v0/$bp_epoch/$fold
+mkdir -p $output_dir
+
+mpirun -np 32 Release/mpi/examm_mpi \
+    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+    --time_offset $offset \
+    --input_parameter_names ${=INPUT_PARAMETERS} \
+    --output_parameter_names ${=output_params} \
+    --bp_iterations $bp_epoch \
+    --normalize min_max \
+    --max_recurrent_depth 1 \
+    --output_directory $output_dir \
+    --log_filename fitness.csv \
+    --learning_rate 0.01 \
+    --std_message_level INFO \
+    --file_message_level INFO \
+    --max_genomes 4000 \
+    --island_size 32 \
+    --number_islands 4
+
+touch $output_dir/completed
+
diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh
new file mode 100755
index 00000000..7dd8e956
--- /dev/null
+++ b/initial_integration_experiments/run_experiments.zsh
@@ -0,0 +1,4 @@
+#!/bin/zsh
+
+initial_integration_experiments/control.zsh
+initial_integration_experiments/dnas.zsh
diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh
new file mode 100644
index 00000000..7e68f482
--- /dev/null
+++ b/initial_integration_experiments/wind.zsh
@@ -0,0 +1,39 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
+      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+
+for output_params in "Cm_avg" "P_avg"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done
diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx
index 87e05d9b..f6d42bfe 100644
--- a/rnn/dnas_node.cxx
+++ b/rnn/dnas_node.cxx
@@ -168,14 +168,15 @@ void DNASNode::print_info() {
 }
 
 void DNASNode::reset(int32_t series_length) {
-    d_pi = vector<double>(pi.size(), 0.0);
-    d_input = vector<double>(series_length, 0.0);
-    node_outputs = vector<vector<double>>(series_length, vector<double>(pi.size(), 0.0));
-    output_values = vector<double>(series_length, 0.0);
-    error_values = vector<double>(series_length, 0.0);
-    inputs_fired = vector<int>(series_length, 0);
-    outputs_fired = vector<int>(series_length, 0);
-    input_values = vector<double>(series_length, 0.0);
+    d_pi.assign(pi.size(), 0.0);
+    d_input.assign(series_length, 0.0);
+    node_outputs.clear();
+    for (int i = 0; i < series_length; i++) node_outputs.emplace_back(pi.size(), 0.0);
+    output_values.assign(series_length, 0.0);
+    error_values.assign(series_length, 0.0);
+    inputs_fired.assign(series_length, 0);
+    outputs_fired.assign(series_length, 0);
+    input_values.assign(series_length, 0.0);
 
     if (counter >= CRYSTALLIZATION_THRESHOLD) {
         nodes[maxi]->reset(series_length);
@@ -206,8 +207,10 @@ void DNASNode::input_fired(int32_t time, double incoming_output) {
     }
 
     if (counter >= CRYSTALLIZATION_THRESHOLD) {
+        Log::info("%d hmm\n", maxi >= 0);
         assert(maxi >= 0);
-
+        
+        Log::info("%d %d %p\n", maxi, time, nodes[maxi]);
         nodes[maxi]->input_fired(time, input_values[time]);
         node_outputs[time][maxi] = nodes[maxi]->output_values[time];
         output_values[time] = nodes[maxi]->output_values[time];
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index c4068495..a84fb36f 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -190,7 +190,6 @@ RNN_Genome* get_seed_genome(
                 time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0,
                 weight_rules
             );
-            // seed_genome = create_dnas_nn(time_series_sets->get_input_parameter_names(), 1, 2, time_series_sets->get_output_parameter_names(), 0, dnas_node_types, weight_rules);
             seed_genome->initialize_randomly();
             Log::info("Generated seed genome, seed genome is minimal\n");
         }
diff --git a/rnn/rnn_edge.cxx b/rnn/rnn_edge.cxx
index 3227e961..babb2552 100644
--- a/rnn/rnn_edge.cxx
+++ b/rnn/rnn_edge.cxx
@@ -92,7 +92,7 @@ RNN_Edge* RNN_Edge::copy(const vector<RNN_Node_Interface*> new_nodes) {
 }
 
 void RNN_Edge::propagate_forward(int32_t time) {
-    if (input_node->inputs_fired[time] != input_node->total_inputs) {
+    if (input_node->inputs_fired[time] != input_node->total_inputs || time < 0 || time >= input_node->output_values.size()) {
         Log::fatal(
             "ERROR! propagate forward called on edge %d where input_node->inputs_fired[%d] (%d) != total_inputs (%d)\n",
             innovation_number, time, input_node->inputs_fired[time], input_node->total_inputs
@@ -104,7 +104,6 @@ void RNN_Edge::propagate_forward(int32_t time) {
         exit(1);
     }
 
-    // Log::debug("input_node %p %d\n", input_node, input_node->output_values.size());
     double output = input_node->output_values[time] * weight;
 
     // Log::debug("propagating forward at time %d from %d to %d, value: %lf, input: %lf, weight: %lf\n", time,
diff --git a/rnn/rnn_node.cxx b/rnn/rnn_node.cxx
index 075c11ed..3e79a1df 100644
--- a/rnn/rnn_node.cxx
+++ b/rnn/rnn_node.cxx
@@ -57,8 +57,6 @@ void RNN_Node::input_fired(int32_t time, double incoming_output) {
         exit(1);
     }
 
-    Log::debug("node %d - input value[%d]: %lf\n", innovation_number, time, input_values[time]);
-
     output_values[time] = tanh(input_values[time] + bias);
     ld_output[time] = tanh_derivative(output_values[time]);
 
@@ -86,6 +84,8 @@ void RNN_Node::try_update_deltas(int32_t time) {
             outputs_fired[time], total_outputs
         );
         exit(1);
+    } else if (time >= d_input.size() || time < 0) {
+        Log::fatal("invalid time %d\n", time);
     }
 
     d_input[time] *= ld_output[time];
diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx
index f86eddd7..210706a2 100644
--- a/rnn/rnn_node_interface.cxx
+++ b/rnn/rnn_node_interface.cxx
@@ -7,13 +7,16 @@ using std::ostream;
 #include <string>
 using std::string;
 
+#include <vector>
+using std::vector;
+
 #include "common/log.hxx"
 #include "rnn/rnn_genome.hxx"
 #include "rnn_node_interface.hxx"
 
-extern const int32_t NUMBER_NODE_TYPES = 11;
-extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN",    "MGU", "GRU",
-                                    "delta",  "LSTM",   "ENARC", "ENAS_DAG", "dnas"};
+extern const vector<string> NODE_TYPES = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU",
+                                    "delta",  "LSTM",   "ENARC", "ENAS_DAG", "random_dag", "dnas"};
+
 extern const unordered_map<string, int32_t> string_to_node_type = {
     {"simple",   SIMPLE_NODE},
     {"jordan",   JORDAN_NODE},
@@ -25,7 +28,8 @@ extern const unordered_map<string, int32_t> string_to_node_type = {
     {  "lstm",     LSTM_NODE},
     { "enarc",    ENARC_NODE},
     {  "enas", ENAS_DAG_NODE},
-    {  "dnas",     DNAS_NODE}
+    {  "dnas",     DNAS_NODE},
+    {  "random_dag", RANDOM_DAG_NODE},
 };
 
 int32_t node_type_from_string(string& node_type) {
diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx
index 15ec45cd..d1b56fa3 100644
--- a/rnn/rnn_node_interface.hxx
+++ b/rnn/rnn_node_interface.hxx
@@ -26,8 +26,9 @@ class RNN;
 #define HIDDEN_LAYER 1
 #define OUTPUT_LAYER 2
 
-extern const int32_t NUMBER_NODE_TYPES;
-extern const string NODE_TYPES[];
+extern const vector<string> NODE_TYPES;
+#define NUMBER_NODE_TYPES NODE_TYPES.size()
+
 extern const unordered_map<string, int32_t> string_to_node_type;
 int32_t node_type_from_string(string& node_type);
 
diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx
index 7bd5647c..265b9669 100644
--- a/rnn_examples/train_rnn.cxx
+++ b/rnn_examples/train_rnn.cxx
@@ -15,6 +15,7 @@ using std::string;
 using std::vector;
 
 #include "common/arguments.hxx"
+#include "common/process_arguments.hxx"
 #include "common/files.hxx"
 #include "common/log.hxx"
 #include "rnn/generate_nn.hxx"
@@ -81,9 +82,9 @@ int main(int argc, char** argv) {
     Log::set_id("main");
 
     TimeSeriesSets* time_series_sets = TimeSeriesSets::generate_from_arguments(arguments);
-
-    int32_t time_offset = 1;
-    get_argument(arguments, "--time_offset", true, time_offset);
+    get_train_validation_data(
+        arguments, time_series_sets, training_inputs, training_outputs, test_inputs, test_outputs
+    );
 
     int32_t crystallization_threshold = 1000;
     get_argument(arguments, "--crystalize_iters", false, crystallization_threshold);
@@ -93,8 +94,8 @@ int main(int argc, char** argv) {
     get_argument(arguments, "--dnas_k", false, k);
     DNASNode::k = k;
 
-    time_series_sets->export_training_series(time_offset, training_inputs, training_outputs);
-    time_series_sets->export_test_series(time_offset, test_inputs, test_outputs);
+    // time_series_sets->export_training_series(time_offset, training_inputs, training_outputs);
+    // time_series_sets->export_test_series(time_offset, test_inputs, test_outputs);
 
     int number_inputs = time_series_sets->get_number_inputs();
     // int number_outputs = time_series_sets->get_number_outputs();
diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx
index de143147..e315164e 100644
--- a/time_series/time_series.cxx
+++ b/time_series/time_series.cxx
@@ -472,7 +472,7 @@ void TimeSeriesSet::export_time_series(
     if (time_offset == 0) {
         for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) {
             for (int32_t j = 0; j < number_rows; j++) {
-                data[i][j] = time_series[requested_fields[i]]->get_value(j);
+                data[i][j] = time_series.at(requested_fields[i])->get_value(j);
             }
         }
 
@@ -480,7 +480,7 @@ void TimeSeriesSet::export_time_series(
         // output data, ignore the first N values
         for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) {
             for (int32_t j = time_offset; j < number_rows; j++) {
-                data[i][j - time_offset] = time_series[requested_fields[i]]->get_value(j);
+                data[i][j - time_offset] = time_series.at(requested_fields[i])->get_value(j);
             }
         }
 
@@ -492,13 +492,13 @@ void TimeSeriesSet::export_time_series(
                 Log::debug("doing shift for field: '%s'\n", requested_fields[i].c_str());
                 // shift the shifted fields to the same as the output, not the input
                 for (int32_t j = -time_offset; j < number_rows; j++) {
-                    data[i][j + time_offset] = time_series[requested_fields[i]]->get_value(j);
+                    data[i][j + time_offset] = time_series.at(requested_fields[i])->get_value(j);
                     // Log::info("\tdata[%d][%d]: %lf\n", i, j + time_offset, data[i][j + time_offset]);
                 }
             } else {
                 Log::debug("not doing shift for field: '%s'\n", requested_fields[i].c_str());
                 for (int32_t j = 0; j < number_rows + time_offset; j++) {
-                    data[i][j] = time_series[requested_fields[i]]->get_value(j);
+                    data[i][j] = time_series.at(requested_fields[i])->get_value(j);
                 }
             }
         }

From 60acb2c8c08d60844fdc161843f0a94771aa5158 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Wed, 31 Jan 2024 04:28:06 -0500
Subject: [PATCH 19/31] gecco 2024 related experiment files + hacky changes

---
 dnas_cluster.zsh                     | 69 ++++++++++++++++++++++++++++
 dnas_control.zsh                     | 60 ++++++++++++++++++++++++
 examm/island_speciation_strategy.cxx |  5 +-
 key                                  |  7 +++
 rnn/generate_nn.cxx                  | 16 ++++++-
 rnn/genome_property.cxx              | 31 +++++++++++--
 rnn/genome_property.hxx              | 12 ++++-
 7 files changed, 191 insertions(+), 9 deletions(-)
 create mode 100644 dnas_cluster.zsh
 create mode 100644 dnas_control.zsh
 create mode 100644 key

diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh
new file mode 100644
index 00000000..55823c0c
--- /dev/null
+++ b/dnas_cluster.zsh
@@ -0,0 +1,69 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 2 \
+      --use_dnas_seed true \
+      --use_burn_in_bp_epoch \
+      --burn_in_period 1024 \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for crystalize_iters in 512; do
+    for k in 1; do
+      for fold in $(seq 0 19); do
+        run_examm
+      done
+    done
+  done
+}
+
+CELL_TYPE='dnas'
+# bp_ge=(8 8192 16 4096 32 2048 64 1024)
+# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+run_group
+# done
diff --git a/dnas_control.zsh b/dnas_control.zsh
new file mode 100644
index 00000000..5e6982c8
--- /dev/null
+++ b/dnas_control.zsh
@@ -0,0 +1,60 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 2 \
+      --burn_in_period 1024 \
+      --use_burn_in_bp_epoch
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for k in 1; do
+    for fold in $(seq 0 19); do
+      run_examm
+    done
+  done
+}
+
+run_group
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index 920eb203..d8eaabab 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island(
         Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str());
         exit(1);
     }
-    return new_genome;
+    return new_genome17731515;
 }
 
 RNN_Genome* IslandSpeciationStrategy::generate_genome(
@@ -370,6 +370,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome(
         Log::info("Island %d: new genome is still null, regenerating\n", generation_island);
         new_genome = generate_genome(rng_0_1, generator, mutate, crossover);
     }
+
     generated_genomes++;
     new_genome->set_generation_id(generated_genomes);
     islands[generation_island]->set_latest_generation_id(generated_genomes);
@@ -577,4 +578,4 @@ void IslandSpeciationStrategy::set_erased_islands_status() {
 
 RNN_Genome* IslandSpeciationStrategy::get_seed_genome() {
     return seed_genome;
-}
\ No newline at end of file
+}
diff --git a/key b/key
new file mode 100644
index 00000000..391a7405
--- /dev/null
+++ b/key
@@ -0,0 +1,7 @@
+v11 -> burn in schedule with 0.001 lr 4 mut
+v12 -> burn in schedule with 0.01 lr 4 mut
+v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut
+v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut
+v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut
+v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut
+
diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx
index a84fb36f..d9fd2eac 100644
--- a/rnn/generate_nn.cxx
+++ b/rnn/generate_nn.cxx
@@ -185,13 +185,27 @@ RNN_Genome* get_seed_genome(
         );
         Log::info("Finished transfering seed genome\n");
     } else {
-        if (seed_genome == NULL) {
+        bool use_dnas_seed = argument_exists(arguments, "--use_dnas_seed");
+
+        if (!use_dnas_seed) {
             seed_genome = create_ff(
                 time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0,
                 weight_rules
             );
             seed_genome->initialize_randomly();
             Log::info("Generated seed genome, seed genome is minimal\n");
+        } else {
+            vector<int32_t> node_types = {
+                SIMPLE_NODE,
+                UGRNN_NODE,
+                MGU_NODE,
+                GRU_NODE,
+                DELTA_NODE,
+                LSTM_NODE
+            };
+            seed_genome = create_dnas_nn(
+                time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, node_types, weight_rules
+            );
         }
     }
 
diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx
index 6bf061b9..09ea1ae8 100644
--- a/rnn/genome_property.cxx
+++ b/rnn/genome_property.cxx
@@ -10,6 +10,20 @@ GenomeProperty::GenomeProperty() {
     max_recurrent_depth = 10;
 }
 
+int32_t GenomeProperty::compute_bp_iterations(RNN_Genome* genome) {
+    if (use_burn_in_bp_epoch) {
+        int32_t n = genome->generation_id / burn_in_period;
+        n = n > max_burn_in_cycles ? max_burn_in_cycles : n;
+
+        float epochs = bp_epochs_start;
+        for (int i = 0; i < n; i++) epochs *= burn_in_ratio;
+        
+        return (int32_t) epochs;
+    } else {
+        return bp_iterations;
+    }
+}
+
 void GenomeProperty::generate_genome_property_from_arguments(const vector<string>& arguments) {
     get_argument(arguments, "--bp_iterations", true, bp_iterations);
     use_dropout = get_argument(arguments, "--dropout_probability", false, dropout_probability);
@@ -17,6 +31,13 @@ void GenomeProperty::generate_genome_property_from_arguments(const vector<string
     get_argument(arguments, "--min_recurrent_depth", false, min_recurrent_depth);
     get_argument(arguments, "--max_recurrent_depth", false, max_recurrent_depth);
 
+    use_burn_in_bp_epoch = argument_exists(arguments, "--use_burn_in_bp_epoch");
+    get_argument(arguments, "--burn_in_period", false, burn_in_period);
+    get_argument(arguments, "--burn_in_cycles", false, max_burn_in_cycles);
+    get_argument(arguments, "--bp_epochs_start", false, bp_epochs_start);
+    get_argument(arguments, "--burn_in_ratio", false, burn_in_ratio);
+
+
     Log::info("Each generated genome is trained for %d epochs\n", bp_iterations);
     Log::info(
         "Use dropout is set to %s, dropout probability is %f\n", use_dropout ? "True" : "False", dropout_probability
@@ -25,10 +46,10 @@ void GenomeProperty::generate_genome_property_from_arguments(const vector<string
 }
 
 void GenomeProperty::set_genome_properties(RNN_Genome* genome) {
-    genome->set_bp_iterations(bp_iterations);
-    if (use_dropout) {
-        genome->enable_dropout(dropout_probability);
-    }
+    genome->set_bp_iterations(compute_bp_iterations(genome));
+    
+    if (use_dropout) genome->enable_dropout(dropout_probability);
+    
     genome->normalize_type = normalize_type;
     genome->set_parameter_names(input_parameter_names, output_parameter_names);
     genome->set_normalize_bounds(normalize_type, normalize_mins, normalize_maxs, normalize_avgs, normalize_std_devs);
@@ -48,4 +69,4 @@ void GenomeProperty::get_time_series_parameters(TimeSeriesSets* time_series_sets
 
 uniform_int_distribution<int32_t> GenomeProperty::get_recurrent_depth_dist() {
     return uniform_int_distribution<int32_t>(this->min_recurrent_depth, this->max_recurrent_depth);
-}
\ No newline at end of file
+}
diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx
index 7d220ff6..130b26de 100644
--- a/rnn/genome_property.hxx
+++ b/rnn/genome_property.hxx
@@ -18,6 +18,12 @@ class GenomeProperty {
     int32_t min_recurrent_depth;
     int32_t max_recurrent_depth;
 
+    bool use_burn_in_bp_epoch;
+    int32_t burn_in_period = 2048;
+    int32_t max_burn_in_cycles = 4;
+    double bp_epochs_start = 0.5;
+    double burn_in_ratio = 2.0;
+
     // TimeSeriesSets *time_series_sets;
     int32_t number_inputs;
     int32_t number_outputs;
@@ -30,12 +36,16 @@ class GenomeProperty {
     map<string, double> normalize_avgs;
     map<string, double> normalize_std_devs;
 
+    int32_t compute_bp_iterations(RNN_Genome* genome);
+
    public:
     GenomeProperty();
+
     void generate_genome_property_from_arguments(const vector<string>& arguments);
     void set_genome_properties(RNN_Genome* genome);
     void get_time_series_parameters(TimeSeriesSets* time_series_sets);
+    
     uniform_int_distribution<int32_t> get_recurrent_depth_dist();
 };
 
-#endif
\ No newline at end of file
+#endif

From 5730472f918a1ebf459e68c88092bf2e37ea1ba1 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Fri, 2 Feb 2024 03:16:10 -0500
Subject: [PATCH 20/31] BP schedule

---
 dnas_control.zsh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dnas_control.zsh b/dnas_control.zsh
index 5e6982c8..88a7c882 100644
--- a/dnas_control.zsh
+++ b/dnas_control.zsh
@@ -17,7 +17,7 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/control_v16/$bp_epoch/$fold
+  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
   mkdir -p $output_dir
   srun -n 36 Release/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
@@ -41,7 +41,7 @@ run_examm() {
       --max_genomes $max_genomes \
       --island_size 32 \
       --number_islands 8 \
-      --num_mutations 2 \
+      --num_mutations 4 \
       --burn_in_period 1024 \
       --use_burn_in_bp_epoch
 

From 70e79d442b1d4fea6c2c752739778f096a9b70ff Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Mon, 19 Feb 2024 14:30:54 -0500
Subject: [PATCH 21/31] moving scripts

---
 scripts/dnas/analyze.py             | 110 ++++++++++++++++++++++++++++
 scripts/dnas/analyze.zsh            |  12 +++
 scripts/dnas/aviation.zsh           |  37 ++++++++++
 scripts/dnas/coal_dnas_control.zsh  |  22 ++++++
 scripts/dnas/coal_gp.zsh            |  22 ++++++
 scripts/dnas/control.zsh            |  50 +++++++++++++
 scripts/dnas/control_cluster.zsh    |  50 +++++++++++++
 scripts/dnas/debug.zsh              |  55 ++++++++++++++
 scripts/dnas/dnas.zsh               |  55 ++++++++++++++
 scripts/dnas/dnas_cluster.zsh       |  69 +++++++++++++++++
 scripts/dnas/dnas_control.zsh       |  60 +++++++++++++++
 scripts/dnas/dnas_r2_cluster.zsh    |  67 +++++++++++++++++
 scripts/dnas/experiment.zsh         |  34 +++++++++
 scripts/dnas/gp_control.zsh         |  59 +++++++++++++++
 scripts/dnas/lib.zsh                |  65 ++++++++++++++++
 scripts/dnas/mk_jobs.zsh            |   6 ++
 scripts/dnas/populate_queue.zsh     |  29 ++++++++
 scripts/dnas/post_training.zsh      |  28 +++++++
 scripts/dnas/post_training_dnas.zsh |  29 ++++++++
 scripts/dnas/posttrain.zsh          |   3 +
 scripts/dnas/run_examm.zsh          |  25 +++++++
 scripts/dnas/run_experiments.zsh    |   4 +
 scripts/dnas/wind.zsh               |  39 ++++++++++
 23 files changed, 930 insertions(+)
 create mode 100644 scripts/dnas/analyze.py
 create mode 100644 scripts/dnas/analyze.zsh
 create mode 100644 scripts/dnas/aviation.zsh
 create mode 100644 scripts/dnas/coal_dnas_control.zsh
 create mode 100644 scripts/dnas/coal_gp.zsh
 create mode 100644 scripts/dnas/control.zsh
 create mode 100644 scripts/dnas/control_cluster.zsh
 create mode 100755 scripts/dnas/debug.zsh
 create mode 100644 scripts/dnas/dnas.zsh
 create mode 100644 scripts/dnas/dnas_cluster.zsh
 create mode 100644 scripts/dnas/dnas_control.zsh
 create mode 100644 scripts/dnas/dnas_r2_cluster.zsh
 create mode 100755 scripts/dnas/experiment.zsh
 create mode 100644 scripts/dnas/gp_control.zsh
 create mode 100644 scripts/dnas/lib.zsh
 create mode 100644 scripts/dnas/mk_jobs.zsh
 create mode 100755 scripts/dnas/populate_queue.zsh
 create mode 100755 scripts/dnas/post_training.zsh
 create mode 100755 scripts/dnas/post_training_dnas.zsh
 create mode 100644 scripts/dnas/posttrain.zsh
 create mode 100644 scripts/dnas/run_examm.zsh
 create mode 100755 scripts/dnas/run_experiments.zsh
 create mode 100644 scripts/dnas/wind.zsh

diff --git a/scripts/dnas/analyze.py b/scripts/dnas/analyze.py
new file mode 100644
index 00000000..78d51466
--- /dev/null
+++ b/scripts/dnas/analyze.py
@@ -0,0 +1,110 @@
+import pandas
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+fig, subplts = plt.subplots(6, 1)
+
+bprange = [8, 16]
+plts = {k:v for k, v in zip(bprange, subplts)}
+print(plts)
+base = plts[bprange[0]]
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    if k == bprange[0]:
+        continue
+    v.sharey(base)
+    v.sharex(base)
+
+def avg(files, slice_at=-1):
+    r = {}
+    for file in files:
+        x = []
+
+        for fold in range(8):
+            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
+            print(f"{file}/{fold} -> {len(f)}")
+            x.append(f)
+
+
+        enabled_nodes = []
+        enabled_edges = []
+        enabled_rec_edges = []
+
+        bpi_columns = []
+        mse_columns = []
+
+        minlen = 100000000
+
+        for f in x:
+            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
+            mse_columns.append(f[' Best Val. MSE'].to_numpy())
+            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
+            enabled_edges.append(f[' Enabled Edges'].to_numpy())
+            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
+
+            minlen = min(minlen, len(bpi_columns[-1]))
+
+        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
+        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
+        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
+        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
+        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
+
+        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
+        edgesmean = np.mean(np.array(enabled_edges), axis=0)
+        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
+        print(f"Nodes at end mean: {nodesmean[-1]}")
+        print(f"edges at end mean: {edgesmean[-1]}")
+        print(f"redges at end mean: {redgesmean[-1]}")
+
+
+        bpimean = np.mean(np.array(bpi_columns), axis=0)
+        msemean = np.mean(np.array(mse_columns), axis=0)
+        msestd = np.std(np.array(mse_columns), axis=0)
+        
+        r[file] = {
+            'mean_nodes': nodesmean,
+            'mean_edges': edgesmean,
+            'mean_rec_edges':redgesmean,
+            'bpi': bpimean,
+            'mean_mse': msemean,
+            'std_mse': msestd,
+        }
+    return r
+
+results = {}
+for ci in [64]:
+    results[ci] = {}
+    for bpe in bprange:
+        results[ci][bpe] = {}
+        for k in [1]:
+            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
+            x = avg([f])[f]
+            results[ci][bpe][k] = x
+            print(x)
+
+            print(x['mean_mse'] - x['std_mse'])
+            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
+            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
+                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
+
+control_results = {}
+for bp in [8, 16]:
+    key = f"initial_integration_experiments/results/control_v7/{bp}"
+    r = avg([key])[key]
+    control_results[bp] = r
+    print(list(r.keys()))
+    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
+    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
+        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
+
+
+for k, v in plts.items():
+    v.set_title(f"{k} BPI")
+    v.legend(fontsize=12, loc="upper right")
+
+
+plt.show()
diff --git a/scripts/dnas/analyze.zsh b/scripts/dnas/analyze.zsh
new file mode 100644
index 00000000..5c2876f3
--- /dev/null
+++ b/scripts/dnas/analyze.zsh
@@ -0,0 +1,12 @@
+#!/usr/bin/zsh
+#
+for crystalize_iters in 64 128 256 512; do
+  for bp_epoch in 8 16 32 64 128; do
+    for k in 1; do
+      for fold in 0 1 2 3 4 5 6 7; do
+        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
+        tail -1 $output_dir/fitness_log.csv
+      done
+    done
+  done
+done
diff --git a/scripts/dnas/aviation.zsh b/scripts/dnas/aviation.zsh
new file mode 100644
index 00000000..7059da3e
--- /dev/null
+++ b/scripts/dnas/aviation.zsh
@@ -0,0 +1,37 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+for output_params in "E1_CHT1" "Pitch"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done
diff --git a/scripts/dnas/coal_dnas_control.zsh b/scripts/dnas/coal_dnas_control.zsh
new file mode 100644
index 00000000..9543cc09
--- /dev/null
+++ b/scripts/dnas/coal_dnas_control.zsh
@@ -0,0 +1,22 @@
+#!/bin/zsh
+
+let np=8
+#SBATCH  --ntasks=8
+#SBATCH  --exclude theocho
+#SBATCH  --time=8-00:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=TIER
+#SBATCH  -J examm_coal_gp_control
+#SBATCH  -o /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.err
+#SBATCH  --mem=64GB
+
+source lib.zsh
+
+output_dir_prefix=/home/jak5763/exact/results/gp_control
+bp_epoch_set=(8 16 32 64 128)
+nfolds=20
+MAX_GENOMES=4000
+ISLAND_SIZE=10
+N_ISLANDS=10
+coal
diff --git a/scripts/dnas/coal_gp.zsh b/scripts/dnas/coal_gp.zsh
new file mode 100644
index 00000000..c1318793
--- /dev/null
+++ b/scripts/dnas/coal_gp.zsh
@@ -0,0 +1,22 @@
+#!/bin/zsh
+
+let np=8
+#SBATCH  --ntasks=8
+#SBATCH  --exclude theocho
+#SBATCH  --time=8-00:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=TIER
+#SBATCH  -J examm_coal_gp_control
+#SBATCH  -o /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.err
+#SBATCH  --mem=64GB
+
+source lib.zsh
+
+output_dir_prefix=/home/jak5763/exact/results/gp_control
+bp_epoch_set=(8)
+nfolds=20
+MAX_GENOMES=10000
+ISLAND_SIZE=10
+N_ISLANDS=10
+coal
diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh
new file mode 100644
index 00000000..a848302b
--- /dev/null
+++ b/scripts/dnas/control.zsh
@@ -0,0 +1,50 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+bp_ge=(8 8192 16 4096 32 2048)
+
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+   for fold in 0 1 2 3; do
+     run_examm &
+   done
+   wait
+   for fold in 4 5 6 7; do
+     run_examm &
+   done
+   wait
+done
diff --git a/scripts/dnas/control_cluster.zsh b/scripts/dnas/control_cluster.zsh
new file mode 100644
index 00000000..a848302b
--- /dev/null
+++ b/scripts/dnas/control_cluster.zsh
@@ -0,0 +1,50 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
+}
+
+bp_ge=(8 8192 16 4096 32 2048)
+
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+   for fold in 0 1 2 3; do
+     run_examm &
+   done
+   wait
+   for fold in 4 5 6 7; do
+     run_examm &
+   done
+   wait
+done
diff --git a/scripts/dnas/debug.zsh b/scripts/dnas/debug.zsh
new file mode 100755
index 00000000..ce159c01
--- /dev/null
+++ b/scripts/dnas/debug.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes 8192 \
+      --island_size 32 \
+      --number_islands 4 \
+      --stochastic \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+for crystalize_iters in 128; do
+  for bp_epoch in 8; do
+    for k in 1; do
+      for fold in 0; do
+        run_examm
+      done
+ #      wait
+ #      for fold in 4 5 6 7; do
+ #        run_examm &
+ #      done
+ #      wait
+    done
+  done
+done
diff --git a/scripts/dnas/dnas.zsh b/scripts/dnas/dnas.zsh
new file mode 100644
index 00000000..8b525b09
--- /dev/null
+++ b/scripts/dnas/dnas.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 4 \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+CELL_TYPE='dnas'
+bp_ge=(8 8192 16 4096 32 2048)
+for crystalize_iters in 256; do
+  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+    for k in 1; do
+      for fold in 0 1 2 3; do
+        run_examm &
+      done
+      wait
+      for fold in 4 5 6 7; do
+        run_examm &
+      done
+      wait
+    done
+  done
+done
diff --git a/scripts/dnas/dnas_cluster.zsh b/scripts/dnas/dnas_cluster.zsh
new file mode 100644
index 00000000..55823c0c
--- /dev/null
+++ b/scripts/dnas/dnas_cluster.zsh
@@ -0,0 +1,69 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 2 \
+      --use_dnas_seed true \
+      --use_burn_in_bp_epoch \
+      --burn_in_period 1024 \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for crystalize_iters in 512; do
+    for k in 1; do
+      for fold in $(seq 0 19); do
+        run_examm
+      done
+    done
+  done
+}
+
+CELL_TYPE='dnas'
+# bp_ge=(8 8192 16 4096 32 2048 64 1024)
+# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+run_group
+# done
diff --git a/scripts/dnas/dnas_control.zsh b/scripts/dnas/dnas_control.zsh
new file mode 100644
index 00000000..88a7c882
--- /dev/null
+++ b/scripts/dnas/dnas_control.zsh
@@ -0,0 +1,60 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types lstm mgu gru ugrnn delta simple \
+      --stochastic 0 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 32 \
+      --number_islands 8 \
+      --num_mutations 4 \
+      --burn_in_period 1024 \
+      --use_burn_in_bp_epoch
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for k in 1; do
+    for fold in $(seq 0 19); do
+      run_examm
+    done
+  done
+}
+
+run_group
diff --git a/scripts/dnas/dnas_r2_cluster.zsh b/scripts/dnas/dnas_r2_cluster.zsh
new file mode 100644
index 00000000..a8bce387
--- /dev/null
+++ b/scripts/dnas/dnas_r2_cluster.zsh
@@ -0,0 +1,67 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
+
+offset=1
+
+run_examm() {
+  output_dir=initial_integration_experiments/results/v9/$crystalize_iters/$bp_epoch/$k/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types dnas \
+      --stochastic 1 \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --validation_sequence_length 100 \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.001 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 16 \
+      --number_islands 8 \
+      --num_mutations 4 \
+      --use_dnas_seed true \
+      --dnas_k $k
+
+  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
+}
+
+run_group() {
+  for crystalize_iters in 1000000; do
+    for k in 1; do
+      for fold in $(seq 0 19); do
+        run_examm
+      done
+    done
+  done
+}
+
+CELL_TYPE='dnas'
+# bp_ge=(8 8192 16 4096 32 2048 64 1024)
+# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+run_group
+# done
diff --git a/scripts/dnas/experiment.zsh b/scripts/dnas/experiment.zsh
new file mode 100755
index 00000000..32a1db55
--- /dev/null
+++ b/scripts/dnas/experiment.zsh
@@ -0,0 +1,34 @@
+#!/bin/zsh
+#SBATCH -n 1
+#SBATCH -A examm
+#SBATCH --partition=tier3
+#SBATCH -o /home/jak5763/exact/aistats/slurm_out/%x.%j.out
+#SBATCH -e /home/jak5763/exact/aistats/slurm_out/%x.%j.err
+#SBATCH --mem=10G
+
+spack load gcc
+spack load openmpi
+spack load /5aoa7oi
+spack load /dd7nzzh
+
+for i in $(seq 0 19); do
+  export i=$i
+  export output_dir=/home/jak5763/exact/aistats/$control/maxt$maxt/crystal$crystal/bp$bp/$i
+
+  if [ "$control" = "control" ]; then
+      node_types="simple UGRNN MGU GRU delta LSTM"
+  else
+      node_types="DNAS"
+  fi
+
+  echo $node_types $control
+
+  export node_types=$node_types
+
+  # ./run_examm.zsh
+
+  best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
+  export BP_ITERS=1
+  export GENOME=$best_genome_file
+  ./post_training.zsh
+done
diff --git a/scripts/dnas/gp_control.zsh b/scripts/dnas/gp_control.zsh
new file mode 100644
index 00000000..049e9750
--- /dev/null
+++ b/scripts/dnas/gp_control.zsh
@@ -0,0 +1,59 @@
+#!/bin/zsh
+
+offset=1
+MAX_GENOMES=10
+N_ISLANDS=4
+ISLAND_SIZE=32
+
+run_examm() {
+  output_dir=test_results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames ${=training_filenames} \
+      --test_filenames ${=test_filenames} \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names $output_params \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes $MAX_GENOMES \
+      --island_size $ISLAND_SIZE \
+      --number_islands $N_ISLANDS
+
+  touch $output_dir/completed
+}
+
+run_group() {
+  for output_params in $OUTPUTS; do
+    for bp_epoch in 2 4 8 16 32; do
+      for fold in 0 1 2 3 4 5 6 7 8 9; do
+        run_examm
+      done
+    done
+  done
+}
+
+INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
+training_filenames=(datasets/2018_coal/burner_[0-9].csv)
+test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
+OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
+run_group
+
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUTS=("E1_CHT1" "Pitch")
+training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
+test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
+run_group
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+OUTPUTS=("Cm_avg" "P_avg")
+training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
+test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
+run_group
diff --git a/scripts/dnas/lib.zsh b/scripts/dnas/lib.zsh
new file mode 100644
index 00000000..49ebc581
--- /dev/null
+++ b/scripts/dnas/lib.zsh
@@ -0,0 +1,65 @@
+#!/bin/zsh
+
+offset=1
+MAX_GENOMES=10
+N_ISLANDS=4
+ISLAND_SIZE=32
+
+run_examm() {
+  output_dir=$output_dir_prefix/bp_$bp_epoch/output_$output_params/$fold
+  mkdir -p $output_dir
+  echo srun -n $np Release/mpi/examm_mpi \
+      --training_filenames ${=training_filenames} \
+      --test_filenames ${=test_filenames} \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names $output_params \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes $MAX_GENOMES \
+      --island_size $ISLAND_SIZE \
+      --number_islands $N_ISLANDS
+
+  touch $output_dir/completed
+}
+
+run_group() {
+  for output_params in $OUTPUTS; do
+    for bp_epoch in $bp_epoch_set; do
+      for fold in $(seq 1 $nfolds); do
+        run_examm
+      done
+    done
+  done
+}
+
+coal() {
+    INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
+    training_filenames=(datasets/2018_coal/burner_[0-9].csv)
+    test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
+    OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
+    run_group
+}
+
+aviation() {
+    INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+    OUTPUTS=("E1_CHT1" "Pitch")
+    training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
+    test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
+    run_group
+}
+
+wind() {
+    INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+    OUTPUTS=("Cm_avg" "P_avg")
+    training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
+    test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
+    run_group
+}
+
diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh
new file mode 100644
index 00000000..38a5526c
--- /dev/null
+++ b/scripts/dnas/mk_jobs.zsh
@@ -0,0 +1,6 @@
+bp_ge=(8 8192 16 4096 32 2048 64 1024)
+for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
+  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh
+  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh
+  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh
+done
diff --git a/scripts/dnas/populate_queue.zsh b/scripts/dnas/populate_queue.zsh
new file mode 100755
index 00000000..43a09dbb
--- /dev/null
+++ b/scripts/dnas/populate_queue.zsh
@@ -0,0 +1,29 @@
+#!/bin/zsh
+export INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+export OUTPUT_PARAMETERS='E1_EGT1'
+
+export offset=1
+export k=1
+
+push_job() {
+  export maxt=$maxt
+  export crystal=$crystal
+  export bp=$bp
+  export control=$control
+  sbatch -J $control.maxt$maxt.cr$crystal.bp$bp ./experiment.zsh
+
+}
+
+export control="exp"
+for maxt in 1.66 1.33 1.0; do
+  for crystal in 64 128 256; do
+    for bp in 4 8 16; do
+      push_job
+    done
+  done
+done
+
+export control="control"
+for bp in 4 8 16; do
+  push_job
+done
diff --git a/scripts/dnas/post_training.zsh b/scripts/dnas/post_training.zsh
new file mode 100755
index 00000000..38c2d39d
--- /dev/null
+++ b/scripts/dnas/post_training.zsh
@@ -0,0 +1,28 @@
+#!/usr/bin/zsh
+offset=1
+
+post_training() {
+
+    echo "genome = $GENOME"
+    Release/rnn_examples/train_rnn \
+        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+        --time_offset $offset \
+        --input_parameter_names ${=INPUT_PARAMETERS} \
+        --output_parameter_names ${=OUTPUT_PARAMETERS} \
+        --bp_iterations $BP_ITERS \
+        --stochastic \
+        --normalize min_max \
+        --genome_file $GENOME \
+        --output_directory $output_dir \
+        --log_filename post_training.csv \
+        --learning_rate 0.01 \
+        --weight_update adagrad \
+        --train_sequence_length 100 \
+        --validation_sequence_length 100 \
+        --crystalize_iters $crystal \
+        --dnas_k $k
+ 
+}
+
+post_training
diff --git a/scripts/dnas/post_training_dnas.zsh b/scripts/dnas/post_training_dnas.zsh
new file mode 100755
index 00000000..1c226178
--- /dev/null
+++ b/scripts/dnas/post_training_dnas.zsh
@@ -0,0 +1,29 @@
+#!/bin/zsh
+offset=1
+
+post_training() {
+
+    echo "genome = $GENOME"
+    Release/rnn_examples/train_rnn \
+        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+        --time_offset 1 \
+        --input_parameter_names ${=INPUT_PARAMETERS} \
+        --output_parameter_names ${=OUTPUT_PARAMETERS} \
+        --bp_iterations $BP_ITERS \
+        --stochastic \
+        --normalize min_max \
+        --genome_file $GENOME \
+        --output_directory $OUTPUT_DIRECTORY \
+        --log_filename post_training.csv \
+        --learning_rate 0.01 \
+        --weight_update adagrad \
+        --train_sequence_length 1000 \
+        --validation_sequence_length 100 \
+        --crystalize_iters $CRYSTALIZE_ITERS \
+        --dnas_k $k
+
+      tail -1 $OUTPUT_DIRECTORY/post_training.csv
+}
+
+post_training
diff --git a/scripts/dnas/posttrain.zsh b/scripts/dnas/posttrain.zsh
new file mode 100644
index 00000000..cc54a2eb
--- /dev/null
+++ b/scripts/dnas/posttrain.zsh
@@ -0,0 +1,3 @@
+#!/bin/zsh
+
+
diff --git a/scripts/dnas/run_examm.zsh b/scripts/dnas/run_examm.zsh
new file mode 100644
index 00000000..77d2893f
--- /dev/null
+++ b/scripts/dnas/run_examm.zsh
@@ -0,0 +1,25 @@
+#!/bin/zsh
+
+output_dir=results/v0/$bp_epoch/$fold
+mkdir -p $output_dir
+
+mpirun -np 32 Release/mpi/examm_mpi \
+    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+    --time_offset $offset \
+    --input_parameter_names ${=INPUT_PARAMETERS} \
+    --output_parameter_names ${=output_params} \
+    --bp_iterations $bp_epoch \
+    --normalize min_max \
+    --max_recurrent_depth 1 \
+    --output_directory $output_dir \
+    --log_filename fitness.csv \
+    --learning_rate 0.01 \
+    --std_message_level INFO \
+    --file_message_level INFO \
+    --max_genomes 4000 \
+    --island_size 32 \
+    --number_islands 4
+
+touch $output_dir/completed
+
diff --git a/scripts/dnas/run_experiments.zsh b/scripts/dnas/run_experiments.zsh
new file mode 100755
index 00000000..7dd8e956
--- /dev/null
+++ b/scripts/dnas/run_experiments.zsh
@@ -0,0 +1,4 @@
+#!/bin/zsh
+
+initial_integration_experiments/control.zsh
+initial_integration_experiments/dnas.zsh
diff --git a/scripts/dnas/wind.zsh b/scripts/dnas/wind.zsh
new file mode 100644
index 00000000..7e68f482
--- /dev/null
+++ b/scripts/dnas/wind.zsh
@@ -0,0 +1,39 @@
+#!/bin/zsh
+
+INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
+
+
+offset=1
+
+run_examm() {
+  output_dir=results/v0/$bp_epoch/$fold
+  mkdir -p $output_dir
+  mpirun -np 32 Release/mpi/examm_mpi \
+      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
+      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
+      --time_offset $offset \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=output_params} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --max_recurrent_depth 1 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level INFO \
+      --file_message_level INFO \
+      --max_genomes 10000 \
+      --island_size 32 \
+      --number_islands 4
+
+  touch $output_dir/completed
+}
+
+
+for output_params in "Cm_avg" "P_avg"; do
+  for bp_epoch in 2 4 8 16 32; do
+    for fold in 0 1 2 3 4 5 6 7 8 9; do
+      run_examm
+    done
+  done
+done

From 4c3ebfc64a020a4ec0ae343a1f328dfc14715c64 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@Joshuas-MacBook-Pro.local>
Date: Mon, 19 Feb 2024 14:35:20 -0500
Subject: [PATCH 22/31] removed old fileS

---
 initial_integration_experiments/analyze.py    | 110 ------------------
 initial_integration_experiments/analyze.zsh   |  12 --
 initial_integration_experiments/aviation.zsh  |  37 ------
 initial_integration_experiments/control.zsh   |  50 --------
 initial_integration_experiments/debug.zsh     |  55 ---------
 initial_integration_experiments/dnas.zsh      |  55 ---------
 .../gp_control.zsh                            |  59 ----------
 .../post_training_dnas.zsh                    |  29 -----
 initial_integration_experiments/posttrain.zsh |   3 -
 initial_integration_experiments/run_examm.zsh |  25 ----
 .../run_experiments.zsh                       |   4 -
 initial_integration_experiments/wind.zsh      |  39 -------
 12 files changed, 478 deletions(-)
 delete mode 100644 initial_integration_experiments/analyze.py
 delete mode 100644 initial_integration_experiments/analyze.zsh
 delete mode 100644 initial_integration_experiments/aviation.zsh
 delete mode 100644 initial_integration_experiments/control.zsh
 delete mode 100755 initial_integration_experiments/debug.zsh
 delete mode 100644 initial_integration_experiments/dnas.zsh
 delete mode 100644 initial_integration_experiments/gp_control.zsh
 delete mode 100755 initial_integration_experiments/post_training_dnas.zsh
 delete mode 100644 initial_integration_experiments/posttrain.zsh
 delete mode 100644 initial_integration_experiments/run_examm.zsh
 delete mode 100755 initial_integration_experiments/run_experiments.zsh
 delete mode 100644 initial_integration_experiments/wind.zsh

diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
deleted file mode 100644
index 78d51466..00000000
--- a/initial_integration_experiments/analyze.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import pandas
-
-import numpy as np
-
-import matplotlib.pyplot as plt
-
-fig, subplts = plt.subplots(6, 1)
-
-bprange = [8, 16]
-plts = {k:v for k, v in zip(bprange, subplts)}
-print(plts)
-base = plts[bprange[0]]
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    if k == bprange[0]:
-        continue
-    v.sharey(base)
-    v.sharex(base)
-
-def avg(files, slice_at=-1):
-    r = {}
-    for file in files:
-        x = []
-
-        for fold in range(8):
-            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
-            print(f"{file}/{fold} -> {len(f)}")
-            x.append(f)
-
-
-        enabled_nodes = []
-        enabled_edges = []
-        enabled_rec_edges = []
-
-        bpi_columns = []
-        mse_columns = []
-
-        minlen = 100000000
-
-        for f in x:
-            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
-            mse_columns.append(f[' Best Val. MSE'].to_numpy())
-            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
-            enabled_edges.append(f[' Enabled Edges'].to_numpy())
-            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
-
-            minlen = min(minlen, len(bpi_columns[-1]))
-
-        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
-        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
-        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
-        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
-        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
-
-        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
-        edgesmean = np.mean(np.array(enabled_edges), axis=0)
-        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
-        print(f"Nodes at end mean: {nodesmean[-1]}")
-        print(f"edges at end mean: {edgesmean[-1]}")
-        print(f"redges at end mean: {redgesmean[-1]}")
-
-
-        bpimean = np.mean(np.array(bpi_columns), axis=0)
-        msemean = np.mean(np.array(mse_columns), axis=0)
-        msestd = np.std(np.array(mse_columns), axis=0)
-        
-        r[file] = {
-            'mean_nodes': nodesmean,
-            'mean_edges': edgesmean,
-            'mean_rec_edges':redgesmean,
-            'bpi': bpimean,
-            'mean_mse': msemean,
-            'std_mse': msestd,
-        }
-    return r
-
-results = {}
-for ci in [64]:
-    results[ci] = {}
-    for bpe in bprange:
-        results[ci][bpe] = {}
-        for k in [1]:
-            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
-            x = avg([f])[f]
-            results[ci][bpe][k] = x
-            print(x)
-
-            print(x['mean_mse'] - x['std_mse'])
-            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
-            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
-                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-control_results = {}
-for bp in [8, 16]:
-    key = f"initial_integration_experiments/results/control_v7/{bp}"
-    r = avg([key])[key]
-    control_results[bp] = r
-    print(list(r.keys()))
-    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
-    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
-        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    v.legend(fontsize=12, loc="upper right")
-
-
-plt.show()
diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh
deleted file mode 100644
index 5c2876f3..00000000
--- a/initial_integration_experiments/analyze.zsh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/zsh
-#
-for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 8 16 32 64 128; do
-    for k in 1; do
-      for fold in 0 1 2 3 4 5 6 7; do
-        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
-        tail -1 $output_dir/fitness_log.csv
-      done
-    done
-  done
-done
diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh
deleted file mode 100644
index 7059da3e..00000000
--- a/initial_integration_experiments/aviation.zsh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-for output_params in "E1_CHT1" "Pitch"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done
diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh
deleted file mode 100644
index a848302b..00000000
--- a/initial_integration_experiments/control.zsh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
-}
-
-bp_ge=(8 8192 16 4096 32 2048)
-
-for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-   for fold in 0 1 2 3; do
-     run_examm &
-   done
-   wait
-   for fold in 4 5 6 7; do
-     run_examm &
-   done
-   wait
-done
diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh
deleted file mode 100755
index ce159c01..00000000
--- a/initial_integration_experiments/debug.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes 8192 \
-      --island_size 32 \
-      --number_islands 4 \
-      --stochastic \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-for crystalize_iters in 128; do
-  for bp_epoch in 8; do
-    for k in 1; do
-      for fold in 0; do
-        run_examm
-      done
- #      wait
- #      for fold in 4 5 6 7; do
- #        run_examm &
- #      done
- #      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
deleted file mode 100644
index 8b525b09..00000000
--- a/initial_integration_experiments/dnas.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-bp_ge=(8 8192 16 4096 32 2048)
-for crystalize_iters in 256; do
-  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-    for k in 1; do
-      for fold in 0 1 2 3; do
-        run_examm &
-      done
-      wait
-      for fold in 4 5 6 7; do
-        run_examm &
-      done
-      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh
deleted file mode 100644
index 049e9750..00000000
--- a/initial_integration_experiments/gp_control.zsh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/zsh
-
-offset=1
-MAX_GENOMES=10
-N_ISLANDS=4
-ISLAND_SIZE=32
-
-run_examm() {
-  output_dir=test_results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames ${=training_filenames} \
-      --test_filenames ${=test_filenames} \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names $output_params \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes $MAX_GENOMES \
-      --island_size $ISLAND_SIZE \
-      --number_islands $N_ISLANDS
-
-  touch $output_dir/completed
-}
-
-run_group() {
-  for output_params in $OUTPUTS; do
-    for bp_epoch in 2 4 8 16 32; do
-      for fold in 0 1 2 3 4 5 6 7 8 9; do
-        run_examm
-      done
-    done
-  done
-}
-
-INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
-training_filenames=(datasets/2018_coal/burner_[0-9].csv)
-test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
-OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
-run_group
-
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUTS=("E1_CHT1" "Pitch")
-training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
-test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
-run_group
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-OUTPUTS=("Cm_avg" "P_avg")
-training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
-test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
-run_group
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
deleted file mode 100755
index 1c226178..00000000
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/zsh
-offset=1
-
-post_training() {
-
-    echo "genome = $GENOME"
-    Release/rnn_examples/train_rnn \
-        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-        --time_offset 1 \
-        --input_parameter_names ${=INPUT_PARAMETERS} \
-        --output_parameter_names ${=OUTPUT_PARAMETERS} \
-        --bp_iterations $BP_ITERS \
-        --stochastic \
-        --normalize min_max \
-        --genome_file $GENOME \
-        --output_directory $OUTPUT_DIRECTORY \
-        --log_filename post_training.csv \
-        --learning_rate 0.01 \
-        --weight_update adagrad \
-        --train_sequence_length 1000 \
-        --validation_sequence_length 100 \
-        --crystalize_iters $CRYSTALIZE_ITERS \
-        --dnas_k $k
-
-      tail -1 $OUTPUT_DIRECTORY/post_training.csv
-}
-
-post_training
diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh
deleted file mode 100644
index cc54a2eb..00000000
--- a/initial_integration_experiments/posttrain.zsh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/zsh
-
-
diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh
deleted file mode 100644
index 77d2893f..00000000
--- a/initial_integration_experiments/run_examm.zsh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/zsh
-
-output_dir=results/v0/$bp_epoch/$fold
-mkdir -p $output_dir
-
-mpirun -np 32 Release/mpi/examm_mpi \
-    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-    --time_offset $offset \
-    --input_parameter_names ${=INPUT_PARAMETERS} \
-    --output_parameter_names ${=output_params} \
-    --bp_iterations $bp_epoch \
-    --normalize min_max \
-    --max_recurrent_depth 1 \
-    --output_directory $output_dir \
-    --log_filename fitness.csv \
-    --learning_rate 0.01 \
-    --std_message_level INFO \
-    --file_message_level INFO \
-    --max_genomes 4000 \
-    --island_size 32 \
-    --number_islands 4
-
-touch $output_dir/completed
-
diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh
deleted file mode 100755
index 7dd8e956..00000000
--- a/initial_integration_experiments/run_experiments.zsh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/zsh
-
-initial_integration_experiments/control.zsh
-initial_integration_experiments/dnas.zsh
diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh
deleted file mode 100644
index 7e68f482..00000000
--- a/initial_integration_experiments/wind.zsh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
-      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-
-for output_params in "Cm_avg" "P_avg"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done

From c0b9e41e02ecdcaa98a501032f466d73ddedd42a Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 19 Feb 2024 14:36:23 -0500
Subject: [PATCH 23/31] removed old fileS

---
 initial_integration_experiments/analyze.py    | 110 ------------------
 initial_integration_experiments/analyze.zsh   |  12 --
 initial_integration_experiments/aviation.zsh  |  37 ------
 initial_integration_experiments/control.zsh   |  50 --------
 initial_integration_experiments/debug.zsh     |  55 ---------
 initial_integration_experiments/dnas.zsh      |  55 ---------
 .../gp_control.zsh                            |  59 ----------
 .../post_training_dnas.zsh                    |  29 -----
 initial_integration_experiments/posttrain.zsh |   3 -
 initial_integration_experiments/run_examm.zsh |  25 ----
 .../run_experiments.zsh                       |   4 -
 initial_integration_experiments/wind.zsh      |  39 -------
 12 files changed, 478 deletions(-)
 delete mode 100644 initial_integration_experiments/analyze.py
 delete mode 100644 initial_integration_experiments/analyze.zsh
 delete mode 100644 initial_integration_experiments/aviation.zsh
 delete mode 100644 initial_integration_experiments/control.zsh
 delete mode 100755 initial_integration_experiments/debug.zsh
 delete mode 100644 initial_integration_experiments/dnas.zsh
 delete mode 100644 initial_integration_experiments/gp_control.zsh
 delete mode 100755 initial_integration_experiments/post_training_dnas.zsh
 delete mode 100644 initial_integration_experiments/posttrain.zsh
 delete mode 100644 initial_integration_experiments/run_examm.zsh
 delete mode 100755 initial_integration_experiments/run_experiments.zsh
 delete mode 100644 initial_integration_experiments/wind.zsh

diff --git a/initial_integration_experiments/analyze.py b/initial_integration_experiments/analyze.py
deleted file mode 100644
index 78d51466..00000000
--- a/initial_integration_experiments/analyze.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import pandas
-
-import numpy as np
-
-import matplotlib.pyplot as plt
-
-fig, subplts = plt.subplots(6, 1)
-
-bprange = [8, 16]
-plts = {k:v for k, v in zip(bprange, subplts)}
-print(plts)
-base = plts[bprange[0]]
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    if k == bprange[0]:
-        continue
-    v.sharey(base)
-    v.sharex(base)
-
-def avg(files, slice_at=-1):
-    r = {}
-    for file in files:
-        x = []
-
-        for fold in range(8):
-            f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at]
-            print(f"{file}/{fold} -> {len(f)}")
-            x.append(f)
-
-
-        enabled_nodes = []
-        enabled_edges = []
-        enabled_rec_edges = []
-
-        bpi_columns = []
-        mse_columns = []
-
-        minlen = 100000000
-
-        for f in x:
-            bpi_columns.append(f[' Total BP Epochs'].to_numpy())
-            mse_columns.append(f[' Best Val. MSE'].to_numpy())
-            enabled_nodes.append(f[' Enabled Nodes'].to_numpy())
-            enabled_edges.append(f[' Enabled Edges'].to_numpy())
-            enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy())
-
-            minlen = min(minlen, len(bpi_columns[-1]))
-
-        enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes))
-        enabled_edges = list(map(lambda x: x[:minlen], enabled_edges))
-        enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges))
-        bpi_columns = list(map(lambda x: x[:minlen], bpi_columns))
-        mse_columns = list(map(lambda x: x[:minlen], mse_columns))
-
-        nodesmean = np.mean(np.array(enabled_nodes), axis=0)
-        edgesmean = np.mean(np.array(enabled_edges), axis=0)
-        redgesmean = np.mean(np.array(enabled_rec_edges), axis=0)
-        print(f"Nodes at end mean: {nodesmean[-1]}")
-        print(f"edges at end mean: {edgesmean[-1]}")
-        print(f"redges at end mean: {redgesmean[-1]}")
-
-
-        bpimean = np.mean(np.array(bpi_columns), axis=0)
-        msemean = np.mean(np.array(mse_columns), axis=0)
-        msestd = np.std(np.array(mse_columns), axis=0)
-        
-        r[file] = {
-            'mean_nodes': nodesmean,
-            'mean_edges': edgesmean,
-            'mean_rec_edges':redgesmean,
-            'bpi': bpimean,
-            'mean_mse': msemean,
-            'std_mse': msestd,
-        }
-    return r
-
-results = {}
-for ci in [64]:
-    results[ci] = {}
-    for bpe in bprange:
-        results[ci][bpe] = {}
-        for k in [1]:
-            f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/"
-            x = avg([f])[f]
-            results[ci][bpe][k] = x
-            print(x)
-
-            print(x['mean_mse'] - x['std_mse'])
-            g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0]
-            plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'],
-                alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-control_results = {}
-for bp in [8, 16]:
-    key = f"initial_integration_experiments/results/control_v7/{bp}"
-    r = avg([key])[key]
-    control_results[bp] = r
-    print(list(r.keys()))
-    g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0]
-    plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'],
-        alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0)
-
-
-for k, v in plts.items():
-    v.set_title(f"{k} BPI")
-    v.legend(fontsize=12, loc="upper right")
-
-
-plt.show()
diff --git a/initial_integration_experiments/analyze.zsh b/initial_integration_experiments/analyze.zsh
deleted file mode 100644
index 5c2876f3..00000000
--- a/initial_integration_experiments/analyze.zsh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/zsh
-#
-for crystalize_iters in 64 128 256 512; do
-  for bp_epoch in 8 16 32 64 128; do
-    for k in 1; do
-      for fold in 0 1 2 3 4 5 6 7; do
-        output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold
-        tail -1 $output_dir/fitness_log.csv
-      done
-    done
-  done
-done
diff --git a/initial_integration_experiments/aviation.zsh b/initial_integration_experiments/aviation.zsh
deleted file mode 100644
index 7059da3e..00000000
--- a/initial_integration_experiments/aviation.zsh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-for output_params in "E1_CHT1" "Pitch"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done
diff --git a/initial_integration_experiments/control.zsh b/initial_integration_experiments/control.zsh
deleted file mode 100644
index a848302b..00000000
--- a/initial_integration_experiments/control.zsh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
-}
-
-bp_ge=(8 8192 16 4096 32 2048)
-
-for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-   for fold in 0 1 2 3; do
-     run_examm &
-   done
-   wait
-   for fold in 4 5 6 7; do
-     run_examm &
-   done
-   wait
-done
diff --git a/initial_integration_experiments/debug.zsh b/initial_integration_experiments/debug.zsh
deleted file mode 100755
index ce159c01..00000000
--- a/initial_integration_experiments/debug.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes 8192 \
-      --island_size 32 \
-      --number_islands 4 \
-      --stochastic \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-for crystalize_iters in 128; do
-  for bp_epoch in 8; do
-    for k in 1; do
-      for fold in 0; do
-        run_examm
-      done
- #      wait
- #      for fold in 4 5 6 7; do
- #        run_examm &
- #      done
- #      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/dnas.zsh b/initial_integration_experiments/dnas.zsh
deleted file mode 100644
index 8b525b09..00000000
--- a/initial_integration_experiments/dnas.zsh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 4 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-CELL_TYPE='dnas'
-bp_ge=(8 8192 16 4096 32 2048)
-for crystalize_iters in 256; do
-  for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-    for k in 1; do
-      for fold in 0 1 2 3; do
-        run_examm &
-      done
-      wait
-      for fold in 4 5 6 7; do
-        run_examm &
-      done
-      wait
-    done
-  done
-done
diff --git a/initial_integration_experiments/gp_control.zsh b/initial_integration_experiments/gp_control.zsh
deleted file mode 100644
index 049e9750..00000000
--- a/initial_integration_experiments/gp_control.zsh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/zsh
-
-offset=1
-MAX_GENOMES=10
-N_ISLANDS=4
-ISLAND_SIZE=32
-
-run_examm() {
-  output_dir=test_results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames ${=training_filenames} \
-      --test_filenames ${=test_filenames} \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names $output_params \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes $MAX_GENOMES \
-      --island_size $ISLAND_SIZE \
-      --number_islands $N_ISLANDS
-
-  touch $output_dir/completed
-}
-
-run_group() {
-  for output_params in $OUTPUTS; do
-    for bp_epoch in 2 4 8 16 32; do
-      for fold in 0 1 2 3 4 5 6 7 8 9; do
-        run_examm
-      done
-    done
-  done
-}
-
-INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" 
-training_filenames=(datasets/2018_coal/burner_[0-9].csv)
-test_filenames=(datasets/2018_coal/burner_1[0-1].csv)
-OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow")
-run_group
-
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUTS=("E1_CHT1" "Pitch")
-training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv)
-test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv)
-run_group
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-OUTPUTS=("Cm_avg" "P_avg")
-training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv)
-test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv)
-run_group
diff --git a/initial_integration_experiments/post_training_dnas.zsh b/initial_integration_experiments/post_training_dnas.zsh
deleted file mode 100755
index 1c226178..00000000
--- a/initial_integration_experiments/post_training_dnas.zsh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/zsh
-offset=1
-
-post_training() {
-
-    echo "genome = $GENOME"
-    Release/rnn_examples/train_rnn \
-        --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-        --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-        --time_offset 1 \
-        --input_parameter_names ${=INPUT_PARAMETERS} \
-        --output_parameter_names ${=OUTPUT_PARAMETERS} \
-        --bp_iterations $BP_ITERS \
-        --stochastic \
-        --normalize min_max \
-        --genome_file $GENOME \
-        --output_directory $OUTPUT_DIRECTORY \
-        --log_filename post_training.csv \
-        --learning_rate 0.01 \
-        --weight_update adagrad \
-        --train_sequence_length 1000 \
-        --validation_sequence_length 100 \
-        --crystalize_iters $CRYSTALIZE_ITERS \
-        --dnas_k $k
-
-      tail -1 $OUTPUT_DIRECTORY/post_training.csv
-}
-
-post_training
diff --git a/initial_integration_experiments/posttrain.zsh b/initial_integration_experiments/posttrain.zsh
deleted file mode 100644
index cc54a2eb..00000000
--- a/initial_integration_experiments/posttrain.zsh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/zsh
-
-
diff --git a/initial_integration_experiments/run_examm.zsh b/initial_integration_experiments/run_examm.zsh
deleted file mode 100644
index 77d2893f..00000000
--- a/initial_integration_experiments/run_examm.zsh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/zsh
-
-output_dir=results/v0/$bp_epoch/$fold
-mkdir -p $output_dir
-
-mpirun -np 32 Release/mpi/examm_mpi \
-    --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-    --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-    --time_offset $offset \
-    --input_parameter_names ${=INPUT_PARAMETERS} \
-    --output_parameter_names ${=output_params} \
-    --bp_iterations $bp_epoch \
-    --normalize min_max \
-    --max_recurrent_depth 1 \
-    --output_directory $output_dir \
-    --log_filename fitness.csv \
-    --learning_rate 0.01 \
-    --std_message_level INFO \
-    --file_message_level INFO \
-    --max_genomes 4000 \
-    --island_size 32 \
-    --number_islands 4
-
-touch $output_dir/completed
-
diff --git a/initial_integration_experiments/run_experiments.zsh b/initial_integration_experiments/run_experiments.zsh
deleted file mode 100755
index 7dd8e956..00000000
--- a/initial_integration_experiments/run_experiments.zsh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/zsh
-
-initial_integration_experiments/control.zsh
-initial_integration_experiments/dnas.zsh
diff --git a/initial_integration_experiments/wind.zsh b/initial_integration_experiments/wind.zsh
deleted file mode 100644
index 7e68f482..00000000
--- a/initial_integration_experiments/wind.zsh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/zsh
-
-INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg"
-
-
-offset=1
-
-run_examm() {
-  output_dir=results/v0/$bp_epoch/$fold
-  mkdir -p $output_dir
-  mpirun -np 32 Release/mpi/examm_mpi \
-      --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \
-      --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \
-      --time_offset $offset \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=output_params} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --max_recurrent_depth 1 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level INFO \
-      --file_message_level INFO \
-      --max_genomes 10000 \
-      --island_size 32 \
-      --number_islands 4
-
-  touch $output_dir/completed
-}
-
-
-for output_params in "Cm_avg" "P_avg"; do
-  for bp_epoch in 2 4 8 16 32; do
-    for fold in 0 1 2 3 4 5 6 7 8 9; do
-      run_examm
-    done
-  done
-done

From 79df69ab941e0d6783d20ba31d1929624d9601b7 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 19 Feb 2024 14:37:52 -0500
Subject: [PATCH 24/31] Fixed bug caused by accidental paste

---
 examm/island_speciation_strategy.cxx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index d8eaabab..b0a7b5e0 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -347,7 +347,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_repopulating_island(
         Log::fatal("Wrong repopulation method: %s\n", repopulation_method.c_str());
         exit(1);
     }
-    return new_genome17731515;
+    return new_genome;
 }
 
 RNN_Genome* IslandSpeciationStrategy::generate_genome(

From 68752460cb9a79e554212b3f0c9a97305defdfc4 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Mon, 19 Feb 2024 15:59:40 -0500
Subject: [PATCH 25/31] Synchronous EXAMM flag added --sychronous

---
 mpi/examm_mpi.cxx | 75 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 11 deletions(-)

diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx
index 227c3a85..e350be0a 100644
--- a/mpi/examm_mpi.cxx
+++ b/mpi/examm_mpi.cxx
@@ -114,10 +114,62 @@ void receive_terminate_message(int32_t source) {
     MPI_Recv(terminate_message, 1, MPI_INT, source, TERMINATE_TAG, MPI_COMM_WORLD, &status);
 }
 
-void master(int32_t max_rank) {
-    // the "main" id will have already been set by the main function so we do not need to re-set it here
-    Log::debug("MAX int32_t: %d\n", numeric_limits<int32_t>::max());
+void master_sync(int32_t max_rank) {
+    max_rank -= 1;
+    int32_t generation = 0;
+    while (true) {
+    
+        // Wait for N work requests
+        int32_t nreqs = 0;
+        while (nreqs < max_rank) {
+            MPI_Status status;
+            MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
+
+            int32_t source = status.MPI_SOURCE;
+            int32_t tag = status.MPI_TAG;
+            // Log::info("probe returned message from: %d with tag: %d\n", source, tag);
+            
+            if (tag == WORK_REQUEST_TAG) {
+                receive_work_request(source);
+                nreqs++;
+            } else if (tag == GENOME_LENGTH_TAG) {
+                Log::debug("received genome from: %d\n", source);
+                RNN_Genome* genome = receive_genome_from(source);
+                
+                examm->insert_genome(genome);
+
+                // delete the genome as it won't be used again, a copy was inserted
+                delete genome;
+            } else {
+                Log::fatal("ERROR: received message from %d with unknown tag: %d", source, tag);
+                MPI_Abort(MPI_COMM_WORLD, 1);
+            }
+        }
 
+        vector<RNN_Genome *> genomes(max_rank);
+        for (int32_t i = 1; i <= max_rank; i++) {
+            RNN_Genome* genome = examm->generate_genome();
+            if (genome == NULL)
+                break;
+            genomes[i - 1] = genome;
+        }
+
+        if (genomes.size() != max_rank) {
+            break;
+        }
+
+        for (int i = 1; i <= max_rank; i++) {
+            send_genome_to(i, genomes[i - 1]);
+            delete genomes[i - 1];
+        }
+    }
+  
+    for (int i = 1; i <= max_rank; i++) {
+        send_terminate_message(i);
+    }
+}
+
+void master(int32_t max_rank) {
     int32_t terminates_sent = 0;
 
     while (true) {
@@ -134,12 +186,7 @@ void master(int32_t max_rank) {
         if (tag == WORK_REQUEST_TAG) {
             receive_work_request(source);
 
-            // if (transfer_learning_version.compare("v3") == 0 || transfer_learning_version.compare("v1+v3") == 0) {
-            //     seed_stirs = 3;
-            // }
-            examm_mutex.lock();
             RNN_Genome* genome = examm->generate_genome();
-            examm_mutex.unlock();
 
             if (genome == NULL) {  // search was completed if it returns NULL for an individual
                 // send terminate message
@@ -167,9 +214,7 @@ void master(int32_t max_rank) {
             Log::debug("received genome from: %d\n", source);
             RNN_Genome* genome = receive_genome_from(source);
 
-            examm_mutex.lock();
             examm->insert_genome(genome);
-            examm_mutex.unlock();
 
             // delete the genome as it won't be used again, a copy was inserted
             delete genome;
@@ -264,12 +309,20 @@ int main(int argc, char** argv) {
 
     RNN_Genome* seed_genome = get_seed_genome(arguments, time_series_sets, weight_rules);
 
+    bool synchronous = argument_exists(arguments, "--synchronous");
+    Log::warning("synchronous? %d\n", synchronous); 
+
     Log::clear_rank_restriction();
 
     if (rank == 0) {
         write_time_series_to_file(arguments, time_series_sets);
         examm = generate_examm_from_arguments(arguments, time_series_sets, weight_rules, seed_genome);
-        master(max_rank);
+        
+        if (synchronous) {
+            master_sync(max_rank);
+        } else {
+            master(max_rank);
+        }
     } else {
         worker(rank);
     }

From a6006064aaeddb6160bd665d5208a4e135b2f5af Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Fri, 23 Feb 2024 21:47:51 -0500
Subject: [PATCH 26/31] Adding additional log information

---
 examm/examm.cxx                      | 26 ++++++++++++++++++-------
 examm/examm.hxx                      |  4 +++-
 examm/island_speciation_strategy.cxx | 29 +++++++++++++++++++++++-----
 examm/island_speciation_strategy.hxx | 16 ++++++++++-----
 examm/neat_speciation_strategy.cxx   | 10 +++++-----
 examm/neat_speciation_strategy.hxx   | 10 +++++-----
 examm/speciation_strategy.hxx        | 12 +++++++-----
 scripts/dnas/control.zsh             | 21 +++++++++-----------
 8 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/examm/examm.cxx b/examm/examm.cxx
index 1e1c2314..a90034f2 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -95,8 +95,8 @@ void EXAMM::generate_log() {
         Log::info("Generating fitness log\n");
         mkpath(output_directory.c_str(), 0777);
         log_file = new ofstream(output_directory + "/" + "fitness_log.csv");
-        (*log_file) << "Inserted Genomes, Total BP Epochs, Time, Best Val. MAE, Best Val. MSE, Enabled Nodes, Enabled "
-                       "Edges, Enabled Rec. Edges";
+        (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled"
+                       "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters";
         (*log_file) << speciation_strategy->get_strategy_information_headers();
         (*log_file) << endl;
 
@@ -151,7 +151,7 @@ void EXAMM::update_op_log_statistics(RNN_Genome* genome, int32_t insert_position
     }
 }
 
-void EXAMM::update_log() {
+void EXAMM::update_log(RNN_Genome *genome) {
     if (log_file != NULL) {
         // make sure the log file is still good
         if (!log_file->good()) {
@@ -193,8 +193,12 @@ void EXAMM::update_log() {
         (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds
                     << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << ","
                     << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << ","
-                    << best_genome->get_enabled_recurrent_edge_count()
-                    << speciation_strategy->get_strategy_information_values() << endl;
+                    << best_genome->get_enabled_recurrent_edge_count() << ","
+                    << genome->best_validation_mse << ","
+                    << pre_insert_best_mse << ","
+                    << (int32_t) (last_genome_inserted ? 1 : 0) << ","
+                    << genome->get_number_weights()
+                    << speciation_strategy->get_strategy_information_values(genome) << endl;
         Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count());
     }
 }
@@ -243,17 +247,25 @@ bool EXAMM::insert_genome(RNN_Genome* genome) {
 
     // updates EXAMM's mapping of which genomes have been generated by what
     genome->update_generation_map(generated_from_map);
+    pre_insert_best_mse = this->get_best_fitness();
+    
     int32_t insert_position = speciation_strategy->insert_genome(genome);
+    
     // write this genome to disk if it was a new best found genome
     if (insert_position == 0) {
         // genome->normalize_type = normalize_type;
         genome->write_graphviz(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".gv");
         genome->write_to_file(output_directory + "/rnn_genome_" + to_string(genome->get_generation_id()) + ".bin");
     }
+    
+    last_genome_inserted = insert_position >= 0;
+    
     speciation_strategy->print();
+    
     update_op_log_statistics(genome, insert_position);
-    update_log();
-    return insert_position >= 0;
+    update_log(genome);
+
+    return last_genome_inserted;
 }
 
 RNN_Genome* EXAMM::generate_genome() {
diff --git a/examm/examm.hxx b/examm/examm.hxx
index c0c0ee03..a95d8af4 100644
--- a/examm/examm.hxx
+++ b/examm/examm.hxx
@@ -73,6 +73,8 @@ class EXAMM {
     string output_directory;
     ofstream* log_file;
     ofstream* op_log_file;
+    double pre_insert_best_mse = 1000000;
+    bool last_genome_inserted = false;
 
     std::chrono::time_point<std::chrono::system_clock> startClock;
 
@@ -87,7 +89,7 @@ class EXAMM {
     ~EXAMM();
 
     void print();
-    void update_log();
+    void update_log(RNN_Genome *genome);
 
     void set_possible_node_types(vector<string> possible_node_type_strings);
 
diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx
index b0a7b5e0..2e7a91be 100644
--- a/examm/island_speciation_strategy.cxx
+++ b/examm/island_speciation_strategy.cxx
@@ -100,12 +100,12 @@ int32_t IslandSpeciationStrategy::get_evaluated_genomes() const {
     return evaluated_genomes;
 }
 
-RNN_Genome* IslandSpeciationStrategy::get_best_genome() {
+RNN_Genome* IslandSpeciationStrategy::get_best_genome() const {
     // the global_best_genome is updated every time a genome is inserted
     return global_best_genome;
 }
 
-RNN_Genome* IslandSpeciationStrategy::get_worst_genome() {
+RNN_Genome* IslandSpeciationStrategy::get_worst_genome() const {
     int32_t worst_genome_island = -1;
     double worst_fitness = -EXAMM_MAX_DOUBLE;
 
@@ -126,7 +126,7 @@ RNN_Genome* IslandSpeciationStrategy::get_worst_genome() {
     }
 }
 
-double IslandSpeciationStrategy::get_best_fitness() {
+double IslandSpeciationStrategy::get_best_fitness() const {
     RNN_Genome* best_genome = get_best_genome();
     if (best_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -135,7 +135,7 @@ double IslandSpeciationStrategy::get_best_fitness() {
     }
 }
 
-double IslandSpeciationStrategy::get_worst_fitness() {
+double IslandSpeciationStrategy::get_worst_fitness() const {
     RNN_Genome* worst_genome = get_worst_genome();
     if (worst_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -376,6 +376,9 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome(
     islands[generation_island]->set_latest_generation_id(generated_genomes);
     new_genome->set_group_id(generation_island);
 
+    pair<double, double> perf = {this->get_best_fitness(), this->get_worst_fitness()};
+    genome_performance.emplace(new_genome->generation_id, perf);
+    
     if (current_island->is_initializing()) {
         RNN_Genome* genome_copy = new_genome->copy();
         Log::debug("inserting genome copy!\n");
@@ -386,6 +389,7 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome(
         generation_island = 0;
     }
 
+
     return new_genome;
 }
 
@@ -456,6 +460,7 @@ void IslandSpeciationStrategy::print(string indent) const {
  */
 string IslandSpeciationStrategy::get_strategy_information_headers() const {
     string info_header = "";
+    info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post");
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
         info_header.append(",");
         info_header.append("Island_");
@@ -472,8 +477,22 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const {
 /**
  * Gets speciation strategy information values for logs
  */
-string IslandSpeciationStrategy::get_strategy_information_values() const {
+string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const {
     string info_value = "";
+    
+    auto &[min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id);
+    info_value.append(",");
+    info_value.append(to_string(min_mse_pre));
+    info_value.append(",");
+    info_value.append(to_string(max_mse_pre));
+    
+    float min_mse_post = this->get_best_fitness();
+    float max_mse_post = this->get_worst_fitness();
+    info_value.append(",");
+    info_value.append(to_string(min_mse_post));
+    info_value.append(",");
+    info_value.append(to_string(max_mse_post));
+
     for (int32_t i = 0; i < (int32_t) islands.size(); i++) {
         double best_fitness = islands[i]->get_best_fitness();
         double worst_fitness = islands[i]->get_worst_fitness();
diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx
index 0eed891c..683e2a39 100644
--- a/examm/island_speciation_strategy.hxx
+++ b/examm/island_speciation_strategy.hxx
@@ -62,6 +62,12 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
     vector<Island*> islands;
     RNN_Genome* global_best_genome;
 
+    ofstream *island_log_file;
+    
+    // Maps genome number to a pair representing (worst island mse, best island mse) at
+    // the time of genome generation.
+    unordered_map<int32_t, pair<double, double>> genome_performance;
+
     // Transfer learning class properties:
 
     bool transfer_learning;
@@ -114,25 +120,25 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
      * Gets the fitness of the best genome of all the islands
      * \return the best fitness over all islands
      */
-    double get_best_fitness();
+    double get_best_fitness() const;
 
     /**
      * Gets the fitness of the worst genome of all the islands
      * \return the worst fitness over all islands
      */
-    double get_worst_fitness();
+    double get_worst_fitness() const;
 
     /**
      * Gets the best genome of all the islands
      * \return the best genome of all islands or NULL if no genomes have yet been inserted
      */
-    RNN_Genome* get_best_genome();
+    RNN_Genome* get_best_genome() const;
 
     /**
      * Gets the the worst genome of all the islands
      * \return the worst genome of all islands or NULL if no genomes have yet been inserted
      */
-    RNN_Genome* get_worst_genome();
+    RNN_Genome* get_worst_genome() const;
 
     /**
      *  \return true if all the islands are full
@@ -207,7 +213,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy {
     /**
      * Gets speciation strategy information values for logs
      */
-    string get_strategy_information_values() const;
+    string get_strategy_information_values(RNN_Genome *genome) const;
 
     /**
      * Island repopulation through two random parents from two seperate islands,
diff --git a/examm/neat_speciation_strategy.cxx b/examm/neat_speciation_strategy.cxx
index 8d5f18ac..e71470e1 100644
--- a/examm/neat_speciation_strategy.cxx
+++ b/examm/neat_speciation_strategy.cxx
@@ -74,7 +74,7 @@ int32_t NeatSpeciationStrategy::get_evaluated_genomes() const {
     return evaluated_genomes;
 }
 
-RNN_Genome* NeatSpeciationStrategy::get_best_genome() {
+RNN_Genome* NeatSpeciationStrategy::get_best_genome() const {
     int32_t best_genome_species = -1;
     double best_fitness = EXAMM_MAX_DOUBLE;
 
@@ -95,7 +95,7 @@ RNN_Genome* NeatSpeciationStrategy::get_best_genome() {
     }
 }
 
-RNN_Genome* NeatSpeciationStrategy::get_worst_genome() {
+RNN_Genome* NeatSpeciationStrategy::get_worst_genome() const {
     int32_t worst_genome_species = -1;
     double worst_fitness = -EXAMM_MAX_DOUBLE;
 
@@ -116,7 +116,7 @@ RNN_Genome* NeatSpeciationStrategy::get_worst_genome() {
     }
 }
 
-double NeatSpeciationStrategy::get_best_fitness() {
+double NeatSpeciationStrategy::get_best_fitness() const {
     RNN_Genome* best_genome = get_best_genome();
     if (best_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -125,7 +125,7 @@ double NeatSpeciationStrategy::get_best_fitness() {
     }
 }
 
-double NeatSpeciationStrategy::get_worst_fitness() {
+double NeatSpeciationStrategy::get_worst_fitness() const {
     RNN_Genome* worst_genome = get_worst_genome();
     if (worst_genome == NULL) {
         return EXAMM_MAX_DOUBLE;
@@ -399,7 +399,7 @@ string NeatSpeciationStrategy::get_strategy_information_headers() const {
 /**
  * Gets speciation strategy information values for logs
  */
-string NeatSpeciationStrategy::get_strategy_information_values() const {
+string NeatSpeciationStrategy::get_strategy_information_values(RNN_Genome *genome) const {
     string info_value = "";
     for (int32_t i = 0; i < (int32_t) Neat_Species.size(); i++) {
         double best_fitness = Neat_Species[i]->get_best_fitness();
diff --git a/examm/neat_speciation_strategy.hxx b/examm/neat_speciation_strategy.hxx
index 3416de03..645aabdd 100644
--- a/examm/neat_speciation_strategy.hxx
+++ b/examm/neat_speciation_strategy.hxx
@@ -64,25 +64,25 @@ class NeatSpeciationStrategy : public SpeciationStrategy {
      * Gets the fitness of the best genome of all the islands
      * \return the best fitness over all islands
      */
-    double get_best_fitness();
+    double get_best_fitness() const;
 
     /**
      * Gets the fitness of the worst genome of all the islands
      * \return the worst fitness over all islands
      */
-    double get_worst_fitness();
+    double get_worst_fitness() const;
 
     /**
      * Gets the best genome of all the islands
      * \return the best genome of all islands
      */
-    RNN_Genome* get_best_genome();
+    RNN_Genome* get_best_genome() const;
 
     /**
      * Gets the the worst genome of all the islands
      * \return the worst genome of all islands
      */
-    RNN_Genome* get_worst_genome();
+    RNN_Genome* get_worst_genome() const;
 
     /**
      * Inserts a <b>copy</b> of the genome into this speciation strategy.
@@ -130,7 +130,7 @@ class NeatSpeciationStrategy : public SpeciationStrategy {
     /**
      * Gets speciation strategy information values for logs
      */
-    string get_strategy_information_values() const;
+    string get_strategy_information_values(RNN_Genome *genome) const;
 
     RNN_Genome* get_global_best_genome();
 
diff --git a/examm/speciation_strategy.hxx b/examm/speciation_strategy.hxx
index 9d790ab0..2d66f990 100644
--- a/examm/speciation_strategy.hxx
+++ b/examm/speciation_strategy.hxx
@@ -9,6 +9,8 @@ using std::string;
 using std::minstd_rand0;
 using std::uniform_real_distribution;
 
+#include "rnn/rnn_genome.hxx"
+
 class SpeciationStrategy {
    public:
     /**
@@ -25,25 +27,25 @@ class SpeciationStrategy {
      * Gets the fitness of the best genome of all the islands
      * \return the best fitness over all islands
      */
-    virtual double get_best_fitness() = 0;
+    virtual double get_best_fitness() const = 0;
 
     /**
      * Gets the fitness of the worst genome of all the islands
      * \return the worst fitness over all islands
      */
-    virtual double get_worst_fitness() = 0;
+    virtual double get_worst_fitness() const = 0;
 
     /**
      * Gets the best genome of all the islands
      * \return the best genome of all islands
      */
-    virtual RNN_Genome* get_best_genome() = 0;
+    virtual RNN_Genome* get_best_genome() const = 0;
 
     /**
      * Gets the the worst genome of all the islands
      * \return the worst genome of all islands
      */
-    virtual RNN_Genome* get_worst_genome() = 0;
+    virtual RNN_Genome* get_worst_genome() const = 0;
 
     /**
      * Inserts a <b>copy</b> of the genome into this speciation strategy.
@@ -86,7 +88,7 @@ class SpeciationStrategy {
     /**
      * Gets speciation strategy information values for logs
      */
-    virtual string get_strategy_information_values() const = 0;
+    virtual string get_strategy_information_values(RNN_Genome *genome) const = 0;
 
     virtual RNN_Genome* get_global_best_genome() = 0;
     virtual void initialize_population(function<void(int32_t, RNN_Genome*)>& mutate) = 0;
diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh
index a848302b..f3532525 100644
--- a/scripts/dnas/control.zsh
+++ b/scripts/dnas/control.zsh
@@ -6,9 +6,9 @@ OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
 offset=1
 
 run_examm() {
-  output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold
+  output_dir=results/control_v8/$bp_epoch/$fold
   mkdir -p $output_dir
-  mpirun -np 8 Release/mpi/examm_mpi \
+  mpirun -np 14 build/mpi/examm_mpi \
       --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
       --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
       --time_offset $offset \
@@ -25,26 +25,23 @@ run_examm() {
       --output_directory $output_dir \
       --log_filename fitness.csv \
       --learning_rate 0.01 \
-      --std_message_level WARNING \
+      --std_message_level INFO \
       --file_message_level WARNING \
       --crystalize_iters $crystalize_iters \
       --max_genomes $max_genomes \
       --island_size 32 \
-      --number_islands 4
+      --number_islands 4 \
+      --synchronous
 
   # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
   # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh
 }
 
-bp_ge=(8 8192 16 4096 32 2048)
+# bp_ge=(8 8192 16 4096 32 2048)
+bp_ge=(8 8192)
 
 for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-   for fold in 0 1 2 3; do
-     run_examm &
+  for fold in $(seq 0 1); do
+     run_examm
    done
-   wait
-   for fold in 4 5 6 7; do
-     run_examm &
-   done
-   wait
 done

From 72ce5d4a42e6435cff23e5c0984527235f786d7f Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Thu, 29 Feb 2024 15:20:33 -0500
Subject: [PATCH 27/31] Additional log data

---
 examm/examm.cxx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examm/examm.cxx b/examm/examm.cxx
index a90034f2..d0c784b9 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -96,7 +96,7 @@ void EXAMM::generate_log() {
         mkpath(output_directory.c_str(), 0777);
         log_file = new ofstream(output_directory + "/" + "fitness_log.csv");
         (*log_file) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled"
-                       "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters";
+                       "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id";
         (*log_file) << speciation_strategy->get_strategy_information_headers();
         (*log_file) << endl;
 
@@ -197,7 +197,8 @@ void EXAMM::update_log(RNN_Genome *genome) {
                     << genome->best_validation_mse << ","
                     << pre_insert_best_mse << ","
                     << (int32_t) (last_genome_inserted ? 1 : 0) << ","
-                    << genome->get_number_weights()
+                    << genome->get_number_weights() << ","
+                    << genome->get_generation_id() 
                     << speciation_strategy->get_strategy_information_values(genome) << endl;
         Log::info("mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), best_genome->get_enabled_recurrent_edge_count());
     }

From 5949736f7bc61d174a7b0465dd104769bcd73781 Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Wed, 6 Mar 2024 15:17:26 -0500
Subject: [PATCH 28/31] Cluster script updates

---
 scripts/dnas/examm_bias_exp.zsh    | 58 ++++++++++++++++++++++++++++++
 scripts/dnas/examm_synchronous.zsh | 55 ++++++++++++++++++++++++++++
 scripts/dnas/mk_jobs.zsh           | 12 ++++---
 3 files changed, 120 insertions(+), 5 deletions(-)
 create mode 100644 scripts/dnas/examm_bias_exp.zsh
 create mode 100644 scripts/dnas/examm_synchronous.zsh

diff --git a/scripts/dnas/examm_bias_exp.zsh b/scripts/dnas/examm_bias_exp.zsh
new file mode 100644
index 00000000..52816f00
--- /dev/null
+++ b/scripts/dnas/examm_bias_exp.zsh
@@ -0,0 +1,58 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=18
+#SBATCH  --exclude theocho
+#SBATCH  --time=48:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_bias_ablation
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=64GB
+
+cd /home/jak5763/exact
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1'
+
+offset=1
+
+run_examm() {
+  output_dir=results/$synchronous/$scramble_weights/$max_genomes/$fold
+  mkdir -p $output_dir
+  srun -n 18 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types simple ugrnn gru mgu lstm delta \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 10 \
+      --number_islands 10 \
+      --num_mutations 1 \
+      --$synchronous \
+      --$scramble_weights
+}
+
+run_group() {
+  for fold in $(seq 0 19); do
+    run_examm
+  done
+}
+
+let base_genomes=100000
+let max_genomes=$base_genomes/$bp_epoch
+run_group
diff --git a/scripts/dnas/examm_synchronous.zsh b/scripts/dnas/examm_synchronous.zsh
new file mode 100644
index 00000000..1d970272
--- /dev/null
+++ b/scripts/dnas/examm_synchronous.zsh
@@ -0,0 +1,55 @@
+#!/bin/zsh
+
+#SBATCH  --nodes=1
+#SBATCH  --ntasks-per-node=36
+#SBATCH  --exclude theocho
+#SBATCH  --time=23:00:00
+#SBATCH  -A examm
+#SBATCH  --partition=tier3
+#SBATCH  -J examm_dnas_experimental
+#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
+#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
+#SBATCH  --mem=0
+
+INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
+OUTPUT_PARAMETERS='E1_CHT1'
+
+offset=1
+
+run_examm() {
+  output_dir=results/synchronous/$max_genomes/$fold
+  mkdir -p $output_dir
+  srun -n 36 Release/mpi/examm_mpi \
+      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
+      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
+      --time_offset $offset \
+      --possible_node_types simple ugrnn gru mgu lstm delta \
+      --input_parameter_names ${=INPUT_PARAMETERS} \
+      --output_parameter_names ${=OUTPUT_PARAMETERS} \
+      --bp_iterations $bp_epoch \
+      --normalize min_max \
+      --num_hidden_layers $SIZE \
+      --hidden_layer_size $SIZE \
+      --max_recurrent_depth 10 \
+      --output_directory $output_dir \
+      --log_filename fitness.csv \
+      --learning_rate 0.01 \
+      --std_message_level WARNING \
+      --file_message_level WARNING \
+      --crystalize_iters $crystalize_iters \
+      --max_genomes $max_genomes \
+      --island_size 10 \
+      --number_islands 10 \
+      --num_mutations 1 \
+      --synchronous
+}
+
+run_group() {
+  for fold in $(seq 0 9); do
+    run_examm
+  done
+}
+
+let base_genomes=100000
+let max_genomes=$base_genomes/$bp_epoch
+run_group
diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh
index 38a5526c..b996883e 100644
--- a/scripts/dnas/mk_jobs.zsh
+++ b/scripts/dnas/mk_jobs.zsh
@@ -1,6 +1,8 @@
-bp_ge=(8 8192 16 4096 32 2048 64 1024)
-for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_cluster.zsh
-  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_r2_cluster.zsh
-  bp_epoch=$bp_epoch max_genomes=$max_genomes sbatch dnas_control.zsh
+bp=(1 2 3 4 5 10 15 20 30 40 50 100 150 200)
+for bp_epoch in $bp; do
+  for synchronous in "async" "synchronous"; do
+    for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do
+      bp_epoch=$bp_epoch synchronous="$synchronous" scramble_weights="$scramble_weights" sbatch examm_bias_exp.zsh
+    done
+  done
 done

From 95277159201b2e0f7b5469223d76794820cd8ccd Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 13 Mar 2024 13:56:00 -0400
Subject: [PATCH 29/31] Remove scripts in root directory

---
 dnas_cluster.zsh | 69 ------------------------------------------------
 dnas_control.zsh | 60 -----------------------------------------
 2 files changed, 129 deletions(-)
 delete mode 100644 dnas_cluster.zsh
 delete mode 100644 dnas_control.zsh

diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh
deleted file mode 100644
index 55823c0c..00000000
--- a/dnas_cluster.zsh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/zsh
-
-#SBATCH  --nodes=1
-#SBATCH  --ntasks-per-node=36
-#SBATCH  --exclude theocho
-#SBATCH  --time=23:00:00
-#SBATCH  -A examm
-#SBATCH  --partition=tier3
-#SBATCH  -J examm_dnas_experimental
-#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
-#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
-#SBATCH  --mem=0
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  srun -n 36 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 10 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 8 \
-      --num_mutations 2 \
-      --use_dnas_seed true \
-      --use_burn_in_bp_epoch \
-      --burn_in_period 1024 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-run_group() {
-  for crystalize_iters in 512; do
-    for k in 1; do
-      for fold in $(seq 0 19); do
-        run_examm
-      done
-    done
-  done
-}
-
-CELL_TYPE='dnas'
-# bp_ge=(8 8192 16 4096 32 2048 64 1024)
-# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-run_group
-# done
diff --git a/dnas_control.zsh b/dnas_control.zsh
deleted file mode 100644
index 88a7c882..00000000
--- a/dnas_control.zsh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/zsh
-
-#SBATCH  --nodes=1
-#SBATCH  --ntasks-per-node=36
-#SBATCH  --exclude theocho
-#SBATCH  --time=23:00:00
-#SBATCH  -A examm
-#SBATCH  --partition=tier3
-#SBATCH  -J examm_dnas_experimental
-#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
-#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
-#SBATCH  --mem=0
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
-  mkdir -p $output_dir
-  srun -n 36 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --max_recurrent_depth 10 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 8 \
-      --num_mutations 4 \
-      --burn_in_period 1024 \
-      --use_burn_in_bp_epoch
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-run_group() {
-  for k in 1; do
-    for fold in $(seq 0 19); do
-      run_examm
-    done
-  done
-}
-
-run_group

From fbb32b2aa3b2adb282e87022b2394169971eb159 Mon Sep 17 00:00:00 2001
From: Joshua Karns <jkarns275@gmail.com>
Date: Wed, 13 Mar 2024 13:56:17 -0400
Subject: [PATCH 30/31] Remove junk file

---
 key | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 key

diff --git a/key b/key
deleted file mode 100644
index 391a7405..00000000
--- a/key
+++ /dev/null
@@ -1,7 +0,0 @@
-v11 -> burn in schedule with 0.001 lr 4 mut
-v12 -> burn in schedule with 0.01 lr 4 mut
-v13 -> burn in schedule with 0.01 lr and period of 1024 and total genoms 8k 4 mut
-v14 -> burn in schedule with 0.001 lr and period of 1024 and total genomes 8k 4 mut
-v15 -> burn in schedule with 0.001 lr and period of 1024 max genome 8k 2mut
-v16 -> burn in schedule with 0.01 lr and period of 1024 max genome 8k 2mut
-

From efcbf5d348f703b02d244ae83fbd656f3c1efecd Mon Sep 17 00:00:00 2001
From: Joshua Karns <josh@mail.rit.edu>
Date: Wed, 20 Mar 2024 15:39:43 -0400
Subject: [PATCH 31/31] Add flag to disable epigenetic weights

---
 dnas_cluster.zsh         | 69 ----------------------------------------
 dnas_control.zsh         | 60 ----------------------------------
 examm/examm.cxx          |  1 -
 rnn/genome_property.cxx  |  4 +++
 rnn/genome_property.hxx  |  1 +
 scripts/dnas/mk_jobs.zsh |  2 +-
 6 files changed, 6 insertions(+), 131 deletions(-)
 delete mode 100644 dnas_cluster.zsh
 delete mode 100644 dnas_control.zsh

diff --git a/dnas_cluster.zsh b/dnas_cluster.zsh
deleted file mode 100644
index 55823c0c..00000000
--- a/dnas_cluster.zsh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/zsh
-
-#SBATCH  --nodes=1
-#SBATCH  --ntasks-per-node=36
-#SBATCH  --exclude theocho
-#SBATCH  --time=23:00:00
-#SBATCH  -A examm
-#SBATCH  --partition=tier3
-#SBATCH  -J examm_dnas_experimental
-#SBATCH  -o /home/jak5763/exact/results/slurm_out/%x.%j.out
-#SBATCH  -e /home/jak5763/exact/results/slurm_out/%x.%j.err
-#SBATCH  --mem=0
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold
-  mkdir -p $output_dir
-  srun -n 36 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types dnas \
-      --stochastic 1 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --validation_sequence_length 100 \
-      --max_recurrent_depth 10 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 8 \
-      --num_mutations 2 \
-      --use_dnas_seed true \
-      --use_burn_in_bp_epoch \
-      --burn_in_period 1024 \
-      --dnas_k $k
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-run_group() {
-  for crystalize_iters in 512; do
-    for k in 1; do
-      for fold in $(seq 0 19); do
-        run_examm
-      done
-    done
-  done
-}
-
-CELL_TYPE='dnas'
-# bp_ge=(8 8192 16 4096 32 2048 64 1024)
-# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do
-run_group
-# done
diff --git a/dnas_control.zsh b/dnas_control.zsh
deleted file mode 100644
index 88a7c882..00000000
--- a/dnas_control.zsh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/zsh
-
-#SBATCH  --nodes=1
-#SBATCH  --ntasks-per-node=36
-#SBATCH  --exclude theocho
-#SBATCH  --time=23:00:00
-#SBATCH  -A examm
-#SBATCH  --partition=tier3
-#SBATCH  -J examm_dnas_experimental
-#SBATCH  -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out
-#SBATCH  -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err
-#SBATCH  --mem=0
-
-INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd'
-OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4'
-
-offset=1
-
-run_examm() {
-  output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold
-  mkdir -p $output_dir
-  srun -n 36 Release/mpi/examm_mpi \
-      --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \
-      --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \
-      --time_offset $offset \
-      --possible_node_types lstm mgu gru ugrnn delta simple \
-      --stochastic 0 \
-      --input_parameter_names ${=INPUT_PARAMETERS} \
-      --output_parameter_names ${=OUTPUT_PARAMETERS} \
-      --bp_iterations $bp_epoch \
-      --normalize min_max \
-      --num_hidden_layers $SIZE \
-      --hidden_layer_size $SIZE \
-      --max_recurrent_depth 10 \
-      --output_directory $output_dir \
-      --log_filename fitness.csv \
-      --learning_rate 0.01 \
-      --std_message_level WARNING \
-      --file_message_level WARNING \
-      --crystalize_iters $crystalize_iters \
-      --max_genomes $max_genomes \
-      --island_size 32 \
-      --number_islands 8 \
-      --num_mutations 4 \
-      --burn_in_period 1024 \
-      --use_burn_in_bp_epoch
-
-  # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) )
-  # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh
-}
-
-run_group() {
-  for k in 1; do
-    for fold in $(seq 0 19); do
-      run_examm
-    done
-  done
-}
-
-run_group
diff --git a/examm/examm.cxx b/examm/examm.cxx
index d0c784b9..6f51fd02 100644
--- a/examm/examm.cxx
+++ b/examm/examm.cxx
@@ -285,7 +285,6 @@ RNN_Genome* EXAMM::generate_genome() {
     RNN_Genome* genome = speciation_strategy->generate_genome(rng_0_1, generator, mutate_function, crossover_function);
 
     genome_property->set_genome_properties(genome);
-    // if (!epigenetic_weights) genome->initialize_randomly();
 
     // this is just a sanity check, can most likely comment out (checking to see
     // if all the paramemters are sane)
diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx
index 09ea1ae8..95b80df1 100644
--- a/rnn/genome_property.cxx
+++ b/rnn/genome_property.cxx
@@ -31,6 +31,9 @@ void GenomeProperty::generate_genome_property_from_arguments(const vector<string
     get_argument(arguments, "--min_recurrent_depth", false, min_recurrent_depth);
     get_argument(arguments, "--max_recurrent_depth", false, max_recurrent_depth);
 
+    bool no_epi = argument_exists(arguments, "--no_epigenetic_weights");
+    use_epigenetic_weights = !no_epi;
+
     use_burn_in_bp_epoch = argument_exists(arguments, "--use_burn_in_bp_epoch");
     get_argument(arguments, "--burn_in_period", false, burn_in_period);
     get_argument(arguments, "--burn_in_cycles", false, max_burn_in_cycles);
@@ -49,6 +52,7 @@ void GenomeProperty::set_genome_properties(RNN_Genome* genome) {
     genome->set_bp_iterations(compute_bp_iterations(genome));
     
     if (use_dropout) genome->enable_dropout(dropout_probability);
+    if (!use_epigenetic_weights) genome->initialize_randomly();
     
     genome->normalize_type = normalize_type;
     genome->set_parameter_names(input_parameter_names, output_parameter_names);
diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx
index 130b26de..b70fbf1e 100644
--- a/rnn/genome_property.hxx
+++ b/rnn/genome_property.hxx
@@ -17,6 +17,7 @@ class GenomeProperty {
     double dropout_probability;
     int32_t min_recurrent_depth;
     int32_t max_recurrent_depth;
+    bool use_epigenetic_weights = true;
 
     bool use_burn_in_bp_epoch;
     int32_t burn_in_period = 2048;
diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh
index b996883e..6adfff8f 100644
--- a/scripts/dnas/mk_jobs.zsh
+++ b/scripts/dnas/mk_jobs.zsh
@@ -1,4 +1,4 @@
-bp=(1 2 3 4 5 10 15 20 30 40 50 100 150 200)
+bp=(1 2 3 4 5 6 7 8 9 10 15 20 25 30 35 40 45 50 100)
 for bp_epoch in $bp; do
   for synchronous in "async" "synchronous"; do
     for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do