diff --git a/CMakeLists.txt b/CMakeLists.txt index cc629b75..65d1f2a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # 2 This line for cluster #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") +# SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 -fsanitize=address") SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") #SET (CMAKE_CXX_FLAGS " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG") #SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") diff --git a/README.md b/README.md index 7ecc5b2c..b297bbc6 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,6 @@ Update the code format before start a pull request with: ~/exact $ sh scripts/util/format.sh ``` - You may also want to have graphviz installed so you can generate images of the evolved neural networks. EXACT/EXALT/EXAMM will write out evolved genomes in a .gv (graphviz) format for this. For example, can generate a pdf from a gv file (assuming graphviz is installed with): ``` diff --git a/common/files.hxx b/common/files.hxx index ac23ff0d..8c4c8a43 100644 --- a/common/files.hxx +++ b/common/files.hxx @@ -1,6 +1,8 @@ #ifndef EXACT_BOINC_COMMON_HXX #define EXACT_BOINC_COMMON_HXX +#include + #include using std::runtime_error; diff --git a/common/log.cxx b/common/log.cxx index 623475e8..6f82e67f 100644 --- a/common/log.cxx +++ b/common/log.cxx @@ -79,11 +79,11 @@ int8_t Log::parse_level_from_string(string level) { void Log::initialize(const vector& arguments) { // TODO: should read these from the CommandLine (to be created) - string std_message_level_str, file_message_level_str; + string std_message_level_str = "INFO", file_message_level_str = "NONE"; - get_argument(arguments, "--std_message_level", true, std_message_level_str); - get_argument(arguments, "--file_message_level", true, file_message_level_str); - get_argument(arguments, "--output_directory", true, output_directory); + get_argument(arguments, "--std_message_level", false, std_message_level_str); + get_argument(arguments, "--file_message_level", false, file_message_level_str); + get_argument(arguments, "--output_directory", false, output_directory); std_message_level = parse_level_from_string(std_message_level_str); file_message_level = parse_level_from_string(file_message_level_str); diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index b4257708..202ddd42 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -189,10 +189,10 @@ void get_train_validation_data( time_series_sets->export_training_series(time_offset, train_inputs, train_outputs); time_series_sets->export_test_series(time_offset, validation_inputs, validation_outputs); - int32_t sequence_length = 0; - if (get_argument(arguments, "--train_sequence_length", false, sequence_length)) { - Log::info("Slicing input training data with time sequence length: %d\n", sequence_length); - slice_input_data(train_inputs, train_outputs, sequence_length); + int32_t train_sequence_length = 0; + if (get_argument(arguments, "--train_sequence_length", false, train_sequence_length)) { + Log::info("Slicing input training data with time sequence length: %d\n", train_sequence_length); + slice_input_data(train_inputs, train_outputs, train_sequence_length); } int32_t validation_sequence_length = 0; diff --git a/examm/examm.cxx b/examm/examm.cxx index 6a9b6d89..cf81485d 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -96,8 +96,9 @@ void EXAMM::generate_log() { Log::info("Generating fitness log\n"); mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); - (*log_file) << "Inserted Genomes, Total BP Epochs, Time, Best Val. MAE, Best Val. MSE, Enabled Nodes, Enabled " - "Edges, Enabled Rec. Edges"; + (*log_file + ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -152,7 +153,7 @@ void EXAMM::update_op_log_statistics(RNN_Genome* genome, int32_t insert_position } } -void EXAMM::update_log() { +void EXAMM::update_log(RNN_Genome* genome) { if (log_file != NULL) { // make sure the log file is still good if (!log_file->good()) { @@ -183,17 +184,26 @@ void EXAMM::update_log() { } (*op_log_file) << endl; } + RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { best_genome = speciation_strategy->get_global_best_genome(); } + std::chrono::time_point currentClock = std::chrono::system_clock::now(); long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," - << best_genome->get_enabled_recurrent_edge_count() - << speciation_strategy->get_strategy_information_values() << endl; + << best_genome->get_enabled_recurrent_edge_count() << "," << genome->best_validation_mse << "," + << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << "," + << genome->get_number_weights() << "," << genome->get_generation_id() + << speciation_strategy->get_strategy_information_values(genome) << endl; + Log::info( + "mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, + best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), + best_genome->get_enabled_recurrent_edge_count() + ); } } @@ -246,6 +256,8 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // updates EXAMM's mapping of which genomes have been generated by what genome->update_generation_map(generated_from_map); + pre_insert_best_mse = this->get_best_fitness(); + int32_t insert_position = speciation_strategy->insert_genome(genome); Log::info("insert to speciation strategy complete, at position: %d\n", insert_position); @@ -264,12 +276,14 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { } Log::info("save genome complete\n"); + last_genome_inserted = insert_position >= 0; + speciation_strategy->print(); - Log::info("printed speciation strategy!\n\n"); update_op_log_statistics(genome, insert_position); - update_log(); - return insert_position >= 0; + update_log(genome); + + return last_genome_inserted; } // write function to save genomes to file @@ -303,7 +317,6 @@ RNN_Genome* EXAMM::generate_genome() { RNN_Genome* genome = speciation_strategy->generate_genome(rng_0_1, generator, mutate_function, crossover_function); genome_property->set_genome_properties(genome); - // if (!epigenetic_weights) genome->initialize_randomly(); // this is just a sanity check, can most likely comment out (checking to see // if all the paramemters are sane) @@ -314,7 +327,7 @@ RNN_Genome* EXAMM::generate_genome() { return genome; } -int32_t EXAMM::get_random_node_type() { +node_t EXAMM::get_random_node_type() { return possible_node_types[rng_0_1(generator) * possible_node_types.size()]; } @@ -354,7 +367,8 @@ void EXAMM::mutate(int32_t max_mutations, RNN_Genome* g) { g->assign_reachability(); double rng = rng_0_1(generator) * total; - int32_t new_node_type = get_random_node_type(); + node_t new_node_type = get_random_node_type(); + Log::info("%d %d\n", new_node_type, NODE_TYPES.size()); string node_type_str = NODE_TYPES[new_node_type]; Log::debug("rng: %lf, total: %lf, new node type: %d (%s)\n", rng, total, new_node_type, node_type_str.c_str()); @@ -536,8 +550,8 @@ void EXAMM::attempt_edge_insert( exit(1); return; - } else if (child_edges[i]->get_input_innovation_number() == edge->get_input_innovation_number() && - child_edges[i]->get_output_innovation_number() == edge->get_output_innovation_number()) { + } else if (child_edges[i]->get_input_innovation_number() == edge->get_input_innovation_number() + && child_edges[i]->get_output_innovation_number() == edge->get_output_innovation_number()) { Log::debug( "Not inserting edge in crossover operation as there was already an edge with the same input and output " "innovation numbers!\n" @@ -620,10 +634,10 @@ void EXAMM::attempt_recurrent_edge_insert( exit(1); return; - } else if (child_recurrent_edges[i]->get_input_innovation_number() == - recurrent_edge->get_input_innovation_number() && - child_recurrent_edges[i]->get_output_innovation_number() == - recurrent_edge->get_output_innovation_number()) { + } else if (child_recurrent_edges[i]->get_input_innovation_number() + == recurrent_edge->get_input_innovation_number() + && child_recurrent_edges[i]->get_output_innovation_number() + == recurrent_edge->get_output_innovation_number()) { Log::debug( "Not inserting recurrent_edge in crossover operation as there was already an recurrent_edge with the " "same input and output innovation numbers!\n" diff --git a/examm/examm.hxx b/examm/examm.hxx index 3a7288ca..a1e7cc59 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -63,8 +63,8 @@ class EXAMM { double split_node_rate; double merge_node_rate; - vector possible_node_types = {SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, - MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; + vector possible_node_types = {SIMPLE_NODE, JORDAN_NODE, ELMAN_NODE, UGRNN_NODE, + MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; vector op_log_ordering; map inserted_counts; @@ -73,6 +73,8 @@ class EXAMM { string output_directory; ofstream* log_file; ofstream* op_log_file; + double pre_insert_best_mse = 1000000; + bool last_genome_inserted = false; std::chrono::time_point startClock; @@ -89,13 +91,13 @@ class EXAMM { ~EXAMM(); void print(); - void update_log(); + void update_log(RNN_Genome* genome); void set_possible_node_types(vector possible_node_type_strings); uniform_int_distribution get_recurrent_depth_dist(); - int32_t get_random_node_type(); + node_t get_random_node_type(); RNN_Genome* generate_genome(); bool insert_genome(RNN_Genome* genome); diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 9df6bd9c..a2463b2d 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -100,12 +100,12 @@ int32_t IslandSpeciationStrategy::get_evaluated_genomes() const { return evaluated_genomes; } -RNN_Genome* IslandSpeciationStrategy::get_best_genome() { +RNN_Genome* IslandSpeciationStrategy::get_best_genome() const { // the global_best_genome is updated every time a genome is inserted return global_best_genome; } -RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { +RNN_Genome* IslandSpeciationStrategy::get_worst_genome() const { int32_t worst_genome_island = -1; double worst_fitness = -EXAMM_MAX_DOUBLE; @@ -126,7 +126,7 @@ RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { } } -double IslandSpeciationStrategy::get_best_fitness() { +double IslandSpeciationStrategy::get_best_fitness() const { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -135,7 +135,7 @@ double IslandSpeciationStrategy::get_best_fitness() { } } -double IslandSpeciationStrategy::get_worst_fitness() { +double IslandSpeciationStrategy::get_worst_fitness() const { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -376,11 +376,15 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( Log::info("Island %d: new genome is still null, regenerating\n", generation_island); new_genome = generate_genome(rng_0_1, generator, mutate, crossover); } + generated_genomes++; new_genome->set_generation_id(generated_genomes); islands[generation_island]->set_latest_generation_id(generated_genomes); new_genome->set_group_id(generation_island); + pair perf = {this->get_best_fitness(), this->get_worst_fitness()}; + genome_performance.emplace(new_genome->generation_id, perf); + if (current_island->is_initializing()) { RNN_Genome* genome_copy = new_genome->copy(); Log::debug("inserting genome copy!\n"); @@ -461,6 +465,7 @@ void IslandSpeciationStrategy::print(string indent) const { */ string IslandSpeciationStrategy::get_strategy_information_headers() const { string info_header = ""; + info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"); for (int32_t i = 0; i < (int32_t) islands.size(); i++) { info_header.append(","); info_header.append("Island_"); @@ -477,8 +482,22 @@ string IslandSpeciationStrategy::get_strategy_information_headers() const { /** * Gets speciation strategy information values for logs */ -string IslandSpeciationStrategy::get_strategy_information_values() const { +string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const { string info_value = ""; + + auto& [min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id); + info_value.append(","); + info_value.append(to_string(min_mse_pre)); + info_value.append(","); + info_value.append(to_string(max_mse_pre)); + + float min_mse_post = this->get_best_fitness(); + float max_mse_post = this->get_worst_fitness(); + info_value.append(","); + info_value.append(to_string(min_mse_post)); + info_value.append(","); + info_value.append(to_string(max_mse_post)); + for (int32_t i = 0; i < (int32_t) islands.size(); i++) { double best_fitness = islands[i]->get_best_fitness(); double worst_fitness = islands[i]->get_worst_fitness(); @@ -584,8 +603,8 @@ void IslandSpeciationStrategy::set_erased_islands_status() { RNN_Genome* IslandSpeciationStrategy::get_seed_genome() { return seed_genome; } -// write a save entire population function with an input saving function +// write a save entire population function with an input saving function void IslandSpeciationStrategy::save_entire_population(string output_path) { for (int32_t i = 0; i < (int32_t) islands.size(); i++) { islands[i]->save_population(output_path); diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index b3888621..19eff273 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -62,6 +62,12 @@ class IslandSpeciationStrategy : public SpeciationStrategy { vector islands; RNN_Genome* global_best_genome; + ofstream* island_log_file; + + // Maps genome number to a pair representing (worst island mse, best island mse) at + // the time of genome generation. + unordered_map> genome_performance; + // Transfer learning class properties: bool transfer_learning; @@ -114,25 +120,25 @@ class IslandSpeciationStrategy : public SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - double get_best_fitness(); + double get_best_fitness() const; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - double get_worst_fitness(); + double get_worst_fitness() const; /** * Gets the best genome of all the islands * \return the best genome of all islands or NULL if no genomes have yet been inserted */ - RNN_Genome* get_best_genome(); + RNN_Genome* get_best_genome() const; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands or NULL if no genomes have yet been inserted */ - RNN_Genome* get_worst_genome(); + RNN_Genome* get_worst_genome() const; /** * \return true if all the islands are full @@ -207,7 +213,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - string get_strategy_information_values() const; + string get_strategy_information_values(RNN_Genome* genome) const; /** * Island repopulation through two random parents from two seperate islands, diff --git a/examm/neat_speciation_strategy.cxx b/examm/neat_speciation_strategy.cxx index 4fdd3d94..e24f6fc1 100644 --- a/examm/neat_speciation_strategy.cxx +++ b/examm/neat_speciation_strategy.cxx @@ -74,7 +74,7 @@ int32_t NeatSpeciationStrategy::get_evaluated_genomes() const { return evaluated_genomes; } -RNN_Genome* NeatSpeciationStrategy::get_best_genome() { +RNN_Genome* NeatSpeciationStrategy::get_best_genome() const { int32_t best_genome_species = -1; double best_fitness = EXAMM_MAX_DOUBLE; @@ -95,7 +95,7 @@ RNN_Genome* NeatSpeciationStrategy::get_best_genome() { } } -RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { +RNN_Genome* NeatSpeciationStrategy::get_worst_genome() const { int32_t worst_genome_species = -1; double worst_fitness = -EXAMM_MAX_DOUBLE; @@ -116,7 +116,7 @@ RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { } } -double NeatSpeciationStrategy::get_best_fitness() { +double NeatSpeciationStrategy::get_best_fitness() const { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -125,7 +125,7 @@ double NeatSpeciationStrategy::get_best_fitness() { } } -double NeatSpeciationStrategy::get_worst_fitness() { +double NeatSpeciationStrategy::get_worst_fitness() const { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { return EXAMM_MAX_DOUBLE; @@ -399,7 +399,7 @@ string NeatSpeciationStrategy::get_strategy_information_headers() const { /** * Gets speciation strategy information values for logs */ -string NeatSpeciationStrategy::get_strategy_information_values() const { +string NeatSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const { string info_value = ""; for (int32_t i = 0; i < (int32_t) Neat_Species.size(); i++) { double best_fitness = Neat_Species[i]->get_best_fitness(); diff --git a/examm/neat_speciation_strategy.hxx b/examm/neat_speciation_strategy.hxx index 01dc38a2..1cc88ceb 100644 --- a/examm/neat_speciation_strategy.hxx +++ b/examm/neat_speciation_strategy.hxx @@ -64,25 +64,25 @@ class NeatSpeciationStrategy : public SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - double get_best_fitness(); + double get_best_fitness() const; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - double get_worst_fitness(); + double get_worst_fitness() const; /** * Gets the best genome of all the islands * \return the best genome of all islands */ - RNN_Genome* get_best_genome(); + RNN_Genome* get_best_genome() const; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands */ - RNN_Genome* get_worst_genome(); + RNN_Genome* get_worst_genome() const; /** * Inserts a copy of the genome into this speciation strategy. @@ -130,7 +130,7 @@ class NeatSpeciationStrategy : public SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - string get_strategy_information_values() const; + string get_strategy_information_values(RNN_Genome* genome) const; RNN_Genome* get_global_best_genome(); diff --git a/examm/speciation_strategy.hxx b/examm/speciation_strategy.hxx index bf8a43d5..713bd216 100644 --- a/examm/speciation_strategy.hxx +++ b/examm/speciation_strategy.hxx @@ -9,6 +9,8 @@ using std::string; using std::minstd_rand0; using std::uniform_real_distribution; +#include "rnn/rnn_genome.hxx" + class SpeciationStrategy { public: /** @@ -25,25 +27,25 @@ class SpeciationStrategy { * Gets the fitness of the best genome of all the islands * \return the best fitness over all islands */ - virtual double get_best_fitness() = 0; + virtual double get_best_fitness() const = 0; /** * Gets the fitness of the worst genome of all the islands * \return the worst fitness over all islands */ - virtual double get_worst_fitness() = 0; + virtual double get_worst_fitness() const = 0; /** * Gets the best genome of all the islands * \return the best genome of all islands */ - virtual RNN_Genome* get_best_genome() = 0; + virtual RNN_Genome* get_best_genome() const = 0; /** * Gets the the worst genome of all the islands * \return the worst genome of all islands */ - virtual RNN_Genome* get_worst_genome() = 0; + virtual RNN_Genome* get_worst_genome() const = 0; /** * Inserts a copy of the genome into this speciation strategy. @@ -86,7 +88,7 @@ class SpeciationStrategy { /** * Gets speciation strategy information values for logs */ - virtual string get_strategy_information_values() const = 0; + virtual string get_strategy_information_values(RNN_Genome* genome) const = 0; virtual RNN_Genome* get_global_best_genome() = 0; virtual void initialize_population(function& mutate) = 0; diff --git a/ground_truth_experiments/cell_experiments.sh b/ground_truth_experiments/cell_experiments.sh new file mode 100755 index 00000000..9c0e29d4 --- /dev/null +++ b/ground_truth_experiments/cell_experiments.sh @@ -0,0 +1,40 @@ +#!/usr/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM' + +offset=1 +bp_epoch=1000 + +for SIZE in 1 2 4; do + for CELL_TYPE in dnas; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + output_dir=ground_truth_experiments/results/$CELL_TYPE/$SIZE/$fold + mkdir -p $output_dir + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --stochastic \ + --rnn_type $CELL_TYPE \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --random_sequence_length \ + --sequence_length_lower_bound 50 \ + --sequence_length_upper_bound 100 \ + --max_recurrent_depth 1 \ + --weight_update adagrad \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level ERROR \ + --file_message_level INFO & + done + done + wait +done + diff --git a/ground_truth_experiments/source_genomes.sh b/ground_truth_experiments/source_genomes.sh new file mode 100755 index 00000000..1c251134 --- /dev/null +++ b/ground_truth_experiments/source_genomes.sh @@ -0,0 +1,33 @@ +#!/usr/bin/zsh +# This is an example of running EXAMM MPI version on c172 dataset +# +# The c172 dataset is not normalized +# To run datasets that's not normalized, make sure to add arguments: +# --normalize min_max for Min Max normalization, or +# --normalize avg_std_dev for Z-score normalization + +INPUT_PARAMETERS="AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd" +OUTPUT_PARAMETERS="Pitch" + +for i in 0 1 2 3 4 5 6 7 8 9; do + exp_name="ground_truth_experiments/results/source_genomes/$i" + mkdir -p $exp_name + echo $exp_name + mpirun -np 5 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --number_islands 8 \ + --island_size 8 \ + --max_genomes 10000 \ + --bp_iterations 5 \ + --num_mutations 2 \ + --normalize min_max \ + --output_directory $exp_name \ + --possible_node_types simple UGRNN MGU GRU delta LSTM \ + --std_message_level ERROR \ + --file_message_level INFO & +done +wait diff --git a/mpi/examm_mpi.cxx b/mpi/examm_mpi.cxx index 7886d91d..66ee6594 100644 --- a/mpi/examm_mpi.cxx +++ b/mpi/examm_mpi.cxx @@ -114,10 +114,62 @@ void receive_terminate_message(int32_t source) { MPI_Recv(terminate_message, 1, MPI_INT, source, TERMINATE_TAG, MPI_COMM_WORLD, &status); } -void master(int32_t max_rank) { - // the "main" id will have already been set by the main function so we do not need to re-set it here - Log::debug("MAX int32_t: %d\n", numeric_limits::max()); +void master_sync(int32_t max_rank) { + max_rank -= 1; + int32_t generation = 0; + while (true) { + // Wait for N work requests + int32_t nreqs = 0; + while (nreqs < max_rank) { + MPI_Status status; + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); + + int32_t source = status.MPI_SOURCE; + int32_t tag = status.MPI_TAG; + // Log::info("probe returned message from: %d with tag: %d\n", source, tag); + + if (tag == WORK_REQUEST_TAG) { + receive_work_request(source); + nreqs++; + } else if (tag == GENOME_LENGTH_TAG) { + Log::debug("received genome from: %d\n", source); + RNN_Genome* genome = receive_genome_from(source); + + examm->insert_genome(genome); + + // delete the genome as it won't be used again, a copy was inserted + delete genome; + } else { + Log::fatal("ERROR: received message from %d with unknown tag: %d", source, tag); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + vector genomes(max_rank); + for (int32_t i = 1; i <= max_rank; i++) { + RNN_Genome* genome = examm->generate_genome(); + if (genome == NULL) { + break; + } + genomes[i - 1] = genome; + } + if (genomes.size() != max_rank) { + break; + } + + for (int i = 1; i <= max_rank; i++) { + send_genome_to(i, genomes[i - 1]); + delete genomes[i - 1]; + } + } + + for (int i = 1; i <= max_rank; i++) { + send_terminate_message(i); + } +} + +void master(int32_t max_rank) { int32_t terminates_sent = 0; while (true) { @@ -134,12 +186,7 @@ void master(int32_t max_rank) { if (tag == WORK_REQUEST_TAG) { receive_work_request(source); - // if (transfer_learning_version.compare("v3") == 0 || transfer_learning_version.compare("v1+v3") == 0) { - // seed_stirs = 3; - // } - examm_mutex.lock(); RNN_Genome* genome = examm->generate_genome(); - examm_mutex.unlock(); if (genome == NULL) { // search was completed if it returns NULL for an individual // send terminate message @@ -167,9 +214,7 @@ void master(int32_t max_rank) { Log::debug("received genome from: %d\n", source); RNN_Genome* genome = receive_genome_from(source); - examm_mutex.lock(); examm->insert_genome(genome); - examm_mutex.unlock(); // delete the genome as it won't be used again, a copy was inserted delete genome; @@ -207,9 +252,14 @@ void worker(int32_t rank) { // have each worker write the backproagation to a separate log file string log_id = "genome_" + to_string(genome->get_generation_id()) + "_worker_" + to_string(rank); Log::set_id(log_id); + + vector params; + genome->get_weights(params); + genome->backpropagate_stochastic( training_inputs, training_outputs, validation_inputs, validation_outputs, weight_update_method ); + Log::release_id(log_id); // go back to the worker's log for MPI communication @@ -259,12 +309,20 @@ int main(int argc, char** argv) { RNN_Genome* seed_genome = get_seed_genome(arguments, time_series_sets, weight_rules); + bool synchronous = argument_exists(arguments, "--synchronous"); + Log::warning("synchronous? %d\n", synchronous); + Log::clear_rank_restriction(); if (rank == 0) { write_time_series_to_file(arguments, time_series_sets); examm = generate_examm_from_arguments(arguments, time_series_sets, weight_rules, seed_genome); - master(max_rank); + + if (synchronous) { + master_sync(max_rank); + } else { + master(max_rank); + } } else { worker(rank); } diff --git a/mpi/process_sweep_results.cxx b/mpi/process_sweep_results.cxx index e1f538f2..2f6f2520 100644 --- a/mpi/process_sweep_results.cxx +++ b/mpi/process_sweep_results.cxx @@ -82,8 +82,8 @@ void process_dir(string dir_name, int32_t depth) { // cout << sub_dir_name << ", depth: " << depth << endl; if (depth == 3 && extension_is(sub_dir_name, ".bin")) { - cout << "\tprocessing genome binary '" << sub_dir_name << "' for '" << current_output << "'" - << " and " << current_run_type; + cout << "\tprocessing genome binary '" << sub_dir_name << "' for '" << current_output << "'" << " and " + << current_run_type; RNN_Genome* genome = new RNN_Genome(sub_dir_name); cout << ", fitness: " << genome->get_fitness() << endl; @@ -353,24 +353,13 @@ int main(int argc, char** argv) { cout << "\\begin{tabular}{|l|r|r|r|r|r|r|r|r|r|r|r|r|r|r|}" << endl; cout << "\\hline" << endl; - cout << " " - << "& \\multicolumn{4}{|c|}{FF}" - << "& \\multicolumn{4}{|c|}{LSTM}" - << "& \\multicolumn{4}{|c|}{UGRNN}" - << "& \\multicolumn{4}{|c|}{Delta}" - << "& \\multicolumn{4}{|c|}{MGU}" - << "& \\multicolumn{4}{|c|}{GRU}" + cout << " " << "& \\multicolumn{4}{|c|}{FF}" << "& \\multicolumn{4}{|c|}{LSTM}" << "& \\multicolumn{4}{|c|}{UGRNN}" + << "& \\multicolumn{4}{|c|}{Delta}" << "& \\multicolumn{4}{|c|}{MGU}" << "& \\multicolumn{4}{|c|}{GRU}" << "\\\\" << endl; cout << "\\hline" << endl; - cout << "Run Type" - << " & Min & Avg & Max & Corr" - << " & Min & Avg & Max & Corr" - << " & Min & Avg & Max & Corr" - << " & Min & Avg & Max & Corr" - << " & Min & Avg & Max & Corr" - << " & Min & Avg & Max & Corr" - << "\\\\" << endl; + cout << "Run Type" << " & Min & Avg & Max & Corr" << " & Min & Avg & Max & Corr" << " & Min & Avg & Max & Corr" + << " & Min & Avg & Max & Corr" << " & Min & Avg & Max & Corr" << " & Min & Avg & Max & Corr" << "\\\\" << endl; cout << "\\hline" << endl; cout << "\\hline" << endl; diff --git a/mpi/run_statistics.cxx b/mpi/run_statistics.cxx index 183fa540..6ea3f2a5 100644 --- a/mpi/run_statistics.cxx +++ b/mpi/run_statistics.cxx @@ -138,21 +138,13 @@ string RunStatistics::overview_header() { oss << "\\begin{tabular}{|l|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|}" << endl; oss << "\\hline" << endl; - oss << " " - << " & \\multicolumn{3}{|c|}{MSE}" - << " & \\multicolumn{4}{|c|}{Edges}" - << " & \\multicolumn{4}{|c|}{Rec. Edges}" - << " & \\multicolumn{4}{|c|}{Hidden Nodes}" - << "\\\\" << endl; + oss << " " << " & \\multicolumn{3}{|c|}{MSE}" << " & \\multicolumn{4}{|c|}{Edges}" + << " & \\multicolumn{4}{|c|}{Rec. Edges}" << " & \\multicolumn{4}{|c|}{Hidden Nodes}" << "\\\\" << endl; oss << "\\hline" << endl; - oss << "Run Type" - << " & Min & Avg & Max " - << " & Min & Avg & Max & Corr." - << " & Min & Avg & Max & Corr." - << " & Min & Avg & Max & Corr." - << "\\\\" << endl; + oss << "Run Type" << " & Min & Avg & Max " << " & Min & Avg & Max & Corr." << " & Min & Avg & Max & Corr." + << " & Min & Avg & Max & Corr." << "\\\\" << endl; oss << "\\hline" << endl; oss << "\\hline" << endl; @@ -194,23 +186,14 @@ string RunStatistics::overview_ff_header() { oss << "\\begin{tabular}{|l|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|}" << endl; oss << "\\hline" << endl; - oss << " " - << " & \\multicolumn{3}{|c|}{MSE}" - << " & \\multicolumn{4}{|c|}{Edges}" - << " & \\multicolumn{4}{|c|}{Rec. Edges}" - << " & \\multicolumn{4}{|c|}{Memory Nodes}" - << " & \\multicolumn{4}{|c|}{FF Nodes}" - << "\\\\" << endl; + oss << " " << " & \\multicolumn{3}{|c|}{MSE}" << " & \\multicolumn{4}{|c|}{Edges}" + << " & \\multicolumn{4}{|c|}{Rec. Edges}" << " & \\multicolumn{4}{|c|}{Memory Nodes}" + << " & \\multicolumn{4}{|c|}{FF Nodes}" << "\\\\" << endl; oss << "\\hline" << endl; - oss << "Run Type" - << " & Min & Avg & Max " - << " & Min & Avg & Max & Corr." - << " & Min & Avg & Max & Corr." - << " & Min & Avg & Max & Corr." - << " & Min & Avg & Max & Corr." - << "\\\\" << endl; + oss << "Run Type" << " & Min & Avg & Max " << " & Min & Avg & Max & Corr." << " & Min & Avg & Max & Corr." + << " & Min & Avg & Max & Corr." << " & Min & Avg & Max & Corr." << "\\\\" << endl; oss << "\\hline" << endl; oss << "\\hline" << endl; diff --git a/rnn/dnas_node.cxx b/rnn/dnas_node.cxx index e51fa179..a54d2f28 100644 --- a/rnn/dnas_node.cxx +++ b/rnn/dnas_node.cxx @@ -1,6 +1,7 @@ #include using std::sort; +#include #include using std::pair; @@ -13,13 +14,16 @@ using std::max; #include "common/log.hxx" #include "dnas_node.hxx" +int32_t DNASNode::CRYSTALLIZATION_THRESHOLD = 1000; +int32_t DNASNode::k = -1; + DNASNode::DNASNode( vector&& _nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter ) : RNN_Node_Interface(_innovation_number, _type, _depth), nodes(_nodes), pi(vector(nodes.size(), 1.0)), - z(vector(nodes.size())), + z(vector(nodes.size(), 0.0)), x(vector(nodes.size())), g(vector(nodes.size())), d_pi(vector(nodes.size())), @@ -49,7 +53,6 @@ DNASNode::DNASNode(const DNASNode& src) : RNN_Node_Interface(src.innovation_numb g = src.g; x = src.x; xtotal = src.xtotal; - tao = src.tao; stochastic = src.stochastic; counter = src.counter; maxi = src.maxi; @@ -88,12 +91,19 @@ void DNASNode::sample_gumbel_softmax(Rng& rng) { x.assign(pi.size(), 0.0); gumbel_noise(rng, g); - calculate_z(); } +double DNASNode::calculate_pi_lr() { + return 0.1; +} + +double DNASNode::calculate_tao() { + return 6.0; +} + void DNASNode::calculate_z() { - tao = max(1.0 / 3.0, 1.0 / (1.0 + (double) counter * 0.05)); + tao = calculate_tao(); xtotal = 0.0; double emax = -10000000; @@ -125,29 +135,50 @@ void DNASNode::calculate_z() { ); double total = 0.0; - for (int32_t i = 0; i < k; i++) { + for (int i = 0; i < k; i++) { total += ps_with_indices[i].second; } - for (int32_t i = 0; i < (int32_t) z.size(); i++) { + for (int i = 0; i < z.size(); i++) { z[i] = 0.0; } - for (int32_t i = 0; i < k; i++) { + for (int i = 0; i < k; i++) { z[ps_with_indices[i].first] = ps_with_indices[i].second / total; } } } +void DNASNode::print_info() { + printf(" "); + int best_pi_idx = 0; + for (int i = 0; i < nodes.size(); i++) { + printf("%-10s & ", std::to_string(pi[i]).c_str()); + if (pi[i] > pi[best_pi_idx]) { + best_pi_idx = i; + } + } + printf("\n"); + Log::info("Node types: "); + for (auto node : nodes) { + Log::info_no_header("%d ", node->node_type); + } + Log::info_no_header("\n "); + Log::info("Best node: %i, node type: %d\n", best_pi_idx, nodes[best_pi_idx]->node_type); +} + void DNASNode::reset(int32_t series_length) { - d_pi = vector(pi.size(), 0.0); - d_input = vector(series_length, 0.0); - node_outputs = vector>(series_length, vector(pi.size(), 0.0)); - output_values = vector(series_length, 0.0); - error_values = vector(series_length, 0.0); - inputs_fired = vector(series_length, 0); - outputs_fired = vector(series_length, 0); - input_values = vector(series_length, 0.0); + d_pi.assign(pi.size(), 0.0); + d_input.assign(series_length, 0.0); + node_outputs.clear(); + for (int i = 0; i < series_length; i++) { + node_outputs.emplace_back(pi.size(), 0.0); + } + output_values.assign(series_length, 0.0); + error_values.assign(series_length, 0.0); + inputs_fired.assign(series_length, 0); + outputs_fired.assign(series_length, 0); + input_values.assign(series_length, 0.0); if (counter >= CRYSTALLIZATION_THRESHOLD) { nodes[maxi]->reset(series_length); @@ -178,8 +209,10 @@ void DNASNode::input_fired(int32_t time, double incoming_output) { } if (counter >= CRYSTALLIZATION_THRESHOLD) { + Log::info("%d hmm\n", maxi >= 0); assert(maxi >= 0); + Log::info("%d %d %p\n", maxi, time, nodes[maxi]); nodes[maxi]->input_fired(time, input_values[time]); node_outputs[time][maxi] = nodes[maxi]->output_values[time]; output_values[time] = nodes[maxi]->output_values[time]; @@ -286,6 +319,7 @@ void DNASNode::set_weights(const vector& parameters) { } void DNASNode::get_weights(int32_t& offset, vector& parameters) const { + // int start = offset; // Log::info("pi start %d; ", offset); for (int32_t i = 0; i < (int32_t) pi.size(); i++) { parameters[offset++] = pi[i]; @@ -300,17 +334,15 @@ void DNASNode::set_weights(int32_t& offset, const vector& parameters) { // int start = offset; for (int32_t i = 0; i < (int32_t) pi.size(); i++) { pi[i] = parameters[offset++]; + if (pi[i] < 0.01) { + pi[i] = 0.01; + } } - // Log::info("Pi indices: %d-%d\n", start, offset); + for (auto node : nodes) { node->set_weights(offset, parameters); } calculate_z(); - // string s = "Pi = { "; - // for (auto p : pi) { - // s += std::to_string(p) + ", "; - // } - // Log::info("%s }\n", s.c_str()); } void DNASNode::set_pi(const vector& new_pi) { @@ -360,6 +392,7 @@ void DNASNode::get_gradients(vector& gradients) { } else { gradients.assign(get_number_weights(), 0.0); int offset = 0; + for (int32_t i = 0; i < (int32_t) pi.size(); i++) { gradients[offset++] = d_pi[i] * 0.1; } diff --git a/rnn/dnas_node.hxx b/rnn/dnas_node.hxx index bd5a6b2d..bdafc88f 100644 --- a/rnn/dnas_node.hxx +++ b/rnn/dnas_node.hxx @@ -22,8 +22,6 @@ using std::unique_ptr; #include "rnn_node.hxx" #include "rnn_node_interface.hxx" -#define CRYSTALLIZATION_THRESHOLD 1000 - class DNASNode : public RNN_Node_Interface { private: template @@ -58,13 +56,9 @@ class DNASNode : public RNN_Node_Interface { // A vector to put gumbel noise into; just to avoid re-allocation vector noise; - // Temperature used when drawing samples from Gumbel-Softmax(pi) - double tao = 1.0; int32_t counter = 0; int32_t maxi = -1; - - // if > 0, then the samples will be forced to be K-hot (K non-zero values that sum to one) - int32_t k = 1; + double tao; // Whether to re-sample the gumbel softmax distribution when resetting the node. // Can be set externally using DNASNode::set_stochastic @@ -73,6 +67,9 @@ class DNASNode : public RNN_Node_Interface { vector> node_outputs; public: + static int32_t CRYSTALLIZATION_THRESHOLD; + static int32_t k; + DNASNode( vector&& nodes, int32_t _innovation_number, int32_t _type, double _depth, int32_t counter = -1 @@ -83,6 +80,8 @@ class DNASNode : public RNN_Node_Interface { template void sample_gumbel_softmax(Rng& rng); void calculate_z(); + double calculate_tao(); + double calculate_pi_lr(); virtual void initialize_lamarckian( minstd_rand0& generator, NormalDistribution& normal_distribution, double mu, double sigma @@ -110,6 +109,8 @@ class DNASNode : public RNN_Node_Interface { virtual void reset(int32_t _series_length); virtual void write_to_stream(ostream& out); + void print_info(); + virtual RNN_Node_Interface* copy() const; void set_stochastic(bool stochastic); diff --git a/rnn/generate_nn.cxx b/rnn/generate_nn.cxx index 91231a01..ba656a3e 100644 --- a/rnn/generate_nn.cxx +++ b/rnn/generate_nn.cxx @@ -9,11 +9,10 @@ using std::string; using std::vector; /* - * node_kind is the type of memory cell (e.g. LSTM, UGRNN) * innovation_counter - reference to an integer used to keep track if innovation numbers. it will be incremented once. */ -RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_counter, double depth) { - switch (node_kind) { +RNN_Node_Interface* create_hidden_node(node_t node_type, int32_t& innovation_counter, double depth) { + switch (node_type) { case SIMPLE_NODE: return new RNN_Node(++innovation_counter, HIDDEN_LAYER, depth, SIMPLE_NODE); case JORDAN_NODE: @@ -36,9 +35,6 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co return new ENAS_DAG_Node(++innovation_counter, HIDDEN_LAYER, depth); case RANDOM_DAG_NODE: return new RANDOM_DAG_Node(++innovation_counter, HIDDEN_LAYER, depth); - case DNAS_NODE: - Log::fatal("You shouldn't be creating DNAS nodes using generate_nn::create_hidden_node.\n"); - exit(1); case SIN_NODE: return new SIN_Node(++innovation_counter, HIDDEN_LAYER, depth); case SUM_NODE: @@ -55,7 +51,7 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co return new MULTIPLY_Node(++innovation_counter, HIDDEN_LAYER, depth); default: Log::fatal( - "If you are seeing this, an invalid node_kind was used to create a node (node_kind = %d\n", node_kind + "If you are seeing this, an invalid node_type was used to create a node (node_type = %d\n", node_type ); exit(1); } @@ -64,7 +60,7 @@ RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_co return nullptr; } -DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector& node_types) { +DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector& node_types) { vector nodes(node_types.size()); if (node_types.size() == 0) { @@ -150,7 +146,7 @@ RNN_Genome* create_nn( RNN_Genome* create_dnas_nn( const vector& input_parameter_names, int32_t number_hidden_layers, int32_t number_hidden_nodes, - const vector& output_parameter_names, int32_t max_recurrent_depth, vector& node_types, + const vector& output_parameter_names, int32_t max_recurrent_depth, vector& node_types, WeightRules* weight_rules ) { auto f = [&](int32_t& innovation_counter, double depth) -> RNN_Node_Interface* { @@ -199,13 +195,21 @@ RNN_Genome* get_seed_genome( ); Log::info("Finished transfering seed genome\n"); } else { - if (seed_genome == NULL) { + bool use_dnas_seed = argument_exists(arguments, "--use_dnas_seed"); + + if (!use_dnas_seed) { seed_genome = create_ff( time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, weight_rules ); seed_genome->initialize_randomly(); Log::info("Generated seed genome, seed genome is minimal\n"); + } else { + vector node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; + seed_genome = create_dnas_nn( + time_series_sets->get_input_parameter_names(), 0, 0, time_series_sets->get_output_parameter_names(), 0, + node_types, weight_rules + ); } } diff --git a/rnn/generate_nn.hxx b/rnn/generate_nn.hxx index 3614497b..0ebd0d2a 100644 --- a/rnn/generate_nn.hxx +++ b/rnn/generate_nn.hxx @@ -5,6 +5,9 @@ #include using std::string; +#include +using std::unordered_map; + #include using std::vector; @@ -36,7 +39,8 @@ template NodeT* create_hidden_memory_cell(int32_t& innovation_counter, double depth) { return new NodeT(++innovation_counter, HIDDEN_LAYER, depth); } -RNN_Node_Interface* create_hidden_node(int32_t node_kind, int32_t& innovation_counter, double depth); + +RNN_Node_Interface* create_hidden_node(node_t node_type, int32_t& innovation_counter, double depth); RNN_Genome* create_nn( const vector& input_parameter_names, int32_t number_hidden_layers, int32_t number_hidden_nodes, @@ -44,7 +48,7 @@ RNN_Genome* create_nn( std::function make_node, WeightRules* weight_rules ); -template +template RNN_Genome* create_simple_nn( const vector& input_parameter_names, int32_t number_hidden_layers, int32_t number_hidden_nodes, const vector& output_parameter_names, int32_t max_recurrent_depth, WeightRules* weight_rules @@ -95,11 +99,11 @@ RNN_Genome* create_memory_cell_nn( #define create_inverse(...) create_memory_cell_nn(__VA_ARGS__) #define create_multiply(...) create_memory_cell_nn(__VA_ARGS__) -DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector& node_types); +DNASNode* create_dnas_node(int32_t& innovation_counter, double depth, const vector& node_types); RNN_Genome* create_dnas_nn( const vector& input_parameter_names, int32_t number_hidden_layers, int32_t number_hidden_nodes, - const vector& output_parameter_names, int32_t max_recurrent_depth, vector& node_types, + const vector& output_parameter_names, int32_t max_recurrent_depth, vector& node_types, WeightRules* weight_rules ); diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx index 6bf061b9..90403dfa 100644 --- a/rnn/genome_property.cxx +++ b/rnn/genome_property.cxx @@ -10,6 +10,22 @@ GenomeProperty::GenomeProperty() { max_recurrent_depth = 10; } +int32_t GenomeProperty::compute_bp_iterations(RNN_Genome* genome) { + if (use_burn_in_bp_epoch) { + int32_t n = genome->generation_id / burn_in_period; + n = n > max_burn_in_cycles ? max_burn_in_cycles : n; + + float epochs = bp_epochs_start; + for (int i = 0; i < n; i++) { + epochs *= burn_in_ratio; + } + + return (int32_t) epochs; + } else { + return bp_iterations; + } +} + void GenomeProperty::generate_genome_property_from_arguments(const vector& arguments) { get_argument(arguments, "--bp_iterations", true, bp_iterations); use_dropout = get_argument(arguments, "--dropout_probability", false, dropout_probability); @@ -17,6 +33,15 @@ void GenomeProperty::generate_genome_property_from_arguments(const vectorset_bp_iterations(bp_iterations); - if (use_dropout) { - genome->enable_dropout(dropout_probability); - } + genome->set_bp_iterations(compute_bp_iterations(genome)); + + if (use_dropout) genome->enable_dropout(dropout_probability); + if (!use_epigenetic_weights) genome->initialize_randomly(); + genome->normalize_type = normalize_type; genome->set_parameter_names(input_parameter_names, output_parameter_names); genome->set_normalize_bounds(normalize_type, normalize_mins, normalize_maxs, normalize_avgs, normalize_std_devs); @@ -48,4 +74,4 @@ void GenomeProperty::get_time_series_parameters(TimeSeriesSets* time_series_sets uniform_int_distribution GenomeProperty::get_recurrent_depth_dist() { return uniform_int_distribution(this->min_recurrent_depth, this->max_recurrent_depth); -} \ No newline at end of file +} diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx index 7d220ff6..0ebc8b40 100644 --- a/rnn/genome_property.hxx +++ b/rnn/genome_property.hxx @@ -17,6 +17,13 @@ class GenomeProperty { double dropout_probability; int32_t min_recurrent_depth; int32_t max_recurrent_depth; + bool use_epigenetic_weights = true; + + bool use_burn_in_bp_epoch; + int32_t burn_in_period = 2048; + int32_t max_burn_in_cycles = 4; + double bp_epochs_start = 0.5; + double burn_in_ratio = 2.0; // TimeSeriesSets *time_series_sets; int32_t number_inputs; @@ -30,12 +37,16 @@ class GenomeProperty { map normalize_avgs; map normalize_std_devs; + int32_t compute_bp_iterations(RNN_Genome* genome); + public: GenomeProperty(); + void generate_genome_property_from_arguments(const vector& arguments); void set_genome_properties(RNN_Genome* genome); void get_time_series_parameters(TimeSeriesSets* time_series_sets); + uniform_int_distribution get_recurrent_depth_dist(); }; -#endif \ No newline at end of file +#endif diff --git a/rnn/rnn_edge.cxx b/rnn/rnn_edge.cxx index 51b53254..ca63dc65 100644 --- a/rnn/rnn_edge.cxx +++ b/rnn/rnn_edge.cxx @@ -93,7 +93,8 @@ RNN_Edge* RNN_Edge::copy(const vector new_nodes) { } void RNN_Edge::propagate_forward(int32_t time) { - if (input_node->inputs_fired[time] != input_node->total_inputs) { + if (input_node->inputs_fired[time] != input_node->total_inputs || time < 0 + || time >= input_node->output_values.size()) { Log::fatal( "ERROR! propagate forward called on edge %d where input_node->inputs_fired[%d] (%d) != total_inputs (%d)\n", innovation_number, time, input_node->inputs_fired[time], input_node->total_inputs @@ -105,7 +106,6 @@ void RNN_Edge::propagate_forward(int32_t time) { exit(1); } - // Log::debug("input_node %p %d\n", input_node, input_node->output_values.size()); double output = input_node->output_values[time] * weight; // Log::debug("propagating forward at time %d from %d to %d, value: %lf, input: %lf, weight: %lf\n", time, diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index d506307f..d0887808 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -44,6 +44,9 @@ using std::to_string; #include using std::vector; +#include +using std::move; + #include using std::unordered_map; @@ -68,7 +71,7 @@ using std::map; #include "time_series/time_series.hxx" #include "ugrnn_node.hxx" -vector dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; +vector dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; string parse_fitness(double fitness) { if (fitness == EXAMM_MAX_DOUBLE) { @@ -243,8 +246,7 @@ string RNN_Genome::print_statistics_header() { oss << std::left << setw(12) << "MSE" << setw(12) << "MAE" << setw(12) << "Edges" << setw(12) << "Rec Edges" << setw(12) << "Simple" << setw(12) << "Jordan" << setw(12) << "Elman" << setw(12) << "UGRNN" << setw(12) << "MGU" << setw(12) << "GRU" << setw(12) << "Delta" << setw(12) << "LSTM" << setw(12) << "ENARC" << setw(12) - << "ENAS_DAG" << setw(12) << "RANDOM_DAG" << setw(12) << "Total" - << "Generated"; + << "ENAS_DAG" << setw(12) << "RANDOM_DAG" << setw(12) << "Total" << "Generated"; return oss.str(); } @@ -258,8 +260,8 @@ string RNN_Genome::print_statistics() { << get_node_count_str(MGU_NODE) << setw(12) << get_node_count_str(GRU_NODE) << setw(12) << get_node_count_str(DELTA_NODE) << setw(12) << get_node_count_str(LSTM_NODE) << setw(12) << get_node_count_str(ENARC_NODE) << setw(12) << get_node_count_str(ENAS_DAG_NODE) << setw(12) - << get_node_count_str(RANDOM_DAG_NODE) << setw(12) << get_node_count_str(-1) //-1 does all nodes - << generated_by_string(); + << get_node_count_str(RANDOM_DAG_NODE) << setw(12) << get_enabled_node_count() << " (" << get_node_count() + << ")" << generated_by_string(); return oss.str(); } @@ -291,7 +293,7 @@ string RNN_Genome::get_edge_count_str(bool recurrent) { return oss.str(); } -string RNN_Genome::get_node_count_str(int32_t node_type) { +string RNN_Genome::get_node_count_str(node_t node_type) { ostringstream oss; if (node_type < 0) { oss << get_enabled_node_count() << " (" << get_node_count() << ")"; @@ -318,7 +320,7 @@ int32_t RNN_Genome::get_enabled_node_count() { return count; } -int32_t RNN_Genome::get_enabled_node_count(int32_t node_type) { +int32_t RNN_Genome::get_enabled_node_count(node_t node_type) { int32_t count = 0; for (int32_t i = 0; i < (int32_t) nodes.size(); i++) { @@ -334,7 +336,7 @@ int32_t RNN_Genome::get_node_count() { return (int32_t) nodes.size(); } -int32_t RNN_Genome::get_node_count(int32_t node_type) { +int32_t RNN_Genome::get_node_count(node_t node_type) { int32_t count = 0; for (int32_t i = 0; i < (int32_t) nodes.size(); i++) { @@ -1655,7 +1657,7 @@ void RNN_Genome::get_mu_sigma(const vector& p, double& mu, double& sigma } RNN_Node_Interface* RNN_Genome::create_node( - double mu, double sigma, int32_t node_type, int32_t& node_innovation_count, double depth + double mu, double sigma, node_t node_type, int32_t& node_innovation_count, double depth ) { RNN_Node_Interface* n = NULL; WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method(); @@ -2005,7 +2007,7 @@ bool RNN_Genome::enable_edge() { } bool RNN_Genome::split_edge( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ) { Log::trace("\tattempting to split an edge!\n"); @@ -2383,7 +2385,7 @@ bool RNN_Genome::connect_node_to_hid_nodes( /* ################# ################# ################# */ bool RNN_Genome::add_node( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ) { Log::trace("\tattempting to add a node!\n"); @@ -2538,7 +2540,7 @@ bool RNN_Genome::disable_node() { } bool RNN_Genome::split_node( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ) { Log::trace("\tattempting to split a node!\n"); @@ -2760,7 +2762,7 @@ bool RNN_Genome::split_node( } bool RNN_Genome::merge_node( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ) { Log::trace("\tattempting to merge a node!\n"); @@ -3188,16 +3190,18 @@ void RNN_Genome::read_from_array(char* array, int32_t length) { } RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { - int32_t innovation_number, layer_type, node_type; + int32_t innovation_number, layer_type, inode_type; double depth; bool enabled; bin_istream.read((char*) &innovation_number, sizeof(int32_t)); bin_istream.read((char*) &layer_type, sizeof(int32_t)); - bin_istream.read((char*) &node_type, sizeof(int32_t)); + bin_istream.read((char*) &inode_type, sizeof(int32_t)); bin_istream.read((char*) &depth, sizeof(double)); bin_istream.read((char*) &enabled, sizeof(bool)); + node_t node_type = (node_t) inode_type; + string parameter_name; read_binary_string(bin_istream, parameter_name, "parameter_name"); Log::debug( @@ -3205,77 +3209,81 @@ RNN_Node_Interface* RNN_Genome::read_node_from_stream(istream& bin_istream) { ); RNN_Node_Interface* node = nullptr; - if (node_type == LSTM_NODE) { - node = new LSTM_Node(innovation_number, layer_type, depth); - } else if (node_type == DELTA_NODE) { - node = new Delta_Node(innovation_number, layer_type, depth); - } else if (node_type == GRU_NODE) { - node = new GRU_Node(innovation_number, layer_type, depth); - } else if (node_type == ENARC_NODE) { - node = new ENARC_Node(innovation_number, layer_type, depth); - } else if (node_type == ENAS_DAG_NODE) { - node = new ENAS_DAG_Node(innovation_number, layer_type, depth); - } else if (node_type == RANDOM_DAG_NODE) { - node = new RANDOM_DAG_Node(innovation_number, layer_type, depth); - } else if (node_type == MGU_NODE) { - node = new MGU_Node(innovation_number, layer_type, depth); - } else if (node_type == UGRNN_NODE) { - node = new UGRNN_Node(innovation_number, layer_type, depth); - } else if (node_type == SIMPLE_NODE || node_type == JORDAN_NODE || node_type == ELMAN_NODE) { - if (layer_type == HIDDEN_LAYER) { - node = new RNN_Node(innovation_number, layer_type, depth, node_type); - } else { - node = new RNN_Node(innovation_number, layer_type, depth, node_type, parameter_name); - } - } else if (node_type == DNAS_NODE) { - int32_t n_nodes; - bin_istream.read((char*) &n_nodes, sizeof(int32_t)); - - int32_t counter; - bin_istream.read((char*) &counter, sizeof(int32_t)); - vector pi(n_nodes, 0.0); - bin_istream.read((char*) &pi[0], sizeof(double) * n_nodes); - - vector nodes(n_nodes, nullptr); - for (int i = 0; i < n_nodes; i++) { - nodes[i] = RNN_Genome::read_node_from_stream(bin_istream); - } - - DNASNode* dnas_node = new DNASNode(move(nodes), innovation_number, layer_type, depth, counter); - dnas_node->set_pi(pi); - node = (RNN_Node_Interface*) dnas_node; - } else if (node_type == SIN_NODE) { - node = new SIN_Node(innovation_number, layer_type, depth); - } else if (node_type == SUM_NODE) { - node = new SUM_Node(innovation_number, layer_type, depth); - } else if (node_type == COS_NODE) { - node = new COS_Node(innovation_number, layer_type, depth); - } else if (node_type == TANH_NODE) { - node = new TANH_Node(innovation_number, layer_type, depth); - } else if (node_type == SIGMOID_NODE) { - node = new SIGMOID_Node(innovation_number, layer_type, depth); - } else if (node_type == INVERSE_NODE) { - node = new INVERSE_Node(innovation_number, layer_type, depth); - } else if (node_type == MULTIPLY_NODE) { - node = new MULTIPLY_Node(innovation_number, layer_type, depth); - } else { - Log::fatal("Error reading node from stream, unknown node_type: %d\n", node_type); - exit(1); + switch (node_type) { + case SIMPLE_NODE: + case JORDAN_NODE: + case ELMAN_NODE: + if (layer_type == HIDDEN_LAYER) { + node = new RNN_Node(innovation_number, layer_type, depth, node_type); + } else { + node = new RNN_Node(innovation_number, layer_type, depth, node_type, parameter_name); + } + break; + + case DNAS_NODE: { + int32_t n_nodes; + bin_istream.read((char*) &n_nodes, sizeof(int32_t)); + + int32_t counter; + bin_istream.read((char*) &counter, sizeof(int32_t)); + vector pi(n_nodes, 0.0); + bin_istream.read((char*) &pi[0], sizeof(double) * n_nodes); + + vector nodes(n_nodes, nullptr); + for (int i = 0; i < n_nodes; i++) { + nodes[i] = RNN_Genome::read_node_from_stream(bin_istream); + } + + DNASNode* dnas_node = new DNASNode(std::move(nodes), innovation_number, layer_type, depth, counter); + dnas_node->set_pi(pi); + node = (RNN_Node_Interface*) dnas_node; + break; + } + + default: + int32_t dummy_counter = 0; + node = create_hidden_node(node_type, dummy_counter, depth); + node->innovation_number = innovation_number; } node->enabled = enabled; return node; } + +#define MAGIC 0xFA + +#define read_magic(place) \ + { \ + uint8_t boo = MAGIC; \ + bin_istream.read((char*) &boo, sizeof(uint8_t)); \ + if (boo != MAGIC) { \ + Log::error("ERROR IN SERIALIZING - FAILED TO READ MAGIC at %d; %x != %x\n", place, boo, MAGIC); \ + exit(-1); \ + } \ + } + +#define write_magic() \ + { \ + uint8_t xxmagic = MAGIC; \ + bin_ostream.write((char*) &xxmagic, sizeof(uint8_t)); \ + } + void RNN_Genome::read_from_stream(istream& bin_istream) { Log::debug("READING GENOME FROM STREAM\n"); + read_magic(__LINE__); + bin_istream.read((char*) &generation_id, sizeof(int32_t)); bin_istream.read((char*) &group_id, sizeof(int32_t)); bin_istream.read((char*) &bp_iterations, sizeof(int32_t)); + read_magic(__LINE__); + bin_istream.read((char*) &use_dropout, sizeof(bool)); bin_istream.read((char*) &dropout_probability, sizeof(double)); + read_magic(__LINE__); + WeightType weight_initialize = WeightType::NONE; WeightType weight_inheritance = WeightType::NONE; WeightType mutated_component_weight = WeightType::NONE; @@ -3284,6 +3292,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &weight_inheritance, sizeof(int32_t)); bin_istream.read((char*) &mutated_component_weight, sizeof(int32_t)); + read_magic(__LINE__); + weight_rules = new WeightRules(); weight_rules->set_weight_initialize_method(weight_initialize); weight_rules->set_weight_inheritance_method(weight_inheritance); @@ -3305,8 +3315,10 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream generator_iss(generator_str); generator_iss >> generator; - string rng_0_1_str; - read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); + read_magic(__LINE__); + + // string rng_0_1_str; + // read_binary_string(bin_istream, rng_0_1_str, "rng_0_1"); // So for some reason this was serialized incorrectly for some genomes, // but the value should always be the same so we really don't need to de-serialize it anways and can just // assign it a constant value @@ -3320,6 +3332,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream generated_by_map_iss(generated_by_map_str); read_map(generated_by_map_iss, generated_by_map); + read_magic(__LINE__); + bin_istream.read((char*) &best_validation_mse, sizeof(double)); bin_istream.read((char*) &best_validation_mae, sizeof(double)); @@ -3331,6 +3345,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { initial_parameters.assign(initial_parameters_v, initial_parameters_v + n_initial_parameters); delete[] initial_parameters_v; + read_magic(__LINE__); + int32_t n_best_parameters; bin_istream.read((char*) &n_best_parameters, sizeof(int32_t)); Log::debug("reading %d best parameters.\n", n_best_parameters); @@ -3339,6 +3355,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { best_parameters.assign(best_parameters_v, best_parameters_v + n_best_parameters); delete[] best_parameters_v; + read_magic(__LINE__); + input_parameter_names.clear(); int32_t n_input_parameter_names; bin_istream.read((char*) &n_input_parameter_names, sizeof(int32_t)); @@ -3349,6 +3367,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { input_parameter_names.push_back(input_parameter_name); } + read_magic(__LINE__); + output_parameter_names.clear(); int32_t n_output_parameter_names; bin_istream.read((char*) &n_output_parameter_names, sizeof(int32_t)); @@ -3359,6 +3379,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { output_parameter_names.push_back(output_parameter_name); } + read_magic(__LINE__); + int32_t n_nodes; bin_istream.read((char*) &n_nodes, sizeof(int32_t)); Log::debug("reading %d nodes.\n", n_nodes); @@ -3366,6 +3388,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { nodes.clear(); for (int32_t i = 0; i < n_nodes; i++) { nodes.push_back(RNN_Genome::read_node_from_stream(bin_istream)); + read_magic(__LINE__); } int32_t n_edges; @@ -3392,6 +3415,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { // innovation_list.push_back(innovation_number); edge->enabled = enabled; edges.push_back(edge); + read_magic(__LINE__); } int32_t n_recurrent_edges; @@ -3423,6 +3447,7 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { // innovation_list.push_back(innovation_number); recurrent_edge->enabled = enabled; recurrent_edges.push_back(recurrent_edge); + read_magic(__LINE__); } read_binary_string(bin_istream, normalize_type, "normalize_type"); @@ -3447,6 +3472,8 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { istringstream normalize_std_devs_iss(normalize_std_devs_str); read_map(normalize_std_devs_iss, normalize_std_devs); + read_magic(__LINE__); + assign_reachability(); } @@ -3470,13 +3497,20 @@ void RNN_Genome::write_to_file(string bin_filename) { void RNN_Genome::write_to_stream(ostream& bin_ostream) { Log::debug("WRITING GENOME TO STREAM\n"); + + write_magic(); + bin_ostream.write((char*) &generation_id, sizeof(int32_t)); bin_ostream.write((char*) &group_id, sizeof(int32_t)); bin_ostream.write((char*) &bp_iterations, sizeof(int32_t)); + write_magic(); + bin_ostream.write((char*) &use_dropout, sizeof(bool)); bin_ostream.write((char*) &dropout_probability, sizeof(double)); + write_magic(); + WeightType weight_initialize = weight_rules->get_weight_initialize_method(); WeightType weight_inheritance = weight_rules->get_weight_inheritance_method(); WeightType mutated_component_weight = weight_rules->get_mutated_components_weight_method(); @@ -3484,6 +3518,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &weight_inheritance, sizeof(int32_t)); bin_ostream.write((char*) &mutated_component_weight, sizeof(int32_t)); + write_magic(); + Log::debug("generation_id: %d\n", generation_id); Log::debug("bp_iterations: %d\n", bp_iterations); @@ -3501,16 +3537,20 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { string generator_str = generator_oss.str(); write_binary_string(bin_ostream, generator_str, "generator"); - ostringstream rng_0_1_oss; - rng_0_1_oss << rng_0_1; - string rng_0_1_str = rng_0_1_oss.str(); - write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1"); + write_magic(); + + // ostringstream rng_0_1_oss; + // rng_0_1_oss << rng_0_1; + // string rng_0_1_str = rng_0_1_oss.str(); + // write_binary_string(bin_ostream, rng_0_1_str, "rng_0_1"); ostringstream generated_by_map_oss; write_map(generated_by_map_oss, generated_by_map); string generated_by_map_str = generated_by_map_oss.str(); write_binary_string(bin_ostream, generated_by_map_str, "generated_by_map"); + write_magic(); + bin_ostream.write((char*) &best_validation_mse, sizeof(double)); bin_ostream.write((char*) &best_validation_mae, sizeof(double)); @@ -3519,18 +3559,24 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &n_initial_parameters, sizeof(int32_t)); bin_ostream.write((char*) &initial_parameters[0], sizeof(double) * initial_parameters.size()); + write_magic(); + int32_t n_best_parameters = (int32_t) best_parameters.size(); bin_ostream.write((char*) &n_best_parameters, sizeof(int32_t)); if (n_best_parameters) { bin_ostream.write((char*) &best_parameters[0], sizeof(double) * best_parameters.size()); } + write_magic(); + int32_t n_input_parameter_names = (int32_t) input_parameter_names.size(); bin_ostream.write((char*) &n_input_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) input_parameter_names.size(); i++) { write_binary_string(bin_ostream, input_parameter_names[i], "input_parameter_names[" + std::to_string(i) + "]"); } + write_magic(); + int32_t n_output_parameter_names = (int32_t) output_parameter_names.size(); bin_ostream.write((char*) &n_output_parameter_names, sizeof(int32_t)); for (int32_t i = 0; i < (int32_t) output_parameter_names.size(); i++) { @@ -3539,6 +3585,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { ); } + write_magic(); + int32_t n_nodes = (int32_t) nodes.size(); bin_ostream.write((char*) &n_nodes, sizeof(int32_t)); Log::debug("writing %d nodes.\n", n_nodes); @@ -3549,6 +3597,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { nodes[i]->depth, nodes[i]->parameter_name.c_str() ); nodes[i]->write_to_stream(bin_ostream); + write_magic(); } int32_t n_edges = (int32_t) edges.size(); @@ -3561,6 +3610,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { edges[i]->output_innovation_number ); edges[i]->write_to_stream(bin_ostream); + write_magic(); } int32_t n_recurrent_edges = (int32_t) recurrent_edges.size(); @@ -3574,6 +3624,7 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { ); recurrent_edges[i]->write_to_stream(bin_ostream); + write_magic(); } write_binary_string(bin_ostream, normalize_type, "normalize_type"); @@ -3597,6 +3648,8 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { write_map(normalize_std_devs_oss, normalize_std_devs); string normalize_std_devs_str = normalize_std_devs_oss.str(); write_binary_string(bin_ostream, normalize_std_devs_str, "normalize_std_devs"); + + write_magic(); } void RNN_Genome::update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count) { @@ -4161,7 +4214,8 @@ void RNN_Genome::write_equations(ostream& outstream) { Log::fatal("ERROR: output_node not correct type\n"); exit(1); } - } else if (innovation_to_inputs_fired[output_node->innovation_number] > 1 && innovation_to_inputs_fired[output_node->innovation_number] < output_node->total_inputs){ + } else if (innovation_to_inputs_fired[output_node->innovation_number] > 1 + && innovation_to_inputs_fired[output_node->innovation_number] < output_node->total_inputs) { if (output_node->node_type == MULTIPLY_NODE) { current_output_equation += " * " + input_equation; } else { @@ -4248,7 +4302,8 @@ void RNN_Genome::write_equations(ostream& outstream) { Log::fatal("ERROR: output_node not correct type"); exit(1); } - } else if (innovation_to_inputs_fired[output_node->innovation_number] > 1 && innovation_to_inputs_fired[output_node->innovation_number] < output_node->total_inputs){ + } else if (innovation_to_inputs_fired[output_node->innovation_number] > 1 + && innovation_to_inputs_fired[output_node->innovation_number] < output_node->total_inputs) { if (output_node->node_type == MULTIPLY_NODE) { current_output_equation += " * " + input_equation; } else { @@ -4291,8 +4346,8 @@ void RNN_Genome::write_equations(ostream& outstream) { // outstream << "is_reachable: " << nodes[i]->is_reachable() << endl; outstream << endl; } else if (nodes[i]->layer_type == OUTPUT_LAYER && nodes[i]->is_reachable()) { - outstream << innovation_to_label[nodes[i]->innovation_number] << "(t + 1)" - << " = " << innovation_to_equation[nodes[i]->innovation_number] << endl; + outstream << innovation_to_label[nodes[i]->innovation_number] << "(t + 1)" << " = " + << innovation_to_equation[nodes[i]->innovation_number] << endl; outstream << endl; } } diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index 717b5257..56977e76 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -32,7 +32,7 @@ using std::vector; // mysql can't handle the max float value for some reason #define EXAMM_MAX_DOUBLE 10000000 -extern vector dnas_node_types; +extern vector dnas_node_types; string parse_fitness(double fitness); @@ -111,7 +111,7 @@ class RNN_Genome { string generated_by_string(); string get_edge_count_str(bool recurrent); - string get_node_count_str(int32_t node_type); + string get_node_count_str(node_t node_type); const map* get_generated_by_map(); @@ -119,8 +119,8 @@ class RNN_Genome { int32_t get_enabled_edge_count(); int32_t get_enabled_recurrent_edge_count(); - int32_t get_enabled_node_count(int32_t node_type); - int32_t get_node_count(int32_t node_type); + int32_t get_enabled_node_count(node_t node_type); + int32_t get_node_count(node_t node_type); int32_t get_enabled_node_count(); int32_t get_node_count(); @@ -241,7 +241,7 @@ class RNN_Genome { bool outputs_unreachable(); RNN_Node_Interface* create_node( - double mu, double sigma, int32_t node_type, int32_t& node_innovation_count, double depth + double mu, double sigma, node_t node_type, int32_t& node_innovation_count, double depth ); bool attempt_edge_insert( @@ -266,23 +266,23 @@ class RNN_Genome { bool disable_edge(); bool enable_edge(); bool split_edge( - double mu, double sigma, int32_t node_type, uniform_int_distribution rec_depth_dist, + double mu, double sigma, node_t node_type, uniform_int_distribution rec_depth_dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ); bool add_node( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ); bool enable_node(); bool disable_node(); bool split_node( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ); bool merge_node( - double mu, double sigma, int32_t node_type, uniform_int_distribution dist, + double mu, double sigma, node_t node_type, uniform_int_distribution dist, int32_t& edge_innovation_count, int32_t& node_innovation_count ); @@ -327,6 +327,10 @@ class RNN_Genome { ); vector pick_possible_nodes(int32_t layer_type, bool not_all_hidden, string node_type); + const vector& get_nodes() { + return this->nodes; + } + void update_innovation_counts(int32_t& node_innovation_count, int32_t& edge_innovation_count); vector get_innovation_list(); diff --git a/rnn/rnn_node.cxx b/rnn/rnn_node.cxx index b6f34344..e0270e2a 100644 --- a/rnn/rnn_node.cxx +++ b/rnn/rnn_node.cxx @@ -5,7 +5,7 @@ using std::vector; #include "common/log.hxx" #include "rnn_node.hxx" -RNN_Node::RNN_Node(int32_t _innovation_number, int32_t _layer_type, double _depth, int32_t _node_type) +RNN_Node::RNN_Node(int32_t _innovation_number, int32_t _layer_type, double _depth, node_t _node_type) : RNN_Node_Interface(_innovation_number, _layer_type, _depth), bias(0) { // node type will be simple, jordan or elman node_type = _node_type; @@ -13,7 +13,7 @@ RNN_Node::RNN_Node(int32_t _innovation_number, int32_t _layer_type, double _dept } RNN_Node::RNN_Node( - int32_t _innovation_number, int32_t _layer_type, double _depth, int32_t _node_type, string _parameter_name + int32_t _innovation_number, int32_t _layer_type, double _depth, node_t _node_type, string _parameter_name ) : RNN_Node_Interface(_innovation_number, _layer_type, _depth, _parameter_name), bias(0) { // node type will be simple, jordan or elman @@ -91,6 +91,8 @@ void RNN_Node::try_update_deltas(int32_t time) { outputs_fired[time], total_outputs ); exit(1); + } else if (time >= d_input.size() || time < 0) { + Log::fatal("invalid time %d\n", time); } d_input[time] *= ld_output[time]; diff --git a/rnn/rnn_node.hxx b/rnn/rnn_node.hxx index 0f6bf741..3bfefa1b 100644 --- a/rnn/rnn_node.hxx +++ b/rnn/rnn_node.hxx @@ -15,12 +15,10 @@ class RNN_Node : public RNN_Node_Interface { public: // constructor for hidden nodes - RNN_Node(int32_t _innovation_number, int32_t _layer_type, double _depth, int32_t _node_type); + RNN_Node(int32_t _innovation_number, int32_t _layer_type, double _depth, node_t _node_type); // constructor for input and output nodes - RNN_Node( - int32_t _innovation_number, int32_t _layer_type, double _depth, int32_t _node_type, string _parameter_name - ); + RNN_Node(int32_t _innovation_number, int32_t _layer_type, double _depth, node_t _node_type, string _parameter_name); ~RNN_Node(); void initialize_lamarckian( diff --git a/rnn/rnn_node_interface.cxx b/rnn/rnn_node_interface.cxx index 5030aa88..7f3d061b 100644 --- a/rnn/rnn_node_interface.cxx +++ b/rnn/rnn_node_interface.cxx @@ -8,14 +8,18 @@ using std::ostream; #include using std::string; +#include +using std::vector; + #include "common/log.hxx" #include "rnn/rnn_genome.hxx" #include "rnn_node_interface.hxx" -extern const string NODE_TYPES[] = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", "delta", - "LSTM", "ENARC", "ENAS_DAG", "rdag", "dnas", "sin", "sum", - "cos", "tanh", "sigmoid", "inverse", "multiply"}; -extern const unordered_map string_to_node_type = { +const vector NODE_TYPES = {"simple", "jordan", "elman", "UGRNN", "MGU", "GRU", "delta", + "LSTM", "ENARC", "ENAS_DAG", "rdag", "dnas", "sin", "sum", + "cos", "tanh", "sigmoid", "inverse", "multiply"}; + +extern const unordered_map string_to_node_type = { { "simple", SIMPLE_NODE}, { "jordan", JORDAN_NODE}, { "elman", ELMAN_NODE}, @@ -37,9 +41,9 @@ extern const unordered_map string_to_node_type = { {"multiply", MULTIPLY_NODE}, }; -extern const int32_t NUMBER_NODE_TYPES = string_to_node_type.size(); +extern const int32_t NUMBER_NODE_TYPES = NODE_TYPES.size(); -int32_t node_type_from_string(string& node_type) { +node_t node_type_from_string(string& node_type) { std::transform(node_type.begin(), node_type.end(), node_type.begin(), [](unsigned char c) { return std::tolower(c); }); diff --git a/rnn/rnn_node_interface.hxx b/rnn/rnn_node_interface.hxx index 2ee56875..2782a87b 100644 --- a/rnn/rnn_node_interface.hxx +++ b/rnn/rnn_node_interface.hxx @@ -26,32 +26,33 @@ class RNN; #define HIDDEN_LAYER 1 #define OUTPUT_LAYER 2 -extern const string NODE_TYPES[]; -extern const unordered_map string_to_node_type; -extern const int32_t NUMBER_NODE_TYPES; -int32_t node_type_from_string(string& node_type); - -#define SIMPLE_NODE 0 -#define JORDAN_NODE 1 -#define ELMAN_NODE 2 -#define UGRNN_NODE 3 -#define MGU_NODE 4 -#define GRU_NODE 5 -#define DELTA_NODE 6 -#define LSTM_NODE 7 -#define ENARC_NODE 8 -#define ENAS_DAG_NODE 9 -#define RANDOM_DAG_NODE 10 -#define DNAS_NODE 11 -#define SIN_NODE 12 -#define SUM_NODE 13 -#define COS_NODE 14 -#define TANH_NODE 15 -#define SIGMOID_NODE 16 -#define INVERSE_NODE 17 -#define MULTIPLY_NODE 18 - -int32_t node_type_from_string(string& node_type); +extern const vector NODE_TYPES; + +enum node_t : int32_t { + SIMPLE_NODE = 0, + JORDAN_NODE = 1, + ELMAN_NODE = 2, + UGRNN_NODE = 3, + MGU_NODE = 4, + GRU_NODE = 5, + DELTA_NODE = 6, + LSTM_NODE = 7, + ENARC_NODE = 8, + ENAS_DAG_NODE = 9, + RANDOM_DAG_NODE = 10, + DNAS_NODE = 11, + SIN_NODE = 12, + SUM_NODE = 13, + COS_NODE = 14, + TANH_NODE = 15, + SIGMOID_NODE = 16, + INVERSE_NODE = 17, + MULTIPLY_NODE = 18, +}; + +node_t node_type_from_string(string& node_type); + +extern const unordered_map string_to_node_type; double sigmoid(double value); double sigmoid_derivative(double value); @@ -69,7 +70,7 @@ class RNN_Node_Interface { public: int32_t innovation_number; int32_t layer_type; - int32_t node_type; + node_t node_type; double depth; diff --git a/rnn_examples/CMakeLists.txt b/rnn_examples/CMakeLists.txt index 2bfda532..f5e294c6 100644 --- a/rnn_examples/CMakeLists.txt +++ b/rnn_examples/CMakeLists.txt @@ -16,3 +16,6 @@ target_link_libraries(evaluate_rnns_multi_offset examm_strategy exact_common exa add_executable(rnn_statistics rnn_statistics.cxx) target_link_libraries(rnn_statistics examm_strategy exact_common exact_time_series exact_weights examm_nn ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread) +add_executable(dnas_info dnas_info.cxx) +target_link_libraries(dnas_info examm_strategy exact_common exact_time_series exact_weights examm_nn ${MPI_LIBRARIES} ${MPI_EXTRA} ${MYSQL_LIBRARIES} pthread) + diff --git a/rnn_examples/dnas_info.cxx b/rnn_examples/dnas_info.cxx new file mode 100644 index 00000000..fac60c84 --- /dev/null +++ b/rnn_examples/dnas_info.cxx @@ -0,0 +1,96 @@ +#include +#include +using std::getline; +using std::ifstream; +using std::ofstream; + +#include +using std::minstd_rand0; +using std::uniform_real_distribution; + +#include +using std::string; + +#include +using std::vector; + +#include "common/arguments.hxx" +#include "common/files.hxx" +#include "common/log.hxx" +#include "rnn/generate_nn.hxx" +#include "rnn/gru_node.hxx" +#include "rnn/lstm_node.hxx" +#include "rnn/rnn_edge.hxx" +#include "rnn/rnn_genome.hxx" +#include "rnn/rnn_node.hxx" +#include "rnn/rnn_node_interface.hxx" +#include "time_series/time_series.hxx" +#include "weights/weight_rules.hxx" +#include "weights/weight_update.hxx" + +vector > > training_inputs; +vector > > training_outputs; +vector > > test_inputs; +vector > > test_outputs; + +bool random_sequence_length; +int32_t sequence_length_lower_bound = 30; +int32_t sequence_length_upper_bound = 100; + +RNN_Genome* genome; +RNN* rnn; +WeightUpdate* weight_update_method; +int32_t bp_iterations; +bool using_dropout; +double dropout_probability; + +ofstream* log_file; +string output_directory; + +double objective_function(const vector& parameters) { + rnn->set_weights(parameters); + + double error = 0.0; + + for (int32_t i = 0; i < (int32_t) training_inputs.size(); i++) { + error += rnn->prediction_mae(training_inputs[i], training_outputs[i], false, true, 0.0); + } + + return -error; +} + +double test_objective_function(const vector& parameters) { + rnn->set_weights(parameters); + + double total_error = 0.0; + + for (int32_t i = 0; i < (int32_t) test_inputs.size(); i++) { + double error = rnn->prediction_mse(test_inputs[i], test_outputs[i], false, true, 0.0); + total_error += error; + + Log::info("output for series[%d]: %lf\n", i, error); + } + + return -total_error; +} + +int main(int argc, char** argv) { + vector arguments = vector(argv, argv + argc); + + Log::initialize(arguments); + Log::set_id("main"); + + string filename; + get_argument(arguments, "--filename", true, filename); + + RNN_Genome genome(filename); + + for (auto node : genome.get_nodes()) { + if (DNASNode* d = dynamic_cast(node)) { + std::cout << "'" << filename << "': "; + d->print_info(); + } + } + + Log::release_id("main"); +} diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index ffdf8999..c790b112 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -17,6 +17,7 @@ using std::vector; #include "common/arguments.hxx" #include "common/files.hxx" #include "common/log.hxx" +#include "common/process_arguments.hxx" #include "rnn/generate_nn.hxx" #include "rnn/gru_node.hxx" #include "rnn/lstm_node.hxx" @@ -81,25 +82,24 @@ int main(int argc, char** argv) { Log::set_id("main"); TimeSeriesSets* time_series_sets = TimeSeriesSets::generate_from_arguments(arguments); + get_train_validation_data( + arguments, time_series_sets, training_inputs, training_outputs, test_inputs, test_outputs + ); + + int32_t crystallization_threshold = 1000; + get_argument(arguments, "--crystalize_iters", false, crystallization_threshold); + DNASNode::CRYSTALLIZATION_THRESHOLD = crystallization_threshold; - int32_t time_offset = 1; - get_argument(arguments, "--time_offset", true, time_offset); + int32_t k = -1; + get_argument(arguments, "--dnas_k", false, k); + DNASNode::k = k; - time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); - time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); + // time_series_sets->export_training_series(time_offset, training_inputs, training_outputs); + // time_series_sets->export_test_series(time_offset, test_inputs, test_outputs); int number_inputs = time_series_sets->get_number_inputs(); // int number_outputs = time_series_sets->get_number_outputs(); - string rnn_type; - get_argument(arguments, "--rnn_type", true, rnn_type); - - int32_t num_hidden_layers; - get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); - - int32_t max_recurrent_depth; - get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); - WeightRules* weight_rules = new WeightRules(arguments); weight_update_method = new WeightUpdate(); @@ -108,74 +108,109 @@ int main(int argc, char** argv) { vector input_parameter_names = time_series_sets->get_input_parameter_names(); vector output_parameter_names = time_series_sets->get_output_parameter_names(); - RNN_Genome* genome; - Log::info("RNN TYPE = %s\n", rnn_type.c_str()); - if (rnn_type == "lstm") { - genome = create_lstm( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "gru") { - genome = create_gru( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "delta") { - genome = create_delta( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "mgu") { - genome = create_mgu( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "ugrnn") { - genome = create_ugrnn( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); - - } else if (rnn_type == "ff") { - genome = create_ff( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); + string genome_file; + get_argument(arguments, "--genome_file", false, genome_file); + Log::info("RNN_GENOME = <%s> \n", genome_file.c_str()); - } else if (rnn_type == "jordan") { - genome = create_jordan( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); + RNN_Genome* genome; - } else if (rnn_type == "elman") { - genome = create_elman( - input_parameter_names, num_hidden_layers, number_inputs, output_parameter_names, max_recurrent_depth, - weight_rules - ); - } else if (rnn_type == "dnas") { - vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE}; - genome = create_dnas_nn( - input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, - weight_rules - ); + if (genome_file.size() != 0) { + genome = new RNN_Genome(genome_file); + Log::info("best weights: { "); + for (double& d : genome->get_best_parameters()) { + Log::info_no_header("%f, ", d); + } + Log::info("}\n"); + + vector params; + genome->get_weights(params); + Log::info("current weights: { "); + for (double& d : params) { + Log::info_no_header("%f, ", d); + } + Log::info("}\n"); } else { - Log::fatal("ERROR: incorrect rnn type\n"); - Log::fatal("Possibilities are:\n"); - Log::fatal(" lstm\n"); - Log::fatal(" gru\n"); - Log::fatal(" ff\n"); - Log::fatal(" jordan\n"); - Log::fatal(" elman\n"); - exit(1); + string rnn_type; + get_argument(arguments, "--rnn_type", true, rnn_type); + + Log::info("RNN TYPE = %s\n", rnn_type.c_str()); + + int32_t num_hidden_layers; + get_argument(arguments, "--num_hidden_layers", true, num_hidden_layers); + + int32_t max_recurrent_depth; + get_argument(arguments, "--max_recurrent_depth", true, max_recurrent_depth); + + int32_t hidden_layer_size = number_inputs; + get_argument(arguments, "--hidden_layer_size", false, hidden_layer_size); + + if (rnn_type == "lstm") { + genome = create_lstm( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "gru") { + genome = create_gru( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "delta") { + genome = create_delta( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "mgu") { + genome = create_mgu( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "ugrnn") { + genome = create_ugrnn( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "ff") { + genome = create_ff( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "jordan") { + genome = create_jordan( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + + } else if (rnn_type == "elman") { + genome = create_elman( + input_parameter_names, num_hidden_layers, hidden_layer_size, output_parameter_names, + max_recurrent_depth, weight_rules + ); + } else if (rnn_type == "dnas") { + vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, DELTA_NODE, UGRNN_NODE}; + genome = create_dnas_nn( + input_parameter_names, num_hidden_layers, 1, output_parameter_names, max_recurrent_depth, node_types, + weight_rules + ); + } else { + Log::fatal("ERROR: incorrect rnn type %s\n", rnn_type.c_str()); + Log::fatal("Possibilities are:\n"); + Log::fatal(" lstm\n"); + Log::fatal(" gru\n"); + Log::fatal(" ff\n"); + Log::fatal(" jordan\n"); + Log::fatal(" elman\n"); + exit(1); + } } get_argument(arguments, "--bp_iterations", true, bp_iterations); - genome->set_bp_iterations(bp_iterations); + genome->set_bp_iterations(bp_iterations + genome->get_bp_iterations()); get_argument(arguments, "--output_directory", true, output_directory); if (output_directory != "") { @@ -208,7 +243,7 @@ int main(int argc, char** argv) { using_dropout = false; - genome->initialize_randomly(); + genome->set_weights(genome->get_best_parameters()); double learning_rate = 0.001; get_argument(arguments, "--learning_rate", false, learning_rate); @@ -232,6 +267,8 @@ int main(int argc, char** argv) { genome->get_weights(best_parameters); rnn->set_weights(best_parameters); + genome->write_to_file(output_directory + "/output_genome.bin"); + Log::info("TRAINING ERRORS:\n"); Log::info("MSE: %lf\n", genome->get_mse(best_parameters, training_inputs, training_outputs)); Log::info("MAE: %lf\n", genome->get_mae(best_parameters, training_inputs, training_outputs)); diff --git a/rnn_tests/test_dnas_gradients.cxx b/rnn_tests/test_dnas_gradients.cxx index 98078193..df917349 100644 --- a/rnn_tests/test_dnas_gradients.cxx +++ b/rnn_tests/test_dnas_gradients.cxx @@ -49,7 +49,7 @@ int main(int argc, char** argv) { WeightRules* weight_rules = new WeightRules(); weight_rules->initialize_from_args(arguments); - vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, JORDAN_NODE, ELMAN_NODE, DELTA_NODE}; + vector node_types = {SIMPLE_NODE, LSTM_NODE, GRU_NODE, MGU_NODE, JORDAN_NODE, ELMAN_NODE, DELTA_NODE}; for (int32_t max_recurrent_depth = 1; max_recurrent_depth <= 5; max_recurrent_depth++) { Log::info("testing with max recurrent depth: %d\n", max_recurrent_depth); diff --git a/scripts/dnas/analyze.py b/scripts/dnas/analyze.py new file mode 100644 index 00000000..78d51466 --- /dev/null +++ b/scripts/dnas/analyze.py @@ -0,0 +1,110 @@ +import pandas + +import numpy as np + +import matplotlib.pyplot as plt + +fig, subplts = plt.subplots(6, 1) + +bprange = [8, 16] +plts = {k:v for k, v in zip(bprange, subplts)} +print(plts) +base = plts[bprange[0]] + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + if k == bprange[0]: + continue + v.sharey(base) + v.sharex(base) + +def avg(files, slice_at=-1): + r = {} + for file in files: + x = [] + + for fold in range(8): + f = pandas.read_csv(f"{file}/{fold}/fitness_log.csv")[:slice_at] + print(f"{file}/{fold} -> {len(f)}") + x.append(f) + + + enabled_nodes = [] + enabled_edges = [] + enabled_rec_edges = [] + + bpi_columns = [] + mse_columns = [] + + minlen = 100000000 + + for f in x: + bpi_columns.append(f[' Total BP Epochs'].to_numpy()) + mse_columns.append(f[' Best Val. MSE'].to_numpy()) + enabled_nodes.append(f[' Enabled Nodes'].to_numpy()) + enabled_edges.append(f[' Enabled Edges'].to_numpy()) + enabled_rec_edges.append(f[' Enabled Rec. Edges'].to_numpy()) + + minlen = min(minlen, len(bpi_columns[-1])) + + enabled_nodes = list(map(lambda x: x[:minlen], enabled_nodes)) + enabled_edges = list(map(lambda x: x[:minlen], enabled_edges)) + enabled_rec_edges = list(map(lambda x: x[:minlen], enabled_rec_edges)) + bpi_columns = list(map(lambda x: x[:minlen], bpi_columns)) + mse_columns = list(map(lambda x: x[:minlen], mse_columns)) + + nodesmean = np.mean(np.array(enabled_nodes), axis=0) + edgesmean = np.mean(np.array(enabled_edges), axis=0) + redgesmean = np.mean(np.array(enabled_rec_edges), axis=0) + print(f"Nodes at end mean: {nodesmean[-1]}") + print(f"edges at end mean: {edgesmean[-1]}") + print(f"redges at end mean: {redgesmean[-1]}") + + + bpimean = np.mean(np.array(bpi_columns), axis=0) + msemean = np.mean(np.array(mse_columns), axis=0) + msestd = np.std(np.array(mse_columns), axis=0) + + r[file] = { + 'mean_nodes': nodesmean, + 'mean_edges': edgesmean, + 'mean_rec_edges':redgesmean, + 'bpi': bpimean, + 'mean_mse': msemean, + 'std_mse': msestd, + } + return r + +results = {} +for ci in [64]: + results[ci] = {} + for bpe in bprange: + results[ci][bpe] = {} + for k in [1]: + f = f"initial_integration_experiments/results/v7/{ci}/{bpe}/{k}/" + x = avg([f])[f] + results[ci][bpe][k] = x + print(x) + + print(x['mean_mse'] - x['std_mse']) + g = plts[bpe].plot(x['bpi'], x['mean_mse'], label=f"ci={ci}")[0] + plts[bpe].fill_between(x['bpi'], x['mean_mse'] - x['std_mse'], x['mean_mse'] + x['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + +control_results = {} +for bp in [8, 16]: + key = f"initial_integration_experiments/results/control_v7/{bp}" + r = avg([key])[key] + control_results[bp] = r + print(list(r.keys())) + g = plts[bp].plot(r['bpi'], r['mean_mse'], label=f"control")[0] + plts[bp].fill_between(r['bpi'], r['mean_mse'] - r['std_mse'], r['mean_mse'] + r['std_mse'], + alpha=0.2, edgecolor=g.get_color(), facecolor=g.get_color(), linewidth=0) + + +for k, v in plts.items(): + v.set_title(f"{k} BPI") + v.legend(fontsize=12, loc="upper right") + + +plt.show() diff --git a/scripts/dnas/analyze.zsh b/scripts/dnas/analyze.zsh new file mode 100644 index 00000000..5c2876f3 --- /dev/null +++ b/scripts/dnas/analyze.zsh @@ -0,0 +1,12 @@ +#!/usr/bin/zsh +# +for crystalize_iters in 64 128 256 512; do + for bp_epoch in 8 16 32 64 128; do + for k in 1; do + for fold in 0 1 2 3 4 5 6 7; do + output_dir=initial_integration_experiments/results/v2/$crystalize_iters/$bp_epoch/$k/$fold + tail -1 $output_dir/fitness_log.csv + done + done + done +done diff --git a/scripts/dnas/aviation.zsh b/scripts/dnas/aviation.zsh new file mode 100644 index 00000000..7059da3e --- /dev/null +++ b/scripts/dnas/aviation.zsh @@ -0,0 +1,37 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + +for output_params in "E1_CHT1" "Pitch"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/scripts/dnas/coal_dnas_control.zsh b/scripts/dnas/coal_dnas_control.zsh new file mode 100644 index 00000000..9543cc09 --- /dev/null +++ b/scripts/dnas/coal_dnas_control.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +let np=8 +#SBATCH --ntasks=8 +#SBATCH --exclude theocho +#SBATCH --time=8-00:00:00 +#SBATCH -A examm +#SBATCH --partition=TIER +#SBATCH -J examm_coal_gp_control +#SBATCH -o /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/dnas_control/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +source lib.zsh + +output_dir_prefix=/home/jak5763/exact/results/gp_control +bp_epoch_set=(8 16 32 64 128) +nfolds=20 +MAX_GENOMES=4000 +ISLAND_SIZE=10 +N_ISLANDS=10 +coal diff --git a/scripts/dnas/coal_gp.zsh b/scripts/dnas/coal_gp.zsh new file mode 100644 index 00000000..c1318793 --- /dev/null +++ b/scripts/dnas/coal_gp.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +let np=8 +#SBATCH --ntasks=8 +#SBATCH --exclude theocho +#SBATCH --time=8-00:00:00 +#SBATCH -A examm +#SBATCH --partition=TIER +#SBATCH -J examm_coal_gp_control +#SBATCH -o /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/gp_control/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +source lib.zsh + +output_dir_prefix=/home/jak5763/exact/results/gp_control +bp_epoch_set=(8) +nfolds=20 +MAX_GENOMES=10000 +ISLAND_SIZE=10 +N_ISLANDS=10 +coal diff --git a/scripts/dnas/control.zsh b/scripts/dnas/control.zsh new file mode 100644 index 00000000..f3532525 --- /dev/null +++ b/scripts/dnas/control.zsh @@ -0,0 +1,47 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 14 build/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 \ + --synchronous + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +# bp_ge=(8 8192 16 4096 32 2048) +bp_ge=(8 8192) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in $(seq 0 1); do + run_examm + done +done diff --git a/scripts/dnas/control_cluster.zsh b/scripts/dnas/control_cluster.zsh new file mode 100644 index 00000000..a848302b --- /dev/null +++ b/scripts/dnas/control_cluster.zsh @@ -0,0 +1,50 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v8/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 8 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k initial_integration_experiments/post_training_dnas.zsh +} + +bp_ge=(8 8192 16 4096 32 2048) + +for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait +done diff --git a/scripts/dnas/debug.zsh b/scripts/dnas/debug.zsh new file mode 100755 index 00000000..ce159c01 --- /dev/null +++ b/scripts/dnas/debug.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/debug/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 63 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes 8192 \ + --island_size 32 \ + --number_islands 4 \ + --stochastic \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +for crystalize_iters in 128; do + for bp_epoch in 8; do + for k in 1; do + for fold in 0; do + run_examm + done + # wait + # for fold in 4 5 6 7; do + # run_examm & + # done + # wait + done + done +done diff --git a/scripts/dnas/dnas.zsh b/scripts/dnas/dnas.zsh new file mode 100644 index 00000000..8b525b09 --- /dev/null +++ b/scripts/dnas/dnas.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v8/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + mpirun -np 8 --use-hwthread-cpus Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 4 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +CELL_TYPE='dnas' +bp_ge=(8 8192 16 4096 32 2048) +for crystalize_iters in 256; do + for bp_epoch max_genomes in "${(@kv)bp_ge}"; do + for k in 1; do + for fold in 0 1 2 3; do + run_examm & + done + wait + for fold in 4 5 6 7; do + run_examm & + done + wait + done + done +done diff --git a/scripts/dnas/dnas_cluster.zsh b/scripts/dnas/dnas_cluster.zsh new file mode 100644 index 00000000..55823c0c --- /dev/null +++ b/scripts/dnas/dnas_cluster.zsh @@ -0,0 +1,69 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v16/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 2 \ + --use_dnas_seed true \ + --use_burn_in_bp_epoch \ + --burn_in_period 1024 \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 512; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/scripts/dnas/dnas_control.zsh b/scripts/dnas/dnas_control.zsh new file mode 100644 index 00000000..88a7c882 --- /dev/null +++ b/scripts/dnas/dnas_control.zsh @@ -0,0 +1,60 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results_control/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results_control/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/control_v13.1/$bp_epoch/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types lstm mgu gru ugrnn delta simple \ + --stochastic 0 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 32 \ + --number_islands 8 \ + --num_mutations 4 \ + --burn_in_period 1024 \ + --use_burn_in_bp_epoch + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done +} + +run_group diff --git a/scripts/dnas/dnas_r2_cluster.zsh b/scripts/dnas/dnas_r2_cluster.zsh new file mode 100644 index 00000000..a8bce387 --- /dev/null +++ b/scripts/dnas/dnas_r2_cluster.zsh @@ -0,0 +1,67 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4' + +offset=1 + +run_examm() { + output_dir=initial_integration_experiments/results/v9/$crystalize_iters/$bp_epoch/$k/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types dnas \ + --stochastic 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --validation_sequence_length 100 \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.001 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 16 \ + --number_islands 8 \ + --num_mutations 4 \ + --use_dnas_seed true \ + --dnas_k $k + + # best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + # BP_ITERS=$crystalize_iters CRYSTALIZE_ITERS=$crystalize_iters GENOME=$best_genome_file OUTPUT_DIRECTORY=$output_dir k=$k ./initial_integration_experiments/post_training_dnas.zsh +} + +run_group() { + for crystalize_iters in 1000000; do + for k in 1; do + for fold in $(seq 0 19); do + run_examm + done + done + done +} + +CELL_TYPE='dnas' +# bp_ge=(8 8192 16 4096 32 2048 64 1024) +# for bp_epoch max_genomes in "${(@kv)bp_ge}"; do +run_group +# done diff --git a/scripts/dnas/examm_bias_exp.zsh b/scripts/dnas/examm_bias_exp.zsh new file mode 100644 index 00000000..52816f00 --- /dev/null +++ b/scripts/dnas/examm_bias_exp.zsh @@ -0,0 +1,58 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=18 +#SBATCH --exclude theocho +#SBATCH --time=48:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_bias_ablation +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=64GB + +cd /home/jak5763/exact + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1' + +offset=1 + +run_examm() { + output_dir=results/$synchronous/$scramble_weights/$max_genomes/$fold + mkdir -p $output_dir + srun -n 18 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types simple ugrnn gru mgu lstm delta \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 10 \ + --number_islands 10 \ + --num_mutations 1 \ + --$synchronous \ + --$scramble_weights +} + +run_group() { + for fold in $(seq 0 19); do + run_examm + done +} + +let base_genomes=100000 +let max_genomes=$base_genomes/$bp_epoch +run_group diff --git a/scripts/dnas/examm_synchronous.zsh b/scripts/dnas/examm_synchronous.zsh new file mode 100644 index 00000000..1d970272 --- /dev/null +++ b/scripts/dnas/examm_synchronous.zsh @@ -0,0 +1,55 @@ +#!/bin/zsh + +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=36 +#SBATCH --exclude theocho +#SBATCH --time=23:00:00 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -J examm_dnas_experimental +#SBATCH -o /home/jak5763/exact/results/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/results/slurm_out/%x.%j.err +#SBATCH --mem=0 + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUT_PARAMETERS='E1_CHT1' + +offset=1 + +run_examm() { + output_dir=results/synchronous/$max_genomes/$fold + mkdir -p $output_dir + srun -n 36 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --possible_node_types simple ugrnn gru mgu lstm delta \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --num_hidden_layers $SIZE \ + --hidden_layer_size $SIZE \ + --max_recurrent_depth 10 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level WARNING \ + --file_message_level WARNING \ + --crystalize_iters $crystalize_iters \ + --max_genomes $max_genomes \ + --island_size 10 \ + --number_islands 10 \ + --num_mutations 1 \ + --synchronous +} + +run_group() { + for fold in $(seq 0 9); do + run_examm + done +} + +let base_genomes=100000 +let max_genomes=$base_genomes/$bp_epoch +run_group diff --git a/scripts/dnas/experiment.zsh b/scripts/dnas/experiment.zsh new file mode 100755 index 00000000..32a1db55 --- /dev/null +++ b/scripts/dnas/experiment.zsh @@ -0,0 +1,34 @@ +#!/bin/zsh +#SBATCH -n 1 +#SBATCH -A examm +#SBATCH --partition=tier3 +#SBATCH -o /home/jak5763/exact/aistats/slurm_out/%x.%j.out +#SBATCH -e /home/jak5763/exact/aistats/slurm_out/%x.%j.err +#SBATCH --mem=10G + +spack load gcc +spack load openmpi +spack load /5aoa7oi +spack load /dd7nzzh + +for i in $(seq 0 19); do + export i=$i + export output_dir=/home/jak5763/exact/aistats/$control/maxt$maxt/crystal$crystal/bp$bp/$i + + if [ "$control" = "control" ]; then + node_types="simple UGRNN MGU GRU delta LSTM" + else + node_types="DNAS" + fi + + echo $node_types $control + + export node_types=$node_types + + # ./run_examm.zsh + + best_genome_file=( $output_dir/rnn_genome_*.bin([-1]) ) + export BP_ITERS=1 + export GENOME=$best_genome_file + ./post_training.zsh +done diff --git a/scripts/dnas/gp_control.zsh b/scripts/dnas/gp_control.zsh new file mode 100644 index 00000000..049e9750 --- /dev/null +++ b/scripts/dnas/gp_control.zsh @@ -0,0 +1,59 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=test_results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done + done +} + +INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" +training_filenames=(datasets/2018_coal/burner_[0-9].csv) +test_filenames=(datasets/2018_coal/burner_1[0-1].csv) +OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") +run_group + + +INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +OUTPUTS=("E1_CHT1" "Pitch") +training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) +test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) +run_group + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" +OUTPUTS=("Cm_avg" "P_avg") +training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) +test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) +run_group diff --git a/scripts/dnas/lib.zsh b/scripts/dnas/lib.zsh new file mode 100644 index 00000000..49ebc581 --- /dev/null +++ b/scripts/dnas/lib.zsh @@ -0,0 +1,65 @@ +#!/bin/zsh + +offset=1 +MAX_GENOMES=10 +N_ISLANDS=4 +ISLAND_SIZE=32 + +run_examm() { + output_dir=$output_dir_prefix/bp_$bp_epoch/output_$output_params/$fold + mkdir -p $output_dir + echo srun -n $np Release/mpi/examm_mpi \ + --training_filenames ${=training_filenames} \ + --test_filenames ${=test_filenames} \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names $output_params \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes $MAX_GENOMES \ + --island_size $ISLAND_SIZE \ + --number_islands $N_ISLANDS + + touch $output_dir/completed +} + +run_group() { + for output_params in $OUTPUTS; do + for bp_epoch in $bp_epoch_set; do + for fold in $(seq 1 $nfolds); do + run_examm + done + done + done +} + +coal() { + INPUT_PARAMETERS="Conditioner_Inlet_Temp Conditioner_Outlet_Temp Coal_Feeder_Rate Primary_Air_Flow Primary_Air_Split System_Secondary_Air_Flow_Total Secondary_Air_Flow Secondary_Air_Split Tertiary_Air_Split Total_Comb_Air_Flow Supp_Fuel_Flow Main_Flm_Int" + training_filenames=(datasets/2018_coal/burner_[0-9].csv) + test_filenames=(datasets/2018_coal/burner_1[0-1].csv) + OUTPUTS=("Main_Flm_Int" "Supp_Fuel_Flow") + run_group +} + +aviation() { + INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' + OUTPUTS=("E1_CHT1" "Pitch") + training_filenames=(datasets/2019_ngafid_transfer/c172_file_[1-9].csv) + test_filenames=(datasets/2019_ngafid_transfer/c172_file_1[0-2].csv) + run_group +} + +wind() { + INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + OUTPUTS=("Cm_avg" "P_avg") + training_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv) + test_filenames=(datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv) + run_group +} + diff --git a/scripts/dnas/mk_jobs.zsh b/scripts/dnas/mk_jobs.zsh new file mode 100644 index 00000000..6adfff8f --- /dev/null +++ b/scripts/dnas/mk_jobs.zsh @@ -0,0 +1,8 @@ +bp=(1 2 3 4 5 6 7 8 9 10 15 20 25 30 35 40 45 50 100) +for bp_epoch in $bp; do + for synchronous in "async" "synchronous"; do + for scramble_weights in "epigenetic_weights" "no_epigenetic_weights"; do + bp_epoch=$bp_epoch synchronous="$synchronous" scramble_weights="$scramble_weights" sbatch examm_bias_exp.zsh + done + done +done diff --git a/scripts/dnas/populate_queue.zsh b/scripts/dnas/populate_queue.zsh new file mode 100755 index 00000000..43a09dbb --- /dev/null +++ b/scripts/dnas/populate_queue.zsh @@ -0,0 +1,29 @@ +#!/bin/zsh +export INPUT_PARAMETERS='AltAGL AltB AltGPS AltMSL BaroA E1_CHT1 E1_CHT2 E1_CHT3 E1_CHT4 E1_EGT1 E1_EGT2 E1_EGT3 E1_EGT4 E1_FFlow E1_OilP E1_OilT E1_RPM FQtyL FQtyR GndSpd IAS LatAc NormAc OAT Pitch Roll TAS VSpd VSpdG WndDr WndSpd' +export OUTPUT_PARAMETERS='E1_EGT1' + +export offset=1 +export k=1 + +push_job() { + export maxt=$maxt + export crystal=$crystal + export bp=$bp + export control=$control + sbatch -J $control.maxt$maxt.cr$crystal.bp$bp ./experiment.zsh + +} + +export control="exp" +for maxt in 1.66 1.33 1.0; do + for crystal in 64 128 256; do + for bp in 4 8 16; do + push_job + done + done +done + +export control="control" +for bp in 4 8 16; do + push_job +done diff --git a/scripts/dnas/post_training.zsh b/scripts/dnas/post_training.zsh new file mode 100755 index 00000000..38c2d39d --- /dev/null +++ b/scripts/dnas/post_training.zsh @@ -0,0 +1,28 @@ +#!/usr/bin/zsh +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $output_dir \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 100 \ + --validation_sequence_length 100 \ + --crystalize_iters $crystal \ + --dnas_k $k + +} + +post_training diff --git a/scripts/dnas/post_training_dnas.zsh b/scripts/dnas/post_training_dnas.zsh new file mode 100755 index 00000000..1c226178 --- /dev/null +++ b/scripts/dnas/post_training_dnas.zsh @@ -0,0 +1,29 @@ +#!/bin/zsh +offset=1 + +post_training() { + + echo "genome = $GENOME" + Release/rnn_examples/train_rnn \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset 1 \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=OUTPUT_PARAMETERS} \ + --bp_iterations $BP_ITERS \ + --stochastic \ + --normalize min_max \ + --genome_file $GENOME \ + --output_directory $OUTPUT_DIRECTORY \ + --log_filename post_training.csv \ + --learning_rate 0.01 \ + --weight_update adagrad \ + --train_sequence_length 1000 \ + --validation_sequence_length 100 \ + --crystalize_iters $CRYSTALIZE_ITERS \ + --dnas_k $k + + tail -1 $OUTPUT_DIRECTORY/post_training.csv +} + +post_training diff --git a/scripts/dnas/posttrain.zsh b/scripts/dnas/posttrain.zsh new file mode 100644 index 00000000..cc54a2eb --- /dev/null +++ b/scripts/dnas/posttrain.zsh @@ -0,0 +1,3 @@ +#!/bin/zsh + + diff --git a/scripts/dnas/run_examm.zsh b/scripts/dnas/run_examm.zsh new file mode 100644 index 00000000..77d2893f --- /dev/null +++ b/scripts/dnas/run_examm.zsh @@ -0,0 +1,25 @@ +#!/bin/zsh + +output_dir=results/v0/$bp_epoch/$fold +mkdir -p $output_dir + +mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2019_ngafid_transfer/c172_file_[1-9].csv \ + --test_filenames datasets/2019_ngafid_transfer/c172_file_1[0-2].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 4000 \ + --island_size 32 \ + --number_islands 4 + +touch $output_dir/completed + diff --git a/scripts/dnas/run_experiments.zsh b/scripts/dnas/run_experiments.zsh new file mode 100755 index 00000000..7dd8e956 --- /dev/null +++ b/scripts/dnas/run_experiments.zsh @@ -0,0 +1,4 @@ +#!/bin/zsh + +initial_integration_experiments/control.zsh +initial_integration_experiments/dnas.zsh diff --git a/scripts/dnas/wind.zsh b/scripts/dnas/wind.zsh new file mode 100644 index 00000000..7e68f482 --- /dev/null +++ b/scripts/dnas/wind.zsh @@ -0,0 +1,39 @@ +#!/bin/zsh + +INPUT_PARAMETERS="Ba_avg Rt_avg DCs_avg Cm_avg P_avg S_avg Cosphi_avg Db1t_avg Db2t_avg Dst_avg Gb1t_avg Gb2t_avg Git_avg Gost_avg Ya_avg Yt_avg Ws_avg Wa_avg Ot_avg Nf_avg Nu_avg Rbt_avg" + + +offset=1 + +run_examm() { + output_dir=results/v0/$bp_epoch/$fold + mkdir -p $output_dir + mpirun -np 32 Release/mpi/examm_mpi \ + --training_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_[1-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_1[0-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_2[0-4].csv \ + --test_filenames datasets/2020_wind_engine/turbine_R80711_2017-2020_2[5-9].csv datasets/2020_wind_engine/turbine_R80711_2017-2020_3[0-1].csv \ + --time_offset $offset \ + --input_parameter_names ${=INPUT_PARAMETERS} \ + --output_parameter_names ${=output_params} \ + --bp_iterations $bp_epoch \ + --normalize min_max \ + --max_recurrent_depth 1 \ + --output_directory $output_dir \ + --log_filename fitness.csv \ + --learning_rate 0.01 \ + --std_message_level INFO \ + --file_message_level INFO \ + --max_genomes 10000 \ + --island_size 32 \ + --number_islands 4 + + touch $output_dir/completed +} + + +for output_params in "Cm_avg" "P_avg"; do + for bp_epoch in 2 4 8 16 32; do + for fold in 0 1 2 3 4 5 6 7 8 9; do + run_examm + done + done +done diff --git a/time_series/time_series.cxx b/time_series/time_series.cxx index 38184b27..1f736a76 100644 --- a/time_series/time_series.cxx +++ b/time_series/time_series.cxx @@ -472,7 +472,7 @@ void TimeSeriesSet::export_time_series( if (time_offset == 0) { for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) { for (int32_t j = 0; j < number_rows; j++) { - data[i][j] = time_series[requested_fields[i]]->get_value(j); + data[i][j] = time_series.at(requested_fields[i])->get_value(j); } } @@ -480,7 +480,7 @@ void TimeSeriesSet::export_time_series( // output data, ignore the first N values for (int32_t i = 0; i < (int32_t) requested_fields.size(); i++) { for (int32_t j = time_offset; j < number_rows; j++) { - data[i][j - time_offset] = time_series[requested_fields[i]]->get_value(j); + data[i][j - time_offset] = time_series.at(requested_fields[i])->get_value(j); } } @@ -492,13 +492,13 @@ void TimeSeriesSet::export_time_series( Log::debug("doing shift for field: '%s'\n", requested_fields[i].c_str()); // shift the shifted fields to the same as the output, not the input for (int32_t j = -time_offset; j < number_rows; j++) { - data[i][j + time_offset] = time_series[requested_fields[i]]->get_value(j); + data[i][j + time_offset] = time_series.at(requested_fields[i])->get_value(j); // Log::info("\tdata[%d][%d]: %lf\n", i, j + time_offset, data[i][j + time_offset]); } } else { Log::debug("not doing shift for field: '%s'\n", requested_fields[i].c_str()); for (int32_t j = 0; j < number_rows + time_offset; j++) { - data[i][j] = time_series[requested_fields[i]]->get_value(j); + data[i][j] = time_series.at(requested_fields[i])->get_value(j); } } } @@ -796,8 +796,8 @@ TimeSeriesSets* TimeSeriesSets::generate_from_arguments(const vector& ar get_argument_vector(arguments, "--parameters", true, p); tss->parse_parameters_string(p); - } else if (argument_exists(arguments, "--input_parameter_names") && - argument_exists(arguments, "--output_parameter_names")) { + } else if (argument_exists(arguments, "--input_parameter_names") + && argument_exists(arguments, "--output_parameter_names")) { get_argument_vector(arguments, "--input_parameter_names", true, tss->input_parameter_names); get_argument_vector(arguments, "--output_parameter_names", true, tss->output_parameter_names);