diff --git a/examm/examm.cxx b/examm/examm.cxx index a09f85e7..abd67e35 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -1,5 +1,6 @@ #include using std::sort; +using std::random_shuffle; #include #include @@ -41,6 +42,7 @@ using std::to_string; #include "rnn/rnn_node.hxx" #include "rnn/ugrnn_node.hxx" #include "speciation_strategy.hxx" +#include "weights/weight_update.hxx" EXAMM::~EXAMM() { delete weight_rules; @@ -99,6 +101,11 @@ void EXAMM::generate_log() { (*log_file) << "Inserted Genomes, Total BP Epochs, Time, Best Val. MAE, Best Val. MSE, Enabled Nodes, Enabled " "Edges, Enabled Rec. Edges"; (*log_file) << speciation_strategy->get_strategy_information_headers(); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + (*log_file) << ", learning_rate, best_learning_rate, worst_learning_rate, global_min_learning_rate, global_max_learning_rate"; + (*log_file) << ", epsilon, best_epsilon, worst_epsilon, beta1, best_beta1, worst_beta1, beta2, best_beta2, worst_beta2"; + } (*log_file) << endl; if (generate_op_log) { @@ -152,7 +159,7 @@ void EXAMM::update_op_log_statistics(RNN_Genome* genome, int32_t insert_position } } -void EXAMM::update_log() { +void EXAMM::update_log(double _learning_rate, double _epsilon, double _beta1, double _beta2) { if (log_file != NULL) { // make sure the log file is still good if (!log_file->good()) { @@ -183,17 +190,42 @@ void EXAMM::update_log() { } (*op_log_file) << endl; } + RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { best_genome = speciation_strategy->get_global_best_genome(); } + + RNN_Genome* worst_genome = get_worst_genome(); + if (worst_genome == NULL) { + worst_genome = speciation_strategy->get_worst_genome(); + } + std::chrono::time_point currentClock = std::chrono::system_clock::now(); long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count() - << speciation_strategy->get_strategy_information_values() << endl; + << speciation_strategy->get_strategy_information_values(); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + (*log_file) << ',' << _learning_rate << "," + << best_genome->get_learning_rate() << "," + << worst_genome->get_learning_rate() << "," + << speciation_strategy->get_min_learning_rate() << "," + << speciation_strategy->get_max_learning_rate() << "," + << _epsilon << "," + << best_genome->get_epsilon() << "," + << worst_genome->get_epsilon() << "," + << _beta1 << "," + << best_genome->get_beta1() << "," + << worst_genome->get_beta1() << "," + << _beta2 << "," + << best_genome->get_beta2() << "," + << worst_genome->get_beta2(); + } + (*log_file) << endl; } } @@ -260,7 +292,16 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { Log::info("save genome complete\n"); update_op_log_statistics(genome, insert_position); - update_log(); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + double learning_rate = genome->get_learning_rate(); + double epsilon = genome->get_epsilon(); + double beta1 = genome->get_beta1(); + double beta2 = genome->get_beta2(); + update_log(learning_rate, epsilon, beta1, beta2); + } else { + update_log(); + } return insert_position >= 0; } @@ -274,7 +315,71 @@ void EXAMM::save_genome(RNN_Genome* genome, string genome_name = "rnn_genome") { genome->write_to_file(output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".bin"); } +vector > EXAMM::get_genome_information(int simplex_count) { + + vector > genome_information; + + Log::debug("AT: Simplex count for genome information = %d\n",simplex_count); + + Log::debug("AT: Number of islands = %d\n",speciation_strategy->get_islands_size()); + + int current_island_index = speciation_strategy->get_generation_island(); + + Log::debug("AT: Current Island Number = %d\n", current_island_index); + + Island* current_island = speciation_strategy->get_island_at_index(current_island_index); + + Log::debug("AT: Current Island size = %d\n",current_island->size()); + + int32_t* island_indices = new int32_t[speciation_strategy->get_islands_size()]; + Log::debug("AT: Island Indices before shuffling: "); + for(int32_t i=0; iget_islands_size(); i++) { + island_indices[i] = i; + Log::debug(" %d ",island_indices[i]); + } + Log::debug("\n"); + + std::random_shuffle(island_indices, island_indices+speciation_strategy->get_islands_size()); + + Log::debug("AT: Island Indices after shuffling: "); + for(int32_t i=0; iget_islands_size(); i++) { + Log::debug(" %d ",island_indices[i]); + } + Log::debug("\n"); + + for(int32_t i=0; iget_island_at_index(island_indices[i]); + RNN_Genome *current_genome = random_island->get_best_genome(); + Log::debug("AT: get_learning_rate = %lg\n",current_genome->get_learning_rate()); + Log::debug("AT: get_best_validation_mse = %lg\n",current_genome->get_best_validation_mse()); + Log::debug("AT: get_epsilon = %lg\n",current_genome->get_epsilon()); + Log::debug("AT: get_beta1 = %lg\n",current_genome->get_beta1()); + Log::debug("AT: get_beta2 = %lg\n",current_genome->get_beta2()); + + vector per_genome_information; + per_genome_information.push_back(current_genome->get_learning_rate()); + per_genome_information.push_back(current_genome->get_best_validation_mse()); + per_genome_information.push_back(current_genome->get_epsilon()); + per_genome_information.push_back(current_genome->get_beta1()); + per_genome_information.push_back(current_genome->get_beta2()); + genome_information.push_back(per_genome_information); + } + + return genome_information; + +} + RNN_Genome* EXAMM::generate_genome() { + + vector> genome_information; + double tuned_learning_rate; + double tuned_epsilon; + double tuned_beta1; + double tuned_beta2; + WeightUpdate* weight_update_method; + weight_update_method = new WeightUpdate(); + if (speciation_strategy->get_evaluated_genomes() > max_genomes) { RNN_Genome* global_best_genome = speciation_strategy->get_global_best_genome(); save_genome(global_best_genome, "global_best_genome"); @@ -295,6 +400,64 @@ RNN_Genome* EXAMM::generate_genome() { RNN_Genome* genome = speciation_strategy->generate_genome(rng_0_1, generator, mutate_function, crossover_function); genome_property->set_genome_properties(genome); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + if (speciation_strategy->islands_full() != true ) { + + tuned_learning_rate = weight_update_method->generate_initial_learning_rate(); + tuned_epsilon = weight_update_method->generate_initial_epsilon(); + tuned_beta1 = weight_update_method->generate_initial_beta1(); + tuned_beta2 = weight_update_method->generate_initial_beta2(); + + // Convert the vector to a string + stringstream ss_island; + for (int i = 0; i < speciation_strategy->get_islands_size(); i++) { + for (int j = 0; j < speciation_strategy->get_island_at_index(i)->size(); j++) { + RNN_Genome *current_genome = speciation_strategy->get_island_at_index(i)->get_genome_at(j); + if (current_genome->get_learning_rate()>1) { + ss_island << "i:" << i << " , j:" << j << " , lr: " << current_genome->get_learning_rate() << " , bvmse:" << current_genome->get_best_validation_mse() << " ~ "; + } + } + ss_island << endl; + } + string island_informationString = ss_island.str(); + + Log::debug("AT: Island not full = %s\n",island_informationString.c_str()); + + } else { + // Convert the vector to a string + stringstream ss_island; + for (int i = 0; i < speciation_strategy->get_islands_size(); i++) { + for (int j = 0; j < speciation_strategy->get_island_at_index(i)->size(); j++) { + RNN_Genome *current_genome = speciation_strategy->get_island_at_index(i)->get_genome_at(j); + if (current_genome->get_learning_rate()>1) { + ss_island << "i:" << i << " , j:" << j << " , lr: " << current_genome->get_learning_rate() << " , bvmse:" << current_genome->get_best_validation_mse() << " ~ "; + } + } + ss_island << endl; + } + string island_informationString = ss_island.str(); + + Log::debug("AT: Island is full before get_genome_information = %s\n",island_informationString.c_str()); + int simplex_count = genome_property->get_simplex_count(); + Log::debug("AT: Simplex Count inside of generate_genome = %d\n",simplex_count); + genome_information = get_genome_information(simplex_count); + tuned_learning_rate = weight_update_method->generate_simplex_learning_rate(genome_information, simplex_count); + tuned_epsilon = weight_update_method->generate_simplex_epsilon(genome_information, simplex_count); + tuned_beta1 = weight_update_method->generate_simplex_beta1(genome_information, simplex_count); + tuned_beta2 = weight_update_method->generate_simplex_beta2(genome_information, simplex_count); + } + + genome->set_learning_rate(tuned_learning_rate); + genome->set_epsilon(tuned_epsilon); + genome->set_beta1(tuned_beta1); + genome->set_beta2(tuned_beta2); + Log::debug("AT: genome Learning Rate after set = %lg\n",genome->get_learning_rate()); + Log::debug("AT: genome epsilon after set = %lg\n",genome->get_epsilon()); + Log::debug("AT: genome beta1 after set = %lg\n",genome->get_beta1()); + Log::debug("AT: genome beta2 after set = %lg\n",genome->get_beta2()); + } + // if (!epigenetic_weights) genome->initialize_randomly(); // this is just a sanity check, can most likely comment out (checking to see diff --git a/examm/examm.hxx b/examm/examm.hxx index 3a7288ca..633c7514 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -89,7 +89,7 @@ class EXAMM { ~EXAMM(); void print(); - void update_log(); + void update_log(double _learning_rate=NULL, double _epsilon=NULL, double _beta1=NULL, double _beta2=NULL); void set_possible_node_types(vector possible_node_type_strings); @@ -97,6 +97,8 @@ class EXAMM { int32_t get_random_node_type(); + vector> get_genome_information(int simplex_count); + RNN_Genome* generate_genome(); bool insert_genome(RNN_Genome* genome); diff --git a/examm/island.cxx b/examm/island.cxx index 6d8b0b5f..9cbb807c 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -71,6 +71,32 @@ double Island::get_worst_fitness() { } } +double Island::get_min_learning_rate() { + double min_learning_rate = 1.0; + if (genomes.size() > 0) { + for (int32_t i = 0; i < (int32_t) genomes.size(); i++) { + double genome_learning_rate = genomes[i]->get_learning_rate(); + if (genome_learning_rate < min_learning_rate) { + min_learning_rate = genome_learning_rate; + } + } + } + return min_learning_rate; +} + +double Island::get_max_learning_rate() { + double max_learning_rate = 0.0; + if (genomes.size() > 0) { + for (int32_t i = 0; i < (int32_t) genomes.size(); i++) { + double genome_learning_rate = genomes[i]->get_learning_rate(); + if (genome_learning_rate > max_learning_rate) { + max_learning_rate = genome_learning_rate; + } + } + } + return max_learning_rate; +} + int32_t Island::get_max_size() { return (int32_t) max_size; } @@ -472,3 +498,8 @@ void Island::save_population(string output_path) { genome->write_to_file(output_path + "/island_" + to_string(id) + "_genome_" + to_string(i) + ".bin"); } } + +RNN_Genome* Island::get_genome_at(int32_t _index) { + if (genomes.size() == 0) return NULL; + else return genomes[_index]; +} diff --git a/examm/island.hxx b/examm/island.hxx index c75921aa..70d3b27b 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -88,6 +88,18 @@ class Island { */ RNN_Genome* get_worst_genome(); + /** + * Gets the the minimum learning rate in the island + * \return the minimum learning rate in the island or NULL if no genomes have yet been inserted + */ + double get_min_learning_rate(); + + /** + * Gets the the maximum learning rate in the island + * \return the maximum learning rate in the island or NULL if no genomes have yet been inserted + */ + double get_max_learning_rate(); + /** * Returns the maximum number of genomes the island can hold * @@ -95,6 +107,14 @@ class Island { */ int32_t get_max_size(); + /** + * Returns the genome at a given index in the island. + * + * \param _index is the index of required genome in the island + * \return genome at a given index in the island + */ + RNN_Genome *get_genome_at(int32_t _index); + /** * Returns the size of the island * diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index 9df6bd9c..8b0cd881 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -17,6 +17,7 @@ using std::string; #include "examm.hxx" #include "island_speciation_strategy.hxx" #include "rnn/rnn_genome.hxx" +#include "weights/weight_update.hxx" /** * @@ -126,6 +127,36 @@ RNN_Genome* IslandSpeciationStrategy::get_worst_genome() { } } +double IslandSpeciationStrategy::get_min_learning_rate() { + double min_learning_rate = 1.0; + + for (int32_t i = 0; i < (int32_t) islands.size(); i++) { + if (islands[i]->size() > 0) { + double island_min_learning_rate = islands[i]->get_min_learning_rate(); + if (island_min_learning_rate < min_learning_rate) { + min_learning_rate = island_min_learning_rate; + } + } + } + + return min_learning_rate; +} + +double IslandSpeciationStrategy::get_max_learning_rate() { + double max_learning_rate = 0.0; + + for (int32_t i = 0; i < (int32_t) islands.size(); i++) { + if (islands[i]->size() > 0) { + double island_max_learning_rate = islands[i]->get_max_learning_rate(); + if (island_max_learning_rate > max_learning_rate) { + max_learning_rate = island_max_learning_rate; + } + } + } + + return max_learning_rate; +} + double IslandSpeciationStrategy::get_best_fitness() { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { @@ -363,18 +394,74 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( Log::debug("getting island: %d\n", generation_island); Island* current_island = islands[generation_island]; RNN_Genome* new_genome = NULL; + double tuned_learning_rate; + double tuned_epsilon; + double tuned_beta1; + double tuned_beta2; + + WeightUpdate* weight_update_method; + weight_update_method = new WeightUpdate(); if (current_island->is_initializing()) { // islands could start with full of mutated seed genomes, it can be used with or without transfer learning new_genome = generate_for_initializing_island(rng_0_1, generator, mutate); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + tuned_learning_rate = weight_update_method->generate_initial_learning_rate(); + tuned_epsilon = weight_update_method->generate_initial_epsilon(); + tuned_beta1 = weight_update_method->generate_initial_beta1(); + tuned_beta2 = weight_update_method->generate_initial_beta2(); + new_genome->set_learning_rate(tuned_learning_rate); + new_genome->set_epsilon(tuned_epsilon); + new_genome->set_beta1(tuned_beta1); + new_genome->set_beta2(tuned_beta2); + Log::debug("AT: Speciation strategy genome Learning Rate after set = %lg\n",new_genome->get_learning_rate()); + Log::debug("AT: Speciation strategy genome epsilon after set = %lg\n",new_genome->get_epsilon()); + Log::debug("AT: Speciation strategy genome beta1 after set = %lg\n",new_genome->get_beta1()); + Log::debug("AT: Speciation strategy genome beta2 after set = %lg\n",new_genome->get_beta2()); + } } else if (current_island->is_full()) { + Log::debug("AT: Speciation strategy island full\n"); new_genome = generate_for_filled_island(rng_0_1, generator, mutate, crossover); } else if (current_island->is_repopulating()) { new_genome = generate_for_repopulating_island(rng_0_1, generator, mutate, crossover); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + + tuned_learning_rate = weight_update_method->generate_initial_learning_rate(); + tuned_epsilon = weight_update_method->generate_initial_epsilon(); + tuned_beta1 = weight_update_method->generate_initial_beta1(); + tuned_beta2 = weight_update_method->generate_initial_beta2(); + new_genome->set_learning_rate(tuned_learning_rate); + new_genome->set_epsilon(tuned_epsilon); + new_genome->set_beta1(tuned_beta1); + new_genome->set_beta2(tuned_beta2); + Log::debug("AT: Speciation strategy is_repopulating Learning Rate after set = %lg\n",new_genome->get_learning_rate()); + Log::debug("AT: Speciation strategy is_repopulating epsilon after set = %lg\n",new_genome->get_epsilon()); + Log::debug("AT: Speciation strategy is_repopulating beta1 after set = %lg\n",new_genome->get_beta1()); + Log::debug("AT: Speciation strategy is_repopulating beta2 after set = %lg\n",new_genome->get_beta2()); + } } if (new_genome == NULL) { Log::info("Island %d: new genome is still null, regenerating\n", generation_island); new_genome = generate_genome(rng_0_1, generator, mutate, crossover); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + + tuned_learning_rate = weight_update_method->generate_initial_learning_rate(); + tuned_epsilon = weight_update_method->generate_initial_epsilon(); + tuned_beta1 = weight_update_method->generate_initial_beta1(); + tuned_beta2 = weight_update_method->generate_initial_beta2(); + new_genome->set_learning_rate(tuned_learning_rate); + new_genome->set_epsilon(tuned_epsilon); + new_genome->set_beta1(tuned_beta1); + new_genome->set_beta2(tuned_beta2); + Log::debug("AT: Speciation strategy null Learning Rate after set = %lg\n",new_genome->get_learning_rate()); + Log::debug("AT: Speciation strategy null epsilon after set = %lg\n",new_genome->get_epsilon()); + Log::debug("AT: Speciation strategy null beta1 after set = %lg\n",new_genome->get_beta1()); + Log::debug("AT: Speciation strategy null beta2 after set = %lg\n",new_genome->get_beta2()); + + } } generated_genomes++; new_genome->set_generation_id(generated_genomes); @@ -385,6 +472,11 @@ RNN_Genome* IslandSpeciationStrategy::generate_genome( RNN_Genome* genome_copy = new_genome->copy(); Log::debug("inserting genome copy!\n"); insert_genome(genome_copy); + Log::debug("AT: Speciation strategy genome_copy Learning Rate after insert_genome = %lg\n",genome_copy->get_learning_rate()); + Log::debug("AT: Speciation strategy genome_copy epsilon after insert_genome = %lg\n",new_genome->get_epsilon()); + Log::debug("AT: Speciation strategy genome_copy beta1 after insert_genome = %lg\n",new_genome->get_beta1()); + Log::debug("AT: Speciation strategy genome_copy beta2 after insert_genome = %lg\n",new_genome->get_beta2()); + } generation_island++; if (generation_island >= (int32_t) islands.size()) { @@ -591,3 +683,15 @@ void IslandSpeciationStrategy::save_entire_population(string output_path) { islands[i]->save_population(output_path); } } + +int32_t IslandSpeciationStrategy::get_islands_size() const { + return islands.size(); +} + +Island* IslandSpeciationStrategy::get_island_at_index(int32_t index) const { + return islands[index]; +} + +int32_t IslandSpeciationStrategy::get_generation_island() const { + return generation_island; +} \ No newline at end of file diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index b3888621..550e210c 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -134,6 +134,18 @@ class IslandSpeciationStrategy : public SpeciationStrategy { */ RNN_Genome* get_worst_genome(); + /** + * Gets the the minimum learning rate of all the islands + * \return the minimum learning rate of all islands or NULL if no genomes have yet been inserted + */ + double get_min_learning_rate(); + + /** + * Gets the the maximum learning rate of all the islands + * \return the maximum learning rate of all islands or NULL if no genomes have yet been inserted + */ + double get_max_learning_rate(); + /** * \return true if all the islands are full */ @@ -233,6 +245,27 @@ class IslandSpeciationStrategy : public SpeciationStrategy { void repopulate(); void save_entire_population(string output_path); + + /** + * Get the number of islands + * + * \return the number of island + */ + int32_t get_islands_size() const; + + /** + * Get the island at a given index + * \param index index of required island + * \return the island at given index + */ + Island* get_island_at_index(int32_t index) const; + + /** + * Get the current island index + * + * \return the index of current island + */ + int32_t get_generation_island() const; }; #endif diff --git a/examm/neat_speciation_strategy.cxx b/examm/neat_speciation_strategy.cxx index 4fdd3d94..034bdcb3 100644 --- a/examm/neat_speciation_strategy.cxx +++ b/examm/neat_speciation_strategy.cxx @@ -116,6 +116,14 @@ RNN_Genome* NeatSpeciationStrategy::get_worst_genome() { } } +double NeatSpeciationStrategy::get_min_learning_rate() { + return NULL; +} + +double NeatSpeciationStrategy::get_max_learning_rate() { + return NULL; +} + double NeatSpeciationStrategy::get_best_fitness() { RNN_Genome* best_genome = get_best_genome(); if (best_genome == NULL) { @@ -562,4 +570,18 @@ void NeatSpeciationStrategy::initialize_population(functioncopy of the genome into this speciation strategy. * @@ -148,6 +160,32 @@ class NeatSpeciationStrategy : public SpeciationStrategy { void initialize_population(function& mutate); RNN_Genome* get_seed_genome(); void save_entire_population(string output_path); + + /** + * \return true if all the islands are full + */ + bool islands_full() const; + + /** + * Get the number of islands + * + * \return the number of island + */ + int32_t get_islands_size() const; + + /** + * Get the island at a given index + * \param index index of required island + * \return the island at given index + */ + Island* get_island_at_index(int32_t index) const; + + /** + * Get the current island index + * + * \return the index of current island + */ + int32_t get_generation_island() const; }; #endif diff --git a/examm/speciation_strategy.hxx b/examm/speciation_strategy.hxx index bf8a43d5..93b3944b 100644 --- a/examm/speciation_strategy.hxx +++ b/examm/speciation_strategy.hxx @@ -9,6 +9,8 @@ using std::string; using std::minstd_rand0; using std::uniform_real_distribution; +#include "island.hxx" + class SpeciationStrategy { public: /** @@ -45,6 +47,18 @@ class SpeciationStrategy { */ virtual RNN_Genome* get_worst_genome() = 0; + /** + * Gets the the minimum learning rate of all the islands + * \return the minimum learning rate of all islands or NULL if no genomes have yet been inserted + */ + virtual double get_min_learning_rate() = 0; + + /** + * Gets the the maximum learning rate of all the islands + * \return the maximum learning rate of all islands or NULL if no genomes have yet been inserted + */ + virtual double get_max_learning_rate() = 0; + /** * Inserts a copy of the genome into this speciation strategy. * @@ -92,6 +106,32 @@ class SpeciationStrategy { virtual void initialize_population(function& mutate) = 0; virtual RNN_Genome* get_seed_genome() = 0; virtual void save_entire_population(string output_path) = 0; + + /** + * \return true if all the islands are full + */ + virtual bool islands_full() const = 0; + + /** + * Get the number of islands + * + * \return the number of island + */ + virtual int32_t get_islands_size() const = 0; + + /** + * Get the island at a given index + * \param index index of required island + * \return the island at given index + */ + virtual Island* get_island_at_index(int32_t index) const = 0; + + /** + * Get the current island index + * + * \return the index of current island + */ + virtual int32_t get_generation_island() const = 0; }; #endif diff --git a/rnn/genome_property.cxx b/rnn/genome_property.cxx index 6bf061b9..894a2da5 100644 --- a/rnn/genome_property.cxx +++ b/rnn/genome_property.cxx @@ -8,6 +8,7 @@ GenomeProperty::GenomeProperty() { dropout_probability = 0.0; min_recurrent_depth = 1; max_recurrent_depth = 10; + simplex_count = 6; } void GenomeProperty::generate_genome_property_from_arguments(const vector& arguments) { @@ -17,6 +18,9 @@ void GenomeProperty::generate_genome_property_from_arguments(const vector GenomeProperty::get_recurrent_depth_dist() { return uniform_int_distribution(this->min_recurrent_depth, this->max_recurrent_depth); +} + +int32_t GenomeProperty::get_simplex_count() { + return simplex_count; } \ No newline at end of file diff --git a/rnn/genome_property.hxx b/rnn/genome_property.hxx index 7d220ff6..22d143e7 100644 --- a/rnn/genome_property.hxx +++ b/rnn/genome_property.hxx @@ -17,6 +17,7 @@ class GenomeProperty { double dropout_probability; int32_t min_recurrent_depth; int32_t max_recurrent_depth; + int32_t simplex_count; // TimeSeriesSets *time_series_sets; int32_t number_inputs; @@ -36,6 +37,7 @@ class GenomeProperty { void set_genome_properties(RNN_Genome* genome); void get_time_series_parameters(TimeSeriesSets* time_series_sets); uniform_int_distribution get_recurrent_depth_dist(); + int32_t get_simplex_count(); }; #endif \ No newline at end of file diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index 80005b27..55038f05 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -67,6 +67,7 @@ using std::map; #include "rnn_node.hxx" #include "time_series/time_series.hxx" #include "ugrnn_node.hxx" +#include "weights/weight_update.hxx" vector dnas_node_types = {SIMPLE_NODE, UGRNN_NODE, MGU_NODE, GRU_NODE, DELTA_NODE, LSTM_NODE}; @@ -174,6 +175,16 @@ RNN_Genome* RNN_Genome::copy() { other->group_id = group_id; other->bp_iterations = bp_iterations; other->generation_id = generation_id; + + // SHO tuned hyperparameters of Genome Copy + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + other->learning_rate = learning_rate; + other->epsilon = epsilon; + other->beta1 = beta1; + other->beta2 = beta2; + } + // other->learning_rate = learning_rate; // other->adapt_learning_rate = adapt_learning_rate; // other->use_reset_weights = use_reset_weights; @@ -548,6 +559,39 @@ void RNN_Genome::set_weights(const vector& parameters) { } } +// Definition of Getters and Setters for SHO tuned hyperparameters +double RNN_Genome::get_learning_rate() { + return learning_rate; +} + +void RNN_Genome::set_learning_rate(double _learning_rate) { + learning_rate = _learning_rate; +} + +double RNN_Genome::get_epsilon() { + return epsilon; +} + +void RNN_Genome::set_epsilon(double _epsilon) { + epsilon = _epsilon; +} + +double RNN_Genome::get_beta1() { + return beta1; +} + +void RNN_Genome::set_beta1(double _beta1) { + beta1 = _beta1; +} + +double RNN_Genome::get_beta2() { + return beta2; +} + +void RNN_Genome::set_beta2(double _beta2) { + beta2 = _beta2; +} + int32_t RNN_Genome::get_number_inputs() { int32_t number_inputs = 0; @@ -1052,7 +1096,17 @@ void RNN_Genome::backpropagate( (*output_log) << iteration << " " << mse << " " << validation_mse << " " << best_validation_mse << endl; } weight_update_method->norm_gradients(analytic_gradient, norm); - weight_update_method->update_weights(parameters, velocity, prev_velocity, analytic_gradient, iteration); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + // Adding SHO tuned hyperparameters to the backpropagate weight update process. + Log::debug("AT: backpropagate LR = %lg\n",learning_rate); + Log::debug("AT: backpropagate epsilon = %lg\n",epsilon); + Log::debug("AT: backpropagate beta1 = %lg\n",beta1); + Log::debug("AT: backpropagate beta2 = %lg\n",beta2); + weight_update_method->update_weights(parameters, velocity, prev_velocity, analytic_gradient, iteration, learning_rate, epsilon, beta1, beta2); + } else { + weight_update_method->update_weights(parameters, velocity, prev_velocity, analytic_gradient, iteration); + } Log::info( "iteration %10d, mse: %10lf, v_mse: %10lf, bv_mse: %10lf, norm: %lf", iteration, mse, validation_mse, best_validation_mse, norm @@ -1151,7 +1205,17 @@ void RNN_Genome::backpropagate_stochastic( avg_norm += norm; weight_update_method->norm_gradients(analytic_gradient, norm); - weight_update_method->update_weights(parameters, velocity, prev_velocity, analytic_gradient, iteration); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + // Adding SHO tuned hyperparameters to the stochastic backpropagate weight update process. + Log::debug("AT: backpropagate_stochastic LR = %lg\n",learning_rate); + Log::debug("AT: backpropagate_stochastic epsilon = %lg\n",epsilon); + Log::debug("AT: backpropagate_stochastic beta1 = %lg\n",beta1); + Log::debug("AT: backpropagate_stochastic beta2 = %lg\n",beta2); + weight_update_method->update_weights(parameters, velocity, prev_velocity, analytic_gradient, iteration, learning_rate, epsilon, beta1, beta2); + } else { + weight_update_method->update_weights(parameters, velocity, prev_velocity, analytic_gradient, iteration); + } } this->set_weights(parameters); double training_mse = get_mse(parameters, inputs, outputs); @@ -3350,6 +3414,15 @@ void RNN_Genome::read_from_stream(istream& bin_istream) { bin_istream.read((char*) &group_id, sizeof(int32_t)); bin_istream.read((char*) &bp_iterations, sizeof(int32_t)); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + // Reading the SHO tuned hyperparameters from input stream + bin_istream.read((char*) &learning_rate, sizeof(double)); + bin_istream.read((char*) &epsilon, sizeof(double)); + bin_istream.read((char*) &beta1, sizeof(double)); + bin_istream.read((char*) &beta2, sizeof(double)); + } + bin_istream.read((char*) &use_dropout, sizeof(bool)); bin_istream.read((char*) &dropout_probability, sizeof(double)); @@ -3551,6 +3624,15 @@ void RNN_Genome::write_to_stream(ostream& bin_ostream) { bin_ostream.write((char*) &group_id, sizeof(int32_t)); bin_ostream.write((char*) &bp_iterations, sizeof(int32_t)); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + // Writing the SHO tuned hyperparameters to output stream + bin_ostream.write((char*) &learning_rate, sizeof(double)); + bin_ostream.write((char*) &epsilon, sizeof(double)); + bin_ostream.write((char*) &beta1, sizeof(double)); + bin_ostream.write((char*) &beta2, sizeof(double)); + } + bin_ostream.write((char*) &use_dropout, sizeof(bool)); bin_ostream.write((char*) &dropout_probability, sizeof(double)); diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index 81fad3eb..ee38b1a0 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -58,6 +58,13 @@ class RNN_Genome { double best_validation_mse; double best_validation_mae; + + // Parameters to be tuned with SHO (Learning Rate and Weight Update) + double learning_rate; + double epsilon; + double beta1; + double beta2; + vector best_parameters; minstd_rand0 generator; @@ -158,6 +165,19 @@ class RNN_Genome { void get_weights(vector& parameters); void set_weights(const vector& parameters); + // Declaration of Getters and Setters for SHO tuned hyperparameters + double get_learning_rate(); + void set_learning_rate(double _learning_rate); + + double get_epsilon(); + void set_epsilon(double _epsilon); + + double get_beta1(); + void set_beta1(double _beta1); + + double get_beta2(); + void set_beta2(double _beta2); + int32_t get_number_weights(); int32_t get_number_inputs(); int32_t get_number_outputs(); diff --git a/weights/weight_update.cxx b/weights/weight_update.cxx index 9ec48132..cca95d1b 100644 --- a/weights/weight_update.cxx +++ b/weights/weight_update.cxx @@ -4,9 +4,10 @@ #include "common/arguments.hxx" #include "common/log.hxx" +#define EXAMM_MAX_DOUBLE 10000000 WeightUpdate::WeightUpdate() { - // By default use RMSProp weight update + // By default use ADAM weight update momentum = 0.9; weight_update_method = ADAM; epsilon = 1e-8; @@ -19,13 +20,66 @@ WeightUpdate::WeightUpdate() { low_threshold = 0.05; use_high_norm = true; use_low_norm = true; + + int32_t seed = std::chrono::system_clock::now().time_since_epoch().count(); + generator = minstd_rand0(seed); + rng_0_1 = uniform_real_distribution(0.0, 1.0); + + // Set the ranges of the SHO tuned hyperparameters + learning_rate_min = 0.00001; + learning_rate_max = 0.3; + + epsilon_min = 1e-9; + epsilon_max = 1e-7; + + beta1_min = 0.88; + beta1_max = 0.93; + + beta2_min = 0.95; + beta2_max = 0.999; + + // Set the endpoints of the distribution from which we have to set the initial SHO hyperparameters + rng_ilr_min = uniform_real_distribution(0.001, 0.01); + rng_ilr_max = uniform_real_distribution(0.011, 0.05); + + rng_ieps_min = uniform_real_distribution(1e-9, 1e-8); + rng_ieps_max = uniform_real_distribution(1e-8, 1e-7); + + rng_ib1_min = uniform_real_distribution(0.88, 0.905); + rng_ib1_max = uniform_real_distribution(0.906, 0.93); + + rng_ib2_min = uniform_real_distribution(0.95, 0.9745); + rng_ib2_max = uniform_real_distribution(0.9746, 0.999); + + // Set the values of the initial SHO hyperparameters from the above set distributions + initial_learning_rate_min = rng_ilr_min(generator); + initial_learning_rate_max = rng_ilr_max(generator); + + initial_epsilon_min = rng_ieps_min(generator); + initial_epsilon_max = rng_ieps_max(generator); + + initial_beta1_min = rng_ib1_min(generator); + initial_beta1_max = rng_ib1_max(generator); + + initial_beta2_min = rng_ib2_min(generator); + initial_beta2_max = rng_ib2_max(generator); } +bool WeightUpdate::use_SHO = false; + WeightUpdate::WeightUpdate(const vector& arguments) : WeightUpdate() { generate_from_arguments(arguments); } void WeightUpdate::generate_from_arguments(const vector& arguments) { + Log::info("In weight_update.cxx, checking if Simplex method (SHO) is to be used\n"); + if (argument_exists(arguments, "--use_SHO")) { + WeightUpdate::use_SHO = true; + Log::debug("AT: SHO is used for this execution"); + } else { + Log::debug("AT: SHO is not used for this execution"); + } + Log::info("Getting infomation on weight update methods for backprop\n"); if (argument_exists(arguments, "--weight_update")) { string weight_update_method_string; @@ -75,22 +129,22 @@ void WeightUpdate::generate_from_arguments(const vector& arguments) { void WeightUpdate::update_weights( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate, double _epsilon, double _beta1, double _beta2 ) { if (weight_update_method == VANILLA) { - vanilla_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + vanilla_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate); } else if (weight_update_method == MOMENTUM) { - momentum_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + momentum_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate); } else if (weight_update_method == NESTEROV) { - nesterov_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + nesterov_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate); } else if (weight_update_method == ADAGRAD) { - adagrad_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + adagrad_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate); } else if (weight_update_method == RMSPROP) { - rmsprop_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + rmsprop_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate); } else if (weight_update_method == ADAM) { - adam_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + adam_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate, _epsilon, _beta1, _beta2); } else if (weight_update_method == ADAM_BIAS) { - adam_bias_weight_update(parameters, velocity, prev_velocity, gradient, epoch); + adam_bias_weight_update(parameters, velocity, prev_velocity, gradient, epoch, _learning_rate); } else { Log::fatal( "Unrecognized weight update method's enom number: %d, this should never happen!\n", weight_update_method @@ -101,22 +155,32 @@ void WeightUpdate::update_weights( void WeightUpdate::vanilla_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ) { Log::trace("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { - parameters[i] -= learning_rate * gradient[i]; + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + parameters[i] -= _learning_rate * gradient[i]; + } else { + parameters[i] -= learning_rate * gradient[i]; + } gradient_clip(parameters[i]); } } void WeightUpdate::momentum_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ) { Log::trace("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { - velocity[i] = momentum * velocity[i] - learning_rate * gradient[i]; + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + velocity[i] = momentum * velocity[i] - _learning_rate * gradient[i]; + } else { + velocity[i] = momentum * velocity[i] - learning_rate * gradient[i]; + } parameters[i] += velocity[i]; gradient_clip(parameters[i]); } @@ -124,12 +188,17 @@ void WeightUpdate::momentum_weight_update( void WeightUpdate::nesterov_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ) { Log::info("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { prev_velocity[i] = velocity[i]; - velocity[i] = momentum * velocity[i] - learning_rate * gradient[i]; + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + velocity[i] = momentum * velocity[i] - _learning_rate * gradient[i]; + } else { + velocity[i] = momentum * velocity[i] - learning_rate * gradient[i]; + } parameters[i] += -momentum * prev_velocity[i] + (1 + momentum) * velocity[i]; gradient_clip(parameters[i]); } @@ -137,47 +206,64 @@ void WeightUpdate::nesterov_weight_update( void WeightUpdate::adagrad_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ) { Log::trace("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { // here the velocity is the "cache" in Adagrad velocity[i] += gradient[i] * gradient[i]; - parameters[i] += -learning_rate * gradient[i] / (sqrt(velocity[i]) + epsilon); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + parameters[i] += -_learning_rate * gradient[i] / (sqrt(velocity[i]) + epsilon); + } else { + parameters[i] += -learning_rate * gradient[i] / (sqrt(velocity[i]) + epsilon); + } gradient_clip(parameters[i]); } } void WeightUpdate::rmsprop_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ) { Log::trace("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { // here the velocity is the "cache" in RMSProp velocity[i] = decay_rate * velocity[i] + (1 - decay_rate) * gradient[i] * gradient[i]; - parameters[i] += -learning_rate * gradient[i] / (sqrt(velocity[i]) + epsilon); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + parameters[i] += -_learning_rate * gradient[i] / (sqrt(velocity[i]) + epsilon); + } else { + parameters[i] += -learning_rate * gradient[i] / (sqrt(velocity[i]) + epsilon); + } gradient_clip(parameters[i]); } } void WeightUpdate::adam_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate, double _epsilon, double _beta1, double _beta2 ) { Log::trace("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { // here the velocity is the "v" in adam, the prev_velocity is "m" in adam - prev_velocity[i] = beta1 * prev_velocity[i] + (1 - beta1) * gradient[i]; - velocity[i] = beta2 * velocity[i] + (1 - beta2) * (gradient[i] * gradient[i]); - parameters[i] += -learning_rate * prev_velocity[i] / (sqrt(velocity[i]) + epsilon); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + prev_velocity[i] = _beta1 * prev_velocity[i] + (1 - _beta1) * gradient[i]; + velocity[i] = _beta2 * velocity[i] + (1 - _beta2) * (gradient[i] * gradient[i]); + parameters[i] += -_learning_rate * prev_velocity[i] / (sqrt(velocity[i]) + _epsilon); + } else { + prev_velocity[i] = beta1 * prev_velocity[i] + (1 - beta1) * gradient[i]; + velocity[i] = beta2 * velocity[i] + (1 - beta2) * (gradient[i] * gradient[i]); + parameters[i] += -learning_rate * prev_velocity[i] / (sqrt(velocity[i]) + epsilon); + } gradient_clip(parameters[i]); } } void WeightUpdate::adam_bias_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ) { Log::trace("Doing weight update with method: %s \n", WEIGHT_UPDATE_METHOD_STRING[weight_update_method].c_str()); for (int32_t i = 0; i < (int32_t) parameters.size(); i++) { @@ -186,7 +272,12 @@ void WeightUpdate::adam_bias_weight_update( double mt = prev_velocity[i] / (1 - pow(beta1, epoch)); velocity[i] = beta2 * velocity[i] + (1 - beta2) * (gradient[i] * gradient[i]); double vt = velocity[i] / (1 - pow(beta2, epoch)); - parameters[i] += -learning_rate * mt / (sqrt(vt) + epsilon); + Log::debug("AT: SHO is used = %s\n",WeightUpdate::use_SHO?"true":"false"); + if (WeightUpdate::use_SHO) { + parameters[i] += -_learning_rate * mt / (sqrt(vt) + epsilon); + } else { + parameters[i] += -learning_rate * mt / (sqrt(vt) + epsilon); + } gradient_clip(parameters[i]); } } @@ -199,10 +290,23 @@ void WeightUpdate::gradient_clip(double& parameter) { } } +// Definition of Getters for SHO tuned hyperparameters double WeightUpdate::get_learning_rate() { return learning_rate; } +double WeightUpdate::get_epsilon() { + return epsilon; +} + +double WeightUpdate::get_beta1() { + return beta1; +} + +double WeightUpdate::get_beta2() { + return beta2; +} + double WeightUpdate::get_low_threshold() { return low_threshold; } @@ -211,10 +315,23 @@ double WeightUpdate::get_high_threshold() { return high_threshold; } +// Definition of Setters for SHO tuned hyperparameters void WeightUpdate::set_learning_rate(double _learning_rate) { learning_rate = _learning_rate; } +void WeightUpdate::set_epsilon(double _epsilon) { + epsilon = _epsilon; +} + +void WeightUpdate::set_beta1(double _beta1) { + beta1 = _beta1; +} + +void WeightUpdate::set_beta2(double _beta2) { + beta2 = _beta2; +} + void WeightUpdate::disable_high_threshold() { use_high_norm = false; } @@ -261,3 +378,287 @@ void WeightUpdate::norm_gradients(vector& analytic_gradient, double norm } } } + + +// Definition of functions for generating SHO tuned hyperparameters + +double WeightUpdate::generate_simplex_learning_rate(vector > genome_information, int simplex_count) { + + stringstream ss; + for (int i = 0; i < genome_information.size(); i++) { + for (int j = 0; j < genome_information[i].size(); j++) { + ss << genome_information[i][j] << " "; + } + ss << endl; + } + string genome_informationString = ss.str(); + + Log::debug("AT: genome_information = %s\n",genome_informationString.c_str()); + + double tuned_learning_rate; + double best_learning_rate; + double avg_learning_rate = 0; + + double best_fitness = EXAMM_MAX_DOUBLE; + Log::debug("AT: Simplex Count for Learning Rate = %d\n",simplex_count); + + for(int i=0; i learning_rate_max) tuned_learning_rate = learning_rate_max; + Log::debug("AT: learning rate finally = %lg\n",tuned_learning_rate); + + return tuned_learning_rate; +} + +double WeightUpdate::generate_simplex_epsilon(vector > genome_information, int simplex_count) { + + stringstream ss; + for (int i = 0; i < genome_information.size(); i++) { + for (int j = 0; j < genome_information[i].size(); j++) { + ss << genome_information[i][j] << " "; + } + ss << endl; + } + string genome_informationString = ss.str(); + + Log::debug("AT: genome_information = %s\n",genome_informationString.c_str()); + + double tuned_epsilon; + double best_epsilon; + double avg_epsilon = 0; + + double best_fitness = EXAMM_MAX_DOUBLE; + Log::debug("AT: Simplex Count for Epsilon = %d\n",simplex_count); + + for(int i=0; i epsilon_max) tuned_epsilon = epsilon_max; + Log::debug("AT: Epsilon finally = %lg\n",tuned_epsilon); + + return tuned_epsilon; +} + + +double WeightUpdate::generate_simplex_beta1(vector > genome_information, int simplex_count) { + + stringstream ss; + for (int i = 0; i < genome_information.size(); i++) { + for (int j = 0; j < genome_information[i].size(); j++) { + ss << genome_information[i][j] << " "; + } + ss << endl; + } + string genome_informationString = ss.str(); + + Log::debug("AT: genome_information = %s\n",genome_informationString.c_str()); + + double tuned_beta1; + double best_beta1; + double avg_beta1 = 0; + + double best_fitness = EXAMM_MAX_DOUBLE; + Log::debug("AT: Simplex Count for Beta1 = %d\n",simplex_count); + + for(int i=0; i beta1_max) tuned_beta1 = beta1_max; + Log::debug("AT: beta1 finally = %lg\n",tuned_beta1); + + return tuned_beta1; +} + +double WeightUpdate::generate_simplex_beta2(vector > genome_information, int simplex_count) { + + stringstream ss; + for (int i = 0; i < genome_information.size(); i++) { + for (int j = 0; j < genome_information[i].size(); j++) { + ss << genome_information[i][j] << " "; + } + ss << endl; + } + string genome_informationString = ss.str(); + + Log::debug("AT: genome_information = %s\n",genome_informationString.c_str()); + + double tuned_beta2; + double best_beta2; + double avg_beta2 = 0; + + double best_fitness = EXAMM_MAX_DOUBLE; + Log::debug("AT: Simplex Count for Beta2 = %d\n",simplex_count); + + for(int i=0; i beta2_max) tuned_beta2 = beta2_max; + Log::debug("AT: beta2 finally = %lg\n",tuned_beta2); + + return tuned_beta2; +} + +// Definition of functions for generating SHO initial hyperparameters + +double WeightUpdate::generate_initial_learning_rate() { + double tuned_learning_rate; + + tuned_learning_rate = (rng_0_1(generator) * (initial_learning_rate_max - initial_learning_rate_min)) + initial_learning_rate_min; + + Log::debug("AT: Generated Initial learning_rate = %lg\n",tuned_learning_rate); + return tuned_learning_rate; +} + +double WeightUpdate::generate_initial_epsilon() { + double tuned_epsilon; + + tuned_epsilon = (rng_0_1(generator) * (initial_epsilon_max - initial_epsilon_min)) + initial_epsilon_min; + + Log::debug("AT: Generated Initial epsilon = %lg\n",tuned_epsilon); + return tuned_epsilon; +} + +double WeightUpdate::generate_initial_beta1() { + double tuned_beta1; + + tuned_beta1 = (rng_0_1(generator) * (initial_beta1_max - initial_beta1_min)) + initial_beta1_min; + + Log::debug("AT: Generated Initial beta1 = %lg\n",tuned_beta1); + return tuned_beta1; +} + +double WeightUpdate::generate_initial_beta2() { + double tuned_beta2; + + tuned_beta2 = (rng_0_1(generator) * (initial_beta2_max - initial_beta2_min)) + initial_beta2_min; + + Log::debug("AT: Generated Initial beta2 = %lg\n",tuned_beta2); + return tuned_beta2; +} diff --git a/weights/weight_update.hxx b/weights/weight_update.hxx index ca2b4c6e..b68f23df 100644 --- a/weights/weight_update.hxx +++ b/weights/weight_update.hxx @@ -7,6 +7,10 @@ using std::string; #include using std::vector; +#include +using std::minstd_rand0; +using std::uniform_real_distribution; + #include "common/arguments.hxx" enum WeightUpdateMethod { VANILLA = 0, MOMENTUM = 1, NESTEROV = 2, ADAGRAD = 3, RMSPROP = 4, ADAM = 5, ADAM_BIAS = 6 }; @@ -44,60 +48,124 @@ class WeightUpdate { double high_threshold; bool use_low_norm; double low_threshold; + + minstd_rand0 generator; + uniform_real_distribution rng_0_1; + + // Declaring variables for keeping track of ranges of SHO tuned hyperparameters + double learning_rate_min; + double learning_rate_max; + + double epsilon_min; + double epsilon_max; + + double beta1_min; + double beta1_max; + + double beta2_min; + double beta2_max; + + // Initial SHO hyperparameter range values to be pulled from uniform real distribution + uniform_real_distribution rng_ilr_min; + uniform_real_distribution rng_ilr_max; + uniform_real_distribution rng_ieps_min; + uniform_real_distribution rng_ieps_max; + uniform_real_distribution rng_ib1_min; + uniform_real_distribution rng_ib1_max; + uniform_real_distribution rng_ib2_min; + uniform_real_distribution rng_ib2_max; + + // Declaring variables for keeping track of ranges of initial SHO hyperparameters + double initial_learning_rate_min; + double initial_learning_rate_max; + + double initial_epsilon_min; + double initial_epsilon_max; + + double initial_beta1_min; + double initial_beta1_max; + + double initial_beta2_min; + double initial_beta2_max; public: + static bool use_SHO; WeightUpdate(); explicit WeightUpdate(const vector& arguments); void generate_from_arguments(const vector& arguments); + // Add the optional SHO hyperparameters to each of the declaration of weight update methods void update_weights( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate=NULL, double _epsilon=NULL, double _beta1=NULL, double _beta2=NULL ); void vanilla_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ); void momentum_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ); void nesterov_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ); void adagrad_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ); void rmsprop_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ); void adam_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate, double _epsilon, double _beta1, double _beta2 ); void adam_bias_weight_update( vector& parameters, vector& velocity, vector& prev_velocity, vector& gradient, - int32_t epoch + int32_t epoch, double _learning_rate ); void gradient_clip(double& parameter); + // Declaration of Setters for SHO tuned hyperparameters void set_learning_rate(double _learning_rate); + void set_epsilon(double _epsilon); + void set_beta1(double _beta1); + void set_beta2(double _beta2); + void disable_high_threshold(); void enable_high_threshold(double _high_threshold); void disable_low_threshold(); void enable_low_threshold(double _low_threshold); + // Declaration of Getters for SHO tuned hyperparameters double get_learning_rate(); + double get_epsilon(); + double get_beta1(); + double get_beta2(); + double get_low_threshold(); double get_high_threshold(); double get_norm(vector& analytic_gradient); void norm_gradients(vector& analytic_gradient, double norm); + + // Declaration of functions for generating SHO tuned hyperparameters + double generate_simplex_learning_rate(vector > genome_information, int simplex_count); + double generate_simplex_epsilon(vector > genome_information, int simplex_count); + double generate_simplex_beta1(vector > genome_information, int simplex_count); + double generate_simplex_beta2(vector > genome_information, int simplex_count); + + // Declaration of functions for generating SHO initial hyperparameters + double generate_initial_learning_rate(); + double generate_initial_epsilon(); + double generate_initial_beta1(); + double generate_initial_beta2(); + }; #endif