diff --git a/CMakeLists.txt b/CMakeLists.txt index 65d1f2a8..f51b9f51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,11 +27,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) #SET (CMAKE_CXX_FLAGS "-std=gnu++17 -Wall -O3 -funroll-loops -msse3 -fno-omit-frame-pointer -D_GLIBCXX_DEBUG") # SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3 -fsanitize=address") -SET (CMAKE_CXX_FLAGS " -Wall -O3 -funroll-loops -msse3") +SET (CMAKE_CXX_FLAGS " -Wall -march=native -O3") #SET (CMAKE_CXX_FLAGS " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG") #SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g") #SET (CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -Os -DNDEBUG") -SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O4 -funroll-loops -DNDEBUG") +SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG") #SET (CMAKE_SHARED_LINKER_FLAGS " -Wall -O1 -funroll-loops -msse3 -g -fsanitize=address -fno-omit-frame-pointer -shared-libasan -DGLIBCXX_DEBUG") diff --git a/common/process_arguments.cxx b/common/process_arguments.cxx index 202ddd42..fe2a49f0 100644 --- a/common/process_arguments.cxx +++ b/common/process_arguments.cxx @@ -1,3 +1,4 @@ +#include #include using std::string; @@ -120,12 +121,15 @@ IslandSpeciationStrategy* generate_island_speciation_strategy_from_arguments( get_argument(arguments, "--seed_stirs", false, seed_stirs); bool start_filled = argument_exists(arguments, "--start_filled"); bool tl_epigenetic_weights = argument_exists(arguments, "--tl_epigenetic_weights"); + unique_ptr annealing_policy = AnnealingPolicy::from_arguments(arguments); + string output_directory = ""; + get_argument(arguments, "--output_directory", false, output_directory); IslandSpeciationStrategy* island_strategy = new IslandSpeciationStrategy( - number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, seed_genome, + number_islands, island_size, mutation_rate, intra_island_co_rate, inter_island_co_rate, output_directory, seed_genome, island_ranking_method, repopulation_method, extinction_event_generation_number, num_mutations, islands_to_exterminate, max_genomes, repeat_extinction, start_filled, transfer_learning, - transfer_learning_version, seed_stirs, tl_epigenetic_weights + transfer_learning_version, tl_epigenetic_weights, annealing_policy ); return island_strategy; diff --git a/examm/CMakeLists.txt b/examm/CMakeLists.txt index d5c532f9..2f5942b7 100644 --- a/examm/CMakeLists.txt +++ b/examm/CMakeLists.txt @@ -1 +1 @@ -add_library(examm_strategy examm.cxx species.cxx island.cxx island_speciation_strategy.cxx species.cxx neat_speciation_strategy.cxx) +add_library(examm_strategy examm.cxx species.cxx island.cxx island_speciation_strategy.cxx species.cxx neat_speciation_strategy.cxx annealing.cxx) diff --git a/examm/annealing.cxx b/examm/annealing.cxx new file mode 100644 index 00000000..cdbb4558 --- /dev/null +++ b/examm/annealing.cxx @@ -0,0 +1,106 @@ +#include "annealing.hxx" + +#include +#include + +#include "common/arguments.hxx" +#include "common/log.hxx" + +unique_ptr AnnealingPolicy::from_arguments(const vector& arguments) { + string type; + get_argument(arguments, "--annealing_policy", false, type); + Log::info("Annealing policy = %s\n", type.c_str()); + if (type == "linear") { + return unique_ptr(new LinearAnnealingPolicy(arguments)); + } else if (type == "inv_exp") { + return unique_ptr(new InvExpAnnealingPolicy(arguments)); + } else if (type == "sin") { + return unique_ptr(new SinAnnealingPolicy(arguments)); + } else { + Log::info("Using default annealing policy\n"); + return make_unique(); + } +} + +double AnnealingPolicy::get_temperature(int32_t genome_number) { + return 0.0; +} + +double AnnealingPolicy::operator()(int32_t genome_number, double population_worst_cost, double candidate_cost) { + double temperature = get_temperature(genome_number); + + if (fpclassify(temperature) == FP_ZERO || temperature < 0) { + return 0.0; + } + + if (population_worst_cost > candidate_cost) + return 1.0; + + population_worst_cost = sqrt(population_worst_cost); + candidate_cost = sqrt(candidate_cost); + + double denom = population_worst_cost + candidate_cost; + double relative_cost = population_worst_cost / denom - candidate_cost / denom; + + // exp((eworst - ecandidate) / T) + double de = exp(-(candidate_cost / (population_worst_cost * temperature))); +} + +LinearAnnealingPolicy::LinearAnnealingPolicy( + double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes +) + : start_value(start_value), end_value(end_value), start_genomes(start_genomes), interp_genomes(interp_genomes) { +} + +LinearAnnealingPolicy::LinearAnnealingPolicy(const vector& arguments) { + get_argument(arguments, "--linear_start_value", true, start_value); + get_argument(arguments, "--linear_end_value", true, end_value); + get_argument(arguments, "--linear_start_genomes", true, start_genomes); + get_argument(arguments, "--linear_interp_genomes", true, interp_genomes); +} + +double LinearAnnealingPolicy::get_temperature(int32_t genome_number) { + if (genome_number <= start_genomes) { + return start_value; + } else if (genome_number <= interp_genomes + start_genomes) { + double weight = (double) (genome_number - (interp_genomes + start_genomes)) / (double) interp_genomes; + return weight * end_value + (1 - weight) * start_value; + } else { + return end_value; + } +} + +InvExpAnnealingPolicy::InvExpAnnealingPolicy(double decay_factor) : decay_factor(decay_factor) { +} +InvExpAnnealingPolicy::InvExpAnnealingPolicy(const vector& arguments) { + get_argument(arguments, "--exp_decay_factor", true, decay_factor); +} + +double InvExpAnnealingPolicy::get_temperature(int32_t genome_number) { + return std::pow(1. + genome_number, -decay_factor); +} + +SinAnnealingPolicy::SinAnnealingPolicy(double period, double min_p, double max_p) + : period(period), min_p(min_p), max_p(max_p) { + if (min_p > max_p) { + std::swap(min_p, max_p); + } + + if (min_p > 1.0 || min_p < 0.0) { + throw "Invalid min_p supplied to SinAnnealingPolicyConstructor"; + } + if (max_p > 1.0 || max_p < 0.0) { + throw "Invalid max_p supplied to SinAnnealingPolicyConstructor"; + } +} +SinAnnealingPolicy::SinAnnealingPolicy(const vector& arguments) { + get_argument(arguments, "--sin_min_p", true, min_p); + get_argument(arguments, "--sin_max_p", true, max_p); + get_argument(arguments, "--sin_period", true, period); +} + +double SinAnnealingPolicy::get_temperature(int32_t genome_number) { + double range = max_p - min_p; + + return (max_p + min_p) / 2. + range / 2. * std::sin(2. * M_PI * genome_number / period); +} diff --git a/examm/annealing.hxx b/examm/annealing.hxx new file mode 100644 index 00000000..5736dcf1 --- /dev/null +++ b/examm/annealing.hxx @@ -0,0 +1,67 @@ +#include + +#include +using std::unique_ptr; + +#include +using std::string; + +#include +using std::vector; + +struct AnnealingPolicy { + static unique_ptr from_arguments(const vector& arguments); + + /** + * Compute the probability to be used during genome insertion. + * This represents the probability of inserting the genome, even if it + * has a fitness value that is worse than the worst member in the population. + */ + virtual double get_temperature(int32_t genome_number); + + double operator()(int32_t genome_number, double population_worst_cost, double candidate_cost); +}; + +/** + * Interpolate between two values for a set number of genomes. + * The `start_value` will be returned for `start_genomes`, + * then a linear interpolation of `start_value` and `end_value` for + * `interp_genomes`. Then, `end_value` is given indefinitely. + */ +class LinearAnnealingPolicy : public AnnealingPolicy { + double start_value, end_value; + int32_t start_genomes, interp_genomes; + + public: + LinearAnnealingPolicy(double start_value, double end_value, int32_t start_genomes, int32_t interp_genomes); + LinearAnnealingPolicy(const vector& arguments); + + double get_temperature(int32_t genome_number) override; +}; + +/** + * Calculates p by simply computing `genome_number^(-decay_factor). + **/ +class InvExpAnnealingPolicy : public AnnealingPolicy { + double decay_factor; + + public: + InvExpAnnealingPolicy(double decay_factor); + InvExpAnnealingPolicy(const vector& arguments); + + double get_temperature(int32_t genome_number) override; +}; + +/** + * Computes `p` as a value falling on a sinusoidal curve with the supplied period. + * a `min_p` and a `max_p` specify the range of the curve. + **/ +class SinAnnealingPolicy : public AnnealingPolicy { + double period, min_p, max_p; + + public: + SinAnnealingPolicy(double period, double min_p, double max_p); + SinAnnealingPolicy(const vector& arguments); + + double get_temperature(int32_t genome_number) override; +}; diff --git a/examm/examm.cxx b/examm/examm.cxx index 0c76c500..100908c2 100644 --- a/examm/examm.cxx +++ b/examm/examm.cxx @@ -97,8 +97,9 @@ void EXAMM::generate_log() { mkpath(output_directory.c_str(), 0777); log_file = new ofstream(output_directory + "/" + "fitness_log.csv"); (*log_file - ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Enabled Nodes,Enabled" - "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Trainable Parameters,Island Id"; + ) << "Inserted Genomes,Total BP Epochs,Time,Best Val. MAE,Best Val. MSE,Trainable Parameters,Enabled " + "Nodes,Enabled" + "Edges,Enabled Rec. Edges,Val. MSE,Pre-Insert MSE,Genome Inserted,Genome Trainable Parameters,Island Id"; (*log_file) << speciation_strategy->get_strategy_information_headers(); (*log_file) << endl; @@ -194,11 +195,12 @@ void EXAMM::update_log(RNN_Genome* genome) { long milliseconds = std::chrono::duration_cast(currentClock - startClock).count(); (*log_file) << speciation_strategy->get_evaluated_genomes() << "," << total_bp_epochs << "," << milliseconds << "," << best_genome->best_validation_mae << "," << best_genome->best_validation_mse << "," - << best_genome->get_enabled_node_count() << "," << best_genome->get_enabled_edge_count() << "," - << best_genome->get_enabled_recurrent_edge_count() << "," << genome->best_validation_mse << "," - << pre_insert_best_mse << "," << (int32_t) (last_genome_inserted ? 1 : 0) << "," - << genome->get_number_weights() << "," << genome->get_generation_id() - << speciation_strategy->get_strategy_information_values(genome) << endl; + << best_genome->get_number_weights() << "," << best_genome->get_enabled_node_count() << "," + << best_genome->get_enabled_edge_count() << "," << best_genome->get_enabled_recurrent_edge_count() + << "," << genome->best_validation_mse << "," << pre_insert_best_mse << "," + << (int32_t) (last_genome_inserted ? 1 : 0) << "," << genome->get_number_weights() << "," + << genome->get_generation_id() << speciation_strategy->get_strategy_information_values(genome) + << endl; Log::info( "mse: %f node count: %d edge count: %d rec edges: %d\n", best_genome->best_validation_mse, best_genome->get_enabled_node_count(), best_genome->get_enabled_edge_count(), @@ -263,16 +265,18 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // write this genome to disk if it was a new best found genome if (save_genome_option.compare("all_best_genomes") == 0) { - Log::info("save genome option compared, save genome option size: %d!\n", save_genome_option.size()); - for (int i = 0; i < 20 && i < save_genome_option.size(); i++) { - cout << "save_genome_option[" << i << "]: " << save_genome_option[i] << endl; - } + // Log::info("save genome option compared, save genome option size: %d!\n", save_genome_option.size()); + // for (int i = 0; i < 20 && i < save_genome_option.size(); i++) { + // cout << "save_genome_option[" << i << "]: " << save_genome_option[i] << endl; + // } if (insert_position == 0) { Log::info("saving genome!"); save_genome(genome, "rnn_genome"); Log::info("saved genome!"); } + } else if (save_genome_option.compare("all") == 0) { + save_genome(genome, "rnn_genome"); } Log::info("save genome complete\n"); @@ -288,6 +292,14 @@ bool EXAMM::insert_genome(RNN_Genome* genome) { // write function to save genomes to file void EXAMM::save_genome(RNN_Genome* genome, string genome_name = "rnn_genome") { + if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { + // need to set the weights for non-initial genomes so we + // can generate a proper graphviz file + vector best_parameters = genome->get_best_parameters(); + genome->set_weights(best_parameters); + Log::info("set genome parameters to best\n"); + } + genome->write_graphviz(output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".gv"); ofstream equations_filestream( output_directory + "/" + genome_name + "_" + to_string(genome->get_generation_id()) + ".txt" diff --git a/examm/examm.hxx b/examm/examm.hxx index a1e7cc59..3ec90bd9 100644 --- a/examm/examm.hxx +++ b/examm/examm.hxx @@ -70,7 +70,7 @@ class EXAMM { map inserted_counts; map generated_counts; - string output_directory; + const string output_directory; ofstream* log_file; ofstream* op_log_file; double pre_insert_best_mse = 1000000; @@ -78,8 +78,7 @@ class EXAMM { std::chrono::time_point startClock; - string genome_file_name; - string save_genome_option; + const string save_genome_option; public: EXAMM( diff --git a/examm/island.cxx b/examm/island.cxx index 6d8b0b5f..2cb054c9 100644 --- a/examm/island.cxx +++ b/examm/island.cxx @@ -1,22 +1,12 @@ #include -using std::lower_bound; -using std::sort; +#include using std::upper_bound; #include -using std::setw; - -#include -using std::minstd_rand0; -using std::uniform_real_distribution; - #include using std::string; using std::to_string; -#include -using std::unordered_map; - #include using std::vector; @@ -24,17 +14,25 @@ using std::vector; #include "island.hxx" #include "rnn/rnn_genome.hxx" -Island::Island(int32_t _id, int32_t _max_size) - : id(_id), max_size(_max_size), status(Island::INITIALIZING), erase_again(0), erased(false) { +Island::Island( + int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy +) + : id(id), max_size(max_size), genomes(genomes), annealing_policy(annealing_policy), status(status) { + using namespace std::chrono; + long long t = time_point_cast(system_clock::now()).time_since_epoch().count(); + generator = mt19937_64(t + 1123 * id + 12334 * max_size); + + for (int i = 0; i < 100; i++) { + generate_canonical(generator); + } } -Island::Island(int32_t _id, vector _genomes) - : id(_id), - max_size((int32_t) _genomes.size()), - genomes(_genomes), - status(Island::FILLED), - erase_again(0), - erased(false) { +Island::Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy) + : Island(id, max_size, vector(), Island::INITIALIZING, annealing_policy) { +} + +Island::Island(int32_t id, vector genomes, AnnealingPolicy& annealing_policy) + : Island(id, genomes.size(), genomes, Island::FILLED, annealing_policy) { } RNN_Genome* Island::get_best_genome() { @@ -62,6 +60,14 @@ double Island::get_best_fitness() { } } +double Island::get_best_all_time_fitness() { + if (all_time_local_best) { + return all_time_local_best->get_fitness(); + } else { + return EXAMM_MAX_DOUBLE; + } +} + double Island::get_worst_fitness() { RNN_Genome* worst_genome = get_worst_genome(); if (worst_genome == NULL) { @@ -147,6 +153,25 @@ int32_t Island::insert_genome(RNN_Genome* genome) { double new_fitness = genome->get_fitness(); Log::info("inserting genome with fitness: %s to island %d\n", parse_fitness(genome->get_fitness()).c_str(), id); + // Only do simulated annealing if the island is full + // This will with a probability prescribed by the annealing policy (a function of genome number) randomly accept + // genomes by deleting a random member of the population. + + if (is_full()) { + double p = annealing_policy(genome->get_generation_id(), get_worst_genome()->get_fitness(), genome->get_fitness()); + Log::info("Annealing policy p = %f\n", p); + + if (uniform_real_distribution<>(0.0, 1.0)(generator) < p) { + int32_t index = uniform_real_distribution<>(0., 1.)(generator) * genomes.size(); + + Log::info("Simulated annealing triggered - deleting a random genome %d\n", index); + + RNN_Genome* victim = genomes[index]; + genomes.erase(genomes.begin() + index); + structure_set.erase(victim); + } + } + // discard the genome if the island is full and it's fitness is worse than the worst in thte population if (is_full() && new_fitness > get_worst_fitness()) { Log::debug( @@ -154,116 +179,38 @@ int32_t Island::insert_genome(RNN_Genome* genome) { genomes.back()->get_fitness() ); do_population_check(__LINE__, initial_size); - return false; + return -1; } // check and see if the structural hash of the genome is in the // set of hashes for this population Log::info("getting structural hash\n"); - string structural_hash = genome->get_structural_hash(); - if (structure_map.count(structural_hash) > 0) { - vector& potential_matches = structure_map.find(structural_hash)->second; - Log::debug( - "potential duplicate for hash '%s', had %d potential matches.\n", structural_hash.c_str(), - potential_matches.size() - ); - - for (auto potential_match = potential_matches.begin(); potential_match != potential_matches.end();) { - Log::debug( - "on potential match %d of %d\n", potential_match - potential_matches.begin(), potential_matches.size() - ); - if ((*potential_match)->equals(genome)) { - if ((*potential_match)->get_fitness() > new_fitness) { - Log::debug( - "REPLACING DUPLICATE GENOME, fitness of genome in search: %s, new fitness: %s\n", - parse_fitness((*potential_match)->get_fitness()).c_str(), - parse_fitness(genome->get_fitness()).c_str() - ); - // we have an exact match for this genome in the island and its fitness is worse - // than the genome we're trying to remove, so remove the duplicate it from the genomes - // as well from the potential matches vector - - auto duplicate_genome_iterator = - lower_bound(genomes.begin(), genomes.end(), *potential_match, sort_genomes_by_fitness()); - bool found = false; - for (; duplicate_genome_iterator != genomes.end(); duplicate_genome_iterator++) { - Log::debug( - "duplicate_genome_iterator: %p, (*potential_match): %p\n", (*duplicate_genome_iterator), - (*potential_match) - ); - if ((*duplicate_genome_iterator) == (*potential_match)) { - found = true; - break; - } - } - if (!found) { - Log::fatal( - "ERROR: could not find duplicate genome even though its structural hash was in the island, " - "this should never happen!\n" - ); - exit(1); - } - Log::debug( - "potential_match->get_fitness(): %lf, duplicate_genome_iterator->get_fitness(): %lf, " - "new_fitness: %lf\n", - (*potential_match)->get_fitness(), (*duplicate_genome_iterator)->get_fitness(), new_fitness - ); - int32_t duplicate_genome_index = duplicate_genome_iterator - genomes.begin(); - Log::debug("duplicate_genome_index: %d\n", duplicate_genome_index); - // int32_t test_index = contains(genome); - // Log::info("test_index: %d\n", test_index); - RNN_Genome* duplicate = genomes[duplicate_genome_index]; - // Log::info("duplicate.equals(potential_match)? %d\n", duplicate->equals(*potential_match)); - genomes.erase(genomes.begin() + duplicate_genome_index); - Log::debug("potential_matches.size() before erase: %d\n", potential_matches.size()); - - // erase the potential match from the structure map as well - // returns an iterator to next element after the deleted one so - // we don't need to increment it - potential_match = potential_matches.erase(potential_match); - delete duplicate; - - Log::debug("potential_matches.size() after erase: %d\n", potential_matches.size()); - Log::debug( - "structure_map[%s].size() after erase: %d\n", structural_hash.c_str(), - structure_map[structural_hash].size() - ); - if (potential_matches.size() == 0) { - Log::debug( - "deleting the potential_matches vector for hash '%s' because it was empty.\n", - structural_hash.c_str() - ); - structure_map.erase(structural_hash); - break; // break because this vector is now empty and deleted - } - } else { - Log::info( - "Island %d: island already contains a duplicate genome with a better fitness! not inserting.\n", - id - ); - do_population_check(__LINE__, initial_size); - return -1; - } - } else { - // increment potential match because we didn't delete an entry (or return from the method) - potential_match++; - } + auto duplicate_it = structure_set.find(genome); + + bool duplicate_exists = duplicate_it != structure_set.end(); + if (duplicate_exists) { + RNN_Genome* duplicate = *duplicate_it; + // TODO: Add annealment here + if (duplicate->get_fitness() > genome->get_fitness()) { + genomes.erase(std::find(genomes.begin(), genomes.end(), duplicate)); } } // inorder insert the new individual RNN_Genome* copy = genome->copy(); + copy->set_generation_id(genome->get_generation_id()); + vector best = copy->get_best_parameters(); if (best.size() != 0) { copy->set_weights(best); } - copy->set_generation_id(genome->get_generation_id()); - Log::debug("created copy to insert to island: %d\n", copy->get_group_id()); + auto index_iterator = upper_bound(genomes.begin(), genomes.end(), copy, sort_genomes_by_fitness()); int32_t insert_index = index_iterator - genomes.begin(); - Log::debug("inserting genome at index: %d\n", insert_index); + Log::info("inserting genome at index: %d\n", insert_index); if (insert_index >= max_size) { + // For simulated annealing: if this is true, then we should remove a random member of the population to insert. // if we're going to insert this at the back of the population // its just going to get removed anyways, so we can delete // it and report it was not inserted. @@ -274,24 +221,14 @@ int32_t Island::insert_genome(RNN_Genome* genome) { } genomes.insert(index_iterator, copy); - // calculate the index the genome was inseretd at from the iterator - - structural_hash = copy->get_structural_hash(); - // add the genome to the vector for this structural hash - structure_map[structural_hash].push_back(copy); - Log::debug("adding to structure_map[%s] : %p\n", structural_hash.c_str(), ©); + structure_set.insert(copy); if (insert_index == 0) { // this was a new best genome for this island - Log::info("Island %d: new best fitness found!\n", id); - if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { - // need to set the weights for non-initial genomes so we - // can generate a proper graphviz file - vector best_parameters = genome->get_best_parameters(); - genome->set_weights(best_parameters); - Log::info("set genome parameters to best\n"); + if (!all_time_local_best || all_time_local_best->get_fitness() > genome->get_fitness()) { + all_time_local_best = unique_ptr(genome->copy()); } } @@ -309,51 +246,7 @@ int32_t Island::insert_genome(RNN_Genome* genome) { Log::debug("deleting worst genome\n"); RNN_Genome* worst = genomes.back(); genomes.pop_back(); - structural_hash = worst->get_structural_hash(); - - vector& potential_matches = structure_map.find(structural_hash)->second; - - bool found = false; - for (auto potential_match = potential_matches.begin(); potential_match != potential_matches.end();) { - // make sure the addresses of the pointers are the same - Log::debug( - "checking to remove worst from structure_map - &worst: %p, &(*potential_match): %p\n", worst, - (*potential_match) - ); - if ((*potential_match) == worst) { - found = true; - Log::debug("potential_matches.size() before erase: %d\n", potential_matches.size()); - - // erase the potential match from the structure map as well - potential_match = potential_matches.erase(potential_match); - - Log::debug("potential_matches.size() after erase: %d\n", potential_matches.size()); - Log::debug( - "structure_map[%s].size() after erase: %d\n", structural_hash.c_str(), - structure_map[structural_hash].size() - ); - - // clean up the structure_map if no genomes in the population have this hash - if (potential_matches.size() == 0) { - Log::debug( - "deleting the potential_matches vector for hash '%s' because it was empty.\n", - structural_hash.c_str() - ); - structure_map.erase(structural_hash); - break; - } - } else { - potential_match++; - } - } - - if (!found) { - Log::debug( - "could not erase from structure_map[%s], genome not found! This should never happen.\n", - structural_hash.c_str() - ); - exit(1); - } + structure_set.erase(worst); delete worst; } @@ -382,24 +275,19 @@ void Island::print(string indent) { } void Island::erase_island() { - erased_generation_id = latest_generation_id; + structure_set.clear(); + for (int32_t i = 0; i < (int32_t) genomes.size(); i++) { delete genomes[i]; } + genomes.clear(); + erased = true; erase_again = 5; - Log::debug("Worst island size after erased: %d\n", genomes.size()); - - if (genomes.size() != 0) { - Log::error("The worst island is not fully erased!\n"); - } -} + erased_generation_id = latest_generation_id; -void Island::erase_structure_map() { - Log::debug("Erasing the structure map in the worst performing island\n"); - structure_map.clear(); - Log::debug("after erase structure map size is %d\n", structure_map.size()); + Log::debug("Worst island size after erased: %d\n", genomes.size()); } int32_t Island::get_erased_generation_id() { diff --git a/examm/island.hxx b/examm/island.hxx index c75921aa..707bf9db 100644 --- a/examm/island.hxx +++ b/examm/island.hxx @@ -2,22 +2,23 @@ #define EXAMM_ISLAND_STRATEGY_HXX #include -using std::sort; -using std::upper_bound; - #include +#include using std::function; #include using std::minstd_rand0; +using std::mt19937_64; using std::uniform_real_distribution; #include using std::string; #include -using std::unordered_map; +#include +using std::unordered_set; +#include "annealing.hxx" #include "rnn/rnn_genome.hxx" class Island { @@ -35,30 +36,49 @@ class Island { */ vector genomes; - unordered_map> structure_map; + /** + * If we are using simulated annealing, then the genomes vector may not contain the best genome we have discovered. + * Keep an additional clone of the best genome here for logging. + **/ + unique_ptr all_time_local_best; + + /** + * A set of the genomes this island contains (one entry per genome in Island::genomes. + * These are hashed by their structure: the nodes, edges, and their innovation numbers. Weights are not considered. + **/ + unordered_set structure_set; + + mt19937_64 generator; + + AnnealingPolicy& annealing_policy; + int32_t status; /**> The status of this island (either Island:INITIALIZING, Island::FILLED or Island::REPOPULATING */ - int32_t erase_again; /**< a flag to track if this islands has been erased */ - bool erased; /**< a flag to track if this islands has been erased */ + int32_t erase_again = 0; /**< a flag to track if this islands has been erased */ + bool erased = false; /**< a flag to track if this islands has been erased */ public: const static int32_t INITIALIZING = 0; /**< status flag for if the island is initializing. */ const static int32_t FILLED = 1; /**< status flag for if the island is filled. */ const static int32_t REPOPULATING = 2; /**< status flag for if the island is repopulating. */ + Island( + int32_t id, int32_t max_size, vector genomes, int32_t status, AnnealingPolicy& annealing_policy + ); + /** * Initializes an island with a given max size. * * \param max_size is the maximum number of genomes in the island. */ - Island(int32_t id, int32_t max_size); + Island(int32_t id, int32_t max_size, AnnealingPolicy& annealing_policy); /** * Initializes an island filled the supplied genomes. The size of the island will be the size * of the supplied genome vector. The island status is set to filled. */ - Island(int32_t id, vector genomes); + Island(int32_t id, vector genomes, AnnealingPolicy& annealing_policy); /** * Returns the fitness of the best genome in the island @@ -67,6 +87,11 @@ class Island { */ double get_best_fitness(); + /** + * Returns the best fitness ever obtains by any genome in this island - even if that genome has been removed. + **/ + double get_best_all_time_fitness(); + /** * Returns the fitness of the worst genome in the island * @@ -172,8 +197,6 @@ class Island { */ void erase_island(); - void erase_structure_map(); - /** * returns the get_erased_generation_id. */ diff --git a/examm/island_speciation_strategy.cxx b/examm/island_speciation_strategy.cxx index a2463b2d..e05b95d1 100644 --- a/examm/island_speciation_strategy.cxx +++ b/examm/island_speciation_strategy.cxx @@ -1,9 +1,12 @@ #include +#include using std::function; #include // #include +#include +using std::stringstream; #include @@ -23,10 +26,11 @@ using std::string; */ IslandSpeciationStrategy::IslandSpeciationStrategy( int32_t _number_of_islands, int32_t _max_island_size, double _mutation_rate, double _intra_island_crossover_rate, - double _inter_island_crossover_rate, RNN_Genome* _seed_genome, string _island_ranking_method, + double _inter_island_crossover_rate, string output_directory, RNN_Genome* _seed_genome, string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, bool _start_filled, - bool _transfer_learning, string _transfer_learning_version, int32_t _seed_stirs, bool _tl_epigenetic_weights + bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights, + unique_ptr& annealing_policy ) : generation_island(0), number_of_islands(_number_of_islands), @@ -34,6 +38,7 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( mutation_rate(_mutation_rate), intra_island_crossover_rate(_intra_island_crossover_rate), inter_island_crossover_rate(_inter_island_crossover_rate), + output_directory(output_directory), generated_genomes(0), evaluated_genomes(0), seed_genome(_seed_genome), @@ -47,8 +52,8 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( start_filled(_start_filled), transfer_learning(_transfer_learning), transfer_learning_version(_transfer_learning_version), - seed_stirs(_seed_stirs), - tl_epigenetic_weights(_tl_epigenetic_weights) { + tl_epigenetic_weights(_tl_epigenetic_weights), + annealing_policy(std::move(annealing_policy)) { double rate_sum = mutation_rate + intra_island_crossover_rate + inter_island_crossover_rate; if (rate_sum != 1.0) { mutation_rate = mutation_rate / rate_sum; @@ -78,15 +83,14 @@ IslandSpeciationStrategy::IslandSpeciationStrategy( if (transfer_learning) { Log::info("Transfer learning version is %s\n", transfer_learning_version.c_str()); - Log::info("Apply seed stirs: %d\n", seed_stirs); } } void IslandSpeciationStrategy::initialize_population(function& mutate) { for (int32_t i = 0; i < number_of_islands; i++) { - Island* new_island = new Island(i, max_island_size); + Island* new_island = new Island(i, max_island_size, *annealing_policy); if (start_filled) { - new_island->fill_with_mutated_genomes(seed_genome, seed_stirs, tl_epigenetic_weights, mutate); + new_island->fill_with_mutated_genomes(seed_genome, num_mutations, tl_epigenetic_weights, mutate); } islands.push_back(new_island); } @@ -186,14 +190,14 @@ int32_t IslandSpeciationStrategy::insert_genome(RNN_Genome* genome) { Log::fatal("ERROR: island[%d] is null!\n", island); } int32_t insert_position = islands[island]->insert_genome(genome); - Log::info("Island %d: Insert position was: %d\n", insert_position); + Log::info("Island %d: Insert position was: %d\n", island, insert_position); if (insert_position == 0) { - if (new_global_best) { - return 0; - } else { - return 1; - } + stringstream ss; + ss << output_directory << "/island_" << island << "_best.bin"; + genome->write_to_file(ss.str()); + + return insert_position != 0; } else { return insert_position; // will be -1 if not inserted, or > 0 if not the global best } @@ -228,7 +232,6 @@ void IslandSpeciationStrategy::repopulate() { if (rank[i] >= 0) { Log::info("found island: %d is the worst island \n", rank[0]); islands[rank[i]]->erase_island(); - islands[rank[i]]->erase_structure_map(); islands[rank[i]]->set_status(Island::REPOPULATING); } else { Log::error("Didn't find the worst island!"); @@ -287,14 +290,6 @@ RNN_Genome* IslandSpeciationStrategy::generate_for_initializing_island( new_genome = seed_genome->copy(); new_genome->initialize_randomly(); - bool stir_seed_genome = false; - if (stir_seed_genome) { - Log::info("Stir the seed genome with %d mutations\n", seed_stirs); - mutate(seed_stirs, new_genome); - if (!tl_epigenetic_weights) { - new_genome->initialize_randomly(); - } - } } else { Log::info("Island %d: island is initializing but not empty, mutating a random genome\n", generation_island); while (new_genome == NULL) { @@ -464,49 +459,38 @@ void IslandSpeciationStrategy::print(string indent) const { * Gets speciation strategy information headers for logs */ string IslandSpeciationStrategy::get_strategy_information_headers() const { + stringstream oss; + string info_header = ""; - info_header.append(",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"); + oss << ",mse_min_pre,mse_max_pre,mse_min_post,mse_max_post"; for (int32_t i = 0; i < (int32_t) islands.size(); i++) { - info_header.append(","); - info_header.append("Island_"); - info_header.append(to_string(i)); - info_header.append("_best_fitness"); - info_header.append(","); - info_header.append("Island_"); - info_header.append(to_string(i)); - info_header.append("_worst_fitness"); - } - return info_header; + oss << ",Island_" << i << "_best_fitness" << ",Island_" << i << "_wort_fitness" << ",Island_" << i + << "_all_time_best"; + } + + return oss.str(); } /** * Gets speciation strategy information values for logs */ string IslandSpeciationStrategy::get_strategy_information_values(RNN_Genome* genome) const { - string info_value = ""; - + stringstream oss; auto& [min_mse_pre, max_mse_pre] = genome_performance.at(genome->generation_id); - info_value.append(","); - info_value.append(to_string(min_mse_pre)); - info_value.append(","); - info_value.append(to_string(max_mse_pre)); + oss << "," << min_mse_pre << "," << max_mse_pre; float min_mse_post = this->get_best_fitness(); float max_mse_post = this->get_worst_fitness(); - info_value.append(","); - info_value.append(to_string(min_mse_post)); - info_value.append(","); - info_value.append(to_string(max_mse_post)); + oss << "," << min_mse_post << "," << max_mse_post; for (int32_t i = 0; i < (int32_t) islands.size(); i++) { double best_fitness = islands[i]->get_best_fitness(); double worst_fitness = islands[i]->get_worst_fitness(); - info_value.append(","); - info_value.append(to_string(best_fitness)); - info_value.append(","); - info_value.append(to_string(worst_fitness)); + double all_time_best = islands[i]->get_best_all_time_fitness(); + oss << "," << best_fitness << "," << worst_fitness << "," << all_time_best; } - return info_value; + + return oss.str(); } RNN_Genome* IslandSpeciationStrategy::parents_repopulation( diff --git a/examm/island_speciation_strategy.hxx b/examm/island_speciation_strategy.hxx index 19eff273..bd32d507 100644 --- a/examm/island_speciation_strategy.hxx +++ b/examm/island_speciation_strategy.hxx @@ -37,6 +37,7 @@ class IslandSpeciationStrategy : public SpeciationStrategy { RNN_Genome* seed_genome; /**< keep a reference to the seed genome so we can re-use it across islands and not duplicate innovation numbers. */ + string output_directory; string island_ranking_method; /**< The method used to find the worst island in population */ string repopulation_method; /**< The method used to repopulate the island after being erased */ @@ -72,9 +73,10 @@ class IslandSpeciationStrategy : public SpeciationStrategy { bool transfer_learning; string transfer_learning_version; - int32_t seed_stirs; bool tl_epigenetic_weights; + unique_ptr annealing_policy; + public: // static void register_command_line_arguments(); // static IslandSpeciationStrategy* generate_from_command_line(); @@ -87,11 +89,11 @@ class IslandSpeciationStrategy : public SpeciationStrategy { */ IslandSpeciationStrategy( int32_t _number_of_islands, int32_t _max_island_size, double _mutation_rate, - double _intra_island_crossover_rate, double _inter_island_crossover_rate, RNN_Genome* _seed_genome, + double _intra_island_crossover_rate, double _inter_island_crossover_rate, string output_directory, RNN_Genome* _seed_genome, string _island_ranking_method, string _repopulation_method, int32_t _extinction_event_generation_number, int32_t _num_mutations, int32_t _islands_to_exterminate, int32_t _max_genomes, bool _repeat_extinction, - bool _start_filled, bool _transfer_learning, string _transfer_learning_version, int32_t _seed_stirs, - bool _tl_epigenetic_weights + bool _start_filled, bool _transfer_learning, string _transfer_learning_version, bool _tl_epigenetic_weights, + unique_ptr& annealing_policy ); // /** diff --git a/examm/species.cxx b/examm/species.cxx index 1e650ec7..9081b203 100644 --- a/examm/species.cxx +++ b/examm/species.cxx @@ -130,12 +130,6 @@ int32_t Species::insert_genome(RNN_Genome* genome) { if (insert_index == 0) { // this was a new best genome for this island Log::info("new best fitness for island: %d!\n", id); - if (genome->get_fitness() != EXAMM_MAX_DOUBLE) { - // need to set the weights for non-initial genomes so we - // can generate a proper graphviz file - vector best_parameters = genome->get_best_parameters(); - genome->set_weights(best_parameters); - } species_not_improving_count = 0; } else { species_not_improving_count++; @@ -233,4 +227,4 @@ int32_t Species::get_species_not_improving_count() { void Species::set_species_not_improving_count(int32_t count) { species_not_improving_count = count; -} \ No newline at end of file +} diff --git a/rnn/rnn_genome.cxx b/rnn/rnn_genome.cxx index d0887808..47bd934f 100644 --- a/rnn/rnn_genome.cxx +++ b/rnn/rnn_genome.cxx @@ -1337,7 +1337,7 @@ bool RNN_Genome::has_node_with_innovation(int32_t innovation_number) const { return false; } -bool RNN_Genome::equals(RNN_Genome* other) { +bool RNN_Genome::equals(const RNN_Genome* other) const { if (nodes.size() != other->nodes.size()) { return false; } @@ -1369,6 +1369,19 @@ bool RNN_Genome::equals(RNN_Genome* other) { return true; } +bool RNN_Genome::operator==(const RNN_Genome& other) const { + return other.equals(this); +} + +size_t RNN_Genome::StructuralHash::operator()(const RNN_Genome* genome) const { + return this->operator()(*genome); +} + +size_t RNN_Genome::StructuralHash::operator()(const RNN_Genome& genome) const { + std::hash hasher; + return hasher(genome.get_structural_hash()); +} + void RNN_Genome::assign_reachability() { Log::trace("assigning reachability!\n"); Log::trace("%6d nodes, %6d edges, %6d recurrent edges\n", nodes.size(), edges.size(), recurrent_edges.size()); diff --git a/rnn/rnn_genome.hxx b/rnn/rnn_genome.hxx index 56977e76..c3584e51 100644 --- a/rnn/rnn_genome.hxx +++ b/rnn/rnn_genome.hxx @@ -295,7 +295,17 @@ class RNN_Genome { */ bool has_node_with_innovation(int32_t innovation_number) const; - bool equals(RNN_Genome* other); + bool equals(const RNN_Genome* other) const; + bool operator==(const RNN_Genome& other) const; + + /** + * Hash function implementation. + * Based on the hash code of the structural hash. + * */ + struct StructuralHash { + size_t operator()(const RNN_Genome& other) const; + size_t operator()(const RNN_Genome* other) const; + }; string get_color(double weight, bool is_recurrent); void write_graphviz(string filename); diff --git a/rnn_examples/train_rnn.cxx b/rnn_examples/train_rnn.cxx index c790b112..a452fe1a 100644 --- a/rnn_examples/train_rnn.cxx +++ b/rnn_examples/train_rnn.cxx @@ -116,19 +116,8 @@ int main(int argc, char** argv) { if (genome_file.size() != 0) { genome = new RNN_Genome(genome_file); - Log::info("best weights: { "); - for (double& d : genome->get_best_parameters()) { - Log::info_no_header("%f, ", d); - } - Log::info("}\n"); - - vector params; - genome->get_weights(params); - Log::info("current weights: { "); - for (double& d : params) { - Log::info_no_header("%f, ", d); - } - Log::info("}\n"); + genome->set_weights(genome->get_best_parameters()); + Log::info("Number of weights = %d\n", genome->get_number_weights()); } else { string rnn_type; get_argument(arguments, "--rnn_type", true, rnn_type); @@ -222,6 +211,9 @@ int main(int argc, char** argv) { genome->set_log_filename(output_directory + "/" + log_filename); } + string output_genome_name = "output_genome.bin"; + get_argument(arguments, "--output_genome_name", false, output_genome_name); + genome->set_parameter_names( time_series_sets->get_input_parameter_names(), time_series_sets->get_output_parameter_names() );