From a8aca576dd633524d2f7c52a77c7b2176e42cc8f Mon Sep 17 00:00:00 2001 From: maximdu Date: Fri, 5 Dec 2025 13:26:00 +0300 Subject: [PATCH 1/3] added FitnessHistory class --- sampo/scheduler/genetic/utils.py | 110 +++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/sampo/scheduler/genetic/utils.py b/sampo/scheduler/genetic/utils.py index abd6406b..3023db22 100644 --- a/sampo/scheduler/genetic/utils.py +++ b/sampo/scheduler/genetic/utils.py @@ -134,3 +134,113 @@ def create_toolbox_using_cached_chromosomes(wg: WorkGraph, sgs_type, only_lft_initialization, is_multiobjective) + + +class FitnessHistory: + """ + Class to track fitness and other stats for genetic algorithm + * fintess function is assumed to stay the same during evolution + * if you want to change the function, feel free to create another object + """ + + def __init__(self): + # [generation1, generation2, ...] + self.fitness_history = [] + # optional, comments about how this generation was created + self.notes = [] + + def update_history(self, population: list, note: str = ""): + fitness_values = [i.fitness.values for i in population] + self.fitness_history.append(fitness_values) + self.notes.append(note) + + def get_agg_functions_for_fitness(self): + means, medians, stds = [], [], [] + for fitness_values in self.fitness_history: + means.append(np.mean(fitness_values, axis=0)) + medians.append(np.median(fitness_values, axis=0)) + stds.append(np.std(fitness_values, axis=0)) + return np.array(means), np.array(medians), np.array(stds) + + def get_uniqueness_scores(self): + """ + Calculate uniqueness of fitness values in population: + how many genomes with the same fitness are in the population + higher score = more fitness values are unique + """ + uniqueness_scores = [ + len(set(fitness_values)) / len(fitness_values) + for fitness_values in self.fitness_history + ] + + # round values for readability + uniqueness_scores = [round(score, 4) for score in uniqueness_scores] + return uniqueness_scores + + def get_generation_shifts(self): + """ + Calculate how much the population has changed compared to previous generation + """ + # for keeping len(generation_shifts) == len(dataframe) + generation_shifts = [0] + n_generations = len(self.fitness_history) + for i in range(1, n_generations): + old_fitness = self.fitness_history[i-1] + new_fitness = self.fitness_history[i] + + n_fresh_fitness = 0 + for f in new_fitness: + if f not in old_fitness: + n_fresh_fitness += 1 + ratio = n_fresh_fitness / len(new_fitness) + generation_shifts.append(ratio) + + # round values for readability + generation_shifts = [round(score, 4) for score in generation_shifts] + return generation_shifts + + def create_fitness_raw_df(self) -> pd.DataFrame: + df = pd.DataFrame(self.fitness_history) + df["iteration"] = list(range(len(self.fitness_history))) + df["notes"] = self.notes + return df + + def create_fitness_info_df(self) -> pd.DataFrame: + iteration = list(range(len(self.fitness_history))) + means, medians, stds = self.get_agg_functions_for_fitness() + uniqueness_scores = self.get_uniqueness_scores() + generation_shifts = self.get_generation_shifts() + + df = pd.DataFrame({ + "iteration": iteration, + "notes": self.notes, + "uniqueness_scores": uniqueness_scores, + "generation_shifts": generation_shifts, + }) + + n_fitness_objectives = means.shape[1] + for i in range(n_fitness_objectives): + df[f"mean_{i}"] = means[:, i] + df[f"median_{i}"] = medians[:, i] + df[f"std_{i}"] = stds[:, i] + return df + + def write_fitness_raw(self, path=None): + """Save current fitness raw values to CSV file""" + if not path: + return + try: + df = self.create_fitness_raw_df() + df.to_csv(path, index=False) + except Exception as e: + print(f"Error occured when trying to write fitness history (raw): {e}") + + def write_fitness_stats(self, path=None): + """Save current fitness stats to CSV file""" + if not path: + return + try: + df = self.create_fitness_info_df() + df.to_csv(path, index=False) + except Exception as e: + print(f"Error occured when trying to write fitness history (stats): {e}") From 17e7faf7b4e247f07ea369f8cc4a040efdf4d5d8 Mon Sep 17 00:00:00 2001 From: maximdu Date: Fri, 5 Dec 2025 13:32:49 +0300 Subject: [PATCH 2/3] added saving with fitness_history --- sampo/scheduler/genetic/schedule_builder.py | 23 ++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sampo/scheduler/genetic/schedule_builder.py b/sampo/scheduler/genetic/schedule_builder.py index e1eb839c..1490b45a 100644 --- a/sampo/scheduler/genetic/schedule_builder.py +++ b/sampo/scheduler/genetic/schedule_builder.py @@ -9,6 +9,7 @@ from sampo.scheduler.genetic.converter import convert_schedule_to_chromosome, ScheduleGenerationScheme from sampo.scheduler.genetic.operators import init_toolbox, ChromosomeType, FitnessFunction, TimeFitness from sampo.scheduler.genetic.utils import prepare_optimized_data_structures +from sampo.scheduler.genetic.utils import FitnessHistory from sampo.scheduler.timeline.base import Timeline from sampo.schemas.contractor import Contractor from sampo.schemas.graph import GraphNode, WorkGraph @@ -112,14 +113,16 @@ def build_schedules(wg: WorkGraph, optimize_resources: bool = False, deadline: Time | None = None, only_lft_initialization: bool = False, - is_multiobjective: bool = False) \ + is_multiobjective: bool = False + fitness_raw_path: str | None = None, + fitness_stats_path: str | None = None) \ -> list[tuple[ScheduleWorkDict, Time, Timeline, list[GraphNode]]]: return build_schedules_with_cache(wg, contractors, population_size, generation_number, mutpb_order, mutpb_res, mutpb_zones, init_schedules, rand, spec, weights, pop, landscape, fitness_object, fitness_weights, work_estimator, sgs_type, assigned_parent_time, timeline, time_border, max_plateau_steps, optimize_resources, - deadline, only_lft_initialization, is_multiobjective)[0] + deadline, only_lft_initialization, is_multiobjective, fitness_raw_path, fitness_stats_path)[0] def build_schedules_with_cache(wg: WorkGraph, @@ -146,7 +149,9 @@ def build_schedules_with_cache(wg: WorkGraph, optimize_resources: bool = False, deadline: Time | None = None, only_lft_initialization: bool = False, - is_multiobjective: bool = False) \ + is_multiobjective: bool = False, + fitness_raw_path: str | None = None, + fitness_stats_path: str | None = None) \ -> tuple[list[tuple[ScheduleWorkDict, Time, Timeline, list[GraphNode]]], list[ChromosomeType]]: """ Genetic algorithm. @@ -193,6 +198,7 @@ def build_schedules_with_cache(wg: WorkGraph, evaluation_start = time.time() + fitness_history = FitnessHistory() hof = tools.ParetoFront(similar=compare_individuals) # map to each individual fitness function @@ -203,7 +209,7 @@ def build_schedules_with_cache(wg: WorkGraph, for ind, fit in zip(pop, fitness): ind.fitness.values = fit - hof.update(pop) + hof.update(pop); fitness_history.update_history(pop, note="first generation") best_fitness = hof[0].fitness.values SAMPO.logger.info(f'First population evaluation took {evaluation_time * 1000} ms') @@ -235,7 +241,7 @@ def build_schedules_with_cache(wg: WorkGraph, # renewing population pop += offspring pop = toolbox.select(pop) - hof.update(pop) + hof.update(pop); fitness_history.update_history(pop, note="genetic update") prev_best_fitness = best_fitness best_fitness = hof[0].fitness.values @@ -282,7 +288,7 @@ def build_schedules_with_cache(wg: WorkGraph, evaluation_time += time.time() - evaluation_start - hof.update(pop) + hof.update(pop); fitness_history.update_history(pop, note="first deadline population") if best_fitness[0] <= deadline: # Optimizing resources @@ -326,7 +332,7 @@ def build_schedules_with_cache(wg: WorkGraph, # renewing population pop += offspring pop = toolbox.select(pop) - hof.update(pop) + hof.update(pop); fitness_history.update_history(pop, note="genetic deadline update") prev_best_fitness = best_fitness best_fitness = hof[0].fitness.values @@ -338,6 +344,9 @@ def build_schedules_with_cache(wg: WorkGraph, SAMPO.logger.info(f'Generations processing took {(time.time() - start) * 1000} ms') SAMPO.logger.info(f'Full genetic processing took {(time.time() - global_start) * 1000} ms') SAMPO.logger.info(f'Evaluation time: {evaluation_time * 1000}') + # save fitness history + fitness_history.write_fitness_raw(path=fitness_raw_path) + fitness_history.write_fitness_stats(path=fitness_stats_path) best_chromosomes = [chromosome for chromosome in hof] From 3ea9e21a2f7fd61324e87062c233d82071f4967d Mon Sep 17 00:00:00 2001 From: maximdu Date: Fri, 5 Dec 2025 13:35:28 +0300 Subject: [PATCH 3/3] added path for saving fitness --- sampo/scheduler/genetic/base.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sampo/scheduler/genetic/base.py b/sampo/scheduler/genetic/base.py index 4712f733..dbd43a40 100644 --- a/sampo/scheduler/genetic/base.py +++ b/sampo/scheduler/genetic/base.py @@ -56,7 +56,9 @@ def __init__(self, # for optimization on one criteria set False is_multiobjective: bool = False, # for experiments with classic RCPSP formulation (initialize population with LFT) - only_lft_initialization: bool = False): + only_lft_initialization: bool = False, + fitness_raw_path: str | None = None, + fitness_stats_path: str | None = None): super().__init__(scheduler_type=scheduler_type, resource_optimizer=resource_optimizer, work_estimator=work_estimator) @@ -75,6 +77,8 @@ def __init__(self, self._is_multiobjective = is_multiobjective self._weights = weights self._only_lft_initialization = only_lft_initialization + self.fitness_raw_path = fitness_raw_path + self.fitness_stats_path = fitness_stats_path self._time_border = None self._max_plateau_steps = None @@ -248,7 +252,9 @@ def upgrade_pop(self, self._optimize_resources, deadline, self._only_lft_initialization, - self._is_multiobjective) + self._is_multiobjective, + self.fitness_raw_path, + self.fitness_stats_path) return new_pop def schedule_with_cache(self, @@ -303,7 +309,9 @@ def schedule_with_cache(self, self._optimize_resources, deadline, self._only_lft_initialization, - self._is_multiobjective) + self._is_multiobjective, + self.fitness_raw_path, + self.fitness_stats_path) schedules = [ (Schedule.from_scheduled_works(scheduled_works.values(), wg), schedule_start_time, timeline, order_nodes) for scheduled_works, schedule_start_time, timeline, order_nodes in schedules]