diff --git a/examples/fitness_history.ipynb b/examples/fitness_history.ipynb new file mode 100644 index 00000000..ae3d3fef --- /dev/null +++ b/examples/fitness_history.ipynb @@ -0,0 +1,242 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6ad37843-2d46-41a7-8637-f7786eb7f72c", + "metadata": {}, + "outputs": [], + "source": [ + "from sampo.generator.base import SimpleSynthetic\n", + "from sampo.generator.environment.contractor_by_wg import get_contractor_by_wg\n", + "from sampo.scheduler.genetic.base import GeneticScheduler\n", + "from sampo.scheduler.genetic.operators import TimeAndResourcesFitness" + ] + }, + { + "cell_type": "markdown", + "id": "505ef085-4abb-48f5-9c22-cfc8211b8da0", + "metadata": {}, + "source": [ + "## Set parameters and generate synthetic graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af3dc563-fe56-4fea-96ef-30fb0f7552a2", + "metadata": {}, + "outputs": [], + "source": [ + "graph_size = 250\n", + "seed = 123\n", + "\n", + "size_of_population = 100\n", + "number_of_generation = 20\n", + "\n", + "mutate_order = 0.02\n", + "mutate_resources = 0.02\n", + "\n", + "fitness_constructor = TimeAndResourcesFitness()\n", + "fitness_weights = (-1, -1)\n", + "is_multiobjective = True\n", + "optimize_resources = True\n", + "\n", + "save_history_to = \"./history_test.json\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b8c8da3-f19f-4c74-ac90-db27ef775be0", + "metadata": {}, + "outputs": [], + "source": [ + "ss = SimpleSynthetic(seed)\n", + "wg = ss.work_graph(bottom_border=graph_size)\n", + "contractors = [get_contractor_by_wg(wg)]\n", + "print(f\"Generated graph with size: {wg.vertex_count}\")" + ] + }, + { + "cell_type": "markdown", + "id": "d2add0c6-3b37-4a17-8af2-96340635b277", + "metadata": {}, + "source": [ + "## Use the genetic algorithm and save history" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00a68d26-7d3b-4eb6-8395-247c0962c86d", + "metadata": {}, + "outputs": [], + "source": [ + "genetic_algorithm = GeneticScheduler(\n", + " number_of_generation=number_of_generation,\n", + " size_of_population=size_of_population,\n", + " \n", + " mutate_order=mutate_order,\n", + " mutate_resources=mutate_resources,\n", + " \n", + " fitness_constructor=fitness_constructor,\n", + " fitness_weights=fitness_weights,\n", + " is_multiobjective=is_multiobjective,\n", + " optimize_resources=optimize_resources,\n", + " \n", + " seed=seed,\n", + " save_history_to=save_history_to\n", + ")\n", + "genetic_result = genetic_algorithm.schedule(wg, contractors)" + ] + }, + { + "cell_type": "markdown", + "id": "0a2019d6-dd49-4150-9c25-2cac2560063c", + "metadata": {}, + "source": [ + "## Get summary of the evolution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73095ca6-9433-4270-abf5-f8beae4cb90d", + "metadata": {}, + "outputs": [], + "source": [ + "from sampo.scheduler.utils.fitness_history import FitnessHistorySummary\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import plotly.express as px\n", + "import plotly.io as pio\n", + "pio.templates.default = \"plotly_dark\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6aefd90-e8ee-4ccb-9451-0ff5ba850792", + "metadata": {}, + "outputs": [], + "source": [ + "# Load history from file\n", + "summary = FitnessHistorySummary(save_history_to)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dadc10b3-9171-4b81-96cd-7faace7bd868", + "metadata": {}, + "outputs": [], + "source": [ + "population_means = np.array(summary.get_fitness_means())\n", + "\n", + "fig = px.line(x=population_means[:, 0], y=population_means[:, 1], markers=True)\n", + "fig.update_layout(height=700, width=700)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea8885bf-def2-47a5-97e5-4cd176b7f4ee", + "metadata": {}, + "outputs": [], + "source": [ + "pareto_front_means = np.array(summary.get_fitness_means(only_pareto=True))\n", + "\n", + "fig = px.line(x=pareto_front_means[:, 0], y=pareto_front_means[:, 1], markers=True)\n", + "fig.update_layout(height=700, width=700)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "626ca043-54be-449d-a682-aef6abbad455", + "metadata": {}, + "outputs": [], + "source": [ + "pareto_ratios = summary.get_pareto_to_population_ratios()\n", + "\n", + "fig = px.line(y=pareto_ratios, markers=True)\n", + "fig.update_layout(height=500, width=1000)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a36746bb-867d-4f56-a691-ff373ed8e29c", + "metadata": {}, + "outputs": [], + "source": [ + "population_shifts = summary.get_generation_shifts()\n", + "pareto_front_shifts = summary.get_generation_shifts(only_pareto=True)\n", + "\n", + "fig = px.line(y=[population_shifts, pareto_front_shifts], markers=True)\n", + "fig.data[1].line.color = \"white\"\n", + "fig.update_layout(height=500, width=1000, showlegend=False)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88d5aeb0-0af3-4002-9be4-6e090c6c0fd2", + "metadata": {}, + "outputs": [], + "source": [ + "population_uniqueness = summary.get_uniqueness_scores()\n", + "pareto_uniqueness = summary.get_uniqueness_scores(only_pareto=True)\n", + "\n", + "fig = px.line(y=[population_uniqueness, pareto_uniqueness], markers=True)\n", + "fig.data[1].line.color = \"white\"\n", + "fig.update_layout(height=500, width=1000, showlegend=False)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3564e9ca-a9fd-43df-9a80-cbd2c74426e4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9bf8b8c-9ffd-4e0d-ae76-cdc4dbbf86c0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sampo/scheduler/genetic/base.py b/sampo/scheduler/genetic/base.py index 4712f733..050d2a63 100644 --- a/sampo/scheduler/genetic/base.py +++ b/sampo/scheduler/genetic/base.py @@ -56,7 +56,10 @@ def __init__(self, # for optimization on one criteria set False is_multiobjective: bool = False, # for experiments with classic RCPSP formulation (initialize population with LFT) - only_lft_initialization: bool = False): + only_lft_initialization: bool = False, + # where to save history of fitness values + # saving instead of returning to avoid breaking modules + save_history_to: str | None = None): super().__init__(scheduler_type=scheduler_type, resource_optimizer=resource_optimizer, work_estimator=work_estimator) @@ -75,6 +78,7 @@ def __init__(self, self._is_multiobjective = is_multiobjective self._weights = weights self._only_lft_initialization = only_lft_initialization + self.save_history_to = save_history_to self._time_border = None self._max_plateau_steps = None @@ -248,7 +252,8 @@ def upgrade_pop(self, self._optimize_resources, deadline, self._only_lft_initialization, - self._is_multiobjective) + self._is_multiobjective, + self.save_history_to) return new_pop def schedule_with_cache(self, @@ -303,7 +308,8 @@ def schedule_with_cache(self, self._optimize_resources, deadline, self._only_lft_initialization, - self._is_multiobjective) + self._is_multiobjective, + self.save_history_to) schedules = [ (Schedule.from_scheduled_works(scheduled_works.values(), wg), schedule_start_time, timeline, order_nodes) for scheduled_works, schedule_start_time, timeline, order_nodes in schedules] diff --git a/sampo/scheduler/genetic/schedule_builder.py b/sampo/scheduler/genetic/schedule_builder.py index e1eb839c..6b5ef6db 100644 --- a/sampo/scheduler/genetic/schedule_builder.py +++ b/sampo/scheduler/genetic/schedule_builder.py @@ -17,6 +17,7 @@ from sampo.schemas.schedule_spec import ScheduleSpec from sampo.schemas.time import Time from sampo.schemas.time_estimator import WorkTimeEstimator, DefaultWorkEstimator +from sampo.scheduler.utils.fitness_history import FitnessHistory def create_toolbox(wg: WorkGraph, @@ -112,14 +113,16 @@ def build_schedules(wg: WorkGraph, optimize_resources: bool = False, deadline: Time | None = None, only_lft_initialization: bool = False, - is_multiobjective: bool = False) \ + is_multiobjective: bool = False, + save_history_to: str | None = None) \ -> list[tuple[ScheduleWorkDict, Time, Timeline, list[GraphNode]]]: return build_schedules_with_cache(wg, contractors, population_size, generation_number, mutpb_order, mutpb_res, mutpb_zones, init_schedules, rand, spec, weights, pop, landscape, fitness_object, fitness_weights, work_estimator, sgs_type, assigned_parent_time, timeline, time_border, max_plateau_steps, optimize_resources, - deadline, only_lft_initialization, is_multiobjective)[0] + deadline, only_lft_initialization, is_multiobjective, + save_history_to)[0] def build_schedules_with_cache(wg: WorkGraph, @@ -146,7 +149,8 @@ def build_schedules_with_cache(wg: WorkGraph, optimize_resources: bool = False, deadline: Time | None = None, only_lft_initialization: bool = False, - is_multiobjective: bool = False) \ + is_multiobjective: bool = False, + save_history_to: str | None = None) \ -> tuple[list[tuple[ScheduleWorkDict, Time, Timeline, list[GraphNode]]], list[ChromosomeType]]: """ Genetic algorithm. @@ -194,6 +198,7 @@ def build_schedules_with_cache(wg: WorkGraph, evaluation_start = time.time() hof = tools.ParetoFront(similar=compare_individuals) + fitness_history = FitnessHistory() # map to each individual fitness function fitness = SAMPO.backend.compute_chromosomes(fitness_f, pop) @@ -204,6 +209,7 @@ def build_schedules_with_cache(wg: WorkGraph, ind.fitness.values = fit hof.update(pop) + fitness_history.update(pop, hof, pop, comment="first generation") best_fitness = hof[0].fitness.values SAMPO.logger.info(f'First population evaluation took {evaluation_time * 1000} ms') @@ -236,6 +242,7 @@ def build_schedules_with_cache(wg: WorkGraph, pop += offspring pop = toolbox.select(pop) hof.update(pop) + fitness_history.update(pop, hof, offspring, comment="genetic update") prev_best_fitness = best_fitness best_fitness = hof[0].fitness.values @@ -283,6 +290,7 @@ def build_schedules_with_cache(wg: WorkGraph, evaluation_time += time.time() - evaluation_start hof.update(pop) + fitness_history.update(pop, hof, pop, comment="first deadline population") if best_fitness[0] <= deadline: # Optimizing resources @@ -327,6 +335,7 @@ def build_schedules_with_cache(wg: WorkGraph, pop += offspring pop = toolbox.select(pop) hof.update(pop) + fitness_history.update(pop, hof, offspring, comment="genetic deadline update") prev_best_fitness = best_fitness best_fitness = hof[0].fitness.values @@ -338,6 +347,9 @@ def build_schedules_with_cache(wg: WorkGraph, SAMPO.logger.info(f'Generations processing took {(time.time() - start) * 1000} ms') SAMPO.logger.info(f'Full genetic processing took {(time.time() - global_start) * 1000} ms') SAMPO.logger.info(f'Evaluation time: {evaluation_time * 1000}') + # save fitness history + if save_history_to: + fitness_history.save_to_json(path=save_history_to) best_chromosomes = [chromosome for chromosome in hof] diff --git a/sampo/scheduler/utils/fitness_history.py b/sampo/scheduler/utils/fitness_history.py new file mode 100644 index 00000000..3b7d803c --- /dev/null +++ b/sampo/scheduler/utils/fitness_history.py @@ -0,0 +1,102 @@ +import json +import numpy as np +from itertools import pairwise +from typing import Any, Iterable +from sampo.base import SAMPO + + +class FitnessHistory: + """Recording fitness values during evolution""" + + def __init__(self): + self.history: list[dict[str, Any]] = [] + + def update(self, + population: Iterable = [], + # while you could get pareto-front from fitness later, + # it is easier to save fitness values from hall-of-fame + pareto_front: Iterable = [], + # currently, history is updated only after genetic selection, + # so offsprings were generated for this population, not from it + offsprings: Iterable = [], + # comments about how current generation was created + # or if there are any changes that need attention + comment: str = "") -> None: + + self.history.append({ + "population_fitness": [i.fitness.values for i in population], + "pareto_front_fitness": [i.fitness.values for i in pareto_front], + "offsprings_fitness": [i.fitness.values for i in offsprings], + "comment": comment + }) + + # JSON format is more flexible + # try: except: is just in case + def save_to_json(self, path: str): + """Save current fitness history to JSON file""" + try: + with open(path, "w") as json_file: + json.dump(self.history, json_file) + SAMPO.logger.info(f"Saved history to: {path}") + except Exception as e: + SAMPO.logger.info(f"Error while saving history to {path}: {e}") + + +class FitnessHistorySummary: + """Functions for creating summary of evolution""" + + def __init__(self, path: str): + with open(path, "r") as json_file: + history = json.load(json_file) + + self.population_history = [generation["population_fitness"] for generation in history] + self.pareto_front_history = [generation["pareto_front_fitness"] for generation in history] + self.offsprings_history = [generation["offsprings_fitness"] for generation in history] + self.comments = [generation["comment"] for generation in history] + + # is the mean fitness of population improving? + # mean might be better than median, outliers matter + def get_fitness_means(self, only_pareto: bool = False, only_offsprings: bool = False): + """Mean fitness in population for each generation and objective""" + if only_pareto: + data = self.pareto_front_history + elif only_offsprings: + data = self.offsprings_history + else: + data = self.population_history + + return [ + np.mean(fitness_values, axis=0) + for fitness_values in data + ] + + # be careful if selection method is not truncation selection + # higher shifts != better algorithm, but generally higher is better + def get_generation_shifts(self, only_pareto: bool = False): + """How much the population has changed compared to previous generation""" + data = self.pareto_front_history if only_pareto else self.population_history + data = pairwise(data) + return [ + sum(1 for value in new_fitness if value not in old_fitness) / len(new_fitness) + for old_fitness, new_fitness in data + ] + + # how "stretched" are the fitness values near the pareto-front + # higher ratio generally means more diverse fitness values + def get_pareto_to_population_ratios(self): + """What part of the population is in pareto front""" + data = zip(self.population_history, self.pareto_front_history) + return [ + len(pareto_front_fitness) / len(population_fitness) + for population_fitness, pareto_front_fitness in data + ] + + # it might be better to remove phenotype duplicates: + # https://www.ac.tuwien.ac.at/files/pub/raidl-99c.pdf + def get_uniqueness_scores(self, only_pareto: bool = False): + """Uniqueness of fitness values in population""" + data = self.pareto_front_history if only_pareto else self.population_history + return [ + len(set(map(tuple, fitness_values))) / len(fitness_values) + for fitness_values in data + ]