diff --git a/.gitignore b/.gitignore index cfe054a..aa6549a 100644 --- a/.gitignore +++ b/.gitignore @@ -176,4 +176,5 @@ BBOB*.zip /stn/ /run/ /setup/ -.vscode/ \ No newline at end of file +.vscode/ +.python-version diff --git a/data.csv b/data.csv new file mode 100644 index 0000000..5d43c3f --- /dev/null +++ b/data.csv @@ -0,0 +1,101 @@ +,id,fitness,name,description,configspace,generation,feedback,error,parent_ids,operator,metadata,task_prompt,method_name,problem_name,seed,_id,cummax_fitness,eval +0,18084962-51ed-4376-8b09-caa9e3c69568,5.955078389147237,AutoCorrCandidate,"Generates a nearly-optimal non-negative function by leveraging a piecewise-constant structure with optimized heights over a central region, tapering to zero at the edges to reduce autocorrelation.",,0,"C1 ratio = 5.95508, best known = 1.5053",,[],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,0,5.955078389147237,1 +1,43b5aacb-4de5-45a9-b846-f2331c03b92f,2.108060230292329,AutoCorrCandidate,"Refines the piecewise-constant function by optimizing the heights of the central, tapering, and edge regions to minimize autocorrelation.",,1,"C1 ratio = 2.10806, best known = 1.5053",,['18084962-51ed-4376-8b09-caa9e3c69568'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,1,5.955078389147237,2 +2,f053dcc7-b434-43ac-a0d0-0167c7569ed2,2.244990298343852,AutoCorrCandidate,"Optimizes a piecewise-constant function with central flat, tapered, and constant-height edge regions, incorporating a dynamically adjusted sigmoid taper and refined parameter bounds to further minimize autocorrelation.",,2,"C1 ratio = 2.24499, best known = 1.5053",,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,2,5.955078389147237,3 +3,dc80fb23-aa09-4b26-9a7a-1c0f535b08a9,2.14625421730855,AutoCorrCandidate,"Optimizes a piecewise-constant function with a central flat region, cosine tapers, and optimized edge values using a more robust optimizer and a refined parameterization, adding a small constant offset to avoid zero values, and using a larger number of iterations.",,3,"C1 ratio = 2.14625, best known = 1.5053",,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,3,5.955078389147237,4 +4,7e44f5a8-b458-4b51-9e44-87cbd6e3307f,0.0,AutoCorrCandidate,Optimizes a piecewise-constant function with an added Gaussian component and refined tapering to minimize autocorrelation.,,4,calc-error Integral ∫f must be > 0 for C1,calc-failed,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,4,5.955078389147237,5 +5,d236ef84-7086-4204-882b-7151cddc3692,2.072361474351445,AutoCorrCandidate,"Optimizes a piecewise-quadratic function with a central flat region, tapering quadratic sections, and constant edges to minimize autocorrelation.",,5,"C1 ratio = 2.07236, best known = 1.5053",,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,5,5.955078389147237,6 +6,617268ca-e233-47c4-9a64-e6889dc5560b,2.000000000000122,AutoCorrCandidate,"Optimizes a piecewise-cubic function with smooth transitions between regions by directly controlling the function values at key points, aiming for a flatter autocorrelation peak.",,6,"C1 ratio = 2, best known = 1.5053",,['d236ef84-7086-4204-882b-7151cddc3692'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,6,5.955078389147237,7 +7,01594200-c881-458f-b751-38ebd030cf6c,10.0,AutoCorrCandidate,"Optimizes a raised cosine function with adjustable parameters for a flatter autocorrelation peak, simplifying the parameter space for faster convergence.",,7,"C1 ratio = 10, best known = 1.5053",,['617268ca-e233-47c4-9a64-e6889dc5560b'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,7,10.0,8 +8,06529ca8-7515-41b3-bfca-120341bfc009,2.000000000000957,AutoCorrCandidate,Optimize a piecewise-quadratic function with a central flat region and parabolic tapers to minimize autocorrelation peak relative to integrated function square.,,8,"C1 ratio = 2, best known = 1.5053",,['617268ca-e233-47c4-9a64-e6889dc5560b'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,8,10.0,9 +9,f2400a0c-cca8-4bb5-9498-b669eb35e167,0.0,AutoCorrCandidate,Optimizes a piecewise-linear function with a central flat region and linearly decaying edges to minimize the autocorrelation peak relative to the integral squared.,,9,"exec-error could not broadcast input array from shape (240,) into shape (120,)",exec-failed,['617268ca-e233-47c4-9a64-e6889dc5560b'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,9,10.0,10 diff --git a/iohblade/plots.py b/iohblade/plots.py index 0847e83..18b7c14 100644 --- a/iohblade/plots.py +++ b/iohblade/plots.py @@ -4,12 +4,11 @@ import os from collections import Counter -import plotly.graph_objects as go - import jsonlines import matplotlib.pyplot as plt import numpy as np import pandas as pd +import plotly.graph_objects as go import seaborn as sns from scipy.stats import ttest_ind from sklearn.decomposition import PCA @@ -459,6 +458,140 @@ def plotly_code_evolution( return fig +def code_diff_chain( + run_data: pd.DataFrame, solution_id: str +) -> list[dict[str, pd.Series | str]]: + """Return diffs along the lineage of ``solution_id``. + + The function follows the first parent of each solution until the root is + reached. For every parent-child pair a unified diff of their code is + produced. + + Args: + run_data: DataFrame containing at least ``id``, ``parent_ids`` and + ``code`` columns. ``name``, ``generation`` and ``fitness`` are + optional but will be preserved if present. + solution_id: Identifier of the final solution. + + Returns: + A list of dictionaries ordered from the first ancestor to + ``solution_id``. Each dictionary has ``parent`` and ``child`` keys with + the respective rows and a ``diff`` key containing a unified diff string. + """ + + data = run_data.copy() + data["parent_ids"] = data["parent_ids"].apply( + lambda x: ast.literal_eval(x) if isinstance(x, str) else x + ) + data = data.set_index("id", drop=False) + if solution_id not in data.index: + raise ValueError(f"Unknown solution_id: {solution_id}") + + chain: list[dict[str, pd.Series | str]] = [] + current = solution_id + + while True: + row = data.loc[current] + parents = row["parent_ids"] + if not parents: + break + parent_id = parents[0] + parent_row = data.loc[parent_id] + parent_code = parent_row["code"] + current_code = row["code"] + parent_lines = parent_code.splitlines() + current_lines = current_code.splitlines() + diff_lines = difflib.unified_diff( + parent_lines, + current_lines, + fromfile=str(parent_id), + tofile=str(current), + lineterm="", + n=max(len(parent_lines), len(current_lines)), + ) + chain.append( + {"parent": parent_row, "child": row, "diff": "\n".join(diff_lines)} + ) + current = parent_id + + chain.reverse() + return chain + + +def get_code_lineage(run_data: pd.DataFrame, solution_id: str) -> list[pd.Series]: + """Return lineage of an individual with id ``solution_id``, across generation. + + The function follows the first parent of each solution until the root is + reached. Generating a chin from first generation to the last generation. + Args: + run_data: DataFrame containing at least ``id``, ``parent_ids`` and + ``code`` columns. ``name``, ``generation`` and ``fitness`` are + optional but will be preserved if present. + solution_id: Identifier of the final solution. + + Returns: + A list of pd.Series `rows` that present individual lineage in ascending order, oldest -> newest. + """ + data = run_data.copy() + data["parent_ids"] = data["parent_ids"].apply( + lambda x: ast.literal_eval(x) if isinstance(x, str) else x + ) + data = data.set_index("id", drop=False) + if solution_id not in data.index: + raise ValueError(f"Unknown solution_id: {solution_id}") + + lineage: list[pd.Series] = [] + id = solution_id + while id: + try: + parent = data.loc[data["id"] == id].iloc[0] + except: + parent = None + if parent is not None: + lineage.append(parent) + pid = parent["parent_ids"] + if pid: + id = pid[0] + else: + id = None + return lineage[::-1] + + +def print_code_diff_chain(run_data: pd.DataFrame, solution_id: str) -> None: + """Print the code diff chain for ``solution_id``.""" + + chain = code_diff_chain(run_data, solution_id) + if not chain: + return + root = chain[0]["parent"] + print( + "Initial {p} (gen {pg}, fit {pf})".format( + p=root.get("name", root["id"]), + pg=root.get("generation", "?"), + pf=root.get("fitness", "?"), + ) + ) + print(root["code"]) + print() + for step, entry in enumerate(chain, start=1): + parent = entry["parent"] + child = entry["child"] + diff = entry["diff"] + print( + "Step {step}: {p} (gen {pg}, fit {pf}) -> {c} (gen {cg}, fit {cf})".format( + step=step, + p=parent.get("name", parent["id"]), + pg=parent.get("generation", "?"), + pf=parent.get("fitness", "?"), + c=child.get("name", child["id"]), + cg=child.get("generation", "?"), + cf=child.get("fitness", "?"), + ) + ) + print(diff) + print() + + def plot_boxplot_fitness( logger: ExperimentLogger, y_label="Fitness", x_label="Method", problems=None ): diff --git a/iohblade/webapp.py b/iohblade/webapp.py index dadd10b..1309a0e 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -1,25 +1,42 @@ +import difflib import json import os +import re import subprocess import time +import urllib from pathlib import Path +import jsonlines import matplotlib import pandas as pd -import plotly.graph_objects as go import plotly.express as px -import jsonlines +import plotly.graph_objects as go import streamlit as st -import urllib +from st_diff_viewer import diff_viewer -from iohblade.plots import CEG_FEATURES, CEG_FEATURE_LABELS, plotly_code_evolution +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import PythonLexer from iohblade.assets import LOGO_DARK_B64, LOGO_LIGHT_B64 from iohblade.loggers import ExperimentLogger +from iohblade.plots import ( + CEG_FEATURE_LABELS, + CEG_FEATURES, + code_diff_chain, + plotly_code_evolution, + get_code_lineage, +) LOGO_LIGHT = f"data:image/png;base64,{LOGO_LIGHT_B64}" LOGO_DARK = f"data:image/png;base64,{LOGO_DARK_B64}" +if "index" not in st.session_state: + st.session_state.index = 0 +if "max_index" not in st.session_state: + st.session_state.max_index = 0 + def convergence_dataframe(logger: ExperimentLogger) -> pd.DataFrame: methods, problems = logger.get_methods_problems() @@ -55,6 +72,35 @@ def _rgba(color: str, alpha: float) -> str: return color +def _highlight_code(code: str, *, wrap: bool = False) -> str: + formatter = HtmlFormatter(nowrap=True, noclasses=True) + html = highlight(code, PythonLexer(), formatter) + # html = re.sub(r'" + html + "" + ) + return html + + +def _diff_to_html(old: str, new: str) -> str: + diff = difflib.ndiff(old.splitlines(), new.splitlines()) + lines = [] + for line in diff: + tag, text = line[:2], line[2:] + if tag == "+ ": + cls = "background-color:MediumSeaGreen;" + elif tag == "- ": + cls = "background-color:Tomato;" + elif tag == "? ": + continue + else: + cls = "context" + lines.append(f'{_highlight_code(text)}') + return "
".join(lines) + + def plotly_convergence(df: pd.DataFrame, aggregate: bool = False) -> go.Figure: fig = go.Figure() palette = px.colors.qualitative.Plotly # or px.colors.qualitative.D3 @@ -152,6 +198,16 @@ def read_progress(exp_dir): return None +def up_index(): + if st.session_state.index < st.session_state.max_index - 1: + st.session_state.index += 1 + + +def down_index(): + if st.session_state.index > 0: + st.session_state.index -= 1 + + def run() -> None: st.set_page_config( page_title="BLADE Experiment Browser", @@ -293,6 +349,49 @@ def run() -> None: else: st.write("No data for selected run.") + if not run_df.empty: + st.markdown("#### Code Diff Chain") + solutions = run_df.to_dict("records") + best_idx = max( + range(len(solutions)), + key=lambda i: solutions[i].get("fitness", float("-inf")), + ) + solution_choice = st.selectbox( + "Solution", + solutions, + index=best_idx, + key="diff_chain_solution", + format_func=lambda x: ( + f"{x.get('name', x['id'])} (gen {x.get('generation', '?')})" + f" | fit {x.get('fitness', 'n/a')}" + ), + ) + selected_sol = solution_choice["id"] + global index, max_index + index = 0 + + if st.button("Show Diff Chain"): + lineage = get_code_lineage(run_df, selected_sol) + if len(lineage) >= 2: + tabs = st.tabs( + list( + map( + lambda x: f"{x['name']}-{x['generation']}", + lineage[1:], + ) + ) + ) + for index, tab_data in enumerate(tabs): + with tab_data: + diff_viewer( + lineage[index]["code"], + lineage[index + 1]["code"], + left_title=f"{lineage[index]['name']}, gen: {lineage[index]['generation']}, Fitness: {lineage[index]['fitness'] : 0.3f}", + right_title=f"{lineage[index + 1]['name']}, gen: {lineage[index + 1]['generation']}, Fitness: {lineage[index + 1]['fitness']: 0.3f}", + ) + else: + st.write("No parent chain found.") + st.markdown("#### Top Solutions") runs = logger.get_data() for m in method_sel: @@ -345,7 +444,10 @@ def run() -> None: def main() -> None: - subprocess.run(["streamlit", "run", str(Path(__file__))], check=True) + subprocess.run( + ["streamlit", "run", str(Path(__file__)), "--server.fileWatcherType", "none"], + check=True, + ) if __name__ == "__main__": diff --git a/tests/test_plots.py b/tests/test_plots.py index 074ebf0..ff9a12f 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -12,6 +12,7 @@ import matplotlib.pyplot as plt import plotly.graph_objects as go + import iohblade # Adjust imports to match your actual package structure @@ -230,6 +231,29 @@ def test_plotly_code_evolution_xaxis_order(): assert list(marker_trace.x) == [1, 2, 3] +def test_code_diff_chain_produces_diffs(): + df = pd.DataFrame( + { + "id": ["0", "1", "2"], + "parent_ids": ["[]", '["0"]', '["1"]'], + "fitness": [0.0, 0.1, 0.2], + "generation": [0, 1, 2], + "name": ["zero", "one", "two"], + "code": [ + "print('zero')", + "print('one')", + "print('two')", + ], + } + ) + chain = iohblade.plots.code_diff_chain(df, "2") + assert len(chain) == 2 + first = chain[0] + assert first["parent"]["id"] == "0" and first["child"]["id"] == "1" + assert first["child"]["generation"] == 1 + assert "-print('zero')" in first["diff"] + + def test_plot_boxplot_fitness(mock_logger): # The code references the "fitness" column, so we just run it: plot_boxplot_fitness(