From 354fe8d68fad962295e67be7ea2be5478cc35bcf Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Tue, 2 Sep 2025 21:57:43 +0200 Subject: [PATCH 1/8] Add code diff chain visualization --- iohblade/plots.py | 66 +++++++++++++++++++++++++++++++++++++++++++-- iohblade/webapp.py | 31 +++++++++++++++++---- tests/test_plots.py | 21 +++++++++++++++ 3 files changed, 111 insertions(+), 7 deletions(-) diff --git a/iohblade/plots.py b/iohblade/plots.py index 0847e83..f62d876 100644 --- a/iohblade/plots.py +++ b/iohblade/plots.py @@ -4,12 +4,11 @@ import os from collections import Counter -import plotly.graph_objects as go - import jsonlines import matplotlib.pyplot as plt import numpy as np import pandas as pd +import plotly.graph_objects as go import seaborn as sns from scipy.stats import ttest_ind from sklearn.decomposition import PCA @@ -459,6 +458,69 @@ def plotly_code_evolution( return fig +def code_diff_chain( + run_data: pd.DataFrame, solution_id: str +) -> list[tuple[str, str, str]]: + """Return diffs along the lineage of ``solution_id``. + + The function follows the first parent of each solution until the root is + reached. For every parent-child pair a unified diff of their code is + produced. + + Args: + run_data: DataFrame containing at least ``id``, ``parent_ids`` and + ``code`` columns. + solution_id: Identifier of the final solution. + + Returns: + A list of ``(parent_id, child_id, diff)`` tuples ordered from the first + ancestor to ``solution_id``. + """ + + data = run_data.copy() + data["parent_ids"] = data["parent_ids"].apply( + lambda x: ast.literal_eval(x) if isinstance(x, str) else x + ) + data = data.set_index("id") + if solution_id not in data.index: + raise ValueError(f"Unknown solution_id: {solution_id}") + + chain: list[tuple[str, str, str]] = [] + current = solution_id + + while True: + row = data.loc[current] + parents = row["parent_ids"] + if not parents: + break + parent = parents[0] + parent_code = data.loc[parent, "code"] + current_code = row["code"] + diff_lines = difflib.unified_diff( + parent_code.splitlines(), + current_code.splitlines(), + fromfile=str(parent), + tofile=str(current), + lineterm="", + ) + chain.append((parent, current, "\n".join(diff_lines))) + current = parent + + chain.reverse() + return chain + + +def print_code_diff_chain(run_data: pd.DataFrame, solution_id: str) -> None: + """Print the code diff chain for ``solution_id``.""" + + for step, (parent, child, diff) in enumerate( + code_diff_chain(run_data, solution_id), start=1 + ): + print(f"Step {step}: {parent} -> {child}") + print(diff) + print() + + def plot_boxplot_fitness( logger: ExperimentLogger, y_label="Fitness", x_label="Method", problems=None ): diff --git a/iohblade/webapp.py b/iohblade/webapp.py index dadd10b..233bafd 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -2,20 +2,24 @@ import os import subprocess import time +import urllib from pathlib import Path +import jsonlines import matplotlib import pandas as pd -import plotly.graph_objects as go import plotly.express as px -import jsonlines +import plotly.graph_objects as go import streamlit as st -import urllib - -from iohblade.plots import CEG_FEATURES, CEG_FEATURE_LABELS, plotly_code_evolution from iohblade.assets import LOGO_DARK_B64, LOGO_LIGHT_B64 from iohblade.loggers import ExperimentLogger +from iohblade.plots import ( + CEG_FEATURE_LABELS, + CEG_FEATURES, + code_diff_chain, + plotly_code_evolution, +) LOGO_LIGHT = f"data:image/png;base64,{LOGO_LIGHT_B64}" LOGO_DARK = f"data:image/png;base64,{LOGO_DARK_B64}" @@ -293,6 +297,23 @@ def run() -> None: else: st.write("No data for selected run.") + if not run_df.empty: + st.markdown("#### Code Diff Chain") + solutions = run_df["id"].tolist() + best_sol = run_df.loc[run_df["fitness"].idxmax(), "id"] + sol_index = solutions.index(best_sol) if best_sol in solutions else 0 + selected_sol = st.selectbox( + "Solution ID", solutions, index=sol_index, key="diff_chain_solution" + ) + if st.button("Show Diff Chain", key="show_diff_chain"): + diffs = code_diff_chain(run_df, selected_sol) + if diffs: + for parent, child, diff in diffs: + st.markdown(f"**{parent} -> {child}**") + st.code(diff) + else: + st.write("No parent chain found.") + st.markdown("#### Top Solutions") runs = logger.get_data() for m in method_sel: diff --git a/tests/test_plots.py b/tests/test_plots.py index 074ebf0..1121550 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -12,6 +12,7 @@ import matplotlib.pyplot as plt import plotly.graph_objects as go + import iohblade # Adjust imports to match your actual package structure @@ -230,6 +231,26 @@ def test_plotly_code_evolution_xaxis_order(): assert list(marker_trace.x) == [1, 2, 3] +def test_code_diff_chain_produces_diffs(): + df = pd.DataFrame( + { + "id": ["0", "1", "2"], + "parent_ids": ["[]", '["0"]', '["1"]'], + "fitness": [0.0, 0.1, 0.2], + "code": [ + "print('zero')", + "print('one')", + "print('two')", + ], + } + ) + chain = iohblade.plots.code_diff_chain(df, "2") + assert len(chain) == 2 + first_parent, first_child, first_diff = chain[0] + assert first_parent == "0" and first_child == "1" + assert "-print('zero')" in first_diff + + def test_plot_boxplot_fitness(mock_logger): # The code references the "fitness" column, so we just run it: plot_boxplot_fitness( From a23f4227f48edb21cd68aeaf2af18173e3479f41 Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Tue, 2 Sep 2025 22:10:44 +0200 Subject: [PATCH 2/8] Improve diff chain visualization --- iohblade/plots.py | 46 ++++++++++++++++++++----------- iohblade/webapp.py | 66 +++++++++++++++++++++++++++++++++++++++------ tests/test_plots.py | 9 ++++--- 3 files changed, 95 insertions(+), 26 deletions(-) diff --git a/iohblade/plots.py b/iohblade/plots.py index f62d876..4e97bae 100644 --- a/iohblade/plots.py +++ b/iohblade/plots.py @@ -460,7 +460,7 @@ def plotly_code_evolution( def code_diff_chain( run_data: pd.DataFrame, solution_id: str -) -> list[tuple[str, str, str]]: +) -> list[dict[str, pd.Series | str]]: """Return diffs along the lineage of ``solution_id``. The function follows the first parent of each solution until the root is @@ -469,23 +469,25 @@ def code_diff_chain( Args: run_data: DataFrame containing at least ``id``, ``parent_ids`` and - ``code`` columns. + ``code`` columns. ``name``, ``generation`` and ``fitness`` are + optional but will be preserved if present. solution_id: Identifier of the final solution. Returns: - A list of ``(parent_id, child_id, diff)`` tuples ordered from the first - ancestor to ``solution_id``. + A list of dictionaries ordered from the first ancestor to + ``solution_id``. Each dictionary has ``parent`` and ``child`` keys with + the respective rows and a ``diff`` key containing a unified diff string. """ data = run_data.copy() data["parent_ids"] = data["parent_ids"].apply( lambda x: ast.literal_eval(x) if isinstance(x, str) else x ) - data = data.set_index("id") + data = data.set_index("id", drop=False) if solution_id not in data.index: raise ValueError(f"Unknown solution_id: {solution_id}") - chain: list[tuple[str, str, str]] = [] + chain: list[dict[str, pd.Series | str]] = [] current = solution_id while True: @@ -493,18 +495,21 @@ def code_diff_chain( parents = row["parent_ids"] if not parents: break - parent = parents[0] - parent_code = data.loc[parent, "code"] + parent_id = parents[0] + parent_row = data.loc[parent_id] + parent_code = parent_row["code"] current_code = row["code"] diff_lines = difflib.unified_diff( parent_code.splitlines(), current_code.splitlines(), - fromfile=str(parent), + fromfile=str(parent_id), tofile=str(current), lineterm="", ) - chain.append((parent, current, "\n".join(diff_lines))) - current = parent + chain.append( + {"parent": parent_row, "child": row, "diff": "\n".join(diff_lines)} + ) + current = parent_id chain.reverse() return chain @@ -513,10 +518,21 @@ def code_diff_chain( def print_code_diff_chain(run_data: pd.DataFrame, solution_id: str) -> None: """Print the code diff chain for ``solution_id``.""" - for step, (parent, child, diff) in enumerate( - code_diff_chain(run_data, solution_id), start=1 - ): - print(f"Step {step}: {parent} -> {child}") + for step, entry in enumerate(code_diff_chain(run_data, solution_id), start=1): + parent = entry["parent"] + child = entry["child"] + diff = entry["diff"] + print( + "Step {step}: {p} (gen {pg}, fit {pf}) -> {c} (gen {cg}, fit {cf})".format( + step=step, + p=parent.get("name", parent["id"]), + pg=parent.get("generation", "?"), + pf=parent.get("fitness", "?"), + c=child.get("name", child["id"]), + cg=child.get("generation", "?"), + cf=child.get("fitness", "?"), + ) + ) print(diff) print() diff --git a/iohblade/webapp.py b/iohblade/webapp.py index 233bafd..e5eb990 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -1,3 +1,4 @@ +import html import json import os import subprocess @@ -59,6 +60,26 @@ def _rgba(color: str, alpha: float) -> str: return color +def _diff_to_html(diff: str) -> str: + """Render a unified diff string with GitHub-like coloring.""" + + lines = diff.splitlines() + html_lines = [] + for line in lines: + esc = html.escape(line) + if line.startswith("+++") or line.startswith("---"): + html_lines.append(f"{esc}") + elif line.startswith("+"): + html_lines.append(f"{esc}") + elif line.startswith("-"): + html_lines.append(f"{esc}") + elif line.startswith("@@"): + html_lines.append(f"{esc}") + else: + html_lines.append(esc) + return "
".join(html_lines) + + def plotly_convergence(df: pd.DataFrame, aggregate: bool = False) -> go.Figure: fig = go.Figure() palette = px.colors.qualitative.Plotly # or px.colors.qualitative.D3 @@ -299,18 +320,47 @@ def run() -> None: if not run_df.empty: st.markdown("#### Code Diff Chain") - solutions = run_df["id"].tolist() - best_sol = run_df.loc[run_df["fitness"].idxmax(), "id"] - sol_index = solutions.index(best_sol) if best_sol in solutions else 0 - selected_sol = st.selectbox( - "Solution ID", solutions, index=sol_index, key="diff_chain_solution" + solutions = run_df.to_dict("records") + best_idx = max( + range(len(solutions)), + key=lambda i: solutions[i].get("fitness", float("-inf")), ) + selected = st.selectbox( + "Solution", + solutions, + index=best_idx, + key="diff_chain_solution", + format_func=lambda x: ( + f"{x.get('name', x['id'])} (gen {x.get('generation', '?')})" + f" | fit {x.get('fitness', 'n/a')}" + ), + ) + selected_sol = selected["id"] if st.button("Show Diff Chain", key="show_diff_chain"): diffs = code_diff_chain(run_df, selected_sol) if diffs: - for parent, child, diff in diffs: - st.markdown(f"**{parent} -> {child}**") - st.code(diff) + cards = "
" + for entry in diffs: + parent = entry["parent"] + child = entry["child"] + header = ( + f"{parent.get('name', parent['id'])} " + f"(gen {parent.get('generation', '?')}, " + f"fit {parent.get('fitness', 'n/a')}) → " + f"{child.get('name', child['id'])} " + f"(gen {child.get('generation', '?')}, " + f"fit {child.get('fitness', 'n/a')})" + ) + diff_html = _diff_to_html(entry["diff"]) + cards += ( + "
" + f"
{header}
" + f"
{diff_html}
" + "
" + ) + cards += "
" + st.markdown(cards, unsafe_allow_html=True) else: st.write("No parent chain found.") diff --git a/tests/test_plots.py b/tests/test_plots.py index 1121550..ff9a12f 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -237,6 +237,8 @@ def test_code_diff_chain_produces_diffs(): "id": ["0", "1", "2"], "parent_ids": ["[]", '["0"]', '["1"]'], "fitness": [0.0, 0.1, 0.2], + "generation": [0, 1, 2], + "name": ["zero", "one", "two"], "code": [ "print('zero')", "print('one')", @@ -246,9 +248,10 @@ def test_code_diff_chain_produces_diffs(): ) chain = iohblade.plots.code_diff_chain(df, "2") assert len(chain) == 2 - first_parent, first_child, first_diff = chain[0] - assert first_parent == "0" and first_child == "1" - assert "-print('zero')" in first_diff + first = chain[0] + assert first["parent"]["id"] == "0" and first["child"]["id"] == "1" + assert first["child"]["generation"] == 1 + assert "-print('zero')" in first["diff"] def test_plot_boxplot_fitness(mock_logger): From 10443ab9dd04521067b1e138ca980b1931fa905b Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Tue, 2 Sep 2025 22:49:27 +0200 Subject: [PATCH 3/8] Enhance diff chain rendering --- iohblade/plots.py | 22 ++++++++++++++--- iohblade/webapp.py | 61 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/iohblade/plots.py b/iohblade/plots.py index 4e97bae..e3bf6ff 100644 --- a/iohblade/plots.py +++ b/iohblade/plots.py @@ -499,12 +499,15 @@ def code_diff_chain( parent_row = data.loc[parent_id] parent_code = parent_row["code"] current_code = row["code"] + parent_lines = parent_code.splitlines() + current_lines = current_code.splitlines() diff_lines = difflib.unified_diff( - parent_code.splitlines(), - current_code.splitlines(), + parent_lines, + current_lines, fromfile=str(parent_id), tofile=str(current), lineterm="", + n=max(len(parent_lines), len(current_lines)), ) chain.append( {"parent": parent_row, "child": row, "diff": "\n".join(diff_lines)} @@ -518,7 +521,20 @@ def code_diff_chain( def print_code_diff_chain(run_data: pd.DataFrame, solution_id: str) -> None: """Print the code diff chain for ``solution_id``.""" - for step, entry in enumerate(code_diff_chain(run_data, solution_id), start=1): + chain = code_diff_chain(run_data, solution_id) + if not chain: + return + root = chain[0]["parent"] + print( + "Initial {p} (gen {pg}, fit {pf})".format( + p=root.get("name", root["id"]), + pg=root.get("generation", "?"), + pf=root.get("fitness", "?"), + ) + ) + print(root["code"]) + print() + for step, entry in enumerate(chain, start=1): parent = entry["parent"] child = entry["child"] diff = entry["diff"] diff --git a/iohblade/webapp.py b/iohblade/webapp.py index e5eb990..178f4b5 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -1,4 +1,3 @@ -import html import json import os import subprocess @@ -12,6 +11,9 @@ import plotly.express as px import plotly.graph_objects as go import streamlit as st +from pygments import highlight +from pygments.formatters import HtmlFormatter +from pygments.lexers import PythonLexer from iohblade.assets import LOGO_DARK_B64, LOGO_LIGHT_B64 from iohblade.loggers import ExperimentLogger @@ -60,24 +62,38 @@ def _rgba(color: str, alpha: float) -> str: return color +_PY_LEXER = PythonLexer() +_HTML_FMT = HtmlFormatter(nowrap=True, style="default", noclasses=True) + + +def _highlight_code(code: str) -> str: + """Return ``code`` highlighted as HTML.""" + + return highlight(code, _PY_LEXER, _HTML_FMT).rstrip() + + def _diff_to_html(diff: str) -> str: - """Render a unified diff string with GitHub-like coloring.""" + """Render a unified diff string with GitHub-like coloring and syntax highlighting.""" lines = diff.splitlines() html_lines = [] for line in lines: - esc = html.escape(line) - if line.startswith("+++") or line.startswith("---"): - html_lines.append(f"{esc}") - elif line.startswith("+"): - html_lines.append(f"{esc}") - elif line.startswith("-"): - html_lines.append(f"{esc}") - elif line.startswith("@@"): - html_lines.append(f"{esc}") + if line.startswith("+++") or line.startswith("---") or line.startswith("@@"): + continue + tag = line[:1] + content = line[1:] if tag in {"+", "-", " "} else line + highlighted = _highlight_code(content) + if tag == "+": + html_lines.append( + f"+{highlighted}" + ) + elif tag == "-": + html_lines.append( + f"-{highlighted}" + ) else: - html_lines.append(esc) - return "
".join(html_lines) + html_lines.append(f"{highlighted}") + return "\n".join(html_lines) def plotly_convergence(df: pd.DataFrame, aggregate: bool = False) -> go.Figure: @@ -325,7 +341,7 @@ def run() -> None: range(len(solutions)), key=lambda i: solutions[i].get("fitness", float("-inf")), ) - selected = st.selectbox( + solution_choice = st.selectbox( "Solution", solutions, index=best_idx, @@ -335,11 +351,24 @@ def run() -> None: f" | fit {x.get('fitness', 'n/a')}" ), ) - selected_sol = selected["id"] + selected_sol = solution_choice["id"] if st.button("Show Diff Chain", key="show_diff_chain"): diffs = code_diff_chain(run_df, selected_sol) if diffs: - cards = "
" + root = diffs[0]["parent"] + root_header = ( + f"{root.get('name', root['id'])} " + f"(gen {root.get('generation', '?')}, fit {root.get('fitness', 'n/a')})" + ) + root_html = _highlight_code(root["code"]) + cards = ( + "
" + "
" + f"
{root_header}
" + f"
{root_html}
" + "
" + ) for entry in diffs: parent = entry["parent"] child = entry["child"] From 0b2663da485f39221bdf37c2a9ddcb9c6c2ddb08 Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Wed, 3 Sep 2025 19:49:33 +0200 Subject: [PATCH 4/8] Ensure HTML spans preserve whitespace (#55) --- iohblade/webapp.py | 51 +++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/iohblade/webapp.py b/iohblade/webapp.py index 178f4b5..22c8a30 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -1,5 +1,7 @@ +import difflib import json import os +import re import subprocess import time import urllib @@ -17,6 +19,7 @@ from iohblade.assets import LOGO_DARK_B64, LOGO_LIGHT_B64 from iohblade.loggers import ExperimentLogger + from iohblade.plots import ( CEG_FEATURE_LABELS, CEG_FEATURES, @@ -24,6 +27,7 @@ plotly_code_evolution, ) + LOGO_LIGHT = f"data:image/png;base64,{LOGO_LIGHT_B64}" LOGO_DARK = f"data:image/png;base64,{LOGO_DARK_B64}" @@ -62,38 +66,29 @@ def _rgba(color: str, alpha: float) -> str: return color -_PY_LEXER = PythonLexer() -_HTML_FMT = HtmlFormatter(nowrap=True, style="default", noclasses=True) - def _highlight_code(code: str) -> str: - """Return ``code`` highlighted as HTML.""" - - return highlight(code, _PY_LEXER, _HTML_FMT).rstrip() - - -def _diff_to_html(diff: str) -> str: - """Render a unified diff string with GitHub-like coloring and syntax highlighting.""" - - lines = diff.splitlines() - html_lines = [] - for line in lines: - if line.startswith("+++") or line.startswith("---") or line.startswith("@@"): + formatter = HtmlFormatter(nowrap=True, noclasses=True) + html = highlight(code, PythonLexer(), formatter) + return re.sub(r' str: + diff = difflib.ndiff(old.splitlines(), new.splitlines()) + lines = [] + for line in diff: + tag, text = line[:2], line[2:] + if tag == "+ ": + cls = "added" + elif tag == "- ": + cls = "removed" + elif tag == "? ": continue - tag = line[:1] - content = line[1:] if tag in {"+", "-", " "} else line - highlighted = _highlight_code(content) - if tag == "+": - html_lines.append( - f"+{highlighted}" - ) - elif tag == "-": - html_lines.append( - f"-{highlighted}" - ) else: - html_lines.append(f"{highlighted}") - return "\n".join(html_lines) + cls = "context" + lines.append(f'{_highlight_code(text)}') + return "
".join(lines) + def plotly_convergence(df: pd.DataFrame, aggregate: bool = False) -> go.Figure: From 7d42fcc2b92d1e7b7fec5627b4fb341ca57f5771 Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Wed, 3 Sep 2025 19:59:08 +0200 Subject: [PATCH 5/8] Fix solution card code highlighting (#57) --- iohblade/webapp.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/iohblade/webapp.py b/iohblade/webapp.py index 22c8a30..c576765 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -19,7 +19,6 @@ from iohblade.assets import LOGO_DARK_B64, LOGO_LIGHT_B64 from iohblade.loggers import ExperimentLogger - from iohblade.plots import ( CEG_FEATURE_LABELS, CEG_FEATURES, @@ -27,7 +26,6 @@ plotly_code_evolution, ) - LOGO_LIGHT = f"data:image/png;base64,{LOGO_LIGHT_B64}" LOGO_DARK = f"data:image/png;base64,{LOGO_DARK_B64}" @@ -66,11 +64,16 @@ def _rgba(color: str, alpha: float) -> str: return color - -def _highlight_code(code: str) -> str: +def _highlight_code(code: str, *, wrap: bool = False) -> str: formatter = HtmlFormatter(nowrap=True, noclasses=True) html = highlight(code, PythonLexer(), formatter) - return re.sub(r'" + html + "" + ) + return html def _diff_to_html(old: str, new: str) -> str: @@ -90,7 +93,6 @@ def _diff_to_html(old: str, new: str) -> str: return "
".join(lines) - def plotly_convergence(df: pd.DataFrame, aggregate: bool = False) -> go.Figure: fig = go.Figure() palette = px.colors.qualitative.Plotly # or px.colors.qualitative.D3 @@ -355,13 +357,13 @@ def run() -> None: f"{root.get('name', root['id'])} " f"(gen {root.get('generation', '?')}, fit {root.get('fitness', 'n/a')})" ) - root_html = _highlight_code(root["code"]) + root_html = _highlight_code(root["code"], wrap=True) cards = ( "
" "
" f"
{root_header}
" - f"
{root_html}
" + f"{root_html}" "
" ) for entry in diffs: From 86250b0c63e4cf972ec4fe7af73bb47df7303dca Mon Sep 17 00:00:00 2001 From: Niki van Stein Date: Wed, 3 Sep 2025 18:05:36 +0000 Subject: [PATCH 6/8] Does not work as expected yet. --- iohblade/webapp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iohblade/webapp.py b/iohblade/webapp.py index c576765..2dbe904 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -377,7 +377,7 @@ def run() -> None: f"(gen {child.get('generation', '?')}, " f"fit {child.get('fitness', 'n/a')})" ) - diff_html = _diff_to_html(entry["diff"]) + diff_html = _diff_to_html(parent["code"], child["code"]) cards += ( "
" @@ -442,7 +442,7 @@ def run() -> None: def main() -> None: - subprocess.run(["streamlit", "run", str(Path(__file__))], check=True) + subprocess.run(["streamlit", "run", str(Path(__file__)), "--server.fileWatcherType", "none"], check=True) if __name__ == "__main__": From 31ac02e3e5cd7fa95d1976b51f4c91b059b31a06 Mon Sep 17 00:00:00 2001 From: Ananta Shahane Date: Tue, 14 Oct 2025 15:35:45 +0200 Subject: [PATCH 7/8] single comparison working. --- .gitignore | 3 +- data.csv | 101 +++++++++++++++++++++++++++++++++++++++++++++ iohblade/plots.py | 41 ++++++++++++++++++ iohblade/webapp.py | 79 +++++++++++++++++------------------ 4 files changed, 182 insertions(+), 42 deletions(-) create mode 100644 data.csv diff --git a/.gitignore b/.gitignore index cfe054a..aa6549a 100644 --- a/.gitignore +++ b/.gitignore @@ -176,4 +176,5 @@ BBOB*.zip /stn/ /run/ /setup/ -.vscode/ \ No newline at end of file +.vscode/ +.python-version diff --git a/data.csv b/data.csv new file mode 100644 index 0000000..5d43c3f --- /dev/null +++ b/data.csv @@ -0,0 +1,101 @@ +,id,fitness,name,description,configspace,generation,feedback,error,parent_ids,operator,metadata,task_prompt,method_name,problem_name,seed,_id,cummax_fitness,eval +0,18084962-51ed-4376-8b09-caa9e3c69568,5.955078389147237,AutoCorrCandidate,"Generates a nearly-optimal non-negative function by leveraging a piecewise-constant structure with optimized heights over a central region, tapering to zero at the edges to reduce autocorrelation.",,0,"C1 ratio = 5.95508, best known = 1.5053",,[],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,0,5.955078389147237,1 +1,43b5aacb-4de5-45a9-b846-f2331c03b92f,2.108060230292329,AutoCorrCandidate,"Refines the piecewise-constant function by optimizing the heights of the central, tapering, and edge regions to minimize autocorrelation.",,1,"C1 ratio = 2.10806, best known = 1.5053",,['18084962-51ed-4376-8b09-caa9e3c69568'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,1,5.955078389147237,2 +2,f053dcc7-b434-43ac-a0d0-0167c7569ed2,2.244990298343852,AutoCorrCandidate,"Optimizes a piecewise-constant function with central flat, tapered, and constant-height edge regions, incorporating a dynamically adjusted sigmoid taper and refined parameter bounds to further minimize autocorrelation.",,2,"C1 ratio = 2.24499, best known = 1.5053",,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,2,5.955078389147237,3 +3,dc80fb23-aa09-4b26-9a7a-1c0f535b08a9,2.14625421730855,AutoCorrCandidate,"Optimizes a piecewise-constant function with a central flat region, cosine tapers, and optimized edge values using a more robust optimizer and a refined parameterization, adding a small constant offset to avoid zero values, and using a larger number of iterations.",,3,"C1 ratio = 2.14625, best known = 1.5053",,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,3,5.955078389147237,4 +4,7e44f5a8-b458-4b51-9e44-87cbd6e3307f,0.0,AutoCorrCandidate,Optimizes a piecewise-constant function with an added Gaussian component and refined tapering to minimize autocorrelation.,,4,calc-error Integral ∫f must be > 0 for C1,calc-failed,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,4,5.955078389147237,5 +5,d236ef84-7086-4204-882b-7151cddc3692,2.072361474351445,AutoCorrCandidate,"Optimizes a piecewise-quadratic function with a central flat region, tapering quadratic sections, and constant edges to minimize autocorrelation.",,5,"C1 ratio = 2.07236, best known = 1.5053",,['43b5aacb-4de5-45a9-b846-f2331c03b92f'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,5,5.955078389147237,6 +6,617268ca-e233-47c4-9a64-e6889dc5560b,2.000000000000122,AutoCorrCandidate,"Optimizes a piecewise-cubic function with smooth transitions between regions by directly controlling the function values at key points, aiming for a flatter autocorrelation peak.",,6,"C1 ratio = 2, best known = 1.5053",,['d236ef84-7086-4204-882b-7151cddc3692'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,6,5.955078389147237,7 +7,01594200-c881-458f-b751-38ebd030cf6c,10.0,AutoCorrCandidate,"Optimizes a raised cosine function with adjustable parameters for a flatter autocorrelation peak, simplifying the parameter space for faster convergence.",,7,"C1 ratio = 10, best known = 1.5053",,['617268ca-e233-47c4-9a64-e6889dc5560b'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,7,10.0,8 +8,06529ca8-7515-41b3-bfca-120341bfc009,2.000000000000957,AutoCorrCandidate,Optimize a piecewise-quadratic function with a central flat region and parabolic tapers to minimize autocorrelation peak relative to integrated function square.,,8,"C1 ratio = 2, best known = 1.5053",,['617268ca-e233-47c4-9a64-e6889dc5560b'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,8,10.0,9 +9,f2400a0c-cca8-4bb5-9498-b669eb35e167,0.0,AutoCorrCandidate,Optimizes a piecewise-linear function with a central flat region and linearly decaying edges to minimize the autocorrelation peak relative to the integral squared.,,9,"exec-error could not broadcast input array from shape (240,) into shape (120,)",exec-failed,['617268ca-e233-47c4-9a64-e6889dc5560b'],,{}," + +Write a python class with function `__call__`, that returns a list of floats f of length N. +- Where N is number of bins over [-1/4, 1/4] with discretization of dx = 0.5 / N. +- Auto-convolution of `g = dx * conv(f, f, mode=""full"")`, where g lies in range [-1/2, 1/2]. +- Optimise for objective of minimize max_t (f*f)(t) / (∫ f)^2, where all entries in the list f must be greater than or equal to 0 and do not normalise the f, scaling does not change the score. +- Symmetry or piecewise-constant structure is allowed if helpful. +- Set N = 600 as default. + +",LLaMEA,auto_corr_ineq_1,0,9,10.0,10 diff --git a/iohblade/plots.py b/iohblade/plots.py index e3bf6ff..ad6d0a2 100644 --- a/iohblade/plots.py +++ b/iohblade/plots.py @@ -518,6 +518,47 @@ def code_diff_chain( return chain +def get_code_lineage(run_data: pd.DataFrame, solution_id: str +) -> list[pd.Series]: + """Return lineage of an individual with id ``solution_id``, across generation. + + The function follows the first parent of each solution until the root is + reached. Generating a chin from first generation to the last generation. + Args: + run_data: DataFrame containing at least ``id``, ``parent_ids`` and + ``code`` columns. ``name``, ``generation`` and ``fitness`` are + optional but will be preserved if present. + solution_id: Identifier of the final solution. + + Returns: + A list of pd.Series `rows` that present individual lineage in ascending order, oldest -> newest. + """ + data = run_data.copy() + data["parent_ids"] = data["parent_ids"].apply( + lambda x: ast.literal_eval(x) if isinstance(x, str) else x + ) + data = data.set_index("id", drop=False) + if solution_id not in data.index: + raise ValueError(f"Unknown solution_id: {solution_id}") + + lineage: list[pd.Series] = [] + id = solution_id + while id: + try: + parent = data.loc[data["id"] == id].iloc[0] + except: + parent = None + if parent is not None: + lineage.append( + parent + ) + pid = parent["parent_ids"] + if pid: + id = pid[0] + else: + id = None + return lineage[::-1] + def print_code_diff_chain(run_data: pd.DataFrame, solution_id: str) -> None: """Print the code diff chain for ``solution_id``.""" diff --git a/iohblade/webapp.py b/iohblade/webapp.py index 2dbe904..f1e0c1d 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -13,6 +13,8 @@ import plotly.express as px import plotly.graph_objects as go import streamlit as st +from st_diff_viewer import diff_viewer + from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import PythonLexer @@ -24,11 +26,15 @@ CEG_FEATURES, code_diff_chain, plotly_code_evolution, + get_code_lineage ) LOGO_LIGHT = f"data:image/png;base64,{LOGO_LIGHT_B64}" LOGO_DARK = f"data:image/png;base64,{LOGO_DARK_B64}" +index = 0 +max_index = 0 + def convergence_dataframe(logger: ExperimentLogger) -> pd.DataFrame: methods, problems = logger.get_methods_problems() @@ -67,7 +73,7 @@ def _rgba(color: str, alpha: float) -> str: def _highlight_code(code: str, *, wrap: bool = False) -> str: formatter = HtmlFormatter(nowrap=True, noclasses=True) html = highlight(code, PythonLexer(), formatter) - html = re.sub(r'{_highlight_code(text)}') + lines.append(f'{_highlight_code(text)}') return "
".join(lines) @@ -189,6 +195,17 @@ def read_progress(exp_dir): return json.load(f) return None +def up_index(): + global index, max_index + if max_index - index > 1: + index += 1 + + +def down_index(): + global index + if index > 0: + index -= 1 + def run() -> None: st.set_page_config( @@ -349,44 +366,24 @@ def run() -> None: ), ) selected_sol = solution_choice["id"] - if st.button("Show Diff Chain", key="show_diff_chain"): + global index, max_index + index = 0 + + if st.button("Show Diff Chain"): diffs = code_diff_chain(run_df, selected_sol) - if diffs: - root = diffs[0]["parent"] - root_header = ( - f"{root.get('name', root['id'])} " - f"(gen {root.get('generation', '?')}, fit {root.get('fitness', 'n/a')})" - ) - root_html = _highlight_code(root["code"], wrap=True) - cards = ( - "
" - "
" - f"
{root_header}
" - f"{root_html}" - "
" - ) - for entry in diffs: - parent = entry["parent"] - child = entry["child"] - header = ( - f"{parent.get('name', parent['id'])} " - f"(gen {parent.get('generation', '?')}, " - f"fit {parent.get('fitness', 'n/a')}) → " - f"{child.get('name', child['id'])} " - f"(gen {child.get('generation', '?')}, " - f"fit {child.get('fitness', 'n/a')})" - ) - diff_html = _diff_to_html(parent["code"], child["code"]) - cards += ( - "
" - f"
{header}
" - f"
{diff_html}
" - "
" - ) - cards += "
" - st.markdown(cards, unsafe_allow_html=True) + lineage = get_code_lineage(run_df, selected_sol) + max_index = len(lineage) - 1 + index = 0 + if st.button("prev", key="down_index"): + pass + if st.button("next", key="up_index"): + pass + if len(lineage) >= 2: + diff_viewer(lineage[index]["code"], + lineage[index + 1]["code"], + left_title=f"{lineage[index]['name']}, gen: {lineage[index]['generation']}, Fitness: {lineage[index]['fitness'] : 0.3f}", + right_title=f"{lineage[index + 1]['name']}, gen: {lineage[index + 1]['generation']}, Fitness: {lineage[index + 1]['fitness']: 0.3f}" + ) else: st.write("No parent chain found.") From dd9e21d0b75773b9f87e387610c55b29036eada8 Mon Sep 17 00:00:00 2001 From: Ananta Shahane Date: Tue, 14 Oct 2025 16:28:10 +0200 Subject: [PATCH 8/8] Working diff viewer. --- iohblade/plots.py | 8 +++----- iohblade/webapp.py | 48 +++++++++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/iohblade/plots.py b/iohblade/plots.py index ad6d0a2..18b7c14 100644 --- a/iohblade/plots.py +++ b/iohblade/plots.py @@ -518,8 +518,7 @@ def code_diff_chain( return chain -def get_code_lineage(run_data: pd.DataFrame, solution_id: str -) -> list[pd.Series]: +def get_code_lineage(run_data: pd.DataFrame, solution_id: str) -> list[pd.Series]: """Return lineage of an individual with id ``solution_id``, across generation. The function follows the first parent of each solution until the root is @@ -549,9 +548,7 @@ def get_code_lineage(run_data: pd.DataFrame, solution_id: str except: parent = None if parent is not None: - lineage.append( - parent - ) + lineage.append(parent) pid = parent["parent_ids"] if pid: id = pid[0] @@ -559,6 +556,7 @@ def get_code_lineage(run_data: pd.DataFrame, solution_id: str id = None return lineage[::-1] + def print_code_diff_chain(run_data: pd.DataFrame, solution_id: str) -> None: """Print the code diff chain for ``solution_id``.""" diff --git a/iohblade/webapp.py b/iohblade/webapp.py index f1e0c1d..1309a0e 100644 --- a/iohblade/webapp.py +++ b/iohblade/webapp.py @@ -26,14 +26,16 @@ CEG_FEATURES, code_diff_chain, plotly_code_evolution, - get_code_lineage + get_code_lineage, ) LOGO_LIGHT = f"data:image/png;base64,{LOGO_LIGHT_B64}" LOGO_DARK = f"data:image/png;base64,{LOGO_DARK_B64}" -index = 0 -max_index = 0 +if "index" not in st.session_state: + st.session_state.index = 0 +if "max_index" not in st.session_state: + st.session_state.max_index = 0 def convergence_dataframe(logger: ExperimentLogger) -> pd.DataFrame: @@ -195,16 +197,15 @@ def read_progress(exp_dir): return json.load(f) return None + def up_index(): - global index, max_index - if max_index - index > 1: - index += 1 + if st.session_state.index < st.session_state.max_index - 1: + st.session_state.index += 1 def down_index(): - global index - if index > 0: - index -= 1 + if st.session_state.index > 0: + st.session_state.index -= 1 def run() -> None: @@ -370,20 +371,24 @@ def run() -> None: index = 0 if st.button("Show Diff Chain"): - diffs = code_diff_chain(run_df, selected_sol) lineage = get_code_lineage(run_df, selected_sol) - max_index = len(lineage) - 1 - index = 0 - if st.button("prev", key="down_index"): - pass - if st.button("next", key="up_index"): - pass if len(lineage) >= 2: - diff_viewer(lineage[index]["code"], + tabs = st.tabs( + list( + map( + lambda x: f"{x['name']}-{x['generation']}", + lineage[1:], + ) + ) + ) + for index, tab_data in enumerate(tabs): + with tab_data: + diff_viewer( + lineage[index]["code"], lineage[index + 1]["code"], left_title=f"{lineage[index]['name']}, gen: {lineage[index]['generation']}, Fitness: {lineage[index]['fitness'] : 0.3f}", - right_title=f"{lineage[index + 1]['name']}, gen: {lineage[index + 1]['generation']}, Fitness: {lineage[index + 1]['fitness']: 0.3f}" - ) + right_title=f"{lineage[index + 1]['name']}, gen: {lineage[index + 1]['generation']}, Fitness: {lineage[index + 1]['fitness']: 0.3f}", + ) else: st.write("No parent chain found.") @@ -439,7 +444,10 @@ def run() -> None: def main() -> None: - subprocess.run(["streamlit", "run", str(Path(__file__)), "--server.fileWatcherType", "none"], check=True) + subprocess.run( + ["streamlit", "run", str(Path(__file__)), "--server.fileWatcherType", "none"], + check=True, + ) if __name__ == "__main__":