diff --git a/docs/source/developer_guide/add_benchmarks.rst b/docs/source/developer_guide/add_benchmarks.rst index 8b609b145..ac91ae7fb 100644 --- a/docs/source/developer_guide/add_benchmarks.rst +++ b/docs/source/developer_guide/add_benchmarks.rst @@ -501,3 +501,36 @@ Building those components and their interactivity should become increasingly aut but less standard plots/interactions will need setting up. For now, please contact us to help with this process. + +Framework credit tags ++++++++++++++++++++++ + +If a benchmark comes from an external benchmarking framework (for example, +MLIP Arena), add a framework credit tag as follows: + +1. Add/update the framework entry in ``ml_peg/app/utils/frameworks.yml``. + +.. code-block:: yaml + + mlip_arena: + label: MLIP Arena + color: "#0f766e" + text_color: "#ecfeff" + url: "https://huggingface.co/spaces/atomind/mlip-arena" + logo: "https://.../logo.svg" + +2. Set ``framework_id`` in the benchmark app constructor. + +.. code-block:: python3 + + return SomeBenchmarkApp( + name="SomeBenchmark", + ..., + framework_id="mlip_arena", + ) + +That is all that is required. The benchmark header badge and framework filter +dropdown are populated automatically from this metadata. + +The ``logo`` field is optional. It can point to a remote image URL or a local +Dash asset path such as ``assets/frameworks/my_framework_logo.png``. diff --git a/ml_peg/app/base_app.py b/ml_peg/app/base_app.py index f4c0ccd00..0f380f5f1 100644 --- a/ml_peg/app/base_app.py +++ b/ml_peg/app/base_app.py @@ -10,6 +10,7 @@ from ml_peg.app.utils.build_components import build_test_layout from ml_peg.app.utils.load import rebuild_table +from ml_peg.app.utils.utils import normalize_framework_id class BaseApp(ABC): @@ -28,6 +29,9 @@ class BaseApp(ABC): List of other Dash components to add to app. docs_url URL for online documentation. Default is None. + framework_id + Framework identifier used for benchmark attribution tags. Default is + ``"ml_peg"``. """ def __init__( @@ -37,6 +41,7 @@ def __init__( table_path: Path, extra_components: list[Component], docs_url: str | None = None, + framework_id: str = "ml_peg", ): """ Initiaise class. @@ -53,12 +58,15 @@ def __init__( List of other Dash components to add to app. docs_url URL to online documentation. Default is None. + framework_id + Framework identifier used for benchmark attribution tags. """ self.name = name self.description = description self.table_path = table_path self.extra_components = extra_components self.docs_url = docs_url + self.framework_id = normalize_framework_id(framework_id) self.table_id = f"{self.name}-table" self.table = rebuild_table( self.table_path, id=self.table_id, description=description @@ -80,6 +88,7 @@ def build_layout(self) -> Div: name=self.name, description=self.description, docs_url=self.docs_url, + framework_id=self.framework_id, table=self.table, thresholds=getattr(self.table, "thresholds", None), extra_components=self.extra_components, diff --git a/ml_peg/app/build_app.py b/ml_peg/app/build_app.py index 6867dc10f..e0ad8ec56 100644 --- a/ml_peg/app/build_app.py +++ b/ml_peg/app/build_app.py @@ -8,7 +8,7 @@ from dash import Dash, Input, Output, callback from dash.dash_table import DataTable from dash.dcc import Loading, Store, Tab, Tabs -from dash.html import H1, H3, Div +from dash.html import H1, H3, Div, Img, Span from yaml import safe_load from ml_peg.analysis.utils.utils import calc_table_scores, get_table_style @@ -27,6 +27,7 @@ from ml_peg.app.utils.utils import ( build_level_of_theory_warnings, calculate_column_widths, + get_framework_config, load_model_registry_configs, sig_fig_format, ) @@ -39,7 +40,11 @@ def get_all_tests( category: str = "*", -) -> tuple[dict[str, dict[str, list[Div]]], dict[str, dict[str, DataTable]]]: +) -> tuple[ + dict[str, dict[str, list[Div]]], + dict[str, dict[str, DataTable]], + dict[str, dict[str, str]], +]: """ Get layout and register callbacks for all categories. @@ -50,8 +55,8 @@ def get_all_tests( Returns ------- - tuple[dict[str, dict[str, list[Div]]], dict[str, dict[str, DataTable]]] - Layouts and tables for all categories. + tuple + Layouts, tables, and framework IDs for all categories. """ # Find Python files e.g. app_OC157.py in mlip_tesing.app module. # We will get the category from the parent's parent directory @@ -59,6 +64,7 @@ def get_all_tests( tests = APP_ROOT.glob(f"{category}/*/app*.py") layouts = {} tables = {} + frameworks = {} # Build all layouts, and register all callbacks to main app. for test in tests: @@ -75,8 +81,10 @@ def get_all_tests( if category_name not in layouts: layouts[category_name] = {} tables[category_name] = {} + frameworks[category_name] = {} layouts[category_name][test_app.name] = test_app.layout tables[category_name][test_app.name] = test_app.table + frameworks[category_name][test_app.name] = test_app.framework_id except FileNotFoundError as err: warnings.warn( f"Unable to load layout for {test_name} in {category_name} category. " @@ -96,13 +104,19 @@ def get_all_tests( ) continue - return layouts, tables + return layouts, tables, frameworks def build_category( all_layouts: dict[str, dict[str, list[Div]]], all_tables: dict[str, dict[str, DataTable]], -) -> tuple[dict[str, list[Div]], dict[str, DataTable]]: + all_frameworks: dict[str, dict[str, str]], +) -> tuple[ + dict[str, dict[str, object]], + dict[str, DataTable], + dict[str, float], + set[str], +]: """ Build category layouts and summary tables. @@ -112,16 +126,19 @@ def build_category( Layouts of all tests, grouped by category. all_tables Tables for all tests, grouped by category. + all_frameworks + Framework IDs for all tests, grouped by category. Returns ------- - tuple[dict[str, list[Div]], dict[str, DataTable]] - Dictionary of category layouts, and dictionary of category summary tables. + tuple + Category view metadata, category summary tables, category weights, and all + discovered framework IDs. """ - # Take all tables in category, build new table, and set layout - category_layouts = {} + category_views = {} category_tables = {} category_weights = {} + framework_ids: set[str] = set() # `category` corresponds to the category's directory name # We will use the loaded `category_title` for IDs/dictionary keys returned @@ -160,33 +177,25 @@ def build_category( column_widths=getattr(summary_table, "column_widths", None), ) - # Build full layout with summary table, weight controls, and test layouts - category_layouts[category_title] = Div( - [ - H1(category_title), - H3(category_descrip), - summary_table, - Store( - id=f"{category_title}-summary-table-computed-store", - storage_type="session", - data=summary_table.data, - ), - weight_components, - Div( - [ - Div( - style={ - "width": "100%", - "height": "1px", - "backgroundColor": "#a7adb3", - } - ), - ], - style={"margin": "32px 0 24px"}, - ), - Div([all_layouts[category][test] for test in all_layouts[category]]), - ] - ) + test_entries = [] + for test_name in all_layouts[category]: + framework_id = all_frameworks[category][test_name] + framework_ids.add(framework_id) + test_entries.append( + { + "name": test_name, + "framework_id": framework_id, + "layout": all_layouts[category][test_name], + } + ) + + category_views[category_title] = { + "title": category_title, + "description": category_descrip, + "summary_table": summary_table, + "weight_components": weight_components, + "tests": test_entries, + } # Register benchmark table -> category table callbacks # Category summary table columns add "Score" to name for clarity @@ -198,7 +207,146 @@ def build_category( model_name_map=getattr(benchmark_table, "model_name_map", None), ) - return category_layouts, category_tables, category_weights + return category_views, category_tables, category_weights, framework_ids + + +def build_category_tab_layout( + category_view: dict[str, object], +) -> Div: + """ + Build category tab layout. + + Parameters + ---------- + category_view + Category metadata including summary table, controls, and benchmark layouts. + + Returns + ------- + Div + Category tab layout. + """ + category_title = category_view["title"] + category_description = category_view["description"] + summary_table = category_view["summary_table"] + weight_components = category_view["weight_components"] + tests = category_view["tests"] + benchmark_section = Div([test["layout"] for test in tests]) + + return Div( + [ + H1(category_title), + H3(category_description), + summary_table, + Store( + id=f"{category_title}-summary-table-computed-store", + storage_type="session", + data=summary_table.data, + ), + weight_components, + Div( + [ + Div( + style={ + "width": "100%", + "height": "1px", + "backgroundColor": "#a7adb3", + } + ), + ], + style={"margin": "32px 0 24px"}, + ), + benchmark_section, + ] + ) + + +def build_framework_tab_views( + category_views: dict[str, dict[str, object]], + framework_ids: set[str], +) -> dict[str, dict[str, object]]: + """ + Build framework-focused tab metadata for non-ML-PEG frameworks. + + Parameters + ---------- + category_views + Category metadata including benchmark layout components. + framework_ids + All framework IDs discovered from benchmark apps. + + Returns + ------- + dict[str, dict[str, object]] + Mapping of framework ID to grouped benchmark layouts by category. + """ + framework_views: dict[str, dict[str, object]] = {} + for framework_id in sorted(framework_ids): + if framework_id == "ml_peg": + continue + + category_groups = [] + for category_name, category_view in category_views.items(): + tests = [ + test["layout"] + for test in category_view["tests"] + if test["framework_id"] == framework_id + ] + if tests: + category_groups.append({"category": category_name, "tests": tests}) + + if category_groups: + config = get_framework_config(framework_id) + framework_views[framework_id] = { + "framework_id": framework_id, + "label": config["label"], + "logo": config.get("logo"), + "category_groups": category_groups, + } + return framework_views + + +def build_framework_tab_layout(framework_view: dict[str, object]) -> Div: + """ + Build a framework-focused tab containing duplicate benchmark sections. + + Parameters + ---------- + framework_view + Framework tab metadata with grouped benchmark layouts by category. + + Returns + ------- + Div + Framework tab layout. + """ + framework_label = framework_view["label"] + category_groups = framework_view["category_groups"] + + sections = [] + for group in category_groups: + sections.append(H3(group["category"], style={"marginTop": "26px"})) + sections.append(Div(group["tests"])) + + return Div( + [ + H1(f"{framework_label} Benchmarks"), + Div( + ( + "These benchmark sections are duplicates of the category tabs for " + "easier collection. Benchmark controls and weights stay in sync." + ), + style={ + "fontSize": "13px", + "fontStyle": "italic", + "color": "#64748b", + "marginTop": "8px", + "marginBottom": "8px", + }, + ), + *sections, + ] + ) def build_summary_table( @@ -254,7 +402,7 @@ def build_summary_table( row[category_col] = summary_data[mlip].get(category_col, None) data.append(row) - data = calc_table_scores(data) + data = calc_table_scores(data, weights=weights) columns_headers = ("MLIP",) + tuple(key + " Score" for key in tables) + ("Score",) @@ -353,7 +501,8 @@ def build_summary_table( def build_tabs( full_app: Dash, - layouts: dict[str, list[Div]], + category_views: dict[str, dict[str, object]], + framework_tab_views: dict[str, dict[str, object]], summary_table: DataTable, weight_components: Div, ) -> None: @@ -364,16 +513,55 @@ def build_tabs( ---------- full_app Full application with all sub-apps. - layouts - Layouts for all tabs. + category_views + Category metadata required to render tab content. + framework_tab_views + Framework tab metadata for additional non-ML-PEG frameworks. summary_table Summary table with score from each category. weight_components Weight sliders, text boxes and reset button. """ - all_tabs = [Tab(label="Summary", value="summary-tab", id="summary-tab")] + [ - Tab(label=category_name, value=category_name) for category_name in layouts - ] + framework_tabs = [] + for framework_id in sorted(framework_tab_views): + framework_view = framework_tab_views[framework_id] + tab_label: str | Div = framework_view["label"] + logo = framework_view.get("logo") + if isinstance(logo, str) and logo: + tab_label = Div( + [ + Img( + src=logo, + alt=f"{framework_view['label']} logo", + style={ + "width": "14px", + "height": "14px", + "borderRadius": "50%", + "objectFit": "cover", + }, + ), + Span(framework_view["label"]), + ], + style={ + "display": "inline-flex", + "alignItems": "center", + "gap": "6px", + }, + ) + framework_tabs.append( + Tab( + label=tab_label, + value=f"framework-{framework_id}", + ) + ) + all_tabs = ( + [Tab(label="Summary", value="summary-tab", id="summary-tab")] + + [ + Tab(label=category_name, value=category_name) + for category_name in category_views + ] + + framework_tabs + ) tabs_layout = [ build_onboarding_modal(), @@ -412,8 +600,11 @@ def build_tabs( style={"display": "flex", "flexDirection": "column", "minHeight": "100vh"}, ) - @callback(Output("tabs-content", "children"), Input("all-tabs", "value")) - def select_tab(tab) -> Div: + @callback( + Output("tabs-content", "children"), + Input("all-tabs", "value"), + ) + def select_tab(tab: str) -> Div: """ Select tab contents to be displayed. @@ -440,7 +631,10 @@ def select_tab(tab) -> Div: build_faqs(), ] ) - return Div([layouts[tab]]) + if tab.startswith("framework-"): + framework_id = tab.removeprefix("framework-") + return Div([build_framework_tab_layout(framework_tab_views[framework_id])]) + return Div([build_category_tab_layout(category_views[tab])]) def build_full_app(full_app: Dash, category: str = "*") -> None: @@ -455,13 +649,16 @@ def build_full_app(full_app: Dash, category: str = "*") -> None: Category to build app for. Default is `*`, corresponding to all categories. """ # Get layouts and tables for each test, grouped by categories - all_layouts, all_tables = get_all_tests(category=category) + all_layouts, all_tables, all_frameworks = get_all_tests(category=category) if not all_layouts: raise ValueError("No tests were built successfully") # Combine tests into categories and create category summary - cat_layouts, cat_tables, cat_weights = build_category(all_layouts, all_tables) + cat_views, cat_tables, cat_weights, framework_ids = build_category( + all_layouts, all_tables, all_frameworks + ) + framework_tab_views = build_framework_tab_views(cat_views, framework_ids) # Build overall summary table summary_table = build_summary_table(cat_tables, weights=cat_weights) weight_components = build_weight_components( @@ -470,5 +667,11 @@ def build_full_app(full_app: Dash, category: str = "*") -> None: column_widths=summary_table.column_widths, ) # Build summary and category tabs - build_tabs(full_app, cat_layouts, summary_table, weight_components) + build_tabs( + full_app, + cat_views, + framework_tab_views, + summary_table, + weight_components, + ) register_onboarding_callbacks() diff --git a/ml_peg/app/utils/build_components.py b/ml_peg/app/utils/build_components.py index d622aa74d..8dfea94c0 100644 --- a/ml_peg/app/utils/build_components.py +++ b/ml_peg/app/utils/build_components.py @@ -21,7 +21,7 @@ register_summary_table_callbacks, register_weight_callbacks, ) -from ml_peg.app.utils.utils import calculate_column_widths +from ml_peg.app.utils.utils import calculate_column_widths, get_framework_config def grid_template_from_widths( @@ -522,9 +522,78 @@ def build_footer() -> html.Footer: ) +def build_framework_badge(framework_id: str) -> Component: + """ + Build a visual framework attribution badge. + + Parameters + ---------- + framework_id + Framework identifier for the benchmark. + + Returns + ------- + Component + Styled badge, wrapped as a link when framework docs URL is configured. + """ + config = get_framework_config(framework_id) + label = config["label"] + color = config["color"] + text_color = config["text_color"] + logo = config.get("logo") + url = config.get("url") + + badge_style = { + "display": "inline-flex", + "alignItems": "center", + "padding": "2px 8px", + "borderRadius": "999px", + "fontSize": "11px", + "fontWeight": "600", + "letterSpacing": "0.02em", + "textTransform": "uppercase", + "backgroundColor": color, + "color": text_color, + "lineHeight": "1.8", + } + + badge_children: list[Component] = [] + if logo: + badge_children.append( + html.Img( + src=logo, + alt=f"{label} logo", + style={ + "width": "14px", + "height": "14px", + "borderRadius": "50%", + "objectFit": "cover", + }, + ) + ) + badge_children.append(html.Span(label)) + badge = html.Span( + badge_children, + style={ + **badge_style, + "gap": "6px", + }, + ) + if url: + return html.A( + badge, + href=url, + target="_blank", + style={"textDecoration": "none"}, + title=f"Open {label} website", + ) + return badge + + def build_test_layout( name: str, description: str, + framework_id: str, table: DataTable, extra_components: list[Component] | None = None, docs_url: str | None = None, @@ -540,6 +609,8 @@ def build_test_layout( Name of test. description Description of test. + framework_id + Framework identifier used to render attribution badge. table Dash Table with metric results. Can include a `weights` attribute to be used by `build_weight_components`. @@ -561,7 +632,18 @@ def build_test_layout( Layout for test layout. """ layout_contents = [ - H2(name, style={"color": "black"}), + Div( + [ + H2(name, style={"color": "black", "margin": "0"}), + build_framework_badge(framework_id), + ], + style={ + "display": "flex", + "alignItems": "center", + "flexWrap": "wrap", + "gap": "10px", + }, + ), H3(description), ] diff --git a/ml_peg/app/utils/frameworks.yml b/ml_peg/app/utils/frameworks.yml new file mode 100644 index 000000000..0d4f9a20a --- /dev/null +++ b/ml_peg/app/utils/frameworks.yml @@ -0,0 +1,12 @@ +ml_peg: + label: ML-PEG + color: "#334155" + text_color: "#ffffff" + url: "https://github.com/ddmms/ml-peg" + +mlip_arena: + label: MLIP Arena + color: "#0f766e" + text_color: "#ecfeff" + url: "https://huggingface.co/spaces/atomind/mlip-arena" + logo: "https://huggingface.co/front/assets/huggingface_logo-noborder.svg" diff --git a/ml_peg/app/utils/utils.py b/ml_peg/app/utils/utils.py index 1b408c3df..59b01d713 100644 --- a/ml_peg/app/utils/utils.py +++ b/ml_peg/app/utils/utils.py @@ -5,7 +5,8 @@ from collections.abc import Mapping, MutableMapping, Sequence from functools import lru_cache import json -from typing import Any, TypedDict +from pathlib import Path +from typing import Any, NotRequired, TypedDict import dash.dash_table.Format as TableFormat import yaml @@ -24,6 +25,16 @@ class ThresholdEntry(TypedDict): Thresholds = dict[str, ThresholdEntry] +class FrameworkEntry(TypedDict): + """Style and link metadata for benchmark framework attribution badges.""" + + label: str + color: str + text_color: str + url: NotRequired[str] + logo: NotRequired[str] + + def calculate_column_widths( columns: list[str], widths: dict[str, float] | None = None, @@ -820,3 +831,98 @@ def load_model_registry_configs() -> dict[str, Any]: except FileNotFoundError: pass return {} + + +def normalize_framework_id(framework_id: str) -> str: + """ + Normalize framework identifiers. + + Parameters + ---------- + framework_id + Raw framework identifier from app metadata. + + Returns + ------- + str + Normalized framework identifier. + """ + cleaned = framework_id.strip() + if not cleaned: + raise ValueError("Framework identifiers must be non-empty strings.") + return cleaned + + +def load_framework_registry() -> dict[str, FrameworkEntry]: + """ + Load framework badge metadata from ``frameworks.yml``. + + Returns + ------- + dict[str, FrameworkEntry] + Mapping of framework IDs to display configuration. + """ + registry: dict[str, FrameworkEntry] = {} + config_path = Path(__file__).with_name("frameworks.yml") + with config_path.open(encoding="utf8") as handle: + loaded = yaml.safe_load(handle) + + if not isinstance(loaded, dict): + raise ValueError("frameworks.yml must map framework IDs to config entries.") + + for framework_id, raw_entry in loaded.items(): + normalized_id = normalize_framework_id(framework_id) + if not isinstance(raw_entry, dict): + raise ValueError( + f"frameworks.yml entry for '{normalized_id}' must be a dictionary." + ) + try: + label = raw_entry["label"].strip() + color = raw_entry["color"].strip() + text_color = raw_entry["text_color"].strip() + except (KeyError, AttributeError) as exc: + raise ValueError( + f"frameworks.yml entry for '{normalized_id}' must include string " + "'label', 'color', and 'text_color' fields." + ) from exc + if not label or not color or not text_color: + raise ValueError( + f"frameworks.yml entry for '{normalized_id}' contains empty " + "'label', 'color', or 'text_color' values." + ) + + registry_entry: FrameworkEntry = { + "label": label, + "color": color, + "text_color": text_color, + } + + url = raw_entry.get("url") + if isinstance(url, str) and url.strip(): + registry_entry["url"] = url.strip() + logo = raw_entry.get("logo") + if isinstance(logo, str) and logo.strip(): + registry_entry["logo"] = logo.strip() + + registry[normalized_id] = registry_entry + + return registry + + +def get_framework_config(framework_id: str) -> FrameworkEntry: + """ + Resolve framework metadata for badge and filter rendering. + + Parameters + ---------- + framework_id + Framework identifier from benchmark app metadata. + + Returns + ------- + FrameworkEntry + Style, label, and optional URL metadata for the framework. + """ + normalized_id = normalize_framework_id(framework_id) + registry = load_framework_registry() + return registry[normalized_id]