From b542fd380ea0cfe6403c12cf9d5196adc99ceb6c Mon Sep 17 00:00:00 2001 From: Leon Wehrhan Date: Tue, 10 Mar 2026 18:36:55 +0100 Subject: [PATCH 1/4] feat: calc folmsbee --- .../conformers/Folmsbee/calc_Folmsbee.py | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py diff --git a/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py b/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py new file mode 100644 index 000000000..1350ebb0f --- /dev/null +++ b/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py @@ -0,0 +1,101 @@ +""" +Compute the Folmsbee dataset of molecular conformers. + +Assessing conformer energies using electronic structure and +machine learning methods + +Dakota Folmsbee, Geoffrey Hutchinson +International Journal of Quantum Chemistry 2020 121 (1) e26381 +DOI: 10.1002/qua.26381 +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from ase import Atoms, units +from ase.io import write +import pytest +from tqdm import tqdm + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +KCAL_TO_EV = units.kcal / units.mol +EV_TO_KCAL = 1 / KCAL_TO_EV + +OUT_PATH = Path(__file__).parent / "outputs" + + +def get_relative_energies(energies: list[float], ref_idx: int) -> list[float]: + """ + Get energies relative to reference. + + Parameters + ---------- + energies + List of energy values. + ref_idx + Index of reference energy. + + Returns + ------- + list[float] + Energies relative to the reference conformer. + """ + return [x - energies[ref_idx] for x in energies] + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_folmsbee(mlip: tuple[str, Any]) -> None: + """ + Benchmark the Folmsbee dataset. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + """ + model_name, model = mlip + # Use double precision + model.default_dtype = "float64" + calc = model.get_calculator() + # Add D3 calculator for this test + calc = model.add_d3_calculator(calc) + + data_path = Path(__file__).parent / "data" / "folmsbee_dataset.json" + out_path = OUT_PATH / model_name + + with open(data_path) as f: + data = json.load(f) + progress = tqdm(total=len(data)) + for structure_data in data: + structure_name = structure_data["molecule_name"] + conformers = [] + model_energies = [] + raw_energies = structure_data["dft_energy_profile"] + ref_min_conformer_idx = raw_energies.index(min(raw_energies)) + ref_energies = get_relative_energies(raw_energies, ref_min_conformer_idx) + + for i, conf_positions in enumerate(structure_data["conformer_coordinates"]): + conf_atoms = Atoms( + positions=conf_positions, symbols=structure_data["atom_symbols"] + ) + conf_atoms.calc = calc + conf_atoms.info.update({"charge": 0, "spin": 1}) + conf_atoms.info["ref_rel_energy"] = ref_energies[i] + + conformers.append(conf_atoms) + model_energies.append(conf_atoms.get_potential_energy()) + + model_energies = get_relative_energies(model_energies, ref_min_conformer_idx) + out_path.mkdir(parents=True, exist_ok=True) + for i, conf_atoms in enumerate(conformers): + conf_atoms.info["model_rel_energy"] = model_energies[i] + write(out_path / f"{structure_name}_conf{i}.xyz", conf_atoms) + + progress.update() From e82bc26021c34bf73e3d77b9744b70fd6915b241 Mon Sep 17 00:00:00 2001 From: Leon Wehrhan Date: Tue, 10 Mar 2026 18:39:46 +0100 Subject: [PATCH 2/4] fix: units --- ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py b/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py index 1350ebb0f..75143c3bc 100644 --- a/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py +++ b/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py @@ -26,7 +26,6 @@ MODELS = load_models(current_models) KCAL_TO_EV = units.kcal / units.mol -EV_TO_KCAL = 1 / KCAL_TO_EV OUT_PATH = Path(__file__).parent / "outputs" @@ -80,6 +79,7 @@ def test_folmsbee(mlip: tuple[str, Any]) -> None: raw_energies = structure_data["dft_energy_profile"] ref_min_conformer_idx = raw_energies.index(min(raw_energies)) ref_energies = get_relative_energies(raw_energies, ref_min_conformer_idx) + ref_energies *= KCAL_TO_EV for i, conf_positions in enumerate(structure_data["conformer_coordinates"]): conf_atoms = Atoms( From cd4e9da685c9b6065e104d52d4bc16774cea8257 Mon Sep 17 00:00:00 2001 From: Leon Wehrhan Date: Fri, 13 Mar 2026 18:11:57 +0100 Subject: [PATCH 3/4] feat: add analysis script --- .../conformers/Folmsbee/analyse_Folmsbee.py | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py diff --git a/ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py b/ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py new file mode 100644 index 000000000..20a5b9549 --- /dev/null +++ b/ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py @@ -0,0 +1,134 @@ +"""Analyse Folmsbee benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import ( + build_dispersion_name_map, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) + +EV_TO_KCAL = units.mol / units.kcal +CALC_PATH = CALCS_ROOT / "conformers" / "Folmsbee" / "outputs" +OUT_PATH = APP_ROOT / "data" / "conformers" / "Folmsbee" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model_name in MODELS: + labels_list = [path.stem for path in sorted((CALC_PATH / model_name).glob("*"))] + break + return labels_list + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_folmsbee.json", + title="Energies", + x_label="Predicted energy / kcal/mol", + y_label="Reference energy / kcal/mol", + hoverdata={ + "Labels": labels(), + }, +) +def conformer_energies() -> dict[str, list]: + """ + Get conformer energies for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted barrier heights. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = read(CALC_PATH / model_name / f"{label}.xyz") + + results[model_name].append(atoms.info["model_rel_energy"] * EV_TO_KCAL) + if not ref_stored: + results["ref"].append(atoms.info["ref_energy"] * EV_TO_KCAL) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + ref_stored = True + return results + + +@pytest.fixture +def get_mae(conformer_energies) -> dict[str, float]: + """ + Get mean absolute error for conformer energies. + + Parameters + ---------- + conformer_energies + Dictionary of reference and predicted conformer energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted conformer energies errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae( + conformer_energies["ref"], conformer_energies[model_name] + ) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "folmsbee_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=DISPERSION_NAME_MAP, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAE": get_mae, + } From e03bf30f698cf008ca2c7b87a1cb837e30cb69ae Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Wed, 18 Mar 2026 14:45:22 +0000 Subject: [PATCH 4/4] add flomsbee app and metrics.yml --- .../analysis/conformers/Folmsbee/metrics.yml | 7 ++ .../app/conformers/Folmsbee/app_Folmsbee.py | 90 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 ml_peg/analysis/conformers/Folmsbee/metrics.yml create mode 100644 ml_peg/app/conformers/Folmsbee/app_Folmsbee.py diff --git a/ml_peg/analysis/conformers/Folmsbee/metrics.yml b/ml_peg/analysis/conformers/Folmsbee/metrics.yml new file mode 100644 index 000000000..604e94f56 --- /dev/null +++ b/ml_peg/analysis/conformers/Folmsbee/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 20.0 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems of the relative energy to the lowest energy conformer. + level_of_theory: DLPNO-CCSD(T) diff --git a/ml_peg/app/conformers/Folmsbee/app_Folmsbee.py b/ml_peg/app/conformers/Folmsbee/app_Folmsbee.py new file mode 100644 index 000000000..0f49c1dd2 --- /dev/null +++ b/ml_peg/app/conformers/Folmsbee/app_Folmsbee.py @@ -0,0 +1,90 @@ +"""Run Folmsbee conformer benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "Folmsbee" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/conformers.html#folmsbee" +) +DATA_PATH = APP_ROOT / "data" / "conformers" / "Folmsbee" + + +class FolmsbeeApp(BaseApp): + """Folmsbee conformer benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_folmsbee.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) + structs = [ + f"assets/conformers/Folmsbee/{MODELS[0]}/{label}.xyz" + for label in labels + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> FolmsbeeApp: + """ + Get Folmsbee benchmark app layout and callback registration. + + Returns + ------- + FolmsbeeApp + Benchmark layout and callback registration. + """ + return FolmsbeeApp( + name=BENCHMARK_NAME, + description=( + "Performance in predicting relative conformer energies for " + "drug-like molecules. " + "Reference data from DLPNO-CCSD(T) calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "folmsbee_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8066, debug=True)