From 5e1876cb4e0902d47f637bad75b3562ec079513a Mon Sep 17 00:00:00 2001 From: Mattia Perrone Date: Thu, 5 Mar 2026 18:11:17 +0100 Subject: [PATCH] Add superacid HF/SbF5 density benchmark Ref #379 --- .../analyse_HF_SbF5_density.py | 202 ++++++++++++++++++ .../superacids/HF_SbF5_density/metrics.yml | 7 + .../HF_SbF5_density/app_HF_SbF5_density.py | 74 +++++++ ml_peg/app/superacids/superacids.yml | 2 + ml_peg/calcs/superacids/.gitignore | 5 + .../HF_SbF5_density/calc_HF_SbF5_density.py | 134 ++++++++++++ 6 files changed, 424 insertions(+) create mode 100644 ml_peg/analysis/superacids/HF_SbF5_density/analyse_HF_SbF5_density.py create mode 100644 ml_peg/analysis/superacids/HF_SbF5_density/metrics.yml create mode 100644 ml_peg/app/superacids/HF_SbF5_density/app_HF_SbF5_density.py create mode 100644 ml_peg/app/superacids/superacids.yml create mode 100644 ml_peg/calcs/superacids/.gitignore create mode 100644 ml_peg/calcs/superacids/HF_SbF5_density/calc_HF_SbF5_density.py diff --git a/ml_peg/analysis/superacids/HF_SbF5_density/analyse_HF_SbF5_density.py b/ml_peg/analysis/superacids/HF_SbF5_density/analyse_HF_SbF5_density.py new file mode 100644 index 000000000..f71e01e5e --- /dev/null +++ b/ml_peg/analysis/superacids/HF_SbF5_density/analyse_HF_SbF5_density.py @@ -0,0 +1,202 @@ +"""Analyse HF/SbF5 density benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.io import read +import numpy as np +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +D3_MODEL_NAMES = build_d3_name_map(MODELS) +CALC_PATH = CALCS_ROOT / "superacids" / "HF_SbF5_density" / "outputs" +OUT_PATH = APP_ROOT / "data" / "superacids" / "HF_SbF5_density" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# Experimental reference densities +REF_DENSITIES = { + "X_0": 0.989, + "X_10": 1.677, + "X_100": 3.141, +} + + +# amu to g conversion factor +AMU_TO_G = 1.66053906660e-24 # g per amu +A3_TO_CM3 = 1e-24 + + +def compute_density_from_volume_dat(volume_path: Path, atoms_path: Path) -> float: + """ + Compute average density from volume.dat and atomic masses. + + Parameters + ---------- + volume_path + Path to volume.dat file (columns: step, volume_A3). + atoms_path + Path to any xyz file for this system (to get atomic masses). + + Returns + ------- + float + Average density in g/cm³. + """ + # Read total mass from atoms + atoms = read(atoms_path) + total_mass_amu = np.sum(atoms.get_masses()) + + # Read volume time series, skip header + data = np.loadtxt(volume_path, comments="#") + # Take second half as production (discard equilibration) + n_points = len(data) + production = data[n_points // 2 :, 1] # column 1 = volume in ų + avg_volume = np.mean(production) + + return (total_mass_amu * AMU_TO_G) / (avg_volume * A3_TO_CM3) + + +def get_system_names() -> list[str]: + """ + Get list of system names. + + Returns + ------- + list[str] + List of system names from output directories. + """ + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if model_dir.exists(): + systems = sorted([d.name for d in model_dir.iterdir() if d.is_dir()]) + if systems: + return systems + return [] + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_density.json", + title="HF/SbF5 Mixture Densities", + x_label="Predicted density / g/cm³", + y_label="Experimental density / g/cm³", + hoverdata={ + "System": get_system_names(), + }, +) +def densities() -> dict[str, list]: + """ + Get predicted and reference densities for all systems. + + Returns + ------- + dict[str, list] + Dictionary of reference and predicted densities. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + + if not model_dir.exists(): + continue + + systems = sorted([d.name for d in model_dir.iterdir() if d.is_dir()]) + if not systems: + continue + + for system in systems: + system_dir = model_dir / system + volume_path = system_dir / "volume.dat" + atoms_path = system_dir / f"{system}.xyz" + + if not volume_path.exists() or not atoms_path.exists(): + continue + + density = compute_density_from_volume_dat(volume_path, atoms_path) + results[model_name].append(density) + + if not ref_stored: + results["ref"].append(REF_DENSITIES[system]) + + ref_stored = True + + return results + + +@pytest.fixture +def density_errors(densities) -> dict[str, float]: + """ + Get mean absolute percentage error for densities. + + Parameters + ---------- + densities + Dictionary of reference and predicted densities. + + Returns + ------- + dict[str, float] + Dictionary of density MAPE for all models. + """ + results = {} + for model_name in MODELS: + if densities[model_name]: + preds = np.array(densities[model_name]) + refs = np.array(densities["ref"]) + mape = np.mean(np.abs(preds - refs) / refs) * 100 # in % + results[model_name] = mape + else: + results[model_name] = None + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "hf_sbf5_density_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(density_errors: dict[str, float]) -> dict[str, dict]: + """ + Get all HF/SbF5 density metrics. + + Parameters + ---------- + density_errors + Mean absolute errors for all systems. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAPE": density_errors, + } + + +def test_hf_sbf5_density(metrics: dict[str, dict]) -> None: + """ + Run HF/SbF5 density test. + + Parameters + ---------- + metrics + All HF/SbF5 density metrics. + """ + return diff --git a/ml_peg/analysis/superacids/HF_SbF5_density/metrics.yml b/ml_peg/analysis/superacids/HF_SbF5_density/metrics.yml new file mode 100644 index 000000000..17d0a19e6 --- /dev/null +++ b/ml_peg/analysis/superacids/HF_SbF5_density/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAPE: + good: 0.0 + bad: 10.0 + unit: "%" + tooltip: "Mean Absolute Percentage Error in liquid density vs experiment" + level_of_theory: Experiment diff --git a/ml_peg/app/superacids/HF_SbF5_density/app_HF_SbF5_density.py b/ml_peg/app/superacids/HF_SbF5_density/app_HF_SbF5_density.py new file mode 100644 index 000000000..da33025e8 --- /dev/null +++ b/ml_peg/app/superacids/HF_SbF5_density/app_HF_SbF5_density.py @@ -0,0 +1,74 @@ +"""Run HF/SbF5 density app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +# Get all models +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "HF/SbF5 Mixture Densities" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" + "superacids.html#hf-sbf5-mixture-densities" +) +DATA_PATH = APP_ROOT / "data" / "superacids" / "HF_SbF5_density" + + +class HFSbF5DensityApp(BaseApp): + """HF/SbF5 density benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_density.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAPE": scatter}, + ) + + +def get_app() -> HFSbF5DensityApp: + """ + Get HF/SbF5 density benchmark app layout and callback registration. + + Returns + ------- + HFSbF5DensityApp + Benchmark layout and callback registration. + """ + return HFSbF5DensityApp( + name=BENCHMARK_NAME, + description=("Liquid densities of HF/SbF5 mixtures at varying compositions."), + docs_url=DOCS_URL, + table_path=DATA_PATH / "hf_sbf5_density_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + + # Construct layout and register callbacks + app = get_app() + full_app.layout = app.layout + app.register_callbacks() + + # Run app + full_app.run(port=8056, debug=True) diff --git a/ml_peg/app/superacids/superacids.yml b/ml_peg/app/superacids/superacids.yml new file mode 100644 index 000000000..8be4e2189 --- /dev/null +++ b/ml_peg/app/superacids/superacids.yml @@ -0,0 +1,2 @@ +title: Superacids +description: Properties of HF/SbF5 superacid systems diff --git a/ml_peg/calcs/superacids/.gitignore b/ml_peg/calcs/superacids/.gitignore new file mode 100644 index 000000000..dbe8840f4 --- /dev/null +++ b/ml_peg/calcs/superacids/.gitignore @@ -0,0 +1,5 @@ +outputs/ +*.xyz +*.dat +*.log +__pycache__/ diff --git a/ml_peg/calcs/superacids/HF_SbF5_density/calc_HF_SbF5_density.py b/ml_peg/calcs/superacids/HF_SbF5_density/calc_HF_SbF5_density.py new file mode 100644 index 000000000..a81601e61 --- /dev/null +++ b/ml_peg/calcs/superacids/HF_SbF5_density/calc_HF_SbF5_density.py @@ -0,0 +1,134 @@ +"""Run calculations for HF/SbF5 tests.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase import units +from ase.io import read, write +from ase.md.logger import MDLogger +from ase.md.nose_hoover_chain import IsotropicMTKNPT +from ase.md.velocitydistribution import ( + MaxwellBoltzmannDistribution, + Stationary, + ZeroRotation, +) +from ase.optimize import FIRE +import pytest + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Unit conversion +EV_TO_KJ_PER_MOL = units.mol / units.kJ + + +# Simulation parameters +TEMPERATURE_K = 288.6 +PRESSURE_ATM = 1 +DT_FS = 0.5 # DT in femtoseconds + +N_MIN_STEPS = 100 # maximum minimization steps +N_NPT_STEPS = 200000 # NPT production steps + +OUT_FREQ = 1000 + + +# conversions +ATM_TO_GPA = 1.01325e-4 # 1 atm = 0.000101325 GPa +PRESSURE_AU = PRESSURE_ATM * ATM_TO_GPA * units.GPa +DT = 0.5 * units.fs +TDAMP = 100 * DT_FS * units.fs +PDAMP = 1000 * DT_FS * units.fs + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_hf_sbf5_density(mlip: tuple[str, Any]) -> None: + """ + Run HF/SbF5 mixture density test. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + """ + model_name, model = mlip + calc = model.get_calculator() + + # Add D3 calculator for this test + calc = model.add_d3_calculator(calc) + + # download dataset + hf_sbf5_density_dir = ( + download_s3_data( + key="inputs/superacids/HF_SbF5_density/HF_SbF5_density.zip", + filename="HF_SbF5_density.zip", + ) + / "HF_SbF5_density" + ) + + with open(hf_sbf5_density_dir / "list") as f: + systems = f.read().splitlines() + + for system in systems: + print(f"simulando {system} with model {model_name}") + + atoms = read(hf_sbf5_density_dir / system / "start.xyz") + atoms.calc = calc + + write_dir = OUT_PATH / model_name / system + write_dir.mkdir(parents=True, exist_ok=True) + + # MINIMIZATIOn + opt = FIRE(atoms, logfile=str(write_dir / "opt.log")) + opt.run(fmax=0.05, steps=N_MIN_STEPS) + write(write_dir / "minimised.xyz", atoms) + + MaxwellBoltzmannDistribution(atoms, temperature_K=TEMPERATURE_K) + Stationary(atoms) + ZeroRotation(atoms) + + dyn = IsotropicMTKNPT( + atoms=atoms, + timestep=DT, + temperature_K=TEMPERATURE_K, + pressure_au=PRESSURE_AU, + tdamp=TDAMP, + pdamp=PDAMP, + ) + + dyn.attach( + MDLogger(dyn, atoms, str(write_dir / "md.log"), header=True, mode="w"), + interval=OUT_FREQ, + ) + + # Volume logger: step and volume only + vol_file = open(write_dir / "volume.dat", "w") + vol_file.write("# step volume_A3\n") + + def write_volume(_dyn=dyn, _atoms=atoms, _f=vol_file) -> None: + step = _dyn.nsteps + vol = _atoms.get_volume() + _f.write(f"{step} {vol:.6f}\n") + _f.flush() + + write_volume() # step 0 + dyn.attach(write_volume, interval=OUT_FREQ) + + # Run NPT + dyn.run(N_NPT_STEPS) + + vol_file.close() + + # Save final structure + atoms.info["system"] = system + write(write_dir / f"{system}.xyz", atoms) + + print(f" {system} done")