From 18834215b15216e6cbe31de997906889aa4ae3d5 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 2 Feb 2026 22:25:59 +0100 Subject: [PATCH 01/22] add high pressure benchmark --- .../analyse_high_pressure_relaxation.py | 378 ++++++++++++++++++ .../high_pressure_relaxation/metrics.yml | 140 +++++++ .../app_high_pressure_relaxation.py | 82 ++++ .../calc_high_pressure_relaxation.py | 252 ++++++++++++ .../mace-mpa-0_first5.csv | 36 ++ ...est_high_pressure_relaxation_regression.py | 74 ++++ 6 files changed, 962 insertions(+) create mode 100644 ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py create mode 100644 ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml create mode 100644 ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py create mode 100644 ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py create mode 100644 tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv create mode 100644 tests/test_high_pressure_relaxation_regression.py diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py new file mode 100644 index 000000000..742a22c97 --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -0,0 +1,378 @@ +"""Analyse high-pressure crystal relaxation benchmark.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import pytest +from sklearn.metrics import mean_absolute_error + +from ml_peg.analysis.utils.decorators import build_table +from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "bulk_crystal" / "high_pressure_relaxation" / "outputs" +OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" + +# Pressure conditions +PRESSURES = [0, 25, 50, 75, 100, 125, 150] +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] + +# Generate colors using viridis colorscale +PRESSURE_COLORS = px.colors.sample_colorscale("viridis", len(PRESSURES)) + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def mae(ref: list, pred: list) -> float: + """ + Calculate Mean Absolute Error. + + Parameters + ---------- + ref + Reference values. + pred + Predicted values. + + Returns + ------- + float + Mean absolute error. + """ + return mean_absolute_error(ref, pred) + + +def load_results_for_pressure(model_name: str, pressure_label: str) -> pd.DataFrame: + """ + Load results for a specific model and pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + pd.DataFrame + Results dataframe or empty dataframe if not found. + """ + csv_path = CALC_PATH / model_name / f"results_{pressure_label}.csv" + if csv_path.exists(): + return pd.read_csv(csv_path) + return pd.DataFrame() + + +def get_converged_data_for_pressure( + model_name: str, pressure_label: str +) -> tuple[list, list, list, list]: + """ + Get converged volume and energy data for a model at a specific pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + tuple[list, list, list, list] + Ref volumes, pred volumes, ref energies, pred energies for converged structures. + """ + df = load_results_for_pressure(model_name, pressure_label) + if df.empty: + return [], [], [], [] + + # Filter converged structures and remove NaN values + df_conv = df[df["converged"]].copy() + df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) + + return ( + df_conv["ref_volume_per_atom"].tolist(), + df_conv["pred_volume_per_atom"].tolist(), + df_conv["ref_energy_per_atom"].tolist(), + df_conv["pred_energy_per_atom"].tolist(), + ) + + +def get_convergence_rate_for_pressure( + model_name: str, pressure_label: str +) -> float | None: + """ + Get convergence rate for a model at a specific pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + float | None + Convergence rate (%) or None if no data. + """ + df = load_results_for_pressure(model_name, pressure_label) + if df.empty: + return None + return (df["converged"].sum() / len(df)) * 100 + + +def create_pressure_colored_parity_plot( + data_getter: str, + title: str, + x_label: str, + y_label: str, + filename: Path, +) -> None: + """ + Create a parity plot with different colors for each pressure. + + Parameters + ---------- + data_getter + Either "volume" or "energy" to select which data to plot. + title + Plot title. + x_label + X-axis label. + y_label + Y-axis label. + filename + Path to save the plot JSON. + """ + fig = go.Figure() + + # Add a trace for each pressure + for pressure, pressure_label, color in zip( + PRESSURES, PRESSURE_LABELS, PRESSURE_COLORS, strict=False + ): + ref_values = [] + pred_values = [] + + # Collect data from all models for this pressure + for model_name in MODELS: + if data_getter == "volume": + ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( + model_name, pressure_label + ) + ref_values.extend(ref_vol) + pred_values.extend(pred_vol) + else: # energy + _, _, ref_energy, pred_energy = get_converged_data_for_pressure( + model_name, pressure_label + ) + ref_values.extend(ref_energy) + pred_values.extend(pred_energy) + + if ref_values and pred_values: + fig.add_trace( + go.Scatter( + x=pred_values, + y=ref_values, + name=f"{pressure} GPa", + mode="markers", + marker={"color": color, "size": 6, "opacity": 0.7}, + hovertemplate=( + f"{pressure} GPa
" + "Pred: %{x:.4f}
" + "Ref: %{y:.4f}
" + "" + ), + ) + ) + + # Add diagonal line (y=x) + all_values = [] + for trace in fig.data: + all_values.extend(trace.x) + all_values.extend(trace.y) + + if all_values: + min_val = min(all_values) + max_val = max(all_values) + fig.add_trace( + go.Scatter( + x=[min_val, max_val], + y=[min_val, max_val], + mode="lines", + name="y=x", + line={"color": "gray", "dash": "dash"}, + showlegend=True, + ) + ) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + legend_title="Pressure", + hovermode="closest", + ) + + # Save to JSON + filename.parent.mkdir(parents=True, exist_ok=True) + with open(filename, "w") as f: + json.dump(fig.to_dict(), f) + + +@pytest.fixture +def volume_predictions() -> None: + """Create volume parity plot with different colors for each pressure.""" + create_pressure_colored_parity_plot( + data_getter="volume", + title="Volume per atom", + x_label="Predicted volume / ų/atom", + y_label="Reference volume / ų/atom", + filename=OUT_PATH / "figure_volume.json", + ) + + +@pytest.fixture +def energy_predictions() -> None: + """Create energy parity plot with different colors for each pressure.""" + create_pressure_colored_parity_plot( + data_getter="energy", + title="Energy per atom", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / "figure_energy.json", + ) + + +@pytest.fixture +def volume_mae_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate MAE for volume predictions at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: mae_value}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Volume MAE ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( + model_name, pressure_label + ) + if ref_vol and pred_vol: + results[pressure_key][model_name] = mae(ref_vol, pred_vol) + return results + + +@pytest.fixture +def energy_mae_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate MAE for energy predictions at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: mae_value}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Energy MAE ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + _, _, ref_energy, pred_energy = get_converged_data_for_pressure( + model_name, pressure_label + ) + if ref_energy and pred_energy: + results[pressure_key][model_name] = mae(ref_energy, pred_energy) + return results + + +@pytest.fixture +def convergence_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate convergence rate at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: convergence_rate}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Convergence ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + conv_rate = get_convergence_rate_for_pressure(model_name, pressure_label) + if conv_rate is not None: + results[pressure_key][model_name] = conv_rate + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "high_pressure_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, +) +def metrics( + volume_mae_per_pressure: dict[str, dict[str, float]], + energy_mae_per_pressure: dict[str, dict[str, float]], + convergence_per_pressure: dict[str, dict[str, float]], +) -> dict[str, dict]: + """ + Get all high-pressure relaxation metrics separated by pressure. + + Parameters + ---------- + volume_mae_per_pressure + Volume MAE for all models at each pressure. + energy_mae_per_pressure + Energy MAE for all models at each pressure. + convergence_per_pressure + Convergence rate for all models at each pressure. + + Returns + ------- + dict[str, dict] + All metrics for all models. + """ + all_metrics = {} + all_metrics.update(volume_mae_per_pressure) + all_metrics.update(energy_mae_per_pressure) + all_metrics.update(convergence_per_pressure) + return all_metrics + + +def test_high_pressure_relaxation( + metrics: dict[str, dict], + volume_predictions: None, + energy_predictions: None, +) -> None: + """ + Run high-pressure relaxation analysis test. + + Parameters + ---------- + metrics + All high-pressure relaxation metrics. + volume_predictions + Triggers volume plot generation. + energy_predictions + Triggers energy plot generation. + """ + return diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml new file mode 100644 index 000000000..fa5b810fa --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -0,0 +1,140 @@ +metrics: + # 0 GPa + Volume MAE (0 GPa): + good: 0.2 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (0 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 0 GPa compared to PBE" + level_of_theory: PBE + Convergence (0 GPa): + good: 100.0 + bad: 80.0 + unit: "%" + tooltip: "Percentage of structures that converged at 0 GPa" + level_of_theory: PBE + + # 25 GPa + Volume MAE (25 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 25 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (25 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 25 GPa compared to PBE" + level_of_theory: PBE + Convergence (25 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 25 GPa" + level_of_theory: PBE + + # 50 GPa + Volume MAE (50 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 50 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (50 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 50 GPa compared to PBE" + level_of_theory: PBE + Convergence (50 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 50 GPa" + level_of_theory: PBE + + # 75 GPa + Volume MAE (75 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 75 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (75 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 75 GPa compared to PBE" + level_of_theory: PBE + Convergence (75 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 75 GPa" + level_of_theory: PBE + + # 100 GPa + Volume MAE (100 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 100 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (100 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 100 GPa compared to PBE" + level_of_theory: PBE + Convergence (100 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 100 GPa" + level_of_theory: PBE + + # 125 GPa + Volume MAE (125 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 125 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (125 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 125 GPa compared to PBE" + level_of_theory: PBE + Convergence (125 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 125 GPa" + level_of_theory: PBE + + # 150 GPa + Volume MAE (150 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 150 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (150 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 150 GPa compared to PBE" + level_of_theory: PBE + Convergence (150 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 150 GPa" + level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py new file mode 100644 index 000000000..b2746c9c7 --- /dev/null +++ b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py @@ -0,0 +1,82 @@ +"""Run high-pressure crystal relaxation benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +# Get all models +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "High-pressure relaxation" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html#high-pressure-relaxation" +DATA_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" + + +PRESSURES = [0, 25, 50, 75, 100, 125, 150] + + +class HighPressureRelaxationApp(BaseApp): + """High-pressure crystal relaxation benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter_volume = read_plot( + DATA_PATH / "figure_volume.json", id=f"{BENCHMARK_NAME}-figure" + ) + scatter_energy = read_plot( + DATA_PATH / "figure_energy.json", id=f"{BENCHMARK_NAME}-figure" + ) + + # Build column_to_plot mapping for all pressures + column_to_plot = {} + for pressure in PRESSURES: + column_to_plot[f"Volume MAE ({pressure} GPa)"] = scatter_volume + column_to_plot[f"Energy MAE ({pressure} GPa)"] = scatter_energy + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot=column_to_plot, + ) + + +def get_app() -> HighPressureRelaxationApp: + """ + Get high-pressure relaxation benchmark app layout and callback registration. + + Returns + ------- + HighPressureRelaxationApp + Benchmark layout and callback registration. + """ + return HighPressureRelaxationApp( + name=BENCHMARK_NAME, + description=( + "Performance when relaxing crystal structures under high pressure " + "(0-150 GPa). Evaluates volume, energy, and convergence percentage " + "against PBE reference calculations from the Alexandria database. " + "Please also reference Loew et al 2026 J. Phys. Mater. 9 015010" + " (https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8) " + "when using this benchmark." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "high_pressure_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + hp_app = get_app() + full_app.layout = hp_app.layout + hp_app.register_callbacks() + full_app.run(port=8055, debug=True) diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py new file mode 100644 index 000000000..372f3342a --- /dev/null +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -0,0 +1,252 @@ +"""Run calculations for high-pressure crystal relaxation benchmark.""" + +from __future__ import annotations + +import bz2 +from copy import copy +import json +from pathlib import Path +from typing import Any +import urllib.request + +from ase import Atoms +from ase.constraints import FixSymmetry +from ase.units import GPa +from janus_core.calculations.geom_opt import GeomOpt +import pandas as pd +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.ase import AseAtomsAdaptor +import pytest +from tqdm import tqdm + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +# URL for Alexandria database +ALEXANDRIA_BASE_URL = "https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure" + +# Local cache directory for downloaded data +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Pressure conditions in GPa +PRESSURES = [0, 25, 50, 75, 100, 125, 150] +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] + + +# Relaxation parameters +FMAX = 0.0002 # eV/A - tight convergence +MAX_STEPS = 500 + +# Number of structures to use for testing +N_STRUCTURES = 3 + + +def download_pressure_data(pressure_label: str) -> Path: + """ + Download pressure data from Alexandria database if not already cached. + + Parameters + ---------- + pressure_label + Pressure label (e.g., "P000", "P025"). + + Returns + ------- + Path + Path to the downloaded/cached file. + """ + DATA_PATH.mkdir(parents=True, exist_ok=True) + local_path = DATA_PATH / f"{pressure_label}.json.bz2" + + if not local_path.exists(): + url = f"{ALEXANDRIA_BASE_URL}/{pressure_label}.json.bz2" + print(f"Downloading {url}...") + urllib.request.urlretrieve(url, local_path) + print(f"Downloaded to {local_path}") + + return local_path + + +def load_structures( + pressure_label: str, n_structures: int = N_STRUCTURES +) -> list[dict]: + """ + Load structures using P000 starting structures and pressure-specific references. + + Downloads data from Alexandria database if not already cached locally. + + Parameters + ---------- + pressure_label + Pressure label (e.g., "P000", "P025") used for reference values. + n_structures + Number of structures to load. Default is N_STRUCTURES. + + Returns + ------- + list[dict] + List of structure dictionaries with atoms, mat_id, and reference values. + """ + start_json_path = download_pressure_data("P000") + ref_json_path = download_pressure_data(pressure_label) + + with bz2.open(start_json_path, "rt") as f: + start_data = json.load(f) + + with bz2.open(ref_json_path, "rt") as f: + ref_data = json.load(f) + + ref_map: dict[str, ComputedStructureEntry] = {} + for entry_dict in ref_data["entries"]: + entry = ComputedStructureEntry.from_dict(entry_dict) + ref_map[entry.data["mat_id"]] = entry + + adaptor = AseAtomsAdaptor() + structures = [] + + for entry_dict in start_data["entries"][:n_structures]: + entry = ComputedStructureEntry.from_dict(entry_dict) + mat_id = entry.data["mat_id"] + ref_entry = ref_map.get(mat_id) + if ref_entry is None: + raise ValueError( + f"Missing reference entry for {mat_id} at {pressure_label}" + ) + + atoms = adaptor.get_atoms(entry.structure) + n_atoms = len(ref_entry.structure) + + structures.append( + { + "mat_id": mat_id, + "atoms": atoms, + "ref_energy_per_atom": ref_entry.energy / n_atoms, + "ref_volume_per_atom": ref_entry.structure.volume / n_atoms, + } + ) + + return structures + + +def relax_with_pressure( + atoms: Atoms, + pressure_gpa: float, + fmax: float = FMAX, + max_steps: int = MAX_STEPS, +) -> tuple[Atoms | None, bool, float | None]: + """ + Relax structure under specified pressure using janus-core GeomOpt. + + Parameters + ---------- + atoms + ASE Atoms object with calculator attached. + pressure_gpa + Pressure in GPa. + fmax + Maximum force tolerance in eV/A. + max_steps + Maximum number of optimization steps. + + Returns + ------- + tuple[Atoms | None, bool, float | None] + Relaxed atoms (or None if failed), convergence status, enthalpy per atom. + """ + try: + converged = False + counter = 0 + # repeat up to 3 times if not converged to be consistent with reference + while counter < 3 and not converged: + atoms.set_constraint( + FixSymmetry( + atoms, + ) + ) + geom_opt = GeomOpt( + struct=atoms, + fmax=fmax, + steps=max_steps, + filter_kwargs={"scalar_pressure": pressure_gpa}, + calc_kwargs={"default_dtype": "float64"}, + ) + geom_opt.run() + relaxed = geom_opt.struct + # asses forces to determine convergence + max_force = max(relaxed.get_forces().flatten(), key=abs) + if max_force < fmax: + converged = True + counter += 1 + atoms = relaxed + except Exception as e: + print(f"Relaxation failed: {e}") + return None, False, None + + # Calculate enthalpy: H = E + PV + energy = relaxed.get_potential_energy() + volume = relaxed.get_volume() + enthalpy = energy + pressure_gpa * GPa * volume + n_atoms = len(relaxed) + + return relaxed, converged, enthalpy / n_atoms + + +@pytest.mark.parametrize("mlip", MODELS.items()) +@pytest.mark.parametrize("pressure_idx", range(len(PRESSURES))) +def test_high_pressure_relaxation(mlip: tuple[str, Any], pressure_idx: int) -> None: + """ + Run high-pressure relaxation benchmark. + + Parameters + ---------- + mlip + Tuple of model name and model object. + pressure_idx + Index into PRESSURES list. + """ + model_name, model = mlip + calc = model.get_calculator() + + pressure_gpa = PRESSURES[pressure_idx] + pressure_label = PRESSURE_LABELS[pressure_idx] + + # Load structures (downloads from Alexandria if not cached) + structures = load_structures(pressure_label) + + results = [] + for struct_data in tqdm( + structures, desc=f"{model_name} @ {pressure_gpa} GPa", leave=False + ): + atoms = struct_data["atoms"].copy() + atoms.calc = copy(calc) + mat_id = struct_data["mat_id"] + + relaxed_atoms, converged, enthalpy_per_atom = relax_with_pressure( + atoms, pressure_gpa + ) + + if relaxed_atoms is not None: + pred_volume = relaxed_atoms.get_volume() / len(relaxed_atoms) + else: + pred_volume = None + + results.append( + { + "mat_id": mat_id, + "pressure_gpa": pressure_gpa, + "ref_volume_per_atom": struct_data["ref_volume_per_atom"], + "ref_energy_per_atom": struct_data["ref_energy_per_atom"], + "pred_volume_per_atom": pred_volume, + "pred_energy_per_atom": enthalpy_per_atom, + "converged": converged, + } + ) + + # Save results + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + df = pd.DataFrame(results) + df.to_csv(out_dir / f"results_{pressure_label}.csv", index=False) diff --git a/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv b/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv new file mode 100644 index 000000000..9cd448467 --- /dev/null +++ b/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv @@ -0,0 +1,36 @@ +pressure_label,mat_id,volume,energy_per_atom +P000,agm001000110,25.033782103611063,-5.936990103881239 +P000,agm001001040,15.204527828733536,-8.27554636941658 +P000,agm001001919,21.303183572813897,-8.435935053726139 +P000,agm001003503,24.690326355959755,-4.327203031854863 +P000,agm001004359,20.86226724052588,-7.829242292338797 +P025,agm001000110,19.71321916175995,-2.5294162683198125 +P025,agm001001040,13.487048183285616,-6.052898044522763 +P025,agm001001919,17.670764016174328,-5.438176257490443 +P025,agm001003503,19.12780621300846,-1.006088317132572 +P025,agm001004359,17.457486573286673,-4.870256961601473 +P050,agm001000110,17.715912861957246,0.3763531065086264 +P050,agm001001040,12.512987673468842,-4.029860765358488 +P050,agm001001919,15.46189689156924,-2.861038673041728 +P050,agm001003503,16.694829244071812,1.7723527015771132 +P050,agm001004359,15.13105554384206,-2.338786848103142 +P075,agm001000110,16.458372063400443,3.036751076640864 +P075,agm001001040,11.833602300101647,-2.13312140913839 +P075,agm001001919,14.04859528970246,-0.5622805372532179 +P075,agm001003503,15.23016215452774,4.255252390053997 +P075,agm001004359,13.686750914395638,-0.0937456687284251 +P100,agm001000110,15.506324317224594,5.527588395992431 +P100,agm001001040,11.324664691623278,-0.328009659077703 +P100,agm001001919,13.09661261512976,1.547445860475598 +P100,agm001003503,14.113463382167296,6.544697423965405 +P100,agm001004359,12.807027240161723,1.9611872723655293 +P125,agm001000110,14.768322896353382,7.887451277337899 +P125,agm001001040,10.9082879022004,1.4057552363067838 +P125,agm001001919,12.503731488724002,3.542297291380569 +P125,agm001003503,13.125761236229373,8.668450989536197 +P125,agm001004359,12.298360564455225,3.9181192151113904 +P150,agm001000110,14.163630823547033,10.143286868066651 +P150,agm001001040,10.539232357961302,3.078605889229442 +P150,agm001001919,12.045275482032784,5.456359868213672 +P150,agm001003503,12.192106272396378,10.643260044585606 +P150,agm001004359,11.883204516891276,5.803899385939424 diff --git a/tests/test_high_pressure_relaxation_regression.py b/tests/test_high_pressure_relaxation_regression.py new file mode 100644 index 000000000..dda8eada5 --- /dev/null +++ b/tests/test_high_pressure_relaxation_regression.py @@ -0,0 +1,74 @@ +"""Regression checks for high-pressure relaxation outputs.""" + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest + +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] +FIRST_N = 5 +VOLUME_ATOL = 0.001 +ENERGY_ATOL = 0.0001 + +REPO_ROOT = Path(__file__).resolve().parents[1] +TEST_DATA_ROOT = REPO_ROOT / "tests" / "data" / "high_pressure_relaxation" +OUTPUT_ROOT = ( + REPO_ROOT + / "ml_peg" + / "calcs" + / "bulk_crystal" + / "high_pressure_relaxation" + / "outputs" +) + +TEST_CASES = [ + { + "name": "mace-mpa-0", + "output_model": "mace-mpa-0", + "data_file": TEST_DATA_ROOT / "mace-mpa-0_first5.csv", + }, +] + + +@pytest.mark.parametrize("case", TEST_CASES, ids=[case["name"] for case in TEST_CASES]) +@pytest.mark.parametrize("pressure_label", PRESSURE_LABELS) +def test_high_pressure_relaxation_regression( + case: dict[str, Path | str], + pressure_label: str, +) -> None: + """Check entries against stored regression baselines.""" + data_path = Path(case["data_file"]) + results_path = ( + OUTPUT_ROOT / str(case["output_model"]) / f"results_{pressure_label}.csv" + ) + + assert data_path.exists(), f"Missing test data file: {data_path}" + assert results_path.exists(), f"Missing results file: {results_path}" + + expected = pd.read_csv(data_path) + expected_pressure = expected[expected["pressure_label"] == pressure_label] + + assert len(expected_pressure) == FIRST_N + + results = pd.read_csv(results_path).head(FIRST_N) + if len(results) < FIRST_N: + pytest.skip( + f"{case['output_model']} only has {len(results)} rows for {pressure_label}." + ) + + assert expected_pressure["mat_id"].tolist() == results["mat_id"].tolist() + np.testing.assert_allclose( + results["pred_volume_per_atom"].to_numpy(), + expected_pressure["volume"].to_numpy(), + rtol=0.0, + atol=VOLUME_ATOL, + ) + np.testing.assert_allclose( + results["pred_energy_per_atom"].to_numpy(), + expected_pressure["energy_per_atom"].to_numpy(), + rtol=0.0, + atol=ENERGY_ATOL, + ) From cb5c0527dc4b152d282793e5a82d372b78c33a73 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Tue, 3 Feb 2026 00:36:01 +0100 Subject: [PATCH 02/22] add random subsampling --- .../calc_high_pressure_relaxation.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py index 372f3342a..1a9b4fda2 100644 --- a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -6,6 +6,7 @@ from copy import copy import json from pathlib import Path +import random from typing import Any import urllib.request @@ -39,9 +40,10 @@ # Relaxation parameters FMAX = 0.0002 # eV/A - tight convergence MAX_STEPS = 500 +RANDOM_SEED = 42 # Number of structures to use for testing -N_STRUCTURES = 3 +N_STRUCTURES = 100 def download_pressure_data(pressure_label: str) -> Path: @@ -107,7 +109,18 @@ def load_structures( adaptor = AseAtomsAdaptor() structures = [] - for entry_dict in start_data["entries"][:n_structures]: + start_entries = start_data["entries"] + if n_structures > len(start_entries): + raise ValueError( + f"Requested {n_structures} structures but only" + f" {len(start_entries)} available" + ) + + rng = random.Random(RANDOM_SEED) + selected_indices = sorted(rng.sample(range(len(start_entries)), k=n_structures)) + + for idx in selected_indices: + entry_dict = start_entries[idx] entry = ComputedStructureEntry.from_dict(entry_dict) mat_id = entry.data["mat_id"] ref_entry = ref_map.get(mat_id) @@ -184,7 +197,8 @@ def relax_with_pressure( except Exception as e: print(f"Relaxation failed: {e}") return None, False, None - + if not converged: + return None, False, None # Calculate enthalpy: H = E + PV energy = relaxed.get_potential_energy() volume = relaxed.get_volume() From 8ef30cc5c2e18fc19bcc372068a118cd01e20498 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Tue, 3 Feb 2026 11:20:58 +0100 Subject: [PATCH 03/22] remove outliers --- .../analyse_high_pressure_relaxation.py | 11 +++++++++++ .../calc_high_pressure_relaxation.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py index 742a22c97..88d2a35b5 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -99,6 +99,11 @@ def get_converged_data_for_pressure( return [], [], [], [] # Filter converged structures and remove NaN values + # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged + df.loc[ + (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), + "converged", + ] = False df_conv = df[df["converged"]].copy() df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) @@ -129,8 +134,14 @@ def get_convergence_rate_for_pressure( Convergence rate (%) or None if no data. """ df = load_results_for_pressure(model_name, pressure_label) + # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged if df.empty: return None + df.loc[ + (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), + "converged", + ] = False + # set energies of unconverged to NaN return (df["converged"].sum() / len(df)) * 100 diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py index 1a9b4fda2..bcf6c88fc 100644 --- a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -43,7 +43,7 @@ RANDOM_SEED = 42 # Number of structures to use for testing -N_STRUCTURES = 100 +N_STRUCTURES = 3000 def download_pressure_data(pressure_label: str) -> Path: From 5bbeba1872d8c557dcb8afa5c35c793e4ab3011f Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Tue, 3 Feb 2026 11:25:32 +0100 Subject: [PATCH 04/22] add orb-v3-cons-inf-mpa for comparison with original benchmark --- ml_peg/models/models.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ml_peg/models/models.yml b/ml_peg/models/models.yml index 77e54a16b..9276c9865 100644 --- a/ml_peg/models/models.yml +++ b/ml_peg/models/models.yml @@ -72,6 +72,16 @@ orb-v3-consv-inf-omat: kwargs: name: "orb_v3_conservative_inf_omat" +orb-v3-consv-inf-mpa: + module: orb_models.forcefield + class_name: OrbCalc + device: "cpu" + default_dtype: float32-high + trained_on_d3: false + level_of_theory: PBE + kwargs: + name: "orb_v3_conservative_inf_mpa" + # mattersim-5M: # module: mattersim.forcefield # class_name: MatterSimCalculator From 1d2fb2ee62d689bb831d8c386a3231d10e687f44 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 11:30:02 +0100 Subject: [PATCH 05/22] add docs for benchmark --- .../user_guide/benchmarks/bulk_crystal.rst | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index 575bbcc11..c7ef3a5b7 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -115,3 +115,62 @@ Reference data: * Same as input data * PBE + + +High-Pressure Relaxation +======================== + +Summary +------- + +Performance in relaxing bulk crystal structures under high-pressure conditions. +3000 structures from the Alexandria database are relaxed at 7 pressure conditions +(0, 25, 50, 75, 100, 125, 150 GPa) and compared to PBE reference calculations. + + +Metrics +------- + +For each pressure condition (0, 25, 50, 75, 100, 125, 150 GPa): + +(1) Volume MAE + +Mean absolute error of volume per atom compared to PBE reference. + +(2) Energy MAE + +Mean absolute error of enthalpy per atom compared to PBE reference. The enthalpy is +calculated as H = E + PV, where E is the potential energy, P is the applied pressure, +and V is the volume. + +(3) Convergence + +Percentage of structures that successfully converged during relaxation. + +Structures are relaxed using janus-core's GeomOpt with the ase `FixSymmetry` constraint +applied to preserve crystallographic symmetry analogously to DFT. Starting from P000 (0 GPa) structures, each structure is relaxed at the target pressure using the FrechetCellFilter with the +specified scalar pressure. Relaxation continues until the maximum force component is +below 0.0002 eV/Šor until 500 steps are reached. If not converged, relaxation is +repeated up from the last structure of the previous relaxation up to 3 times. + + +Computational cost +------------------ + +High: tests are likely to take hours-days to run on GPU, depending on the number of +structures and pressure conditions tested. + + +Data availability +----------------- + +Input structures: + +* Alexandria database pressure benchmark dataset +* URL: https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure +* 3000 structures randomly sampled from the full datasets at each pressure + +Reference data: + +* PBE calculations from the Alexandria database +* Loew et al 2026 J. Phys. Mater. 9 015010 https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8 From 3a06816d193575729877eac7623b84cb80c966fe Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 12:27:02 +0100 Subject: [PATCH 06/22] fix good/bad metrics, use internal mae function --- .../analyse_high_pressure_relaxation.py | 44 +++++++++---------- .../high_pressure_relaxation/metrics.yml | 6 +-- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py index 88d2a35b5..78a591f6f 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -9,10 +9,9 @@ import plotly.express as px import plotly.graph_objects as go import pytest -from sklearn.metrics import mean_absolute_error from ml_peg.analysis.utils.decorators import build_table -from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.analysis.utils.utils import load_metrics_config, mae from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names @@ -34,24 +33,34 @@ METRICS_CONFIG_PATH ) +# Energy outlier thresholds (eV/atom) +ENERGY_OUTLIER_MIN = -25 +ENERGY_OUTLIER_MAX = 25 -def mae(ref: list, pred: list) -> float: + +def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: """ - Calculate Mean Absolute Error. + Mark structures with extreme predicted energies as unconverged. + + Structures with predicted energy per atom outside [-25, 25] eV/atom + are considered outliers and marked as unconverged. Parameters ---------- - ref - Reference values. - pred - Predicted values. + df + Results dataframe with 'pred_energy_per_atom' and 'converged' columns. Returns ------- - float - Mean absolute error. + pd.DataFrame + Dataframe with outliers marked as unconverged. """ - return mean_absolute_error(ref, pred) + df.loc[ + (df["pred_energy_per_atom"] <= ENERGY_OUTLIER_MIN) + | (df["pred_energy_per_atom"] >= ENERGY_OUTLIER_MAX), + "converged", + ] = False + return df def load_results_for_pressure(model_name: str, pressure_label: str) -> pd.DataFrame: @@ -99,11 +108,7 @@ def get_converged_data_for_pressure( return [], [], [], [] # Filter converged structures and remove NaN values - # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged - df.loc[ - (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), - "converged", - ] = False + df = _filter_energy_outliers(df) df_conv = df[df["converged"]].copy() df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) @@ -134,14 +139,9 @@ def get_convergence_rate_for_pressure( Convergence rate (%) or None if no data. """ df = load_results_for_pressure(model_name, pressure_label) - # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged if df.empty: return None - df.loc[ - (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), - "converged", - ] = False - # set energies of unconverged to NaN + df = _filter_energy_outliers(df) return (df["converged"].sum() / len(df)) * 100 diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml index fa5b810fa..5fe501cc1 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -1,8 +1,8 @@ metrics: # 0 GPa Volume MAE (0 GPa): - good: 0.2 - bad: 0.1 + good: 0.05 + bad: 0.15 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" level_of_theory: PBE @@ -14,7 +14,7 @@ metrics: level_of_theory: PBE Convergence (0 GPa): good: 100.0 - bad: 80.0 + bad: 99.9 unit: "%" tooltip: "Percentage of structures that converged at 0 GPa" level_of_theory: PBE From b0c5d56074bf36add7f0dea356e96b5451057c5e Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 14:50:20 +0100 Subject: [PATCH 07/22] metrics.yml update --- .../high_pressure_relaxation/metrics.yml | 14 +++++------ uv.lock | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml index 5fe501cc1..1e09408cd 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -2,7 +2,7 @@ metrics: # 0 GPa Volume MAE (0 GPa): good: 0.05 - bad: 0.15 + bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" level_of_theory: PBE @@ -21,7 +21,7 @@ metrics: # 25 GPa Volume MAE (25 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 25 GPa compared to PBE" @@ -41,7 +41,7 @@ metrics: # 50 GPa Volume MAE (50 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 50 GPa compared to PBE" @@ -61,7 +61,7 @@ metrics: # 75 GPa Volume MAE (75 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 75 GPa compared to PBE" @@ -81,7 +81,7 @@ metrics: # 100 GPa Volume MAE (100 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 100 GPa compared to PBE" @@ -101,7 +101,7 @@ metrics: # 125 GPa Volume MAE (125 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 125 GPa compared to PBE" @@ -121,7 +121,7 @@ metrics: # 150 GPa Volume MAE (150 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 150 GPa compared to PBE" diff --git a/uv.lock b/uv.lock index f828ab629..b1665744d 100644 --- a/uv.lock +++ b/uv.lock @@ -2603,6 +2603,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/a8/365059bbcd4572cbc41de17fd5b682be5868b218c3c5479071865cab9078/entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f", size = 5294, upload-time = "2022-02-02T21:30:26.024Z" }, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + [[package]] name = "eventlet" version = "0.40.4" @@ -5998,6 +6007,7 @@ dependencies = [ { name = "mdanalysis", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "mdanalysis", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "mlipx" }, + { name = "openpyxl" }, { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "tqdm" }, @@ -6078,6 +6088,7 @@ requires-dist = [ { name = "mattersim", marker = "extra == 'mattersim'", specifier = "==1.2.0" }, { name = "mdanalysis" }, { name = "mlipx", specifier = ">=0.1.5,<0.2" }, + { name = "openpyxl" }, { name = "orb-models", marker = "python_full_version < '3.13' and sys_platform != 'win32' and extra == 'orb'", specifier = "==0.5.5" }, { name = "pet-mad", marker = "sys_platform != 'win32' and extra == 'pet-mad'", specifier = "==1.4.4" }, { name = "scikit-learn", specifier = ">=1.7.1" }, @@ -7337,6 +7348,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" }, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + [[package]] name = "opt-einsum" version = "3.4.0" From 5e6359d0d085c2b42534ca57ebaf758f64516bb6 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 16:03:59 +0100 Subject: [PATCH 08/22] Add low-dimensional (2D) crystal relaxation benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial implementation of benchmark for 2D crystal structures from Alexandria database. Uses cell mask to constrain relaxation to in-plane directions only. Metrics: - Area MAE (Ų/atom): In-plane area per atom vs PBE reference - Energy MAE (eV/atom): Energy per atom vs PBE reference - Convergence (%): Percentage of structures that converged Data source: alexandria_2d_001.json.bz2 from Alexandria database Co-Authored-By: Claude Opus 4.5 --- .../analyse_low_dimensional_relaxation.py | 364 ++++++++++++++++++ .../low_dimensional_relaxation/metrics.yml | 22 ++ .../app_low_dimensional_relaxation.py | 81 ++++ .../calc_low_dimensional_relaxation.py | 266 +++++++++++++ 4 files changed, 733 insertions(+) create mode 100644 ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py create mode 100644 ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml create mode 100644 ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py create mode 100644 ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py new file mode 100644 index 000000000..d416c49ab --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py @@ -0,0 +1,364 @@ +"""Analyse low-dimensional (2D/1D) crystal relaxation benchmark.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd +import plotly.graph_objects as go +import pytest + +from ml_peg.analysis.utils.decorators import build_table +from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "bulk_crystal" / "low_dimensional_relaxation" / "outputs" +OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "low_dimensional_relaxation" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# Energy outlier thresholds (eV/atom) +ENERGY_OUTLIER_MIN = -25 +ENERGY_OUTLIER_MAX = 25 + + +def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: + """ + Mark structures with extreme predicted energies as unconverged. + + Structures with predicted energy per atom outside [-25, 25] eV/atom + are considered outliers and marked as unconverged. + + Parameters + ---------- + df + Results dataframe with 'pred_energy_per_atom' and 'converged' columns. + + Returns + ------- + pd.DataFrame + Dataframe with outliers marked as unconverged. + """ + df.loc[ + (df["pred_energy_per_atom"] <= ENERGY_OUTLIER_MIN) + | (df["pred_energy_per_atom"] >= ENERGY_OUTLIER_MAX), + "converged", + ] = False + return df + + +def load_results(model_name: str, dimensionality: str = "2D") -> pd.DataFrame: + """ + Load results for a specific model and dimensionality. + + Parameters + ---------- + model_name + Name of the model. + dimensionality + Either "2D" or "1D". + + Returns + ------- + pd.DataFrame + Results dataframe or empty dataframe if not found. + """ + csv_path = CALC_PATH / model_name / f"results_{dimensionality}.csv" + if csv_path.exists(): + return pd.read_csv(csv_path) + return pd.DataFrame() + + +def get_converged_data( + model_name: str, dimensionality: str = "2D" +) -> tuple[list, list, list, list]: + """ + Get converged area and energy data for a model. + + Parameters + ---------- + model_name + Name of the model. + dimensionality + Either "2D" or "1D". + + Returns + ------- + tuple[list, list, list, list] + Ref areas, pred areas, ref energies, pred energies for converged structures. + """ + df = load_results(model_name, dimensionality) + if df.empty: + return [], [], [], [] + + # Filter converged structures and remove NaN values + df = _filter_energy_outliers(df) + df_conv = df[df["converged"]].copy() + df_conv = df_conv.dropna(subset=["pred_area_per_atom", "pred_energy_per_atom"]) + + return ( + df_conv["ref_area_per_atom"].tolist(), + df_conv["pred_area_per_atom"].tolist(), + df_conv["ref_energy_per_atom"].tolist(), + df_conv["pred_energy_per_atom"].tolist(), + ) + + +def get_convergence_rate(model_name: str, dimensionality: str = "2D") -> float | None: + """ + Get convergence rate for a model. + + Parameters + ---------- + model_name + Name of the model. + dimensionality + Either "2D" or "1D". + + Returns + ------- + float | None + Convergence rate (%) or None if no data. + """ + df = load_results(model_name, dimensionality) + if df.empty: + return None + df = _filter_energy_outliers(df) + return (df["converged"].sum() / len(df)) * 100 + + +def create_parity_plot( + data_getter: str, + title: str, + x_label: str, + y_label: str, + filename: Path, + dimensionality: str = "2D", +) -> None: + """ + Create a parity plot for low-dimensional structures. + + Parameters + ---------- + data_getter + Either "area" or "energy" to select which data to plot. + title + Plot title. + x_label + X-axis label. + y_label + Y-axis label. + filename + Path to save the plot JSON. + dimensionality + Either "2D" or "1D". + """ + fig = go.Figure() + + ref_values = [] + pred_values = [] + + # Collect data from all models + for model_name in MODELS: + if data_getter == "area": + ref_area, pred_area, _, _ = get_converged_data(model_name, dimensionality) + ref_values.extend(ref_area) + pred_values.extend(pred_area) + else: # energy + _, _, ref_energy, pred_energy = get_converged_data( + model_name, dimensionality + ) + ref_values.extend(ref_energy) + pred_values.extend(pred_energy) + + if ref_values and pred_values: + fig.add_trace( + go.Scatter( + x=pred_values, + y=ref_values, + name=dimensionality, + mode="markers", + marker={"size": 6, "opacity": 0.7}, + hovertemplate=( + f"{dimensionality}
" + "Pred: %{x:.4f}
" + "Ref: %{y:.4f}
" + "" + ), + ) + ) + + # Add diagonal line (y=x) + all_values = [] + for trace in fig.data: + all_values.extend(trace.x) + all_values.extend(trace.y) + + if all_values: + min_val = min(all_values) + max_val = max(all_values) + fig.add_trace( + go.Scatter( + x=[min_val, max_val], + y=[min_val, max_val], + mode="lines", + name="y=x", + line={"color": "gray", "dash": "dash"}, + showlegend=True, + ) + ) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + hovermode="closest", + ) + + # Save to JSON + filename.parent.mkdir(parents=True, exist_ok=True) + with open(filename, "w") as f: + json.dump(fig.to_dict(), f) + + +@pytest.fixture +def area_predictions() -> None: + """Create area parity plot for 2D structures.""" + create_parity_plot( + data_getter="area", + title="Area per atom (2D)", + x_label="Predicted area / Ų/atom", + y_label="Reference area / Ų/atom", + filename=OUT_PATH / "figure_area.json", + dimensionality="2D", + ) + + +@pytest.fixture +def energy_predictions() -> None: + """Create energy parity plot for 2D structures.""" + create_parity_plot( + data_getter="energy", + title="Energy per atom (2D)", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / "figure_energy.json", + dimensionality="2D", + ) + + +@pytest.fixture +def area_mae() -> dict[str, float]: + """ + Calculate MAE for area predictions. + + Returns + ------- + dict[str, float] + {model_name: mae_value}. + """ + results = {} + for model_name in MODELS: + ref_area, pred_area, _, _ = get_converged_data(model_name, "2D") + if ref_area and pred_area: + results[model_name] = mae(ref_area, pred_area) + return results + + +@pytest.fixture +def energy_mae() -> dict[str, float]: + """ + Calculate MAE for energy predictions. + + Returns + ------- + dict[str, float] + {model_name: mae_value}. + """ + results = {} + for model_name in MODELS: + _, _, ref_energy, pred_energy = get_converged_data(model_name, "2D") + if ref_energy and pred_energy: + results[model_name] = mae(ref_energy, pred_energy) + return results + + +@pytest.fixture +def convergence() -> dict[str, float]: + """ + Calculate convergence rate. + + Returns + ------- + dict[str, float] + {model_name: convergence_rate}. + """ + results = {} + for model_name in MODELS: + conv_rate = get_convergence_rate(model_name, "2D") + if conv_rate is not None: + results[model_name] = conv_rate + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "low_dimensional_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, +) +def metrics( + area_mae: dict[str, float], + energy_mae: dict[str, float], + convergence: dict[str, float], +) -> dict[str, dict]: + """ + Get all low-dimensional relaxation metrics. + + Parameters + ---------- + area_mae + Area MAE for all models. + energy_mae + Energy MAE for all models. + convergence + Convergence rate for all models. + + Returns + ------- + dict[str, dict] + All metrics for all models. + """ + return { + "Area MAE (2D)": area_mae, + "Energy MAE (2D)": energy_mae, + "Convergence (2D)": convergence, + } + + +def test_low_dimensional_relaxation( + metrics: dict[str, dict], + area_predictions: None, + energy_predictions: None, +) -> None: + """ + Run low-dimensional relaxation analysis test. + + Parameters + ---------- + metrics + All low-dimensional relaxation metrics. + area_predictions + Triggers area plot generation. + energy_predictions + Triggers energy plot generation. + """ + return diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml new file mode 100644 index 000000000..bffdca5cc --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml @@ -0,0 +1,22 @@ +metrics: + # 2D structures + Area MAE (2D): + good: 0.05 + bad: 0.15 + unit: "Ų/atom" + tooltip: "Mean Absolute Error of area per atom for 2D structures compared to PBE" + level_of_theory: PBE + + Energy MAE (2D): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom for 2D structures compared to PBE" + level_of_theory: PBE + + Convergence (2D): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of 2D structures that converged during relaxation" + level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py new file mode 100644 index 000000000..4ae98c932 --- /dev/null +++ b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py @@ -0,0 +1,81 @@ +"""Low-dimensional (2D/1D) crystal relaxation benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "Low-Dimensional Relaxation" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html" + "#low-dimensional-relaxation" +) +DATA_PATH = APP_ROOT / "data" / "bulk_crystal" / "low_dimensional_relaxation" + + +class LowDimensionalRelaxationApp(BaseApp): + """Low-dimensional relaxation benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + # Define plot paths and their metric mappings + plot_configs = [ + ("figure_area.json", "Area MAE (2D)", "area"), + ("figure_energy.json", "Energy MAE (2D)", "energy"), + ] + + # Build column-to-plot mapping + column_to_plot = {} + for filename, metric_name, plot_id_suffix in plot_configs: + plot_path = DATA_PATH / filename + if plot_path.exists(): + scatter = read_plot( + plot_path, + id=f"{BENCHMARK_NAME}-figure-{plot_id_suffix}", + ) + column_to_plot[metric_name] = scatter + + if column_to_plot: + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot=column_to_plot, + ) + + +def get_app() -> LowDimensionalRelaxationApp: + """ + Get low-dimensional relaxation benchmark app. + + Returns + ------- + LowDimensionalRelaxationApp + Benchmark layout and callback registration. + """ + return LowDimensionalRelaxationApp( + name=BENCHMARK_NAME, + description=( + "Performance in relaxing low-dimensional (2D/1D) crystal structures. " + "Structures from the Alexandria database are relaxed with cell masks " + "to constrain relaxation to the appropriate dimensions and compared " + "to PBE reference calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "low_dimensional_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + ld_app = get_app() + full_app.layout = ld_app.layout + ld_app.register_callbacks() + full_app.run(port=8064, debug=True) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py new file mode 100644 index 000000000..9c7d59f92 --- /dev/null +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -0,0 +1,266 @@ +"""Run calculations for low-dimensional (2D/1D) crystal relaxation benchmark.""" + +from __future__ import annotations + +import bz2 +from copy import copy +import json +from pathlib import Path +import random +from typing import Any +import urllib.request + +from ase import Atoms +from ase.constraints import FixSymmetry +from janus_core.calculations.geom_opt import GeomOpt +import numpy as np +import pandas as pd +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.ase import AseAtomsAdaptor +import pytest +from tqdm import tqdm + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +# URLs for Alexandria database +ALEXANDRIA_2D_URL = "https://alexandria.icams.rub.de/data/pbe_2d" +ALEXANDRIA_1D_URL = "https://alexandria.icams.rub.de/data/pbe_1d" + +# Local cache directory for downloaded data +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Relaxation parameters +FMAX = 0.0002 # eV/A - tight convergence +MAX_STEPS = 500 +RANDOM_SEED = 42 + +# Number of structures to use for testing +N_STRUCTURES = 3000 + +# Dimensionality configurations +# For 2D: mask allows in-plane cell relaxation only (fix z) +# For 1D: mask allows along-chain relaxation only (fix y and z) +CELL_MASKS = { + "2D": np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool), + "1D": np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=bool), +} + + +def download_2d_data(filename: str = "alexandria_2d_001.json.bz2") -> Path: + """ + Download 2D structure data from Alexandria database if not already cached. + + Parameters + ---------- + filename + Name of the file to download. + + Returns + ------- + Path + Path to the downloaded/cached file. + """ + DATA_PATH.mkdir(parents=True, exist_ok=True) + local_path = DATA_PATH / filename + + if not local_path.exists(): + url = f"{ALEXANDRIA_2D_URL}/{filename}" + print(f"Downloading {url}...") + urllib.request.urlretrieve(url, local_path) + print(f"Downloaded to {local_path}") + + return local_path + + +def get_area_per_atom(atoms: Atoms) -> float: + """ + Calculate the in-plane area per atom for 2D structures. + + For 2D materials, the area is calculated as a x b x sin(gamma), + where a and b are the in-plane lattice vectors. + + Parameters + ---------- + atoms + ASE Atoms object. + + Returns + ------- + float + In-plane area per atom in Ų. + """ + cell = atoms.get_cell() + # Calculate cross product of first two lattice vectors (in-plane) + a = cell[0] + b = cell[1] + area = np.linalg.norm(np.cross(a, b)) + return area / len(atoms) + + +def load_2d_structures( + filename: str = "alexandria_2d_001.json.bz2", + n_structures: int = N_STRUCTURES, +) -> list[dict]: + """ + Load 2D structures from Alexandria database. + + Downloads data if not already cached locally. + + Parameters + ---------- + filename + Name of the data file to load. + n_structures + Number of structures to load. Default is N_STRUCTURES. + + Returns + ------- + list[dict] + List of structure dictionaries with atoms, mat_id, and reference values. + """ + json_path = download_2d_data(filename) + + with bz2.open(json_path, "rt") as f: + data = json.load(f) + + adaptor = AseAtomsAdaptor() + structures = [] + + entries = data["entries"] + if n_structures > len(entries): + n_structures = len(entries) + print(f"Only {len(entries)} structures available, using all of them") + + rng = random.Random(RANDOM_SEED) + selected_indices = sorted(rng.sample(range(len(entries)), k=n_structures)) + + for idx in selected_indices: + entry_dict = entries[idx] + entry = ComputedStructureEntry.from_dict(entry_dict) + mat_id = entry.data.get("mat_id", f"struct_{idx}") + + atoms = adaptor.get_atoms(entry.structure) + n_atoms = len(entry.structure) + + # For 2D, use area per atom instead of volume + ref_area_per_atom = get_area_per_atom(atoms) + + structures.append( + { + "mat_id": mat_id, + "atoms": atoms, + "ref_energy_per_atom": entry.energy / n_atoms, + "ref_area_per_atom": ref_area_per_atom, + } + ) + + return structures + + +def relax_2d( + atoms: Atoms, + fmax: float = FMAX, + max_steps: int = MAX_STEPS, +) -> tuple[Atoms | None, bool, float | None]: + """ + Relax 2D structure with cell mask to prevent out-of-plane relaxation. + + Parameters + ---------- + atoms + ASE Atoms object with calculator attached. + fmax + Maximum force tolerance in eV/A. + max_steps + Maximum number of optimization steps. + + Returns + ------- + tuple[Atoms | None, bool, float | None] + Relaxed atoms (or None if failed), convergence status, energy per atom. + """ + try: + converged = False + counter = 0 + # repeat up to 3 times if not converged + while counter < 3 and not converged: + atoms.set_constraint(FixSymmetry(atoms)) + geom_opt = GeomOpt( + struct=atoms, + fmax=fmax, + steps=max_steps, + # Use cell mask to only allow in-plane relaxation + filter_kwargs={"mask": CELL_MASKS["2D"]}, + calc_kwargs={"default_dtype": "float64"}, + ) + geom_opt.run() + relaxed = geom_opt.struct + # assess forces to determine convergence + max_force = max(relaxed.get_forces().flatten(), key=abs) + if max_force < fmax: + converged = True + counter += 1 + atoms = relaxed + except Exception as e: + print(f"Relaxation failed: {e}") + return None, False, None + + if not converged: + return None, False, None + + energy = relaxed.get_potential_energy() + n_atoms = len(relaxed) + + return relaxed, converged, energy / n_atoms + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_low_dimensional_relaxation(mlip: tuple[str, Any]) -> None: + """ + Run 2D relaxation benchmark. + + Parameters + ---------- + mlip + Tuple of model name and model object. + """ + model_name, model = mlip + calc = model.get_calculator() + + # Load 2D structures (downloads from Alexandria if not cached) + structures = load_2d_structures() + + results = [] + for struct_data in tqdm(structures, desc=f"{model_name} 2D", leave=False): + atoms = struct_data["atoms"].copy() + atoms.calc = copy(calc) + mat_id = struct_data["mat_id"] + + relaxed_atoms, converged, energy_per_atom = relax_2d(atoms) + + if relaxed_atoms is not None: + pred_area = get_area_per_atom(relaxed_atoms) + else: + pred_area = None + + results.append( + { + "mat_id": mat_id, + "dimensionality": "2D", + "ref_area_per_atom": struct_data["ref_area_per_atom"], + "ref_energy_per_atom": struct_data["ref_energy_per_atom"], + "pred_area_per_atom": pred_area, + "pred_energy_per_atom": energy_per_atom, + "converged": converged, + } + ) + + # Save results + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + df = pd.DataFrame(results) + df.to_csv(out_dir / "results_2D.csv", index=False) From 2a6711d2f90429fdbaa8e6a0e5ae4470027e2a35 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 16:34:33 +0100 Subject: [PATCH 09/22] Add 1D structure support to low-dimensional relaxation benchmark - Add generic download_data() and load_structures() functions for both 2D/1D - Add get_length_per_atom() for 1D chain length calculation - Add relax_low_dimensional() with dimensionality parameter for cell masks - Add 1D metrics: Length MAE, Energy MAE, Convergence - Add 1D parity plots for length and energy - Parametrize test function for both 2D and 1D dimensionalities - Update app with 1D plot configurations Co-Authored-By: Claude Opus 4.5 --- .../analyse_low_dimensional_relaxation.py | 224 +++++++++++++----- .../low_dimensional_relaxation/metrics.yml | 22 ++ .../app_low_dimensional_relaxation.py | 15 +- .../calc_low_dimensional_relaxation.py | 155 ++++++++---- 4 files changed, 300 insertions(+), 116 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py index d416c49ab..4ece6c5c9 100644 --- a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py @@ -77,11 +77,9 @@ def load_results(model_name: str, dimensionality: str = "2D") -> pd.DataFrame: return pd.DataFrame() -def get_converged_data( - model_name: str, dimensionality: str = "2D" -) -> tuple[list, list, list, list]: +def get_converged_data(model_name: str, dimensionality: str = "2D") -> dict[str, list]: """ - Get converged area and energy data for a model. + Get converged geometric and energy data for a model. Parameters ---------- @@ -92,24 +90,31 @@ def get_converged_data( Returns ------- - tuple[list, list, list, list] - Ref areas, pred areas, ref energies, pred energies for converged structures. + dict[str, list] + Dictionary with ref/pred values for geometric metric and energy. """ df = load_results(model_name, dimensionality) if df.empty: - return [], [], [], [] + return {"ref_geom": [], "pred_geom": [], "ref_energy": [], "pred_energy": []} # Filter converged structures and remove NaN values df = _filter_energy_outliers(df) df_conv = df[df["converged"]].copy() - df_conv = df_conv.dropna(subset=["pred_area_per_atom", "pred_energy_per_atom"]) - return ( - df_conv["ref_area_per_atom"].tolist(), - df_conv["pred_area_per_atom"].tolist(), - df_conv["ref_energy_per_atom"].tolist(), - df_conv["pred_energy_per_atom"].tolist(), - ) + # Determine geometric column based on dimensionality + if dimensionality == "2D": + geom_col = "area_per_atom" + else: # 1D + geom_col = "length_per_atom" + + df_conv = df_conv.dropna(subset=[f"pred_{geom_col}", "pred_energy_per_atom"]) + + return { + "ref_geom": df_conv[f"ref_{geom_col}"].tolist(), + "pred_geom": df_conv[f"pred_{geom_col}"].tolist(), + "ref_energy": df_conv["ref_energy_per_atom"].tolist(), + "pred_energy": df_conv["pred_energy_per_atom"].tolist(), + } def get_convergence_rate(model_name: str, dimensionality: str = "2D") -> float | None: @@ -136,7 +141,7 @@ def get_convergence_rate(model_name: str, dimensionality: str = "2D") -> float | def create_parity_plot( - data_getter: str, + data_type: str, title: str, x_label: str, y_label: str, @@ -148,8 +153,8 @@ def create_parity_plot( Parameters ---------- - data_getter - Either "area" or "energy" to select which data to plot. + data_type + Either "geom" (area for 2D, length for 1D) or "energy". title Plot title. x_label @@ -168,16 +173,13 @@ def create_parity_plot( # Collect data from all models for model_name in MODELS: - if data_getter == "area": - ref_area, pred_area, _, _ = get_converged_data(model_name, dimensionality) - ref_values.extend(ref_area) - pred_values.extend(pred_area) + data = get_converged_data(model_name, dimensionality) + if data_type == "geom": + ref_values.extend(data["ref_geom"]) + pred_values.extend(data["pred_geom"]) else: # energy - _, _, ref_energy, pred_energy = get_converged_data( - model_name, dimensionality - ) - ref_values.extend(ref_energy) - pred_values.extend(pred_energy) + ref_values.extend(data["ref_energy"]) + pred_values.extend(data["pred_energy"]) if ref_values and pred_values: fig.add_trace( @@ -230,35 +232,61 @@ def create_parity_plot( @pytest.fixture -def area_predictions() -> None: +def area_predictions_2d() -> None: """Create area parity plot for 2D structures.""" create_parity_plot( - data_getter="area", + data_type="geom", title="Area per atom (2D)", x_label="Predicted area / Ų/atom", y_label="Reference area / Ų/atom", - filename=OUT_PATH / "figure_area.json", + filename=OUT_PATH / "figure_area_2d.json", dimensionality="2D", ) @pytest.fixture -def energy_predictions() -> None: +def energy_predictions_2d() -> None: """Create energy parity plot for 2D structures.""" create_parity_plot( - data_getter="energy", + data_type="energy", title="Energy per atom (2D)", x_label="Predicted energy / eV/atom", y_label="Reference energy / eV/atom", - filename=OUT_PATH / "figure_energy.json", + filename=OUT_PATH / "figure_energy_2d.json", dimensionality="2D", ) @pytest.fixture -def area_mae() -> dict[str, float]: +def length_predictions_1d() -> None: + """Create length parity plot for 1D structures.""" + create_parity_plot( + data_type="geom", + title="Length per atom (1D)", + x_label="Predicted length / Å/atom", + y_label="Reference length / Å/atom", + filename=OUT_PATH / "figure_length_1d.json", + dimensionality="1D", + ) + + +@pytest.fixture +def energy_predictions_1d() -> None: + """Create energy parity plot for 1D structures.""" + create_parity_plot( + data_type="energy", + title="Energy per atom (1D)", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / "figure_energy_1d.json", + dimensionality="1D", + ) + + +@pytest.fixture +def area_mae_2d() -> dict[str, float]: """ - Calculate MAE for area predictions. + Calculate MAE for area predictions (2D). Returns ------- @@ -267,16 +295,16 @@ def area_mae() -> dict[str, float]: """ results = {} for model_name in MODELS: - ref_area, pred_area, _, _ = get_converged_data(model_name, "2D") - if ref_area and pred_area: - results[model_name] = mae(ref_area, pred_area) + data = get_converged_data(model_name, "2D") + if data["ref_geom"] and data["pred_geom"]: + results[model_name] = mae(data["ref_geom"], data["pred_geom"]) return results @pytest.fixture -def energy_mae() -> dict[str, float]: +def energy_mae_2d() -> dict[str, float]: """ - Calculate MAE for energy predictions. + Calculate MAE for energy predictions (2D). Returns ------- @@ -285,16 +313,16 @@ def energy_mae() -> dict[str, float]: """ results = {} for model_name in MODELS: - _, _, ref_energy, pred_energy = get_converged_data(model_name, "2D") - if ref_energy and pred_energy: - results[model_name] = mae(ref_energy, pred_energy) + data = get_converged_data(model_name, "2D") + if data["ref_energy"] and data["pred_energy"]: + results[model_name] = mae(data["ref_energy"], data["pred_energy"]) return results @pytest.fixture -def convergence() -> dict[str, float]: +def convergence_2d() -> dict[str, float]: """ - Calculate convergence rate. + Calculate convergence rate for 2D structures. Returns ------- @@ -309,6 +337,60 @@ def convergence() -> dict[str, float]: return results +@pytest.fixture +def length_mae_1d() -> dict[str, float]: + """ + Calculate MAE for length predictions (1D). + + Returns + ------- + dict[str, float] + {model_name: mae_value}. + """ + results = {} + for model_name in MODELS: + data = get_converged_data(model_name, "1D") + if data["ref_geom"] and data["pred_geom"]: + results[model_name] = mae(data["ref_geom"], data["pred_geom"]) + return results + + +@pytest.fixture +def energy_mae_1d() -> dict[str, float]: + """ + Calculate MAE for energy predictions (1D). + + Returns + ------- + dict[str, float] + {model_name: mae_value}. + """ + results = {} + for model_name in MODELS: + data = get_converged_data(model_name, "1D") + if data["ref_energy"] and data["pred_energy"]: + results[model_name] = mae(data["ref_energy"], data["pred_energy"]) + return results + + +@pytest.fixture +def convergence_1d() -> dict[str, float]: + """ + Calculate convergence rate for 1D structures. + + Returns + ------- + dict[str, float] + {model_name: convergence_rate}. + """ + results = {} + for model_name in MODELS: + conv_rate = get_convergence_rate(model_name, "1D") + if conv_rate is not None: + results[model_name] = conv_rate + return results + + @pytest.fixture @build_table( filename=OUT_PATH / "low_dimensional_metrics_table.json", @@ -316,21 +398,30 @@ def convergence() -> dict[str, float]: thresholds=DEFAULT_THRESHOLDS, ) def metrics( - area_mae: dict[str, float], - energy_mae: dict[str, float], - convergence: dict[str, float], + area_mae_2d: dict[str, float], + energy_mae_2d: dict[str, float], + convergence_2d: dict[str, float], + length_mae_1d: dict[str, float], + energy_mae_1d: dict[str, float], + convergence_1d: dict[str, float], ) -> dict[str, dict]: """ Get all low-dimensional relaxation metrics. Parameters ---------- - area_mae - Area MAE for all models. - energy_mae - Energy MAE for all models. - convergence - Convergence rate for all models. + area_mae_2d + Area MAE for 2D structures. + energy_mae_2d + Energy MAE for 2D structures. + convergence_2d + Convergence rate for 2D structures. + length_mae_1d + Length MAE for 1D structures. + energy_mae_1d + Energy MAE for 1D structures. + convergence_1d + Convergence rate for 1D structures. Returns ------- @@ -338,16 +429,21 @@ def metrics( All metrics for all models. """ return { - "Area MAE (2D)": area_mae, - "Energy MAE (2D)": energy_mae, - "Convergence (2D)": convergence, + "Area MAE (2D)": area_mae_2d, + "Energy MAE (2D)": energy_mae_2d, + "Convergence (2D)": convergence_2d, + "Length MAE (1D)": length_mae_1d, + "Energy MAE (1D)": energy_mae_1d, + "Convergence (1D)": convergence_1d, } def test_low_dimensional_relaxation( metrics: dict[str, dict], - area_predictions: None, - energy_predictions: None, + area_predictions_2d: None, + energy_predictions_2d: None, + length_predictions_1d: None, + energy_predictions_1d: None, ) -> None: """ Run low-dimensional relaxation analysis test. @@ -356,9 +452,13 @@ def test_low_dimensional_relaxation( ---------- metrics All low-dimensional relaxation metrics. - area_predictions - Triggers area plot generation. - energy_predictions - Triggers energy plot generation. + area_predictions_2d + Triggers 2D area plot generation. + energy_predictions_2d + Triggers 2D energy plot generation. + length_predictions_1d + Triggers 1D length plot generation. + energy_predictions_1d + Triggers 1D energy plot generation. """ return diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml index bffdca5cc..59b851b24 100644 --- a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml @@ -20,3 +20,25 @@ metrics: unit: "%" tooltip: "Percentage of 2D structures that converged during relaxation" level_of_theory: PBE + + # 1D structures + Length MAE (1D): + good: 0.01 + bad: 0.05 + unit: "Å/atom" + tooltip: "Mean Absolute Error of chain length per atom for 1D structures compared to PBE" + level_of_theory: PBE + + Energy MAE (1D): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom for 1D structures compared to PBE" + level_of_theory: PBE + + Convergence (1D): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of 1D structures that converged during relaxation" + level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py index 4ae98c932..15d246291 100644 --- a/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py +++ b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py @@ -25,8 +25,10 @@ def register_callbacks(self) -> None: """Register callbacks to app.""" # Define plot paths and their metric mappings plot_configs = [ - ("figure_area.json", "Area MAE (2D)", "area"), - ("figure_energy.json", "Energy MAE (2D)", "energy"), + ("figure_area_2d.json", "Area MAE (2D)", "area-2d"), + ("figure_energy_2d.json", "Energy MAE (2D)", "energy-2d"), + ("figure_length_1d.json", "Length MAE (1D)", "length-1d"), + ("figure_energy_1d.json", "Energy MAE (1D)", "energy-1d"), ] # Build column-to-plot mapping @@ -60,10 +62,11 @@ def get_app() -> LowDimensionalRelaxationApp: return LowDimensionalRelaxationApp( name=BENCHMARK_NAME, description=( - "Performance in relaxing low-dimensional (2D/1D) crystal structures. " - "Structures from the Alexandria database are relaxed with cell masks " - "to constrain relaxation to the appropriate dimensions and compared " - "to PBE reference calculations." + "Performance in relaxing low-dimensional crystal structures. " + "2D structures are evaluated on area per atom, and 1D structures " + "on length per atom. Structures from the Alexandria database are " + "relaxed with cell masks to constrain relaxation to the appropriate " + "dimensions and compared to PBE reference calculations." ), docs_url=DOCS_URL, table_path=DATA_PATH / "low_dimensional_metrics_table.json", diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index 9c7d59f92..ce76cf3fe 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -26,8 +26,16 @@ MODELS = load_models(current_models) # URLs for Alexandria database -ALEXANDRIA_2D_URL = "https://alexandria.icams.rub.de/data/pbe_2d" -ALEXANDRIA_1D_URL = "https://alexandria.icams.rub.de/data/pbe_1d" +ALEXANDRIA_URLS = { + "2D": "https://alexandria.icams.rub.de/data/pbe_2d", + "1D": "https://alexandria.icams.rub.de/data/pbe_1d", +} + +# Default data files for each dimensionality +DEFAULT_DATA_FILES = { + "2D": "alexandria_2d_001.json.bz2", + "1D": "alexandria_1d_001.json.bz2", +} # Local cache directory for downloaded data DATA_PATH = Path(__file__).parent / "data" @@ -50,25 +58,30 @@ } -def download_2d_data(filename: str = "alexandria_2d_001.json.bz2") -> Path: +def download_data(dimensionality: str, filename: str | None = None) -> Path: """ - Download 2D structure data from Alexandria database if not already cached. + Download structure data from Alexandria database if not already cached. Parameters ---------- + dimensionality + Either "2D" or "1D". filename - Name of the file to download. + Name of the file to download. If None, uses default for dimensionality. Returns ------- Path Path to the downloaded/cached file. """ + if filename is None: + filename = DEFAULT_DATA_FILES[dimensionality] + DATA_PATH.mkdir(parents=True, exist_ok=True) local_path = DATA_PATH / filename if not local_path.exists(): - url = f"{ALEXANDRIA_2D_URL}/{filename}" + url = f"{ALEXANDRIA_URLS[dimensionality]}/{filename}" print(f"Downloading {url}...") urllib.request.urlretrieve(url, local_path) print(f"Downloaded to {local_path}") @@ -101,19 +114,43 @@ def get_area_per_atom(atoms: Atoms) -> float: return area / len(atoms) -def load_2d_structures( - filename: str = "alexandria_2d_001.json.bz2", +def get_length_per_atom(atoms: Atoms) -> float: + """ + Calculate the chain length per atom for 1D structures. + + For 1D materials, the length is the magnitude of the first lattice vector. + + Parameters + ---------- + atoms + ASE Atoms object. + + Returns + ------- + float + Chain length per atom in Å. + """ + cell = atoms.get_cell() + length = np.linalg.norm(cell[0]) + return length / len(atoms) + + +def load_structures( + dimensionality: str = "2D", + filename: str | None = None, n_structures: int = N_STRUCTURES, ) -> list[dict]: """ - Load 2D structures from Alexandria database. + Load structures from Alexandria database. Downloads data if not already cached locally. Parameters ---------- + dimensionality + Either "2D" or "1D". filename - Name of the data file to load. + Name of the data file to load. If None, uses default for dimensionality. n_structures Number of structures to load. Default is N_STRUCTURES. @@ -122,7 +159,7 @@ def load_2d_structures( list[dict] List of structure dictionaries with atoms, mat_id, and reference values. """ - json_path = download_2d_data(filename) + json_path = download_data(dimensionality, filename) with bz2.open(json_path, "rt") as f: data = json.load(f) @@ -146,33 +183,38 @@ def load_2d_structures( atoms = adaptor.get_atoms(entry.structure) n_atoms = len(entry.structure) - # For 2D, use area per atom instead of volume - ref_area_per_atom = get_area_per_atom(atoms) + struct_data = { + "mat_id": mat_id, + "atoms": atoms, + "ref_energy_per_atom": entry.energy / n_atoms, + } - structures.append( - { - "mat_id": mat_id, - "atoms": atoms, - "ref_energy_per_atom": entry.energy / n_atoms, - "ref_area_per_atom": ref_area_per_atom, - } - ) + # Use appropriate geometric metric based on dimensionality + if dimensionality == "2D": + struct_data["ref_area_per_atom"] = get_area_per_atom(atoms) + else: # 1D + struct_data["ref_length_per_atom"] = get_length_per_atom(atoms) + + structures.append(struct_data) return structures -def relax_2d( +def relax_low_dimensional( atoms: Atoms, + dimensionality: str = "2D", fmax: float = FMAX, max_steps: int = MAX_STEPS, ) -> tuple[Atoms | None, bool, float | None]: """ - Relax 2D structure with cell mask to prevent out-of-plane relaxation. + Relax low-dimensional structure with cell mask to constrain relaxation. Parameters ---------- atoms ASE Atoms object with calculator attached. + dimensionality + Either "2D" or "1D". fmax Maximum force tolerance in eV/A. max_steps @@ -193,8 +235,8 @@ def relax_2d( struct=atoms, fmax=fmax, steps=max_steps, - # Use cell mask to only allow in-plane relaxation - filter_kwargs={"mask": CELL_MASKS["2D"]}, + # Use cell mask to constrain relaxation to appropriate dimensions + filter_kwargs={"mask": CELL_MASKS[dimensionality]}, calc_kwargs={"default_dtype": "float64"}, ) geom_opt.run() @@ -218,49 +260,66 @@ def relax_2d( return relaxed, converged, energy / n_atoms +DIMENSIONALITIES = ["2D", "1D"] + + @pytest.mark.parametrize("mlip", MODELS.items()) -def test_low_dimensional_relaxation(mlip: tuple[str, Any]) -> None: +@pytest.mark.parametrize("dimensionality", DIMENSIONALITIES) +def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) -> None: """ - Run 2D relaxation benchmark. + Run low-dimensional relaxation benchmark. Parameters ---------- mlip Tuple of model name and model object. + dimensionality + Either "2D" or "1D". """ model_name, model = mlip calc = model.get_calculator() - # Load 2D structures (downloads from Alexandria if not cached) - structures = load_2d_structures() + # Load structures (downloads from Alexandria if not cached) + structures = load_structures(dimensionality) results = [] - for struct_data in tqdm(structures, desc=f"{model_name} 2D", leave=False): + for struct_data in tqdm( + structures, desc=f"{model_name} {dimensionality}", leave=False + ): atoms = struct_data["atoms"].copy() atoms.calc = copy(calc) mat_id = struct_data["mat_id"] - relaxed_atoms, converged, energy_per_atom = relax_2d(atoms) - - if relaxed_atoms is not None: - pred_area = get_area_per_atom(relaxed_atoms) - else: - pred_area = None - - results.append( - { - "mat_id": mat_id, - "dimensionality": "2D", - "ref_area_per_atom": struct_data["ref_area_per_atom"], - "ref_energy_per_atom": struct_data["ref_energy_per_atom"], - "pred_area_per_atom": pred_area, - "pred_energy_per_atom": energy_per_atom, - "converged": converged, - } + relaxed_atoms, converged, energy_per_atom = relax_low_dimensional( + atoms, dimensionality ) + result = { + "mat_id": mat_id, + "dimensionality": dimensionality, + "ref_energy_per_atom": struct_data["ref_energy_per_atom"], + "pred_energy_per_atom": energy_per_atom, + "converged": converged, + } + + # Add dimension-specific geometric metrics + if dimensionality == "2D": + result["ref_area_per_atom"] = struct_data["ref_area_per_atom"] + if relaxed_atoms is not None: + result["pred_area_per_atom"] = get_area_per_atom(relaxed_atoms) + else: + result["pred_area_per_atom"] = None + else: # 1D + result["ref_length_per_atom"] = struct_data["ref_length_per_atom"] + if relaxed_atoms is not None: + result["pred_length_per_atom"] = get_length_per_atom(relaxed_atoms) + else: + result["pred_length_per_atom"] = None + + results.append(result) + # Save results out_dir = OUT_PATH / model_name out_dir.mkdir(parents=True, exist_ok=True) df = pd.DataFrame(results) - df.to_csv(out_dir / "results_2D.csv", index=False) + df.to_csv(out_dir / f"results_{dimensionality}.csv", index=False) From 014cb814c72975ef53df4c275f3f1c8936e033f2 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 16:37:38 +0100 Subject: [PATCH 10/22] Add Low-Dimensional Relaxation documentation Document the new low-dimensional relaxation benchmark including: - Summary of 2D and 1D structure relaxation - Metrics: Area/Length MAE, Energy MAE, and Convergence for both 2D and 1D - Description of cell masks for constrained relaxation - Data availability from Alexandria database Co-Authored-By: Claude Opus 4.5 --- .../user_guide/benchmarks/bulk_crystal.rst | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index c7ef3a5b7..dd513fe10 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -174,3 +174,79 @@ Reference data: * PBE calculations from the Alexandria database * Loew et al 2026 J. Phys. Mater. 9 015010 https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8 + + +Low-Dimensional Relaxation +========================== + +Summary +------- + +Performance in relaxing low-dimensional (2D and 1D) crystal structures. +Structures from the Alexandria database are relaxed with cell masks to constrain +relaxation to the appropriate dimensions and compared to PBE reference calculations. + + +Metrics +------- + +**2D Structures:** + +(1) Area MAE (2D) + +Mean absolute error of area per atom compared to PBE reference. The area is +calculated as the magnitude of the cross product of the two in-plane lattice vectors. + +(2) Energy MAE (2D) + +Mean absolute error of energy per atom compared to PBE reference. + +(3) Convergence (2D) + +Percentage of 2D structures that successfully converged during relaxation. + +**1D Structures:** + +(4) Length MAE (1D) + +Mean absolute error of chain length per atom compared to PBE reference. The length +is the magnitude of the first lattice vector (the chain direction). + +(5) Energy MAE (1D) + +Mean absolute error of energy per atom compared to PBE reference. + +(6) Convergence (1D) + +Percentage of 1D structures that successfully converged during relaxation. + +Structures are relaxed using janus-core's GeomOpt with the ase `FixSymmetry` constraint +applied to preserve crystallographic symmetry. Cell relaxation is constrained using +cell masks: + +* 2D: Only in-plane cell components (a, b, and γ) are allowed to relax +* 1D: Only the chain direction (a) is allowed to relax + +Relaxation continues until the maximum force component is below 0.0002 eV/Å or until +500 steps are reached. If not converged, relaxation is repeated up to 3 times. + + +Computational cost +------------------ + +High: tests are likely to take hours-days to run on GPU, depending on the number of +structures tested. + + +Data availability +----------------- + +Input structures: + +* Alexandria database 2D structures: https://alexandria.icams.rub.de/data/pbe_2d +* Alexandria database 1D structures: https://alexandria.icams.rub.de/data/pbe_1d +* 3000 structures randomly sampled from each dataset + +Reference data: + +* PBE calculations from the Alexandria database From 936033a23f2b350eace7656ee577c133cc92b3de Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 18:48:33 +0100 Subject: [PATCH 11/22] Add vacuum padding function for non-periodic directions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add set_vacuum_padding() function that sets lattice constants to 100 Å in non-periodic directions while preserving vector directions: - 2D: sets c vector to 100 Å - 1D: sets b and c vectors to 100 Å Apply vacuum padding before relaxation in test function. Co-Authored-By: Claude Opus 4.5 --- .../calc_low_dimensional_relaxation.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index ce76cf3fe..2fe27252d 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -49,6 +49,9 @@ # Number of structures to use for testing N_STRUCTURES = 3000 +# Vacuum padding for non-periodic directions (Angstrom) +VACUUM_PADDING = 100.0 + # Dimensionality configurations # For 2D: mask allows in-plane cell relaxation only (fix z) # For 1D: mask allows along-chain relaxation only (fix y and z) @@ -135,6 +138,62 @@ def get_length_per_atom(atoms: Atoms) -> float: return length / len(atoms) +def set_vacuum_padding( + atoms: Atoms, dimensionality: str, vacuum: float = VACUUM_PADDING +) -> Atoms: + """ + Set lattice constants in non-periodic directions to specified vacuum padding. + + For 2D materials, sets the c lattice vector magnitude to the vacuum value. + For 1D materials, sets both b and c lattice vector magnitudes to the vacuum value. + The direction of each lattice vector is preserved. + + Parameters + ---------- + atoms + ASE Atoms object. + dimensionality + Either "2D" or "1D". + vacuum + Target length for non-periodic lattice vectors in Angstrom. + + Returns + ------- + Atoms + Modified atoms object with updated cell. + """ + cell = np.array(atoms.get_cell()) + + if dimensionality == "2D": + # Set c vector magnitude to vacuum while preserving direction + c_vec = cell[2] + c_norm = np.linalg.norm(c_vec) + if c_norm > 0: + cell[2] = c_vec / c_norm * vacuum + else: + # If c vector is zero, set it along z direction + cell[2] = np.array([0.0, 0.0, vacuum]) + else: # 1D + # Set b vector magnitude to vacuum while preserving direction + b_vec = cell[1] + b_norm = np.linalg.norm(b_vec) + if b_norm > 0: + cell[1] = b_vec / b_norm * vacuum + else: + cell[1] = np.array([0.0, vacuum, 0.0]) + + # Set c vector magnitude to vacuum while preserving direction + c_vec = cell[2] + c_norm = np.linalg.norm(c_vec) + if c_norm > 0: + cell[2] = c_vec / c_norm * vacuum + else: + cell[2] = np.array([0.0, 0.0, vacuum]) + + atoms.set_cell(cell, scale_atoms=False) + return atoms + + def load_structures( dimensionality: str = "2D", filename: str | None = None, @@ -287,6 +346,8 @@ def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) structures, desc=f"{model_name} {dimensionality}", leave=False ): atoms = struct_data["atoms"].copy() + # Set vacuum padding in non-periodic directions + atoms = set_vacuum_padding(atoms, dimensionality) atoms.calc = copy(calc) mat_id = struct_data["mat_id"] From b4686352a54cf1b3a3ebaa5f15208caa4868e46c Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 18:52:20 +0100 Subject: [PATCH 12/22] Add structure symmetrization, remove FixSymmetry constraint - Add symmetrize_structure() using spglib.standardize_cell() - Remove FixSymmetry constraint (doesn't work for low-d relaxations) - Apply symmetrization before vacuum padding and relaxation Co-Authored-By: Claude Opus 4.5 --- .../calc_low_dimensional_relaxation.py | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index 2fe27252d..b7d5731c0 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -11,13 +11,13 @@ import urllib.request from ase import Atoms -from ase.constraints import FixSymmetry from janus_core.calculations.geom_opt import GeomOpt import numpy as np import pandas as pd from pymatgen.entries.computed_entries import ComputedStructureEntry from pymatgen.io.ase import AseAtomsAdaptor import pytest +import spglib from tqdm import tqdm from ml_peg.models.get_models import load_models @@ -138,6 +138,47 @@ def get_length_per_atom(atoms: Atoms) -> float: return length / len(atoms) +def symmetrize_structure(atoms: Atoms, symprec: float = 1e-5) -> Atoms: + """ + Symmetrize atomic positions and cell using spglib. + + Parameters + ---------- + atoms + ASE Atoms object. + symprec + Symmetry precision for spglib. + + Returns + ------- + Atoms + Symmetrized atoms object. + """ + # Convert to spglib format + cell = ( + atoms.get_cell().array, + atoms.get_scaled_positions(), + atoms.get_atomic_numbers(), + ) + + # Get symmetrized cell + symmetrized = spglib.standardize_cell(cell, to_primitive=False, symprec=symprec) + + if symmetrized is None: + # If symmetrization fails, return original structure + return atoms + + lattice, scaled_positions, numbers = symmetrized + + # Create new atoms object with symmetrized structure + return Atoms( + numbers=numbers, + scaled_positions=scaled_positions, + cell=lattice, + pbc=atoms.pbc, + ) + + def set_vacuum_padding( atoms: Atoms, dimensionality: str, vacuum: float = VACUUM_PADDING ) -> Atoms: @@ -289,7 +330,6 @@ def relax_low_dimensional( counter = 0 # repeat up to 3 times if not converged while counter < 3 and not converged: - atoms.set_constraint(FixSymmetry(atoms)) geom_opt = GeomOpt( struct=atoms, fmax=fmax, @@ -346,6 +386,8 @@ def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) structures, desc=f"{model_name} {dimensionality}", leave=False ): atoms = struct_data["atoms"].copy() + # Symmetrize structure before relaxation + atoms = symmetrize_structure(atoms) # Set vacuum padding in non-periodic directions atoms = set_vacuum_padding(atoms, dimensionality) atoms.calc = copy(calc) From 5f06e8734103225d3e80ac68c4590e550f38af48 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 19:13:45 +0100 Subject: [PATCH 13/22] Support multiple data files per dimensionality - Update DEFAULT_DATA_FILES to use lists (2 files for 2D, 1 for 1D) - Add download_all_data() to download multiple files - Update load_structures() to pool entries from all files before sampling Co-Authored-By: Claude Opus 4.5 --- .../calc_low_dimensional_relaxation.py | 77 ++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index b7d5731c0..a466d5e61 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -31,10 +31,10 @@ "1D": "https://alexandria.icams.rub.de/data/pbe_1d", } -# Default data files for each dimensionality -DEFAULT_DATA_FILES = { - "2D": "alexandria_2d_001.json.bz2", - "1D": "alexandria_1d_001.json.bz2", +# Default data files for each dimensionality (list of files) +DEFAULT_DATA_FILES: dict[str, list[str]] = { + "2D": ["alexandria_2d_001.json.bz2", "alexandria_2d_002.json.bz2"], + "1D": ["alexandria_1d_001.json.bz2"], } # Local cache directory for downloaded data @@ -61,25 +61,22 @@ } -def download_data(dimensionality: str, filename: str | None = None) -> Path: +def download_data(dimensionality: str, filename: str) -> Path: """ - Download structure data from Alexandria database if not already cached. + Download a single structure data file from Alexandria database if not cached. Parameters ---------- dimensionality Either "2D" or "1D". filename - Name of the file to download. If None, uses default for dimensionality. + Name of the file to download. Returns ------- Path Path to the downloaded/cached file. """ - if filename is None: - filename = DEFAULT_DATA_FILES[dimensionality] - DATA_PATH.mkdir(parents=True, exist_ok=True) local_path = DATA_PATH / filename @@ -92,6 +89,30 @@ def download_data(dimensionality: str, filename: str | None = None) -> Path: return local_path +def download_all_data( + dimensionality: str, filenames: list[str] | None = None +) -> list[Path]: + """ + Download all structure data files for a dimensionality. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". + filenames + List of filenames to download. If None, uses defaults for dimensionality. + + Returns + ------- + list[Path] + Paths to the downloaded/cached files. + """ + if filenames is None: + filenames = DEFAULT_DATA_FILES[dimensionality] + + return [download_data(dimensionality, f) for f in filenames] + + def get_area_per_atom(atoms: Atoms) -> float: """ Calculate the in-plane area per atom for 2D structures. @@ -237,20 +258,21 @@ def set_vacuum_padding( def load_structures( dimensionality: str = "2D", - filename: str | None = None, + filenames: list[str] | None = None, n_structures: int = N_STRUCTURES, ) -> list[dict]: """ Load structures from Alexandria database. - Downloads data if not already cached locally. + Downloads data if not already cached locally. When multiple files are provided, + structures are pooled together and randomly sampled from the combined set. Parameters ---------- dimensionality Either "2D" or "1D". - filename - Name of the data file to load. If None, uses default for dimensionality. + filenames + List of data filenames to load. If None, uses defaults for dimensionality. n_structures Number of structures to load. Default is N_STRUCTURES. @@ -259,24 +281,27 @@ def load_structures( list[dict] List of structure dictionaries with atoms, mat_id, and reference values. """ - json_path = download_data(dimensionality, filename) - - with bz2.open(json_path, "rt") as f: - data = json.load(f) + json_paths = download_all_data(dimensionality, filenames) - adaptor = AseAtomsAdaptor() - structures = [] + # Load all entries from all files + all_entries = [] + for json_path in json_paths: + with bz2.open(json_path, "rt") as f: + data = json.load(f) + all_entries.extend(data["entries"]) - entries = data["entries"] - if n_structures > len(entries): - n_structures = len(entries) - print(f"Only {len(entries)} structures available, using all of them") + if n_structures > len(all_entries): + n_structures = len(all_entries) + print(f"Only {len(all_entries)} structures available, using all of them") rng = random.Random(RANDOM_SEED) - selected_indices = sorted(rng.sample(range(len(entries)), k=n_structures)) + selected_indices = sorted(rng.sample(range(len(all_entries)), k=n_structures)) + + adaptor = AseAtomsAdaptor() + structures = [] for idx in selected_indices: - entry_dict = entries[idx] + entry_dict = all_entries[idx] entry = ComputedStructureEntry.from_dict(entry_dict) mat_id = entry.data.get("mat_id", f"struct_{idx}") From bb5ec267915f7617e12e27308e06fa1097e6922c Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Sun, 8 Feb 2026 15:18:36 +0100 Subject: [PATCH 14/22] Consolidate low-dimensional analysis with config-driven loops Replace 10 near-identical per-dimensionality fixtures with: - DIM_CONFIGS dict mapping dimensionality to column/label/unit - _compute_mae() and _compute_convergence() helpers - _generate_all_plots() looping over DIM_CONFIGS - Single parity_plots fixture and loop-based metrics fixture Reduces 465 lines to 353 lines with no duplication. Co-Authored-By: Claude Opus 4.6 --- .../analyse_low_dimensional_relaxation.py | 260 +++++------------- 1 file changed, 74 insertions(+), 186 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py index 4ece6c5c9..153c23ff3 100644 --- a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py @@ -26,15 +26,25 @@ ) # Energy outlier thresholds (eV/atom) -ENERGY_OUTLIER_MIN = -25 -ENERGY_OUTLIER_MAX = 25 +ENERGY_OUTLIER_MIN = -40 +ENERGY_OUTLIER_MAX = 40 + +# Dimensionality-specific configuration +DIM_CONFIGS = { + "2D": {"geom_col": "area_per_atom", "geom_label": "Area", "geom_unit": "Ų/atom"}, + "1D": { + "geom_col": "length_per_atom", + "geom_label": "Length", + "geom_unit": "Å/atom", + }, +} def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: """ Mark structures with extreme predicted energies as unconverged. - Structures with predicted energy per atom outside [-25, 25] eV/atom + Structures with predicted energy per atom outside [-40, 40] eV/atom are considered outliers and marked as unconverged. Parameters @@ -97,16 +107,10 @@ def get_converged_data(model_name: str, dimensionality: str = "2D") -> dict[str, if df.empty: return {"ref_geom": [], "pred_geom": [], "ref_energy": [], "pred_energy": []} - # Filter converged structures and remove NaN values df = _filter_energy_outliers(df) df_conv = df[df["converged"]].copy() - # Determine geometric column based on dimensionality - if dimensionality == "2D": - geom_col = "area_per_atom" - else: # 1D - geom_col = "length_per_atom" - + geom_col = DIM_CONFIGS[dimensionality]["geom_col"] df_conv = df_conv.dropna(subset=[f"pred_{geom_col}", "pred_energy_per_atom"]) return { @@ -171,13 +175,12 @@ def create_parity_plot( ref_values = [] pred_values = [] - # Collect data from all models for model_name in MODELS: data = get_converged_data(model_name, dimensionality) if data_type == "geom": ref_values.extend(data["ref_geom"]) pred_values.extend(data["pred_geom"]) - else: # energy + else: ref_values.extend(data["ref_energy"]) pred_values.extend(data["pred_energy"]) @@ -198,7 +201,6 @@ def create_parity_plot( ) ) - # Add diagonal line (y=x) all_values = [] for trace in fig.data: all_values.extend(trace.x) @@ -225,86 +227,21 @@ def create_parity_plot( hovermode="closest", ) - # Save to JSON filename.parent.mkdir(parents=True, exist_ok=True) with open(filename, "w") as f: json.dump(fig.to_dict(), f) -@pytest.fixture -def area_predictions_2d() -> None: - """Create area parity plot for 2D structures.""" - create_parity_plot( - data_type="geom", - title="Area per atom (2D)", - x_label="Predicted area / Ų/atom", - y_label="Reference area / Ų/atom", - filename=OUT_PATH / "figure_area_2d.json", - dimensionality="2D", - ) - - -@pytest.fixture -def energy_predictions_2d() -> None: - """Create energy parity plot for 2D structures.""" - create_parity_plot( - data_type="energy", - title="Energy per atom (2D)", - x_label="Predicted energy / eV/atom", - y_label="Reference energy / eV/atom", - filename=OUT_PATH / "figure_energy_2d.json", - dimensionality="2D", - ) - - -@pytest.fixture -def length_predictions_1d() -> None: - """Create length parity plot for 1D structures.""" - create_parity_plot( - data_type="geom", - title="Length per atom (1D)", - x_label="Predicted length / Å/atom", - y_label="Reference length / Å/atom", - filename=OUT_PATH / "figure_length_1d.json", - dimensionality="1D", - ) - - -@pytest.fixture -def energy_predictions_1d() -> None: - """Create energy parity plot for 1D structures.""" - create_parity_plot( - data_type="energy", - title="Energy per atom (1D)", - x_label="Predicted energy / eV/atom", - y_label="Reference energy / eV/atom", - filename=OUT_PATH / "figure_energy_1d.json", - dimensionality="1D", - ) - - -@pytest.fixture -def area_mae_2d() -> dict[str, float]: +def _compute_mae(dimensionality: str, data_key: str) -> dict[str, float]: """ - Calculate MAE for area predictions (2D). + Compute MAE across all models for a given dimensionality and data key. - Returns - ------- - dict[str, float] - {model_name: mae_value}. - """ - results = {} - for model_name in MODELS: - data = get_converged_data(model_name, "2D") - if data["ref_geom"] and data["pred_geom"]: - results[model_name] = mae(data["ref_geom"], data["pred_geom"]) - return results - - -@pytest.fixture -def energy_mae_2d() -> dict[str, float]: - """ - Calculate MAE for energy predictions (2D). + Parameters + ---------- + dimensionality + Either "2D" or "1D". + data_key + Either "geom" or "energy". Returns ------- @@ -312,17 +249,23 @@ def energy_mae_2d() -> dict[str, float]: {model_name: mae_value}. """ results = {} + ref_key = f"ref_{data_key}" + pred_key = f"pred_{data_key}" for model_name in MODELS: - data = get_converged_data(model_name, "2D") - if data["ref_energy"] and data["pred_energy"]: - results[model_name] = mae(data["ref_energy"], data["pred_energy"]) + data = get_converged_data(model_name, dimensionality) + if data[ref_key] and data[pred_key]: + results[model_name] = mae(data[ref_key], data[pred_key]) return results -@pytest.fixture -def convergence_2d() -> dict[str, float]: +def _compute_convergence(dimensionality: str) -> dict[str, float]: """ - Calculate convergence rate for 2D structures. + Compute convergence rates across all models for a given dimensionality. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". Returns ------- @@ -331,64 +274,41 @@ def convergence_2d() -> dict[str, float]: """ results = {} for model_name in MODELS: - conv_rate = get_convergence_rate(model_name, "2D") + conv_rate = get_convergence_rate(model_name, dimensionality) if conv_rate is not None: results[model_name] = conv_rate return results -@pytest.fixture -def length_mae_1d() -> dict[str, float]: - """ - Calculate MAE for length predictions (1D). - - Returns - ------- - dict[str, float] - {model_name: mae_value}. - """ - results = {} - for model_name in MODELS: - data = get_converged_data(model_name, "1D") - if data["ref_geom"] and data["pred_geom"]: - results[model_name] = mae(data["ref_geom"], data["pred_geom"]) - return results - - -@pytest.fixture -def energy_mae_1d() -> dict[str, float]: - """ - Calculate MAE for energy predictions (1D). - - Returns - ------- - dict[str, float] - {model_name: mae_value}. - """ - results = {} - for model_name in MODELS: - data = get_converged_data(model_name, "1D") - if data["ref_energy"] and data["pred_energy"]: - results[model_name] = mae(data["ref_energy"], data["pred_energy"]) - return results +def _generate_all_plots() -> None: + """Generate all parity plots for each dimensionality.""" + for dim, cfg in DIM_CONFIGS.items(): + geom_label = cfg["geom_label"] + geom_unit = cfg["geom_unit"] + dim_lower = dim.lower() + + create_parity_plot( + data_type="geom", + title=f"{geom_label} per atom ({dim})", + x_label=f"Predicted {geom_label.lower()} / {geom_unit}", + y_label=f"Reference {geom_label.lower()} / {geom_unit}", + filename=OUT_PATH / f"figure_{geom_label.lower()}_{dim_lower}.json", + dimensionality=dim, + ) + create_parity_plot( + data_type="energy", + title=f"Energy per atom ({dim})", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / f"figure_energy_{dim_lower}.json", + dimensionality=dim, + ) @pytest.fixture -def convergence_1d() -> dict[str, float]: - """ - Calculate convergence rate for 1D structures. - - Returns - ------- - dict[str, float] - {model_name: convergence_rate}. - """ - results = {} - for model_name in MODELS: - conv_rate = get_convergence_rate(model_name, "1D") - if conv_rate is not None: - results[model_name] = conv_rate - return results +def parity_plots() -> None: + """Generate all parity plots for 2D and 1D structures.""" + _generate_all_plots() @pytest.fixture @@ -397,53 +317,27 @@ def convergence_1d() -> dict[str, float]: metric_tooltips=DEFAULT_TOOLTIPS, thresholds=DEFAULT_THRESHOLDS, ) -def metrics( - area_mae_2d: dict[str, float], - energy_mae_2d: dict[str, float], - convergence_2d: dict[str, float], - length_mae_1d: dict[str, float], - energy_mae_1d: dict[str, float], - convergence_1d: dict[str, float], -) -> dict[str, dict]: +def metrics() -> dict[str, dict]: """ - Get all low-dimensional relaxation metrics. - - Parameters - ---------- - area_mae_2d - Area MAE for 2D structures. - energy_mae_2d - Energy MAE for 2D structures. - convergence_2d - Convergence rate for 2D structures. - length_mae_1d - Length MAE for 1D structures. - energy_mae_1d - Energy MAE for 1D structures. - convergence_1d - Convergence rate for 1D structures. + Compute all low-dimensional relaxation metrics. Returns ------- dict[str, dict] All metrics for all models. """ - return { - "Area MAE (2D)": area_mae_2d, - "Energy MAE (2D)": energy_mae_2d, - "Convergence (2D)": convergence_2d, - "Length MAE (1D)": length_mae_1d, - "Energy MAE (1D)": energy_mae_1d, - "Convergence (1D)": convergence_1d, - } + result = {} + for dim, cfg in DIM_CONFIGS.items(): + geom_label = cfg["geom_label"] + result[f"{geom_label} MAE ({dim})"] = _compute_mae(dim, "geom") + result[f"Energy MAE ({dim})"] = _compute_mae(dim, "energy") + result[f"Convergence ({dim})"] = _compute_convergence(dim) + return result def test_low_dimensional_relaxation( metrics: dict[str, dict], - area_predictions_2d: None, - energy_predictions_2d: None, - length_predictions_1d: None, - energy_predictions_1d: None, + parity_plots: None, ) -> None: """ Run low-dimensional relaxation analysis test. @@ -452,13 +346,7 @@ def test_low_dimensional_relaxation( ---------- metrics All low-dimensional relaxation metrics. - area_predictions_2d - Triggers 2D area plot generation. - energy_predictions_2d - Triggers 2D energy plot generation. - length_predictions_1d - Triggers 1D length plot generation. - energy_predictions_1d - Triggers 1D energy plot generation. + parity_plots + Triggers parity plot generation for all dimensionalities. """ return From 4a9d2d1a1743d93e04941cc037035b3f46c92459 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Sun, 8 Feb 2026 19:00:32 +0100 Subject: [PATCH 15/22] add two path --- .../calc_low_dimensional_relaxation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index a466d5e61..822cc3e31 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -33,8 +33,8 @@ # Default data files for each dimensionality (list of files) DEFAULT_DATA_FILES: dict[str, list[str]] = { - "2D": ["alexandria_2d_001.json.bz2", "alexandria_2d_002.json.bz2"], - "1D": ["alexandria_1d_001.json.bz2"], + "2D": ["alexandria_2d_001.json.bz2", "alexandria_2d_000.json.bz2"], + "1D": ["alexandria_1d_000.json.bz2"], } # Local cache directory for downloaded data From be940bbf69bbbee5b12018281411b77fb94eb400 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 9 Feb 2026 08:18:14 +0100 Subject: [PATCH 16/22] Use ASE refine_symmetry instead of custom spglib function Replace custom symmetrize_structure implementation with ase.spacegroup.symmetrize.refine_symmetry. Drop spglib import. Co-Authored-By: Claude Opus 4.6 --- .../calc_low_dimensional_relaxation.py | 30 ++++--------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index 822cc3e31..64806616c 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -11,13 +11,13 @@ import urllib.request from ase import Atoms +from ase.spacegroup.symmetrize import refine_symmetry from janus_core.calculations.geom_opt import GeomOpt import numpy as np import pandas as pd from pymatgen.entries.computed_entries import ComputedStructureEntry from pymatgen.io.ase import AseAtomsAdaptor import pytest -import spglib from tqdm import tqdm from ml_peg.models.get_models import load_models @@ -161,7 +161,7 @@ def get_length_per_atom(atoms: Atoms) -> float: def symmetrize_structure(atoms: Atoms, symprec: float = 1e-5) -> Atoms: """ - Symmetrize atomic positions and cell using spglib. + Symmetrize atomic positions and cell using ASE's refine_symmetry. Parameters ---------- @@ -175,29 +175,9 @@ def symmetrize_structure(atoms: Atoms, symprec: float = 1e-5) -> Atoms: Atoms Symmetrized atoms object. """ - # Convert to spglib format - cell = ( - atoms.get_cell().array, - atoms.get_scaled_positions(), - atoms.get_atomic_numbers(), - ) - - # Get symmetrized cell - symmetrized = spglib.standardize_cell(cell, to_primitive=False, symprec=symprec) - - if symmetrized is None: - # If symmetrization fails, return original structure - return atoms - - lattice, scaled_positions, numbers = symmetrized - - # Create new atoms object with symmetrized structure - return Atoms( - numbers=numbers, - scaled_positions=scaled_positions, - cell=lattice, - pbc=atoms.pbc, - ) + atoms = atoms.copy() + refine_symmetry(atoms, symprec=symprec) + return atoms def set_vacuum_padding( From 9b02013e756c1beb8f7de27dd44ffc3431e32cef Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 9 Feb 2026 08:23:40 +0100 Subject: [PATCH 17/22] Remove symmetrize_structure wrapper, add geometry unit tests - Remove symmetrize_structure(), call refine_symmetry() directly - Add test_get_area_per_atom and test_get_length_per_atom unit tests Co-Authored-By: Claude Opus 4.6 --- .../calc_low_dimensional_relaxation.py | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index 64806616c..fe3fd22a8 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -159,27 +159,6 @@ def get_length_per_atom(atoms: Atoms) -> float: return length / len(atoms) -def symmetrize_structure(atoms: Atoms, symprec: float = 1e-5) -> Atoms: - """ - Symmetrize atomic positions and cell using ASE's refine_symmetry. - - Parameters - ---------- - atoms - ASE Atoms object. - symprec - Symmetry precision for spglib. - - Returns - ------- - Atoms - Symmetrized atoms object. - """ - atoms = atoms.copy() - refine_symmetry(atoms, symprec=symprec) - return atoms - - def set_vacuum_padding( atoms: Atoms, dimensionality: str, vacuum: float = VACUUM_PADDING ) -> Atoms: @@ -392,7 +371,7 @@ def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) ): atoms = struct_data["atoms"].copy() # Symmetrize structure before relaxation - atoms = symmetrize_structure(atoms) + refine_symmetry(atoms) # Set vacuum padding in non-periodic directions atoms = set_vacuum_padding(atoms, dimensionality) atoms.calc = copy(calc) @@ -431,3 +410,17 @@ def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) out_dir.mkdir(parents=True, exist_ok=True) df = pd.DataFrame(results) df.to_csv(out_dir / f"results_{dimensionality}.csv", index=False) + + +def test_get_area_per_atom() -> None: + """Test get_area_per_atom for a simple 2D structure.""" + # 2-atom structure with 3x4 in-plane cell → area = 12, area/atom = 6 + atoms = Atoms("H2", positions=[[0, 0, 0], [1.5, 0, 0]], cell=[3, 4, 20], pbc=True) + assert get_area_per_atom(atoms) == pytest.approx(6.0) + + +def test_get_length_per_atom() -> None: + """Test get_length_per_atom for a simple 1D structure.""" + # 2-atom chain with a=5 Å → length/atom = 2.5 + atoms = Atoms("H2", positions=[[0, 0, 0], [2.5, 0, 0]], cell=[5, 20, 20], pbc=True) + assert get_length_per_atom(atoms) == pytest.approx(2.5) From 7e9ef0e423e9f88e9b0916afe257d96e96954947 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 9 Feb 2026 08:26:46 +0100 Subject: [PATCH 18/22] Move geometry unit tests to tests/ directory Co-Authored-By: Claude Opus 4.6 --- .../calc_low_dimensional_relaxation.py | 14 ----------- tests/test_low_dimensional_relaxation.py | 25 +++++++++++++++++++ 2 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 tests/test_low_dimensional_relaxation.py diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index fe3fd22a8..b1f253c1f 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -410,17 +410,3 @@ def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) out_dir.mkdir(parents=True, exist_ok=True) df = pd.DataFrame(results) df.to_csv(out_dir / f"results_{dimensionality}.csv", index=False) - - -def test_get_area_per_atom() -> None: - """Test get_area_per_atom for a simple 2D structure.""" - # 2-atom structure with 3x4 in-plane cell → area = 12, area/atom = 6 - atoms = Atoms("H2", positions=[[0, 0, 0], [1.5, 0, 0]], cell=[3, 4, 20], pbc=True) - assert get_area_per_atom(atoms) == pytest.approx(6.0) - - -def test_get_length_per_atom() -> None: - """Test get_length_per_atom for a simple 1D structure.""" - # 2-atom chain with a=5 Å → length/atom = 2.5 - atoms = Atoms("H2", positions=[[0, 0, 0], [2.5, 0, 0]], cell=[5, 20, 20], pbc=True) - assert get_length_per_atom(atoms) == pytest.approx(2.5) diff --git a/tests/test_low_dimensional_relaxation.py b/tests/test_low_dimensional_relaxation.py new file mode 100644 index 000000000..17ec100cd --- /dev/null +++ b/tests/test_low_dimensional_relaxation.py @@ -0,0 +1,25 @@ +"""Unit tests for low-dimensional relaxation utilities.""" + +from __future__ import annotations + +from ase import Atoms +import pytest + +from ml_peg.calcs.bulk_crystal.low_dimensional_relaxation.calc_low_dimensional_relaxation import ( # noqa: E501 + get_area_per_atom, + get_length_per_atom, +) + + +def test_get_area_per_atom() -> None: + """Test get_area_per_atom for a simple 2D structure.""" + # 2-atom structure with 3x4 in-plane cell → area = 12, area/atom = 6 + atoms = Atoms("H2", positions=[[0, 0, 0], [1.5, 0, 0]], cell=[3, 4, 20], pbc=True) + assert get_area_per_atom(atoms) == pytest.approx(6.0) + + +def test_get_length_per_atom() -> None: + """Test get_length_per_atom for a simple 1D structure.""" + # 2-atom chain with a=5 Å → length/atom = 2.5 + atoms = Atoms("H2", positions=[[0, 0, 0], [2.5, 0, 0]], cell=[5, 20, 20], pbc=True) + assert get_length_per_atom(atoms) == pytest.approx(2.5) From 17b4926fa0d6286209a0a47e6dc669f55f6835bd Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 9 Feb 2026 08:29:44 +0100 Subject: [PATCH 19/22] remove high-pressure part --- .../analyse_high_pressure_relaxation.py | 389 ------------------ .../high_pressure_relaxation/metrics.yml | 140 ------- .../app_high_pressure_relaxation.py | 82 ---- .../calc_high_pressure_relaxation.py | 266 ------------ ...est_high_pressure_relaxation_regression.py | 74 ---- 5 files changed, 951 deletions(-) delete mode 100644 ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py delete mode 100644 ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml delete mode 100644 ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py delete mode 100644 ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py delete mode 100644 tests/test_high_pressure_relaxation_regression.py diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py deleted file mode 100644 index 78a591f6f..000000000 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py +++ /dev/null @@ -1,389 +0,0 @@ -"""Analyse high-pressure crystal relaxation benchmark.""" - -from __future__ import annotations - -import json -from pathlib import Path - -import pandas as pd -import plotly.express as px -import plotly.graph_objects as go -import pytest - -from ml_peg.analysis.utils.decorators import build_table -from ml_peg.analysis.utils.utils import load_metrics_config, mae -from ml_peg.app import APP_ROOT -from ml_peg.calcs import CALCS_ROOT -from ml_peg.models.get_models import get_model_names -from ml_peg.models.models import current_models - -MODELS = get_model_names(current_models) -CALC_PATH = CALCS_ROOT / "bulk_crystal" / "high_pressure_relaxation" / "outputs" -OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" - -# Pressure conditions -PRESSURES = [0, 25, 50, 75, 100, 125, 150] -PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] - -# Generate colors using viridis colorscale -PRESSURE_COLORS = px.colors.sample_colorscale("viridis", len(PRESSURES)) - -METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") -DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( - METRICS_CONFIG_PATH -) - -# Energy outlier thresholds (eV/atom) -ENERGY_OUTLIER_MIN = -25 -ENERGY_OUTLIER_MAX = 25 - - -def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: - """ - Mark structures with extreme predicted energies as unconverged. - - Structures with predicted energy per atom outside [-25, 25] eV/atom - are considered outliers and marked as unconverged. - - Parameters - ---------- - df - Results dataframe with 'pred_energy_per_atom' and 'converged' columns. - - Returns - ------- - pd.DataFrame - Dataframe with outliers marked as unconverged. - """ - df.loc[ - (df["pred_energy_per_atom"] <= ENERGY_OUTLIER_MIN) - | (df["pred_energy_per_atom"] >= ENERGY_OUTLIER_MAX), - "converged", - ] = False - return df - - -def load_results_for_pressure(model_name: str, pressure_label: str) -> pd.DataFrame: - """ - Load results for a specific model and pressure. - - Parameters - ---------- - model_name - Name of the model. - pressure_label - Pressure label (e.g., "P000"). - - Returns - ------- - pd.DataFrame - Results dataframe or empty dataframe if not found. - """ - csv_path = CALC_PATH / model_name / f"results_{pressure_label}.csv" - if csv_path.exists(): - return pd.read_csv(csv_path) - return pd.DataFrame() - - -def get_converged_data_for_pressure( - model_name: str, pressure_label: str -) -> tuple[list, list, list, list]: - """ - Get converged volume and energy data for a model at a specific pressure. - - Parameters - ---------- - model_name - Name of the model. - pressure_label - Pressure label (e.g., "P000"). - - Returns - ------- - tuple[list, list, list, list] - Ref volumes, pred volumes, ref energies, pred energies for converged structures. - """ - df = load_results_for_pressure(model_name, pressure_label) - if df.empty: - return [], [], [], [] - - # Filter converged structures and remove NaN values - df = _filter_energy_outliers(df) - df_conv = df[df["converged"]].copy() - df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) - - return ( - df_conv["ref_volume_per_atom"].tolist(), - df_conv["pred_volume_per_atom"].tolist(), - df_conv["ref_energy_per_atom"].tolist(), - df_conv["pred_energy_per_atom"].tolist(), - ) - - -def get_convergence_rate_for_pressure( - model_name: str, pressure_label: str -) -> float | None: - """ - Get convergence rate for a model at a specific pressure. - - Parameters - ---------- - model_name - Name of the model. - pressure_label - Pressure label (e.g., "P000"). - - Returns - ------- - float | None - Convergence rate (%) or None if no data. - """ - df = load_results_for_pressure(model_name, pressure_label) - if df.empty: - return None - df = _filter_energy_outliers(df) - return (df["converged"].sum() / len(df)) * 100 - - -def create_pressure_colored_parity_plot( - data_getter: str, - title: str, - x_label: str, - y_label: str, - filename: Path, -) -> None: - """ - Create a parity plot with different colors for each pressure. - - Parameters - ---------- - data_getter - Either "volume" or "energy" to select which data to plot. - title - Plot title. - x_label - X-axis label. - y_label - Y-axis label. - filename - Path to save the plot JSON. - """ - fig = go.Figure() - - # Add a trace for each pressure - for pressure, pressure_label, color in zip( - PRESSURES, PRESSURE_LABELS, PRESSURE_COLORS, strict=False - ): - ref_values = [] - pred_values = [] - - # Collect data from all models for this pressure - for model_name in MODELS: - if data_getter == "volume": - ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( - model_name, pressure_label - ) - ref_values.extend(ref_vol) - pred_values.extend(pred_vol) - else: # energy - _, _, ref_energy, pred_energy = get_converged_data_for_pressure( - model_name, pressure_label - ) - ref_values.extend(ref_energy) - pred_values.extend(pred_energy) - - if ref_values and pred_values: - fig.add_trace( - go.Scatter( - x=pred_values, - y=ref_values, - name=f"{pressure} GPa", - mode="markers", - marker={"color": color, "size": 6, "opacity": 0.7}, - hovertemplate=( - f"{pressure} GPa
" - "Pred: %{x:.4f}
" - "Ref: %{y:.4f}
" - "" - ), - ) - ) - - # Add diagonal line (y=x) - all_values = [] - for trace in fig.data: - all_values.extend(trace.x) - all_values.extend(trace.y) - - if all_values: - min_val = min(all_values) - max_val = max(all_values) - fig.add_trace( - go.Scatter( - x=[min_val, max_val], - y=[min_val, max_val], - mode="lines", - name="y=x", - line={"color": "gray", "dash": "dash"}, - showlegend=True, - ) - ) - - fig.update_layout( - title=title, - xaxis_title=x_label, - yaxis_title=y_label, - legend_title="Pressure", - hovermode="closest", - ) - - # Save to JSON - filename.parent.mkdir(parents=True, exist_ok=True) - with open(filename, "w") as f: - json.dump(fig.to_dict(), f) - - -@pytest.fixture -def volume_predictions() -> None: - """Create volume parity plot with different colors for each pressure.""" - create_pressure_colored_parity_plot( - data_getter="volume", - title="Volume per atom", - x_label="Predicted volume / ų/atom", - y_label="Reference volume / ų/atom", - filename=OUT_PATH / "figure_volume.json", - ) - - -@pytest.fixture -def energy_predictions() -> None: - """Create energy parity plot with different colors for each pressure.""" - create_pressure_colored_parity_plot( - data_getter="energy", - title="Energy per atom", - x_label="Predicted energy / eV/atom", - y_label="Reference energy / eV/atom", - filename=OUT_PATH / "figure_energy.json", - ) - - -@pytest.fixture -def volume_mae_per_pressure() -> dict[str, dict[str, float]]: - """ - Calculate MAE for volume predictions at each pressure. - - Returns - ------- - dict[str, dict[str, float]] - Nested dict: {pressure_gpa: {model_name: mae_value}}. - """ - results = {} - for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): - pressure_key = f"Volume MAE ({pressure} GPa)" - results[pressure_key] = {} - for model_name in MODELS: - ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( - model_name, pressure_label - ) - if ref_vol and pred_vol: - results[pressure_key][model_name] = mae(ref_vol, pred_vol) - return results - - -@pytest.fixture -def energy_mae_per_pressure() -> dict[str, dict[str, float]]: - """ - Calculate MAE for energy predictions at each pressure. - - Returns - ------- - dict[str, dict[str, float]] - Nested dict: {pressure_gpa: {model_name: mae_value}}. - """ - results = {} - for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): - pressure_key = f"Energy MAE ({pressure} GPa)" - results[pressure_key] = {} - for model_name in MODELS: - _, _, ref_energy, pred_energy = get_converged_data_for_pressure( - model_name, pressure_label - ) - if ref_energy and pred_energy: - results[pressure_key][model_name] = mae(ref_energy, pred_energy) - return results - - -@pytest.fixture -def convergence_per_pressure() -> dict[str, dict[str, float]]: - """ - Calculate convergence rate at each pressure. - - Returns - ------- - dict[str, dict[str, float]] - Nested dict: {pressure_gpa: {model_name: convergence_rate}}. - """ - results = {} - for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): - pressure_key = f"Convergence ({pressure} GPa)" - results[pressure_key] = {} - for model_name in MODELS: - conv_rate = get_convergence_rate_for_pressure(model_name, pressure_label) - if conv_rate is not None: - results[pressure_key][model_name] = conv_rate - return results - - -@pytest.fixture -@build_table( - filename=OUT_PATH / "high_pressure_metrics_table.json", - metric_tooltips=DEFAULT_TOOLTIPS, - thresholds=DEFAULT_THRESHOLDS, -) -def metrics( - volume_mae_per_pressure: dict[str, dict[str, float]], - energy_mae_per_pressure: dict[str, dict[str, float]], - convergence_per_pressure: dict[str, dict[str, float]], -) -> dict[str, dict]: - """ - Get all high-pressure relaxation metrics separated by pressure. - - Parameters - ---------- - volume_mae_per_pressure - Volume MAE for all models at each pressure. - energy_mae_per_pressure - Energy MAE for all models at each pressure. - convergence_per_pressure - Convergence rate for all models at each pressure. - - Returns - ------- - dict[str, dict] - All metrics for all models. - """ - all_metrics = {} - all_metrics.update(volume_mae_per_pressure) - all_metrics.update(energy_mae_per_pressure) - all_metrics.update(convergence_per_pressure) - return all_metrics - - -def test_high_pressure_relaxation( - metrics: dict[str, dict], - volume_predictions: None, - energy_predictions: None, -) -> None: - """ - Run high-pressure relaxation analysis test. - - Parameters - ---------- - metrics - All high-pressure relaxation metrics. - volume_predictions - Triggers volume plot generation. - energy_predictions - Triggers energy plot generation. - """ - return diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml deleted file mode 100644 index 1e09408cd..000000000 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml +++ /dev/null @@ -1,140 +0,0 @@ -metrics: - # 0 GPa - Volume MAE (0 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (0 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 0 GPa compared to PBE" - level_of_theory: PBE - Convergence (0 GPa): - good: 100.0 - bad: 99.9 - unit: "%" - tooltip: "Percentage of structures that converged at 0 GPa" - level_of_theory: PBE - - # 25 GPa - Volume MAE (25 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 25 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (25 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 25 GPa compared to PBE" - level_of_theory: PBE - Convergence (25 GPa): - good: 100.0 - bad: 99.0 - unit: "%" - tooltip: "Percentage of structures that converged at 25 GPa" - level_of_theory: PBE - - # 50 GPa - Volume MAE (50 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 50 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (50 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 50 GPa compared to PBE" - level_of_theory: PBE - Convergence (50 GPa): - good: 100.0 - bad: 99.0 - unit: "%" - tooltip: "Percentage of structures that converged at 50 GPa" - level_of_theory: PBE - - # 75 GPa - Volume MAE (75 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 75 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (75 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 75 GPa compared to PBE" - level_of_theory: PBE - Convergence (75 GPa): - good: 100.0 - bad: 99.0 - unit: "%" - tooltip: "Percentage of structures that converged at 75 GPa" - level_of_theory: PBE - - # 100 GPa - Volume MAE (100 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 100 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (100 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 100 GPa compared to PBE" - level_of_theory: PBE - Convergence (100 GPa): - good: 100.0 - bad: 99.0 - unit: "%" - tooltip: "Percentage of structures that converged at 100 GPa" - level_of_theory: PBE - - # 125 GPa - Volume MAE (125 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 125 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (125 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 125 GPa compared to PBE" - level_of_theory: PBE - Convergence (125 GPa): - good: 100.0 - bad: 99.0 - unit: "%" - tooltip: "Percentage of structures that converged at 125 GPa" - level_of_theory: PBE - - # 150 GPa - Volume MAE (150 GPa): - good: 0.05 - bad: 0.1 - unit: "ų/atom" - tooltip: "Mean Absolute Error of volume per atom at 150 GPa compared to PBE" - level_of_theory: PBE - Energy MAE (150 GPa): - good: 0.02 - bad: 0.03 - unit: "eV/atom" - tooltip: "Mean Absolute Error of energy per atom at 150 GPa compared to PBE" - level_of_theory: PBE - Convergence (150 GPa): - good: 100.0 - bad: 99.0 - unit: "%" - tooltip: "Percentage of structures that converged at 150 GPa" - level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py deleted file mode 100644 index b2746c9c7..000000000 --- a/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Run high-pressure crystal relaxation benchmark app.""" - -from __future__ import annotations - -from dash import Dash -from dash.html import Div - -from ml_peg.app import APP_ROOT -from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_column -from ml_peg.app.utils.load import read_plot -from ml_peg.models.get_models import get_model_names -from ml_peg.models.models import current_models - -# Get all models -MODELS = get_model_names(current_models) -BENCHMARK_NAME = "High-pressure relaxation" -DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html#high-pressure-relaxation" -DATA_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" - - -PRESSURES = [0, 25, 50, 75, 100, 125, 150] - - -class HighPressureRelaxationApp(BaseApp): - """High-pressure crystal relaxation benchmark app layout and callbacks.""" - - def register_callbacks(self) -> None: - """Register callbacks to app.""" - scatter_volume = read_plot( - DATA_PATH / "figure_volume.json", id=f"{BENCHMARK_NAME}-figure" - ) - scatter_energy = read_plot( - DATA_PATH / "figure_energy.json", id=f"{BENCHMARK_NAME}-figure" - ) - - # Build column_to_plot mapping for all pressures - column_to_plot = {} - for pressure in PRESSURES: - column_to_plot[f"Volume MAE ({pressure} GPa)"] = scatter_volume - column_to_plot[f"Energy MAE ({pressure} GPa)"] = scatter_energy - - plot_from_table_column( - table_id=self.table_id, - plot_id=f"{BENCHMARK_NAME}-figure-placeholder", - column_to_plot=column_to_plot, - ) - - -def get_app() -> HighPressureRelaxationApp: - """ - Get high-pressure relaxation benchmark app layout and callback registration. - - Returns - ------- - HighPressureRelaxationApp - Benchmark layout and callback registration. - """ - return HighPressureRelaxationApp( - name=BENCHMARK_NAME, - description=( - "Performance when relaxing crystal structures under high pressure " - "(0-150 GPa). Evaluates volume, energy, and convergence percentage " - "against PBE reference calculations from the Alexandria database. " - "Please also reference Loew et al 2026 J. Phys. Mater. 9 015010" - " (https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8) " - "when using this benchmark." - ), - docs_url=DOCS_URL, - table_path=DATA_PATH / "high_pressure_metrics_table.json", - extra_components=[ - Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), - ], - ) - - -if __name__ == "__main__": - full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) - hp_app = get_app() - full_app.layout = hp_app.layout - hp_app.register_callbacks() - full_app.run(port=8055, debug=True) diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py deleted file mode 100644 index bcf6c88fc..000000000 --- a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py +++ /dev/null @@ -1,266 +0,0 @@ -"""Run calculations for high-pressure crystal relaxation benchmark.""" - -from __future__ import annotations - -import bz2 -from copy import copy -import json -from pathlib import Path -import random -from typing import Any -import urllib.request - -from ase import Atoms -from ase.constraints import FixSymmetry -from ase.units import GPa -from janus_core.calculations.geom_opt import GeomOpt -import pandas as pd -from pymatgen.entries.computed_entries import ComputedStructureEntry -from pymatgen.io.ase import AseAtomsAdaptor -import pytest -from tqdm import tqdm - -from ml_peg.models.get_models import load_models -from ml_peg.models.models import current_models - -MODELS = load_models(current_models) - -# URL for Alexandria database -ALEXANDRIA_BASE_URL = "https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure" - -# Local cache directory for downloaded data -DATA_PATH = Path(__file__).parent / "data" -OUT_PATH = Path(__file__).parent / "outputs" - -# Pressure conditions in GPa -PRESSURES = [0, 25, 50, 75, 100, 125, 150] -PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] - - -# Relaxation parameters -FMAX = 0.0002 # eV/A - tight convergence -MAX_STEPS = 500 -RANDOM_SEED = 42 - -# Number of structures to use for testing -N_STRUCTURES = 3000 - - -def download_pressure_data(pressure_label: str) -> Path: - """ - Download pressure data from Alexandria database if not already cached. - - Parameters - ---------- - pressure_label - Pressure label (e.g., "P000", "P025"). - - Returns - ------- - Path - Path to the downloaded/cached file. - """ - DATA_PATH.mkdir(parents=True, exist_ok=True) - local_path = DATA_PATH / f"{pressure_label}.json.bz2" - - if not local_path.exists(): - url = f"{ALEXANDRIA_BASE_URL}/{pressure_label}.json.bz2" - print(f"Downloading {url}...") - urllib.request.urlretrieve(url, local_path) - print(f"Downloaded to {local_path}") - - return local_path - - -def load_structures( - pressure_label: str, n_structures: int = N_STRUCTURES -) -> list[dict]: - """ - Load structures using P000 starting structures and pressure-specific references. - - Downloads data from Alexandria database if not already cached locally. - - Parameters - ---------- - pressure_label - Pressure label (e.g., "P000", "P025") used for reference values. - n_structures - Number of structures to load. Default is N_STRUCTURES. - - Returns - ------- - list[dict] - List of structure dictionaries with atoms, mat_id, and reference values. - """ - start_json_path = download_pressure_data("P000") - ref_json_path = download_pressure_data(pressure_label) - - with bz2.open(start_json_path, "rt") as f: - start_data = json.load(f) - - with bz2.open(ref_json_path, "rt") as f: - ref_data = json.load(f) - - ref_map: dict[str, ComputedStructureEntry] = {} - for entry_dict in ref_data["entries"]: - entry = ComputedStructureEntry.from_dict(entry_dict) - ref_map[entry.data["mat_id"]] = entry - - adaptor = AseAtomsAdaptor() - structures = [] - - start_entries = start_data["entries"] - if n_structures > len(start_entries): - raise ValueError( - f"Requested {n_structures} structures but only" - f" {len(start_entries)} available" - ) - - rng = random.Random(RANDOM_SEED) - selected_indices = sorted(rng.sample(range(len(start_entries)), k=n_structures)) - - for idx in selected_indices: - entry_dict = start_entries[idx] - entry = ComputedStructureEntry.from_dict(entry_dict) - mat_id = entry.data["mat_id"] - ref_entry = ref_map.get(mat_id) - if ref_entry is None: - raise ValueError( - f"Missing reference entry for {mat_id} at {pressure_label}" - ) - - atoms = adaptor.get_atoms(entry.structure) - n_atoms = len(ref_entry.structure) - - structures.append( - { - "mat_id": mat_id, - "atoms": atoms, - "ref_energy_per_atom": ref_entry.energy / n_atoms, - "ref_volume_per_atom": ref_entry.structure.volume / n_atoms, - } - ) - - return structures - - -def relax_with_pressure( - atoms: Atoms, - pressure_gpa: float, - fmax: float = FMAX, - max_steps: int = MAX_STEPS, -) -> tuple[Atoms | None, bool, float | None]: - """ - Relax structure under specified pressure using janus-core GeomOpt. - - Parameters - ---------- - atoms - ASE Atoms object with calculator attached. - pressure_gpa - Pressure in GPa. - fmax - Maximum force tolerance in eV/A. - max_steps - Maximum number of optimization steps. - - Returns - ------- - tuple[Atoms | None, bool, float | None] - Relaxed atoms (or None if failed), convergence status, enthalpy per atom. - """ - try: - converged = False - counter = 0 - # repeat up to 3 times if not converged to be consistent with reference - while counter < 3 and not converged: - atoms.set_constraint( - FixSymmetry( - atoms, - ) - ) - geom_opt = GeomOpt( - struct=atoms, - fmax=fmax, - steps=max_steps, - filter_kwargs={"scalar_pressure": pressure_gpa}, - calc_kwargs={"default_dtype": "float64"}, - ) - geom_opt.run() - relaxed = geom_opt.struct - # asses forces to determine convergence - max_force = max(relaxed.get_forces().flatten(), key=abs) - if max_force < fmax: - converged = True - counter += 1 - atoms = relaxed - except Exception as e: - print(f"Relaxation failed: {e}") - return None, False, None - if not converged: - return None, False, None - # Calculate enthalpy: H = E + PV - energy = relaxed.get_potential_energy() - volume = relaxed.get_volume() - enthalpy = energy + pressure_gpa * GPa * volume - n_atoms = len(relaxed) - - return relaxed, converged, enthalpy / n_atoms - - -@pytest.mark.parametrize("mlip", MODELS.items()) -@pytest.mark.parametrize("pressure_idx", range(len(PRESSURES))) -def test_high_pressure_relaxation(mlip: tuple[str, Any], pressure_idx: int) -> None: - """ - Run high-pressure relaxation benchmark. - - Parameters - ---------- - mlip - Tuple of model name and model object. - pressure_idx - Index into PRESSURES list. - """ - model_name, model = mlip - calc = model.get_calculator() - - pressure_gpa = PRESSURES[pressure_idx] - pressure_label = PRESSURE_LABELS[pressure_idx] - - # Load structures (downloads from Alexandria if not cached) - structures = load_structures(pressure_label) - - results = [] - for struct_data in tqdm( - structures, desc=f"{model_name} @ {pressure_gpa} GPa", leave=False - ): - atoms = struct_data["atoms"].copy() - atoms.calc = copy(calc) - mat_id = struct_data["mat_id"] - - relaxed_atoms, converged, enthalpy_per_atom = relax_with_pressure( - atoms, pressure_gpa - ) - - if relaxed_atoms is not None: - pred_volume = relaxed_atoms.get_volume() / len(relaxed_atoms) - else: - pred_volume = None - - results.append( - { - "mat_id": mat_id, - "pressure_gpa": pressure_gpa, - "ref_volume_per_atom": struct_data["ref_volume_per_atom"], - "ref_energy_per_atom": struct_data["ref_energy_per_atom"], - "pred_volume_per_atom": pred_volume, - "pred_energy_per_atom": enthalpy_per_atom, - "converged": converged, - } - ) - - # Save results - out_dir = OUT_PATH / model_name - out_dir.mkdir(parents=True, exist_ok=True) - df = pd.DataFrame(results) - df.to_csv(out_dir / f"results_{pressure_label}.csv", index=False) diff --git a/tests/test_high_pressure_relaxation_regression.py b/tests/test_high_pressure_relaxation_regression.py deleted file mode 100644 index dda8eada5..000000000 --- a/tests/test_high_pressure_relaxation_regression.py +++ /dev/null @@ -1,74 +0,0 @@ -"""Regression checks for high-pressure relaxation outputs.""" - -from __future__ import annotations - -from pathlib import Path - -import numpy as np -import pandas as pd -import pytest - -PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] -FIRST_N = 5 -VOLUME_ATOL = 0.001 -ENERGY_ATOL = 0.0001 - -REPO_ROOT = Path(__file__).resolve().parents[1] -TEST_DATA_ROOT = REPO_ROOT / "tests" / "data" / "high_pressure_relaxation" -OUTPUT_ROOT = ( - REPO_ROOT - / "ml_peg" - / "calcs" - / "bulk_crystal" - / "high_pressure_relaxation" - / "outputs" -) - -TEST_CASES = [ - { - "name": "mace-mpa-0", - "output_model": "mace-mpa-0", - "data_file": TEST_DATA_ROOT / "mace-mpa-0_first5.csv", - }, -] - - -@pytest.mark.parametrize("case", TEST_CASES, ids=[case["name"] for case in TEST_CASES]) -@pytest.mark.parametrize("pressure_label", PRESSURE_LABELS) -def test_high_pressure_relaxation_regression( - case: dict[str, Path | str], - pressure_label: str, -) -> None: - """Check entries against stored regression baselines.""" - data_path = Path(case["data_file"]) - results_path = ( - OUTPUT_ROOT / str(case["output_model"]) / f"results_{pressure_label}.csv" - ) - - assert data_path.exists(), f"Missing test data file: {data_path}" - assert results_path.exists(), f"Missing results file: {results_path}" - - expected = pd.read_csv(data_path) - expected_pressure = expected[expected["pressure_label"] == pressure_label] - - assert len(expected_pressure) == FIRST_N - - results = pd.read_csv(results_path).head(FIRST_N) - if len(results) < FIRST_N: - pytest.skip( - f"{case['output_model']} only has {len(results)} rows for {pressure_label}." - ) - - assert expected_pressure["mat_id"].tolist() == results["mat_id"].tolist() - np.testing.assert_allclose( - results["pred_volume_per_atom"].to_numpy(), - expected_pressure["volume"].to_numpy(), - rtol=0.0, - atol=VOLUME_ATOL, - ) - np.testing.assert_allclose( - results["pred_energy_per_atom"].to_numpy(), - expected_pressure["energy_per_atom"].to_numpy(), - rtol=0.0, - atol=ENERGY_ATOL, - ) From 68b131f3629ff4282c25067eba8a0b89dc8721b1 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 9 Feb 2026 08:35:40 +0100 Subject: [PATCH 20/22] add references remove high-pressure mention --- .../user_guide/benchmarks/bulk_crystal.rst | 76 ++----------------- 1 file changed, 7 insertions(+), 69 deletions(-) diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index dd513fe10..a4757700b 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -117,64 +117,6 @@ Reference data: * PBE -High-Pressure Relaxation -======================== - -Summary -------- - -Performance in relaxing bulk crystal structures under high-pressure conditions. -3000 structures from the Alexandria database are relaxed at 7 pressure conditions -(0, 25, 50, 75, 100, 125, 150 GPa) and compared to PBE reference calculations. - - -Metrics -------- - -For each pressure condition (0, 25, 50, 75, 100, 125, 150 GPa): - -(1) Volume MAE - -Mean absolute error of volume per atom compared to PBE reference. - -(2) Energy MAE - -Mean absolute error of enthalpy per atom compared to PBE reference. The enthalpy is -calculated as H = E + PV, where E is the potential energy, P is the applied pressure, -and V is the volume. - -(3) Convergence - -Percentage of structures that successfully converged during relaxation. - -Structures are relaxed using janus-core's GeomOpt with the ase `FixSymmetry` constraint -applied to preserve crystallographic symmetry analogously to DFT. Starting from P000 (0 GPa) structures, each structure is relaxed at the target pressure using the FrechetCellFilter with the -specified scalar pressure. Relaxation continues until the maximum force component is -below 0.0002 eV/Å or until 500 steps are reached. If not converged, relaxation is -repeated up from the last structure of the previous relaxation up to 3 times. - - -Computational cost ------------------- - -High: tests are likely to take hours-days to run on GPU, depending on the number of -structures and pressure conditions tested. - - -Data availability ------------------ - -Input structures: - -* Alexandria database pressure benchmark dataset -* URL: https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure -* 3000 structures randomly sampled from the full datasets at each pressure - -Reference data: - -* PBE calculations from the Alexandria database -* Loew et al 2026 J. Phys. Mater. 9 015010 https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8 - Low-Dimensional Relaxation ========================== @@ -194,8 +136,7 @@ Metrics (1) Area MAE (2D) -Mean absolute error of area per atom compared to PBE reference. The area is -calculated as the magnitude of the cross product of the two in-plane lattice vectors. +Mean absolute error of area per atom compared to PBE reference. (2) Energy MAE (2D) @@ -209,8 +150,7 @@ Percentage of 2D structures that successfully converged during relaxation. (4) Length MAE (1D) -Mean absolute error of chain length per atom compared to PBE reference. The length -is the magnitude of the first lattice vector (the chain direction). +Mean absolute error of chain length per atom compared to PBE reference. (5) Energy MAE (1D) @@ -220,22 +160,19 @@ Mean absolute error of energy per atom compared to PBE reference. Percentage of 1D structures that successfully converged during relaxation. -Structures are relaxed using janus-core's GeomOpt with the ase `FixSymmetry` constraint -applied to preserve crystallographic symmetry. Cell relaxation is constrained using +Structures are relaxed using janus-core's GeomOpt after calling from `ase.spacegroup.symmetrize.refine_symmetry`. Cell relaxation is constrained using cell masks: * 2D: Only in-plane cell components (a, b, and γ) are allowed to relax * 1D: Only the chain direction (a) is allowed to relax -Relaxation continues until the maximum force component is below 0.0002 eV/Å or until -500 steps are reached. If not converged, relaxation is repeated up to 3 times. +Relaxation continues until the maximum force component is below 0.0002 eV/Å or until 500 steps are reached. If not converged, relaxation is repeated up to 3 times. Computational cost ------------------ -High: tests are likely to take hours-days to run on GPU, depending on the number of -structures tested. +High: tests are likely to take hours-days to run on GPU, depending on the number of structures tested. Data availability @@ -249,4 +186,5 @@ Input structures: Reference data: -* PBE calculations from the Alexandria database +* Hai-Chen Wang et al 2023 2D Mater. 10 035007 +* Jonathan Schmidt et al 2024, Mater. Tod. Phys, 48, 101560 From a17cf3e9dfc448245e33db62e4f0f20e1411b7c3 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 9 Feb 2026 09:03:45 +0100 Subject: [PATCH 21/22] remove high pressure test --- ml_peg/models/models.yml | 10 ------ .../mace-mpa-0_first5.csv | 36 ------------------- 2 files changed, 46 deletions(-) delete mode 100644 tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv diff --git a/ml_peg/models/models.yml b/ml_peg/models/models.yml index 9276c9865..77e54a16b 100644 --- a/ml_peg/models/models.yml +++ b/ml_peg/models/models.yml @@ -72,16 +72,6 @@ orb-v3-consv-inf-omat: kwargs: name: "orb_v3_conservative_inf_omat" -orb-v3-consv-inf-mpa: - module: orb_models.forcefield - class_name: OrbCalc - device: "cpu" - default_dtype: float32-high - trained_on_d3: false - level_of_theory: PBE - kwargs: - name: "orb_v3_conservative_inf_mpa" - # mattersim-5M: # module: mattersim.forcefield # class_name: MatterSimCalculator diff --git a/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv b/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv deleted file mode 100644 index 9cd448467..000000000 --- a/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv +++ /dev/null @@ -1,36 +0,0 @@ -pressure_label,mat_id,volume,energy_per_atom -P000,agm001000110,25.033782103611063,-5.936990103881239 -P000,agm001001040,15.204527828733536,-8.27554636941658 -P000,agm001001919,21.303183572813897,-8.435935053726139 -P000,agm001003503,24.690326355959755,-4.327203031854863 -P000,agm001004359,20.86226724052588,-7.829242292338797 -P025,agm001000110,19.71321916175995,-2.5294162683198125 -P025,agm001001040,13.487048183285616,-6.052898044522763 -P025,agm001001919,17.670764016174328,-5.438176257490443 -P025,agm001003503,19.12780621300846,-1.006088317132572 -P025,agm001004359,17.457486573286673,-4.870256961601473 -P050,agm001000110,17.715912861957246,0.3763531065086264 -P050,agm001001040,12.512987673468842,-4.029860765358488 -P050,agm001001919,15.46189689156924,-2.861038673041728 -P050,agm001003503,16.694829244071812,1.7723527015771132 -P050,agm001004359,15.13105554384206,-2.338786848103142 -P075,agm001000110,16.458372063400443,3.036751076640864 -P075,agm001001040,11.833602300101647,-2.13312140913839 -P075,agm001001919,14.04859528970246,-0.5622805372532179 -P075,agm001003503,15.23016215452774,4.255252390053997 -P075,agm001004359,13.686750914395638,-0.0937456687284251 -P100,agm001000110,15.506324317224594,5.527588395992431 -P100,agm001001040,11.324664691623278,-0.328009659077703 -P100,agm001001919,13.09661261512976,1.547445860475598 -P100,agm001003503,14.113463382167296,6.544697423965405 -P100,agm001004359,12.807027240161723,1.9611872723655293 -P125,agm001000110,14.768322896353382,7.887451277337899 -P125,agm001001040,10.9082879022004,1.4057552363067838 -P125,agm001001919,12.503731488724002,3.542297291380569 -P125,agm001003503,13.125761236229373,8.668450989536197 -P125,agm001004359,12.298360564455225,3.9181192151113904 -P150,agm001000110,14.163630823547033,10.143286868066651 -P150,agm001001040,10.539232357961302,3.078605889229442 -P150,agm001001919,12.045275482032784,5.456359868213672 -P150,agm001003503,12.192106272396378,10.643260044585606 -P150,agm001004359,11.883204516891276,5.803899385939424 From 7af3ea5115cf77c92c0f6844fd298c94d2130e77 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Mar 2026 18:22:32 +0100 Subject: [PATCH 22/22] add xyz storage of relaxed structures and density plots --- .../analyse_low_dimensional_relaxation.py | 208 +++++++++--------- .../app_low_dimensional_relaxation.py | 44 ++-- .../calc_low_dimensional_relaxation.py | 35 ++- 3 files changed, 149 insertions(+), 138 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py index 153c23ff3..5096272df 100644 --- a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py @@ -2,15 +2,13 @@ from __future__ import annotations -import json from pathlib import Path import pandas as pd -import plotly.graph_objects as go import pytest -from ml_peg.analysis.utils.decorators import build_table -from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.analysis.utils.decorators import build_table, plot_density_scatter +from ml_peg.analysis.utils.utils import build_density_inputs, load_metrics_config, mae from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names @@ -144,92 +142,28 @@ def get_convergence_rate(model_name: str, dimensionality: str = "2D") -> float | return (df["converged"].sum() / len(df)) * 100 -def create_parity_plot( - data_type: str, - title: str, - x_label: str, - y_label: str, - filename: Path, - dimensionality: str = "2D", -) -> None: +def _build_stats(dimensionality: str) -> dict[str, dict]: """ - Create a parity plot for low-dimensional structures. + Aggregate converged ref/pred data per model for a given dimensionality. Parameters ---------- - data_type - Either "geom" (area for 2D, length for 1D) or "energy". - title - Plot title. - x_label - X-axis label. - y_label - Y-axis label. - filename - Path to save the plot JSON. dimensionality Either "2D" or "1D". - """ - fig = go.Figure() - - ref_values = [] - pred_values = [] + Returns + ------- + dict[str, dict] + Per-model dicts with "geom" and "energy" ref/pred lists. + """ + stats = {} for model_name in MODELS: data = get_converged_data(model_name, dimensionality) - if data_type == "geom": - ref_values.extend(data["ref_geom"]) - pred_values.extend(data["pred_geom"]) - else: - ref_values.extend(data["ref_energy"]) - pred_values.extend(data["pred_energy"]) - - if ref_values and pred_values: - fig.add_trace( - go.Scatter( - x=pred_values, - y=ref_values, - name=dimensionality, - mode="markers", - marker={"size": 6, "opacity": 0.7}, - hovertemplate=( - f"{dimensionality}
" - "Pred: %{x:.4f}
" - "Ref: %{y:.4f}
" - "" - ), - ) - ) - - all_values = [] - for trace in fig.data: - all_values.extend(trace.x) - all_values.extend(trace.y) - - if all_values: - min_val = min(all_values) - max_val = max(all_values) - fig.add_trace( - go.Scatter( - x=[min_val, max_val], - y=[min_val, max_val], - mode="lines", - name="y=x", - line={"color": "gray", "dash": "dash"}, - showlegend=True, - ) - ) - - fig.update_layout( - title=title, - xaxis_title=x_label, - yaxis_title=y_label, - hovermode="closest", - ) - - filename.parent.mkdir(parents=True, exist_ok=True) - with open(filename, "w") as f: - json.dump(fig.to_dict(), f) + stats[model_name] = { + "geom": {"ref": data["ref_geom"], "pred": data["pred_geom"]}, + "energy": {"ref": data["ref_energy"], "pred": data["pred_energy"]}, + } + return stats def _compute_mae(dimensionality: str, data_key: str) -> dict[str, float]: @@ -280,35 +214,80 @@ def _compute_convergence(dimensionality: str) -> dict[str, float]: return results -def _generate_all_plots() -> None: - """Generate all parity plots for each dimensionality.""" - for dim, cfg in DIM_CONFIGS.items(): - geom_label = cfg["geom_label"] - geom_unit = cfg["geom_unit"] - dim_lower = dim.lower() - - create_parity_plot( - data_type="geom", - title=f"{geom_label} per atom ({dim})", - x_label=f"Predicted {geom_label.lower()} / {geom_unit}", - y_label=f"Reference {geom_label.lower()} / {geom_unit}", - filename=OUT_PATH / f"figure_{geom_label.lower()}_{dim_lower}.json", - dimensionality=dim, - ) - create_parity_plot( - data_type="energy", - title=f"Energy per atom ({dim})", - x_label="Predicted energy / eV/atom", - y_label="Reference energy / eV/atom", - filename=OUT_PATH / f"figure_energy_{dim_lower}.json", - dimensionality=dim, - ) +@pytest.fixture +@plot_density_scatter( + filename=OUT_PATH / "figure_area_2d.json", + title="Area per atom (2D)", + x_label="Reference area / Ų/atom", + y_label="Predicted area / Ų/atom", +) +def area_density_2d() -> dict[str, dict]: + """ + Density scatter inputs for 2D area per atom. + + Returns + ------- + dict[str, dict] + Mapping of model name to density-scatter data. + """ + return build_density_inputs(MODELS, _build_stats("2D"), "geom", metric_fn=mae) + + +@pytest.fixture +@plot_density_scatter( + filename=OUT_PATH / "figure_energy_2d.json", + title="Energy per atom (2D)", + x_label="Reference energy / eV/atom", + y_label="Predicted energy / eV/atom", +) +def energy_density_2d() -> dict[str, dict]: + """ + Density scatter inputs for 2D energy per atom. + + Returns + ------- + dict[str, dict] + Mapping of model name to density-scatter data. + """ + return build_density_inputs(MODELS, _build_stats("2D"), "energy", metric_fn=mae) + + +@pytest.fixture +@plot_density_scatter( + filename=OUT_PATH / "figure_length_1d.json", + title="Length per atom (1D)", + x_label="Reference length / Å/atom", + y_label="Predicted length / Å/atom", +) +def length_density_1d() -> dict[str, dict]: + """ + Density scatter inputs for 1D length per atom. + + Returns + ------- + dict[str, dict] + Mapping of model name to density-scatter data. + """ + return build_density_inputs(MODELS, _build_stats("1D"), "geom", metric_fn=mae) @pytest.fixture -def parity_plots() -> None: - """Generate all parity plots for 2D and 1D structures.""" - _generate_all_plots() +@plot_density_scatter( + filename=OUT_PATH / "figure_energy_1d.json", + title="Energy per atom (1D)", + x_label="Reference energy / eV/atom", + y_label="Predicted energy / eV/atom", +) +def energy_density_1d() -> dict[str, dict]: + """ + Density scatter inputs for 1D energy per atom. + + Returns + ------- + dict[str, dict] + Mapping of model name to density-scatter data. + """ + return build_density_inputs(MODELS, _build_stats("1D"), "energy", metric_fn=mae) @pytest.fixture @@ -337,7 +316,10 @@ def metrics() -> dict[str, dict]: def test_low_dimensional_relaxation( metrics: dict[str, dict], - parity_plots: None, + area_density_2d: dict[str, dict], + energy_density_2d: dict[str, dict], + length_density_1d: dict[str, dict], + energy_density_1d: dict[str, dict], ) -> None: """ Run low-dimensional relaxation analysis test. @@ -346,7 +328,13 @@ def test_low_dimensional_relaxation( ---------- metrics All low-dimensional relaxation metrics. - parity_plots - Triggers parity plot generation for all dimensionalities. + area_density_2d + Triggers 2D area density plot generation. + energy_density_2d + Triggers 2D energy density plot generation. + length_density_1d + Triggers 1D length density plot generation. + energy_density_1d + Triggers 1D energy density plot generation. """ return diff --git a/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py index 15d246291..081fa4496 100644 --- a/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py +++ b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py @@ -7,9 +7,12 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_column -from ml_peg.app.utils.load import read_plot +from ml_peg.app.utils.build_callbacks import plot_from_table_cell +from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models +MODELS = get_model_names(current_models) BENCHMARK_NAME = "Low-Dimensional Relaxation" DOCS_URL = ( "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html" @@ -23,7 +26,6 @@ class LowDimensionalRelaxationApp(BaseApp): def register_callbacks(self) -> None: """Register callbacks to app.""" - # Define plot paths and their metric mappings plot_configs = [ ("figure_area_2d.json", "Area MAE (2D)", "area-2d"), ("figure_energy_2d.json", "Energy MAE (2D)", "energy-2d"), @@ -31,23 +33,27 @@ def register_callbacks(self) -> None: ("figure_energy_1d.json", "Energy MAE (1D)", "energy-1d"), ] - # Build column-to-plot mapping - column_to_plot = {} - for filename, metric_name, plot_id_suffix in plot_configs: - plot_path = DATA_PATH / filename - if plot_path.exists(): - scatter = read_plot( - plot_path, - id=f"{BENCHMARK_NAME}-figure-{plot_id_suffix}", - ) - column_to_plot[metric_name] = scatter + density_plots: dict = {} + for model in MODELS: + plots = {} + for filename, metric_name, plot_id_suffix in plot_configs: + plot_path = DATA_PATH / filename + if plot_path.exists(): + graph = read_density_plot_for_model( + filename=plot_path, + model=model, + id=f"{BENCHMARK_NAME}-{model}-{plot_id_suffix}-figure", + ) + if graph is not None: + plots[metric_name] = graph + if plots: + density_plots[model] = plots - if column_to_plot: - plot_from_table_column( - table_id=self.table_id, - plot_id=f"{BENCHMARK_NAME}-figure-placeholder", - column_to_plot=column_to_plot, - ) + plot_from_table_cell( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + cell_to_plot=density_plots, + ) def get_app() -> LowDimensionalRelaxationApp: diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py index b1f253c1f..12f1bf4dd 100644 --- a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -11,6 +11,7 @@ import urllib.request from ase import Atoms +from ase.io import write as ase_write from ase.spacegroup.symmetrize import refine_symmetry from janus_core.calculations.geom_opt import GeomOpt import numpy as np @@ -381,32 +382,48 @@ def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) atoms, dimensionality ) + if relaxed_atoms is not None: + relaxed_atoms.info["mat_id"] = mat_id + relaxed_atoms.info["dimensionality"] = dimensionality + result = { "mat_id": mat_id, "dimensionality": dimensionality, "ref_energy_per_atom": struct_data["ref_energy_per_atom"], "pred_energy_per_atom": energy_per_atom, "converged": converged, + "relaxed_atoms": relaxed_atoms, } # Add dimension-specific geometric metrics if dimensionality == "2D": result["ref_area_per_atom"] = struct_data["ref_area_per_atom"] - if relaxed_atoms is not None: - result["pred_area_per_atom"] = get_area_per_atom(relaxed_atoms) - else: - result["pred_area_per_atom"] = None + result["pred_area_per_atom"] = ( + get_area_per_atom(relaxed_atoms) if relaxed_atoms is not None else None + ) else: # 1D result["ref_length_per_atom"] = struct_data["ref_length_per_atom"] - if relaxed_atoms is not None: - result["pred_length_per_atom"] = get_length_per_atom(relaxed_atoms) - else: - result["pred_length_per_atom"] = None + result["pred_length_per_atom"] = ( + get_length_per_atom(relaxed_atoms) + if relaxed_atoms is not None + else None + ) results.append(result) # Save results out_dir = OUT_PATH / model_name out_dir.mkdir(parents=True, exist_ok=True) - df = pd.DataFrame(results) + df = pd.DataFrame( + [{k: v for k, v in r.items() if k != "relaxed_atoms"} for r in results] + ) df.to_csv(out_dir / f"results_{dimensionality}.csv", index=False) + + # Write converged relaxed structures to xyz + relaxed_frames = [ + r["relaxed_atoms"] for r in results if r["relaxed_atoms"] is not None + ] + if relaxed_frames: + for frame in relaxed_frames: + frame.calc = None + ase_write(out_dir / f"relaxed_{dimensionality}.xyz", relaxed_frames)