Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions ml_peg/analysis/superacids/HF_SbF5_density/analyse_HF_SbF5_density.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""Analyse HF/SbF5 density benchmark."""

from __future__ import annotations

from pathlib import Path

from ase.io import read
import numpy as np
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)
Comment on lines +12 to +19
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models
MODELS = get_model_names(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)
from ml_peg.analysis.utils.utils import build_dispersion_name_map, load_metrics_config
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models
MODELS = get_model_names(current_models)
DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS)

We recently renamed this sightly. You may need to rebase for this to work pre-merging.

CALC_PATH = CALCS_ROOT / "superacids" / "HF_SbF5_density" / "outputs"
OUT_PATH = APP_ROOT / "data" / "superacids" / "HF_SbF5_density"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)

# Experimental reference densities
REF_DENSITIES = {
"X_0": 0.989,
"X_10": 1.677,
"X_100": 3.141,
}


# amu to g conversion factor
AMU_TO_G = 1.66053906660e-24 # g per amu
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can get this from ase.units e.g. 1000 / units.kg to reduce hard-coding

A3_TO_CM3 = 1e-24


def compute_density_from_volume_dat(volume_path: Path, atoms_path: Path) -> float:
"""
Compute average density from volume.dat and atomic masses.

Parameters
----------
volume_path
Path to volume.dat file (columns: step, volume_A3).
atoms_path
Path to any xyz file for this system (to get atomic masses).

Returns
-------
float
Average density in g/cm³.
"""
# Read total mass from atoms
atoms = read(atoms_path)
total_mass_amu = np.sum(atoms.get_masses())

# Read volume time series, skip header
data = np.loadtxt(volume_path, comments="#")
# Take second half as production (discard equilibration)
n_points = len(data)
production = data[n_points // 2 :, 1] # column 1 = volume in ų
avg_volume = np.mean(production)

return (total_mass_amu * AMU_TO_G) / (avg_volume * A3_TO_CM3)


def get_system_names() -> list[str]:
"""
Get list of system names.

Returns
-------
list[str]
List of system names from output directories.
"""
for model_name in MODELS:
model_dir = CALC_PATH / model_name
if model_dir.exists():
systems = sorted([d.name for d in model_dir.iterdir() if d.is_dir()])
if systems:
return systems
return []


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_density.json",
title="HF/SbF5 Mixture Densities",
x_label="Predicted density / g/cm³",
y_label="Experimental density / g/cm³",
hoverdata={
"System": get_system_names(),
},
)
def densities() -> dict[str, list]:
"""
Get predicted and reference densities for all systems.

Returns
-------
dict[str, list]
Dictionary of reference and predicted densities.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
model_dir = CALC_PATH / model_name

if not model_dir.exists():
continue

systems = sorted([d.name for d in model_dir.iterdir() if d.is_dir()])
if not systems:
continue

for system in systems:
system_dir = model_dir / system
volume_path = system_dir / "volume.dat"
atoms_path = system_dir / f"{system}.xyz"

if not volume_path.exists() or not atoms_path.exists():
continue

density = compute_density_from_volume_dat(volume_path, atoms_path)
results[model_name].append(density)

if not ref_stored:
results["ref"].append(REF_DENSITIES[system])

ref_stored = True

return results


@pytest.fixture
def density_errors(densities) -> dict[str, float]:
"""
Get mean absolute percentage error for densities.

Parameters
----------
densities
Dictionary of reference and predicted densities.

Returns
-------
dict[str, float]
Dictionary of density MAPE for all models.
"""
results = {}
for model_name in MODELS:
if densities[model_name]:
preds = np.array(densities[model_name])
refs = np.array(densities["ref"])
mape = np.mean(np.abs(preds - refs) / refs) * 100 # in %
results[model_name] = mape
else:
results[model_name] = None
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "hf_sbf5_density_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=D3_MODEL_NAMES,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mlip_name_map=D3_MODEL_NAMES,
mlip_name_map=DISPERSION_NAME_MAP,

Following above

)
def metrics(density_errors: dict[str, float]) -> dict[str, dict]:
"""
Get all HF/SbF5 density metrics.

Parameters
----------
density_errors
Mean absolute errors for all systems.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {
"MAPE": density_errors,
}


def test_hf_sbf5_density(metrics: dict[str, dict]) -> None:
"""
Run HF/SbF5 density test.

Parameters
----------
metrics
All HF/SbF5 density metrics.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/superacids/HF_SbF5_density/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAPE:
good: 0.0
bad: 10.0
unit: "%"
tooltip: "Mean Absolute Percentage Error in liquid density vs experiment"
level_of_theory: Experiment
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
level_of_theory: Experiment
level_of_theory: Experimental

This is not documented yet, sorry. We need specific key words for this to be interpreted correctly

74 changes: 74 additions & 0 deletions ml_peg/app/superacids/HF_SbF5_density/app_HF_SbF5_density.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Run HF/SbF5 density app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import (
plot_from_table_column,
)
from ml_peg.app.utils.load import read_plot
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

# Get all models
MODELS = get_model_names(current_models)
BENCHMARK_NAME = "HF/SbF5 Mixture Densities"
DOCS_URL = (
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/"
"superacids.html#hf-sbf5-mixture-densities"
)
DATA_PATH = APP_ROOT / "data" / "superacids" / "HF_SbF5_density"


class HFSbF5DensityApp(BaseApp):
"""HF/SbF5 density benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
scatter = read_plot(
DATA_PATH / "figure_density.json",
id=f"{BENCHMARK_NAME}-figure",
)

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"MAPE": scatter},
)


def get_app() -> HFSbF5DensityApp:
"""
Get HF/SbF5 density benchmark app layout and callback registration.

Returns
-------
HFSbF5DensityApp
Benchmark layout and callback registration.
"""
return HFSbF5DensityApp(
name=BENCHMARK_NAME,
description=("Liquid densities of HF/SbF5 mixtures at varying compositions."),
docs_url=DOCS_URL,
table_path=DATA_PATH / "hf_sbf5_density_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
],
)


if __name__ == "__main__":
# Create Dash app
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)

# Construct layout and register callbacks
app = get_app()
full_app.layout = app.layout
app.register_callbacks()

# Run app
full_app.run(port=8056, debug=True)
2 changes: 2 additions & 0 deletions ml_peg/app/superacids/superacids.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
title: Superacids
description: Properties of HF/SbF5 superacid systems
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably too specific as an entire category description. Do you think superacids makes sense as an entire category, and if so, could this be made slightly more general?

5 changes: 5 additions & 0 deletions ml_peg/calcs/superacids/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outputs/
*.xyz
*.dat
*.log
__pycache__/
Comment on lines +1 to +5
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these not already ignored by the top-level .gitignore? I haven't seen any of these sorts of files in my git status.

If any of these are necessary, I think they're general enough that we can add them to general one anyway.

Loading
Loading