Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 210 additions & 0 deletions ml_peg/analysis/molecular_reactions/bh2o_36/analyse_bh2o_36.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
"""
Analyse the BH2O-36 benchmark for hydrolysis reaction barriers.

Journal of Chemical Theory and Computation 2023 19 (11), 3159-3171
DOI: 10.1021/acs.jctc.3c00176
"""

from __future__ import annotations

from pathlib import Path

from ase import units
from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)

KCAL_TO_EV = units.kcal / units.mol
EV_TO_KCAL = 1 / KCAL_TO_EV
CALC_PATH = CALCS_ROOT / "molecular_reactions" / "bh2o_36" / "outputs"
OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "bh2o_36"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)


def get_system_names() -> list[str]:
"""
Get list of reaction system names from the first available model.

Returns
-------
list[str]
List of base system names (without suffixes).
"""
for model_name in MODELS:
model_dir = CALC_PATH / model_name
if model_dir.exists():
system_names = []
for system_path in sorted(model_dir.glob("*_ts.xyz")):
system_names.append(system_path.stem.replace("_ts", ""))
if system_names:
return system_names
return []


def get_barrier_labels() -> list[str]:
"""
Get list of barrier labels for plotting (two per reaction system).

Returns
-------
list[str]
List of barrier labels with reaction context.
"""
return [
label
for system in get_system_names()
for label in [f"{system} (TS-Reactants)", f"{system} (TS-Products)"]
]


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_bh2o_36_barriers.json",
title="Reaction barriers",
x_label="Predicted barrier / eV",
y_label="Reference barrier / eV",
hoverdata={
"System": [
system_name
for system_name in get_system_names()
for _ in range(2) # Duplicate each system name for both barriers
],
"Barrier Type": [
barrier_type
for _ in get_system_names()
for barrier_type in ["TS-Reactants", "TS-Products"]
],
},
)
def barrier_heights() -> dict[str, list]:
"""
Get barrier heights for all systems.

Returns
-------
dict[str, list]
Dictionary of all reference and predicted barrier heights.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

system_names = get_system_names()
for model_name in MODELS:
for system_name in system_names:
atoms_rct = read(CALC_PATH / model_name / f"{system_name}_rct.xyz")
atoms_pro = read(CALC_PATH / model_name / f"{system_name}_pro.xyz")
atoms_ts = read(CALC_PATH / model_name / f"{system_name}_ts.xyz")

# TS - Reactants barrier
results[model_name].append(
atoms_ts.info["pred_energy"] - atoms_rct.info["pred_energy"]
)
# TS - Products barrier
results[model_name].append(
atoms_ts.info["pred_energy"] - atoms_pro.info["pred_energy"]
)

if not ref_stored:
results["ref"].append(
atoms_ts.info["ref_energy"] - atoms_rct.info["ref_energy"]
)
results["ref"].append(
atoms_ts.info["ref_energy"] - atoms_pro.info["ref_energy"]
)

# Write structures for app
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)

# Write individual structures
write(structs_dir / f"{system_name}_rct.xyz", atoms_rct)
write(structs_dir / f"{system_name}_pro.xyz", atoms_pro)
write(structs_dir / f"{system_name}_ts.xyz", atoms_ts)

# Write trajectory files for each barrier type
# TS-Reactants barrier: reactants -> TS
write(
structs_dir / f"{system_name}_rct_to_ts.xyz",
[atoms_rct, atoms_ts],
append=False,
)
# TS-Products barrier: products -> TS
write(
structs_dir / f"{system_name}_pro_to_ts.xyz",
[atoms_pro, atoms_ts],
append=False,
)
ref_stored = True
return results


@pytest.fixture
def get_mae(barrier_heights) -> dict[str, float]:
"""
Get mean absolute error for barrier heights.

Parameters
----------
barrier_heights
Dictionary of reference and predicted barrier heights.

Returns
-------
dict[str, float]
Dictionary of predicted barrier height errors for all models.
"""
results = {}
for model_name in MODELS:
results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name])
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "bh2o_36_barriers_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=D3_MODEL_NAMES,
)
def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
"""
Get all metrics.

Parameters
----------
get_mae
Mean absolute errors for all models.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {
"MAE": get_mae,
}


def test_bh2o_36_barriers(metrics: dict[str, dict]) -> None:
"""
Run bh2o_36_barriers test.

Parameters
----------
metrics
All new benchmark metric names and dictionary of values for each model.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/molecular_reactions/bh2o_36/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAE:
good: 0.0
bad: 2.0
unit: eV
tooltip: Mean Absolute Error for all systems
level_of_theory: CCSD(T)
101 changes: 101 additions & 0 deletions ml_peg/app/molecular_reactions/bh2o_36/app_bh2o_36.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Run BH2O-36 barriers app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import (
plot_from_table_column,
struct_from_scatter,
)
from ml_peg.app.utils.load import read_plot
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
BENCHMARK_NAME = "BH2O-36"
DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular.html#bh2o-36"
DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "bh2o_36"


class BH2O36App(BaseApp):
"""BH2O-36 benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
scatter = read_plot(
DATA_PATH / "figure_bh2o_36_barriers.json",
id=f"{BENCHMARK_NAME}-figure",
)

model_dir = DATA_PATH / MODELS[0]
if model_dir.exists():
# Get system names from trajectory files
labels = sorted(
{
f.stem.replace("_rct_to_ts", "")
for f in model_dir.glob("*_rct_to_ts.xyz")
}
)
asset_prefix = f"assets/molecular_reactions/bh2o_36/{MODELS[0]}/"
# Each system has 2 data points:
# TS-Reactants (rct->TS), TS-Products (pro->TS)
structs = [
path
for label in labels
for path in [
f"{asset_prefix}{label}_rct_to_ts.xyz",
f"{asset_prefix}{label}_pro_to_ts.xyz",
]
]
else:
structs = []

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"MAE": scatter},
)

struct_from_scatter(
scatter_id=f"{BENCHMARK_NAME}-figure",
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
structs=structs,
mode="traj", # Use trajectory mode to cycle between relevant structures
)


def get_app() -> BH2O36App:
"""
Get BH2O-36 benchmark app layout and callback registration.

Returns
-------
BH2O36App
Benchmark layout and callback registration.
"""
return BH2O36App(
name=BENCHMARK_NAME,
description=(
"Performance in predicting hydrolysis reaction barriers for the "
"BH2O-36 dataset (36 aqueous bimolecular reactions). Reference data "
"from CCSD(T) calculations."
),
docs_url=DOCS_URL,
table_path=DATA_PATH / "bh2o_36_barriers_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
],
)


if __name__ == "__main__":
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
benchmark_app = get_app()
full_app.layout = benchmark_app.layout
benchmark_app.register_callbacks()
full_app.run(port=8070, debug=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
title: Molecular Reactions
description:
3 changes: 3 additions & 0 deletions ml_peg/calcs/molecular_reactions/bh2o_36/.dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
Empty file.
3 changes: 3 additions & 0 deletions ml_peg/calcs/molecular_reactions/bh2o_36/.dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
Loading