Skip to content

Commit 0d06cef

Browse files
kurylaDomantas Kurylajoehart2001ElliottKasoar
authored
Add 37Conf8 benchmark (#233)
Co-authored-by: Domantas Kuryla <dk584@icepc16> Co-authored-by: joehart2001 <[email protected]> Co-authored-by: ElliottKasoar <[email protected]> Co-authored-by: ElliottKasoar <[email protected]>
1 parent 1990376 commit 0d06cef

File tree

5 files changed

+325
-0
lines changed

5 files changed

+325
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
metrics:
2+
MAE:
3+
good: 0.0
4+
bad: 20.0
5+
unit: kcal/mol
6+
tooltip: Mean Absolute Error for all systems
7+
level_of_theory: DLPNO-CCSD(T)/cc-pVTZ
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
"""
2+
Analyse 37Conf8 conformer energy benchmark.
3+
4+
10.1002/cphc.201801063.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from pathlib import Path
10+
11+
from ase import units
12+
from ase.io import read, write
13+
import pytest
14+
15+
from ml_peg.analysis.utils.decorators import build_table, plot_parity
16+
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
17+
from ml_peg.app import APP_ROOT
18+
from ml_peg.calcs import CALCS_ROOT
19+
from ml_peg.models.get_models import load_models
20+
from ml_peg.models.models import current_models
21+
22+
MODELS = load_models(current_models)
23+
D3_MODEL_NAMES = build_d3_name_map(MODELS)
24+
25+
EV_TO_KCAL = units.mol / units.kcal
26+
CALC_PATH = CALCS_ROOT / "conformers" / "37Conf8" / "outputs"
27+
OUT_PATH = APP_ROOT / "data" / "conformers" / "37Conf8"
28+
29+
METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
30+
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
31+
METRICS_CONFIG_PATH
32+
)
33+
34+
35+
def labels() -> list:
36+
"""
37+
Get list of system names.
38+
39+
Returns
40+
-------
41+
list
42+
List of all system names.
43+
"""
44+
for model_name in MODELS:
45+
labels_list = [path.stem for path in sorted((CALC_PATH / model_name).glob("*"))]
46+
break
47+
return labels_list
48+
49+
50+
@pytest.fixture
51+
@plot_parity(
52+
filename=OUT_PATH / "figure_37conf8.json",
53+
title="Energies",
54+
x_label="Predicted energy / kcal/mol",
55+
y_label="Reference energy / kcal/mol",
56+
hoverdata={
57+
"Labels": labels(),
58+
},
59+
)
60+
def conformer_energies() -> dict[str, list]:
61+
"""
62+
Get barrier heights for all systems.
63+
64+
Returns
65+
-------
66+
dict[str, list]
67+
Dictionary of all reference and predicted energies.
68+
"""
69+
results = {"ref": []} | {mlip: [] for mlip in MODELS}
70+
ref_stored = False
71+
72+
for model_name in MODELS:
73+
for label in labels():
74+
atoms = read(CALC_PATH / model_name / f"{label}.xyz")
75+
results[model_name].append(atoms.info["model_rel_energy"] * EV_TO_KCAL)
76+
77+
if not ref_stored:
78+
results["ref"].append(atoms.info["ref_energy"] * EV_TO_KCAL)
79+
80+
# Write structures for app
81+
structs_dir = OUT_PATH / model_name
82+
structs_dir.mkdir(parents=True, exist_ok=True)
83+
write(structs_dir / f"{label}.xyz", atoms)
84+
ref_stored = True
85+
return results
86+
87+
88+
@pytest.fixture
89+
def get_mae(conformer_energies) -> dict[str, float]:
90+
"""
91+
Get mean absolute error for conformer energies.
92+
93+
Parameters
94+
----------
95+
conformer_energies
96+
Dictionary of reference and predicted conformer energies.
97+
98+
Returns
99+
-------
100+
dict[str, float]
101+
Dictionary of predicted conformer energies errors for all models.
102+
"""
103+
results = {}
104+
for model_name in MODELS:
105+
results[model_name] = mae(
106+
conformer_energies["ref"], conformer_energies[model_name]
107+
)
108+
return results
109+
110+
111+
@pytest.fixture
112+
@build_table(
113+
filename=OUT_PATH / "37conf8_metrics_table.json",
114+
metric_tooltips=DEFAULT_TOOLTIPS,
115+
thresholds=DEFAULT_THRESHOLDS,
116+
mlip_name_map=D3_MODEL_NAMES,
117+
)
118+
def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
119+
"""
120+
Get all metrics.
121+
122+
Parameters
123+
----------
124+
get_mae
125+
Mean absolute errors for all models.
126+
127+
Returns
128+
-------
129+
dict[str, dict]
130+
Metric names and values for all models.
131+
"""
132+
return {
133+
"MAE": get_mae,
134+
}
135+
136+
137+
def test_37conf8(metrics: dict[str, dict]) -> None:
138+
"""
139+
Run 37Conf8 barriers test.
140+
141+
Parameters
142+
----------
143+
metrics
144+
All new benchmark metric names and dictionary of values for each model.
145+
"""
146+
return
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""Run 37Conf8 app."""
2+
3+
from __future__ import annotations
4+
5+
from dash import Dash
6+
from dash.html import Div
7+
8+
from ml_peg.app import APP_ROOT
9+
from ml_peg.app.base_app import BaseApp
10+
from ml_peg.app.utils.build_callbacks import (
11+
plot_from_table_column,
12+
struct_from_scatter,
13+
)
14+
from ml_peg.app.utils.load import read_plot
15+
from ml_peg.models.get_models import get_model_names
16+
from ml_peg.models.models import current_models
17+
18+
MODELS = get_model_names(current_models)
19+
BENCHMARK_NAME = "37Conf8"
20+
DOCS_URL = (
21+
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/conformers.html#37conf8"
22+
)
23+
DATA_PATH = APP_ROOT / "data" / "conformers" / "37Conf8"
24+
25+
26+
class ThirtySevenConf8App(BaseApp):
27+
"""37Conf8 benchmark app layout and callbacks."""
28+
29+
def register_callbacks(self) -> None:
30+
"""Register callbacks to app."""
31+
scatter = read_plot(
32+
DATA_PATH / "figure_37conf8.json",
33+
id=f"{BENCHMARK_NAME}-figure",
34+
)
35+
36+
model_dir = DATA_PATH / MODELS[0]
37+
if model_dir.exists():
38+
labels = sorted([f.stem for f in model_dir.glob("*.xyz")])
39+
structs = [
40+
f"assets/conformers/37Conf8/{MODELS[0]}/{label}.xyz" for label in labels
41+
]
42+
else:
43+
structs = []
44+
45+
plot_from_table_column(
46+
table_id=self.table_id,
47+
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
48+
column_to_plot={"MAE": scatter},
49+
)
50+
51+
struct_from_scatter(
52+
scatter_id=f"{BENCHMARK_NAME}-figure",
53+
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
54+
structs=structs,
55+
mode="struct",
56+
)
57+
58+
59+
def get_app() -> ThirtySevenConf8App:
60+
"""
61+
Get 37Conf8 benchmark app layout and callback registration.
62+
63+
Returns
64+
-------
65+
ThirtySevenCONF8App
66+
Benchmark layout and callback registration.
67+
"""
68+
return ThirtySevenConf8App(
69+
name=BENCHMARK_NAME,
70+
description=(
71+
"Performance in predicting relative conformer energies "
72+
"of 37 organic molecules representing pharmaceuticals, drugs, catalysts, "
73+
"synthetic precursors, and industry-related chemicals (37 neutral "
74+
"molecules, 8 conformers each). "
75+
"Reference data from DLPNO-CCSD(T) calculations."
76+
),
77+
docs_url=DOCS_URL,
78+
table_path=DATA_PATH / "37conf8_metrics_table.json",
79+
extra_components=[
80+
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
81+
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
82+
],
83+
)
84+
85+
86+
if __name__ == "__main__":
87+
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
88+
benchmark_app = get_app()
89+
full_app.layout = benchmark_app.layout
90+
benchmark_app.register_callbacks()
91+
full_app.run(port=8062, debug=True)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
title: Conformers
2+
description:
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""
2+
Compute the 37Conf8 dataset for molecular conformer relative energies.
3+
4+
10.1002/cphc.201801063.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from pathlib import Path
10+
from typing import Any
11+
12+
from ase import units
13+
from ase.io import read, write
14+
import pandas as pd
15+
import pytest
16+
from tqdm import tqdm
17+
18+
from ml_peg.calcs.utils.utils import download_s3_data
19+
from ml_peg.models.get_models import load_models
20+
from ml_peg.models.models import current_models
21+
22+
MODELS = load_models(current_models)
23+
24+
KCAL_TO_EV = units.kcal / units.mol
25+
26+
OUT_PATH = Path(__file__).parent / "outputs"
27+
28+
29+
@pytest.mark.parametrize("mlip", MODELS.items())
30+
def test_37conf8_conformer_energies(mlip: tuple[str, Any]) -> None:
31+
"""
32+
Benchmark the 37Conf8 dataset.
33+
34+
Parameters
35+
----------
36+
mlip
37+
Name of model use and model to get calculator.
38+
"""
39+
model_name, model = mlip
40+
calc = model.get_calculator()
41+
42+
data_path = (
43+
download_s3_data(
44+
filename="37CONF8.zip",
45+
key="inputs/conformers/37Conf8/37Conf8.zip",
46+
)
47+
/ "37CONF8"
48+
)
49+
50+
df = pd.read_excel(
51+
data_path / "37Conf8_data.xlsx", sheet_name="Rel_Energy_SP", header=2
52+
)
53+
calc = model.get_calculator()
54+
# Add D3 calculator for this test
55+
calc = model.add_d3_calculator(calc)
56+
57+
write_dir = OUT_PATH / model_name
58+
write_dir.mkdir(parents=True, exist_ok=True)
59+
60+
for i in tqdm(range(len(df) - 3)):
61+
molecule_name = df.iloc[i][0].strip()
62+
conf_id = int(df.iloc[i][1])
63+
label = f"{molecule_name}_{conf_id}"
64+
if conf_id == 1:
65+
zero_conf = read(data_path / "PBEPBE-D3" / f"{label}_PBEPBE-D3.xyz")
66+
zero_conf.info["charge"] = 0
67+
zero_conf.info["spin"] = 1
68+
zero_conf.calc = calc
69+
e_model_zero_conf = zero_conf.get_potential_energy()
70+
else:
71+
atoms = read(data_path / "PBEPBE-D3" / f"{label}_PBEPBE-D3.xyz")
72+
atoms.info["charge"] = 0
73+
atoms.info["spin"] = 1
74+
atoms.calc = calc
75+
atoms.info["model_rel_energy"] = (
76+
atoms.get_potential_energy() - e_model_zero_conf
77+
)
78+
atoms.info["ref_energy"] = float(df.iloc[i][2]) * KCAL_TO_EV
79+
write(write_dir / f"{label}.xyz", atoms)

0 commit comments

Comments
 (0)