Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions ml_peg/analysis/conformers/UpU46/analyse_UpU46.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
Analyse the UpU46 benchmark dataset for RNA backbone conformations.

Journal of Chemical Theory and Computation,
2015 11 (10), 4972-4991.
DOI: 10.1021/acs.jctc.5b00515.
"""

from __future__ import annotations

from pathlib import Path

from ase import units
from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)


EV_TO_KCAL = units.mol / units.kcal
CALC_PATH = CALCS_ROOT / "conformers" / "UpU46" / "outputs"
OUT_PATH = APP_ROOT / "data" / "conformers" / "UpU46"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)


def labels() -> list:
"""
Get list of system names.

Returns
-------
list
List of all system names.
"""
labels_list = []
for model_name in MODELS:
for system_path in sorted((CALC_PATH / model_name).glob("*")):
labels_list.append(system_path.stem)
break # only need the first model to list available systems
return labels_list


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_upu46.json",
title="Energies",
x_label="Predicted energy / kcal/mol",
y_label="Reference energy / kcal/mol",
hoverdata={
"Labels": labels(),
},
)
def conformer_energies() -> dict[str, list]:
"""
Get conformer energies for all systems.

Returns
-------
dict[str, list]
Dictionary of all reference and predicted barrier heights.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
for label in labels():
atoms = read(CALC_PATH / model_name / f"{label}.xyz")

results[model_name].append(atoms.info["model_rel_energy"] * EV_TO_KCAL)
if not ref_stored:
results["ref"].append(atoms.info["ref_energy"] * EV_TO_KCAL)

# Write structures for app
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)
write(structs_dir / f"{label}.xyz", atoms)
ref_stored = True
return results


@pytest.fixture
def get_mae(conformer_energies) -> dict[str, float]:
"""
Get mean absolute error for conformer energies.

Parameters
----------
conformer_energies
Dictionary of reference and predicted conformer energies.

Returns
-------
dict[str, float]
Dictionary of predicted conformer energies errors for all models.
"""
results = {}
for model_name in MODELS:
results[model_name] = mae(
conformer_energies["ref"], conformer_energies[model_name]
)
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "upu46_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=D3_MODEL_NAMES,
)
def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
"""
Get all metrics.

Parameters
----------
get_mae
Mean absolute errors for all models.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {
"MAE": get_mae,
}


def test_upu46(metrics: dict[str, dict]) -> None:
"""
Run UpU46 test.

Parameters
----------
metrics
All new benchmark metric names and dictionary of values for each model.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/conformers/UpU46/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAE:
good: 0.0
bad: 20.0
unit: kcal/mol
tooltip: Mean Absolute Error for all systems
level_of_theory: CCSD(T)
88 changes: 88 additions & 0 deletions ml_peg/app/conformers/UpU46/app_UpU46.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Run UpU46 app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import (
plot_from_table_column,
struct_from_scatter,
)
from ml_peg.app.utils.load import read_plot
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
BENCHMARK_NAME = "UpU46"
DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular.html#upu46"
DATA_PATH = APP_ROOT / "data" / "conformers" / "UpU46"


class UpU46App(BaseApp):
"""UpU46 benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
scatter = read_plot(
DATA_PATH / "figure_upu46.json",
id=f"{BENCHMARK_NAME}-figure",
)

model_dir = DATA_PATH / MODELS[0]
if model_dir.exists():
labels = sorted([f.stem for f in model_dir.glob("*.xyz")])
structs = [
f"assets/conformers/UpU46/{MODELS[0]}/{label}.xyz" for label in labels
]
else:
structs = []

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"MAE": scatter},
)

struct_from_scatter(
scatter_id=f"{BENCHMARK_NAME}-figure",
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
structs=structs,
mode="struct",
)


def get_app() -> UpU46App:
"""
Get UpU46 benchmark app layout and callback registration.

Returns
-------
UpU46App
Benchmark layout and callback registration.
"""
return UpU46App(
name=BENCHMARK_NAME,
description=(
"Performance in predicting RNA backbone conformer energies for the "
"UpU46 dataset comprising 46 uracil dinucleotides (UpU), representing "
"all known 46 RNA backbone conformational families. Reference data "
"from CCSD(T) calculations."
),
docs_url=DOCS_URL,
table_path=DATA_PATH / "upu46_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
],
)


if __name__ == "__main__":
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
benchmark_app = get_app()
full_app.layout = benchmark_app.layout
benchmark_app.register_callbacks()
full_app.run(port=8067, debug=True)
147 changes: 147 additions & 0 deletions ml_peg/calcs/conformers/UpU46/calc_UpU46.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
"""
Calculate the UpU46 benchmark dataset for RNA backbone conformations.

Journal of Chemical Theory and Computation,
2015 11 (10), 4972-4991.
DOI: 10.1021/acs.jctc.5b00515.
"""

from __future__ import annotations

from pathlib import Path

from ase import Atoms, units
from ase.io import read, write
import mlipx
from mlipx.abc import NodeWithCalculator
from tqdm import tqdm
import zntrack

from ml_peg.calcs.utils.utils import chdir, download_s3_data
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)

KCAL_TO_EV = units.kcal / units.mol

OUT_PATH = Path(__file__).parent / "outputs"


class UpU46Benchmark(zntrack.Node):
"""Compute the benchmark."""

model: NodeWithCalculator = zntrack.deps()
model_name: str = zntrack.params()

@staticmethod
def get_atoms(atoms_path: Path) -> Atoms:
"""
Read atoms object and add charge and spin.

Parameters
----------
atoms_path
Path to atoms object.

Returns
-------
atoms
ASE atoms object.
"""
atoms = read(atoms_path)
atoms.info["charge"] = -1
atoms.info["spin"] = 1
if atoms.calc is not None:
if "energy" in atoms.calc.results:
del atoms.calc.results["energy"]
return atoms

def get_ref_energies(self, data_path: Path) -> None:
"""
Get reference conformer energies.

Parameters
----------
data_path
Path to the structure.
"""
self.ref_energies = {}
with open(data_path / "references") as lines:
for i, line in enumerate(lines):
# Skip the comment lines
if i < 5:
continue
items = line.strip().split()
label = items[2]
self.ref_energies[label] = float(items[7]) * KCAL_TO_EV

def run(self):
"""Run new benchmark."""
data_path = (
download_s3_data(
filename="UPU46.zip",
key="inputs/conformers/UpU46/UpU46.zip",
)
/ "UPU46"
)
zero_conf_label = "2p"
self.get_ref_energies(data_path)
# Read in data and attach calculator
calc = self.model.get_calculator()
# Add D3 calculator for this test
calc = self.model.add_d3_calculator(calc)

conf_lowest = self.get_atoms(data_path / f"{zero_conf_label}.xyz")
conf_lowest.calc = calc
e_conf_lowest_model = conf_lowest.get_potential_energy()

for label, e_ref in tqdm(self.ref_energies.items()):
# Skip the reference conformer for
# which the error is automatically zero
if label == zero_conf_label:
continue

atoms = self.get_atoms(data_path / f"{label}.xyz")
atoms.calc = calc
atoms.info["model_rel_energy"] = (
atoms.get_potential_energy() - e_conf_lowest_model
)
atoms.info["ref_energy"] = e_ref
atoms.calc = None

write_dir = OUT_PATH / self.model_name
write_dir.mkdir(parents=True, exist_ok=True)
write(write_dir / f"{label}.xyz", atoms)


def build_project(repro: bool = False) -> None:
"""
Build mlipx project.

Parameters
----------
repro
Whether to call dvc repro -f after building.
"""
project = mlipx.Project()
benchmark_node_dict = {}

for model_name, model in MODELS.items():
with project.group(model_name):
benchmark = UpU46Benchmark(
model=model,
model_name=model_name,
)
benchmark_node_dict[model_name] = benchmark

if repro:
with chdir(Path(__file__).parent):
project.repro(build=True, force=True)
else:
project.build()


def test_upu46():
"""Run UpU46 benchmark via pytest."""
build_project(repro=True)
Loading