From f6cfb864bc1efb5d20eddf8f3a7d0349ee400ec7 Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Sat, 20 Dec 2025 20:30:52 +0000 Subject: [PATCH 1/4] Add NCIA IHB100x10 calculation script --- .../ncia_ihb100x10/.dvc/.gitignore | 3 + .../ncia_ihb100x10/.dvc/config | 0 .../ncia_ihb100x10/.dvcignore | 3 + .../ncia_ihb100x10/calc_ncia_ihb100x10.py | 162 ++++++++++++++++++ 4 files changed, 168 insertions(+) create mode 100644 ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore create mode 100644 ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/config create mode 100644 ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvcignore create mode 100644 ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/config b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/config new file mode 100644 index 00000000..e69de29b diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvcignore b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py new file mode 100644 index 00000000..3b85eacb --- /dev/null +++ b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py @@ -0,0 +1,162 @@ +""" +Compute the NCIA IHB100x10 dataset for ionic hydrogen bonds. + +Journal of Chemical Theory and Computation 2020 16 (4), 2355-2368. +""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import mlipx +from mlipx.abc import NodeWithCalculator +from tqdm import tqdm +import zntrack + +from ml_peg.calcs.utils.utils import chdir, download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +KCAL_TO_EV = units.kcal / units.mol +EV_TO_KCAL = 1 / KCAL_TO_EV + +OUT_PATH = Path(__file__).parent / "outputs" + + +class NCIAIHB100x10Benchmark(zntrack.Node): + """Benchmarking NCIA_IHB100x10 ionic hydrogen bonds benchmark dataset.""" + + model: NodeWithCalculator = zntrack.deps() + model_name: str = zntrack.params() + + def get_ref_energies(self, data_path): + """ + Get reference energies. + + Parameters + ---------- + data_path + Path to data. + """ + self.ref_energies = {} + with open(data_path / "NCIA_IHB100x10_benchmark.txt") as lines: + for i, line in enumerate(lines): + if i == 0: + continue + items = line.strip().split() + system_label = items[0][7:].replace(".", "") + for xyz_path in (data_path / "geometries").glob("*.xyz"): + if system_label in xyz_path.stem: + label = xyz_path.stem + ref_energy = float(items[1]) * KCAL_TO_EV + self.ref_energies[label] = ref_energy + + @staticmethod + def get_monomers(atoms): + """ + Get ASE atoms objects of the monomers. + + Parameters + ---------- + atoms + ASE atoms object of the structure. + + Returns + ------- + tuple[ASE.Atoms, ASE.Atoms] + Tuple containing the two monomers. + """ + if isinstance(atoms.info["selection_a"], str): + a_ids = [int(id) for id in atoms.info["selection_a"].split("-")] + a_ids[0] -= 1 + else: + a_ids = [int(atoms.info["selection_a"]) - 1, int(atoms.info["selection_a"])] + + if isinstance(atoms.info["selection_b"], str): + b_ids = [int(id) for id in atoms.info["selection_b"].split("-")] + b_ids[0] -= 1 + else: + b_ids = [int(atoms.info["selection_b"]) - 1, int(atoms.info["selection_b"])] + + atoms_a = atoms[a_ids[0] : a_ids[1]] + atoms_b = atoms[b_ids[0] : b_ids[1]] + assert len(atoms_a) + len(atoms_b) == len(atoms) + + atoms_a.info["charge"] = int(atoms.info["charge_a"]) + atoms_a.info["spin"] = 1 + + atoms_b.info["charge"] = int(atoms.info["charge_b"]) + atoms_b.info["spin"] = 1 + return (atoms_a, atoms_b) + + def run(self): + """Run new benchmark.""" + # Read in data and attach calculator + data_path = ( + download_s3_data( + filename="NCIA_IHB100x10.zip", + key="inputs/non_covalent_interactions/NCIA_IHB100x10/NCIA_IHB100x10.zip", + ) + / "NCIA_IHB100x10" + ) + self.get_ref_energies(data_path) + + calc = self.model.get_calculator() + + for label, ref_energy in tqdm(self.ref_energies.items()): + xyz_fname = f"{label}.xyz" + atoms = read(data_path / "geometries" / xyz_fname) + atoms_a, atoms_b = self.get_monomers(atoms) + atoms.info["spin"] = 1 + atoms.info["charge"] = int(atoms_a.info["charge"] + atoms_b.info["charge"]) + atoms.calc = calc + atoms_a.calc = calc + atoms_b.calc = calc + + atoms.info["model_int_energy"] = ( + atoms.get_potential_energy() + - atoms_a.get_potential_energy() + - atoms_b.get_potential_energy() + ) + atoms.info["ref_int_energy"] = ref_energy + atoms.calc = None + + write_dir = OUT_PATH / self.model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{label}.xyz", atoms) + + +def build_project(repro: bool = False) -> None: + """ + Build mlipx project. + + Parameters + ---------- + repro + Whether to call dvc repro -f after building. + """ + project = mlipx.Project() + benchmark_node_dict = {} + + for model_name, model in MODELS.items(): + with project.group(model_name): + benchmark = NCIAIHB100x10Benchmark( + model=model, + model_name=model_name, + ) + benchmark_node_dict[model_name] = benchmark + + if repro: + with chdir(Path(__file__).parent): + project.repro(build=True, force=True) + else: + project.build() + + +def test_ncia_ihb100x10(): + """Run NCIA_IHB100x10 barriers benchmark via pytest.""" + build_project(repro=True) From 7d39fcc1cc92302bda7afaed921659324b46b718 Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Sun, 21 Dec 2025 21:09:37 +0000 Subject: [PATCH 2/4] Add NCIA_IHB100x10 analysis files --- .../ncia_ihb100x10/analyse_ncia_ihb100x10.py | 177 ++++++++++++++++++ .../ncia_ihb100x10/metrics.yml | 13 ++ 2 files changed, 190 insertions(+) create mode 100644 ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py create mode 100644 ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml diff --git a/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py new file mode 100644 index 00000000..b2eec5cb --- /dev/null +++ b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py @@ -0,0 +1,177 @@ +"""Analyse ncia_ihb100x10 benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import ( + build_d3_name_map, + load_metrics_config, + mae, + rmse, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +D3_MODEL_NAMES = build_d3_name_map(MODELS) + +KCAL_TO_EV = units.kcal / units.mol +EV_TO_KCAL = 1 / KCAL_TO_EV +CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "ncia_ihb100x10" / "outputs" +OUT_PATH = APP_ROOT / "data" / "non_covalent_interactions" / "ncia_ihb100x10" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model in MODELS: + labels_list = [path.stem for path in (CALC_PATH / model).glob("*.xyz")] + break + return labels_list + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_ncia_ihb100x10.json", + title="Interaction energies", + x_label="Predicted energy / eV", + y_label="Reference energy / eV", + hoverdata={ + "Labels": labels(), + }, +) +def interaction_energies() -> dict[str, list]: + """ + Get interaction energies for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted interaction energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = read(CALC_PATH / model_name / f"{label}.xyz") + if not ref_stored: + results["ref"].append(atoms.info["ref_int_energy"]) + + results[model_name].append(atoms.info["model_int_energy"]) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + + ref_stored = True + return results + + +@pytest.fixture +def get_mae(interaction_energies) -> dict[str, float]: + """ + Get mean absolute error for energies. + + Parameters + ---------- + interaction_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae( + interaction_energies["ref"], interaction_energies[model_name] + ) + return results + + +@pytest.fixture +def get_rmse(interaction_energies) -> dict[str, float]: + """ + Get root mean square error for energies. + + Parameters + ---------- + interaction_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = rmse( + interaction_energies["ref"], interaction_energies[model_name] + ) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "ncia_ihb100x10_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + get_rmse + Root Mean Square Error for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAE": get_mae, + "RMSE": get_rmse, + } + + +def test_ncia_ihb100x10(metrics: dict[str, dict]) -> None: + """ + Run ncia_ihb100x10 test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml new file mode 100644 index 00000000..cbfb7e7a --- /dev/null +++ b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml @@ -0,0 +1,13 @@ +metrics: + MAE: + good: 0.0 + bad: 0.5 + unit: eV + tooltip: Mean Absolute Error for all systems + level_of_theory: CCSD(T) + RMSE: + good: 0.0 + bad: 0.5 + unit: eV + tooltip: Root Mean Square Error for all systems + level_of_theory: CCSD(T) From 3305ac3a1a5c2a0f0750142b4ba038f0d1bffa2a Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Mon, 29 Dec 2025 20:18:25 +0000 Subject: [PATCH 3/4] Add D3 calc --- .../ncia_ihb100x10/calc_ncia_ihb100x10.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py index 3b85eacb..bfd73f73 100644 --- a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py +++ b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/calc_ncia_ihb100x10.py @@ -106,6 +106,8 @@ def run(self): self.get_ref_energies(data_path) calc = self.model.get_calculator() + # Add D3 calculator for this test + calc = self.model.add_d3_calculator(calc) for label, ref_energy in tqdm(self.ref_energies.items()): xyz_fname = f"{label}.xyz" From a0acc9117ba56aa17baa24909c3b1e11b800c35e Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Wed, 7 Jan 2026 13:52:32 +0000 Subject: [PATCH 4/4] ncia_ihb100x10 app --- .../ncia_ihb100x10/analyse_ncia_ihb100x10.py | 38 +------- .../ncia_ihb100x10/metrics.yml | 6 -- .../ncia_ihb100x10/app_ncia_ihb100x10.py | 91 +++++++++++++++++++ .../ncia_ihb100x10/.dvc/.gitignore | 3 - .../ncia_ihb100x10/.dvc/config | 0 5 files changed, 94 insertions(+), 44 deletions(-) create mode 100644 ml_peg/app/non_covalent_interactions/ncia_ihb100x10/app_ncia_ihb100x10.py delete mode 100644 ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore delete mode 100644 ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/config diff --git a/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py index b2eec5cb..e7d6ec57 100644 --- a/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py +++ b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/analyse_ncia_ihb100x10.py @@ -9,12 +9,7 @@ import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import ( - build_d3_name_map, - load_metrics_config, - mae, - rmse, -) +from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models @@ -44,7 +39,7 @@ def labels() -> list: List of all system names. """ for model in MODELS: - labels_list = [path.stem for path in (CALC_PATH / model).glob("*.xyz")] + labels_list = sorted([path.stem for path in (CALC_PATH / model).glob("*.xyz")]) break return labels_list @@ -112,29 +107,6 @@ def get_mae(interaction_energies) -> dict[str, float]: return results -@pytest.fixture -def get_rmse(interaction_energies) -> dict[str, float]: - """ - Get root mean square error for energies. - - Parameters - ---------- - interaction_energies - Dictionary of reference and predicted energies. - - Returns - ------- - dict[str, float] - Dictionary of predicted energy errors for all models. - """ - results = {} - for model_name in MODELS: - results[model_name] = rmse( - interaction_energies["ref"], interaction_energies[model_name] - ) - return results - - @pytest.fixture @build_table( filename=OUT_PATH / "ncia_ihb100x10_metrics_table.json", @@ -142,7 +114,7 @@ def get_rmse(interaction_energies) -> dict[str, float]: thresholds=DEFAULT_THRESHOLDS, mlip_name_map=D3_MODEL_NAMES, ) -def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str, dict]: +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: """ Get all metrics. @@ -151,9 +123,6 @@ def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str, get_mae Mean absolute errors for all models. - get_rmse - Root Mean Square Error for all models. - Returns ------- dict[str, dict] @@ -161,7 +130,6 @@ def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str, """ return { "MAE": get_mae, - "RMSE": get_rmse, } diff --git a/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml index cbfb7e7a..8118e823 100644 --- a/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml +++ b/ml_peg/analysis/non_covalent_interactions/ncia_ihb100x10/metrics.yml @@ -5,9 +5,3 @@ metrics: unit: eV tooltip: Mean Absolute Error for all systems level_of_theory: CCSD(T) - RMSE: - good: 0.0 - bad: 0.5 - unit: eV - tooltip: Root Mean Square Error for all systems - level_of_theory: CCSD(T) diff --git a/ml_peg/app/non_covalent_interactions/ncia_ihb100x10/app_ncia_ihb100x10.py b/ml_peg/app/non_covalent_interactions/ncia_ihb100x10/app_ncia_ihb100x10.py new file mode 100644 index 00000000..387d4b75 --- /dev/null +++ b/ml_peg/app/non_covalent_interactions/ncia_ihb100x10/app_ncia_ihb100x10.py @@ -0,0 +1,91 @@ +"""Run NCIA_IHB100x10 app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "NCIA_IHB100x10" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" + "non_covalent_interactions.html#ncia-ihb100x10" +) +DATA_PATH = APP_ROOT / "data" / "non_covalent_interactions" / "ncia_ihb100x10" + + +class NCIANIHB100x10App(BaseApp): + """NCIA_IHB100x10 benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_ncia_ihb100x10.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) + structs = [ + f"assets/non_covalent_interactions/ncia_ihb100x10/{MODELS[0]}/{label}.xyz" + for label in labels + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> NCIANIHB100x10App: + """ + Get NCIA_IHB100x10 benchmark app layout and callback registration. + + Returns + ------- + NCIANIHB100x10App + Benchmark layout and callback registration. + """ + return NCIANIHB100x10App( + name=BENCHMARK_NAME, + description=( + "Performance in predicting intramolecular hydrogen bond energies " + "for the NCIA IHB100x10 dataset (intramolecular conformers). " + "Reference data from CCSD(T) calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "ncia_ihb100x10_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8059, debug=True) diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore deleted file mode 100644 index 528f30c7..00000000 --- a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/config.local -/tmp -/cache diff --git a/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/config b/ml_peg/calcs/non_covalent_interactions/ncia_ihb100x10/.dvc/config deleted file mode 100644 index e69de29b..00000000