From 608b6ba0da6ddd200175a92ad8e35f789958fc91 Mon Sep 17 00:00:00 2001
From: Domantas Kuryla <dk584@icepc16>
Date: Mon, 5 Jan 2026 19:37:23 +0000
Subject: [PATCH 1/3] Add BH9 calculation files

---
 .../molecular_reactions/bh9/.dvc/.gitignore   |   3 +
 .../calcs/molecular_reactions/bh9/.dvc/config |   0
 .../calcs/molecular_reactions/bh9/.dvcignore  |   3 +
 .../calcs/molecular_reactions/bh9/calc_bh9.py | 178 ++++++++++++++++++
 ml_peg/calcs/molecular_reactions/bh9/dvc.lock | 107 +++++++++++
 ml_peg/calcs/molecular_reactions/bh9/dvc.yaml |  22 +++
 6 files changed, 313 insertions(+)
 create mode 100644 ml_peg/calcs/molecular_reactions/bh9/.dvc/.gitignore
 create mode 100644 ml_peg/calcs/molecular_reactions/bh9/.dvc/config
 create mode 100644 ml_peg/calcs/molecular_reactions/bh9/.dvcignore
 create mode 100644 ml_peg/calcs/molecular_reactions/bh9/calc_bh9.py
 create mode 100644 ml_peg/calcs/molecular_reactions/bh9/dvc.lock
 create mode 100644 ml_peg/calcs/molecular_reactions/bh9/dvc.yaml

diff --git a/ml_peg/calcs/molecular_reactions/bh9/.dvc/.gitignore b/ml_peg/calcs/molecular_reactions/bh9/.dvc/.gitignore
new file mode 100644
index 00000000..528f30c7
--- /dev/null
+++ b/ml_peg/calcs/molecular_reactions/bh9/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/ml_peg/calcs/molecular_reactions/bh9/.dvc/config b/ml_peg/calcs/molecular_reactions/bh9/.dvc/config
new file mode 100644
index 00000000..e69de29b
diff --git a/ml_peg/calcs/molecular_reactions/bh9/.dvcignore b/ml_peg/calcs/molecular_reactions/bh9/.dvcignore
new file mode 100644
index 00000000..51973055
--- /dev/null
+++ b/ml_peg/calcs/molecular_reactions/bh9/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/ml_peg/calcs/molecular_reactions/bh9/calc_bh9.py b/ml_peg/calcs/molecular_reactions/bh9/calc_bh9.py
new file mode 100644
index 00000000..e46f1752
--- /dev/null
+++ b/ml_peg/calcs/molecular_reactions/bh9/calc_bh9.py
@@ -0,0 +1,178 @@
+"""
+Calculate the BH9 reaction barriers dataset.
+
+Journal of Chemical Theory and Computation 2022 18 (1), 151-166
+DOI: 10.1021/acs.jctc.1c00694
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase import units
+from ase.io import read, write
+import mlipx
+from mlipx.abc import NodeWithCalculator
+from tqdm import tqdm
+import zntrack
+
+from ml_peg.calcs.utils.utils import chdir, download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+KCAL_TO_EV = units.kcal / units.mol
+EV_TO_KCAL = 1 / KCAL_TO_EV
+
+OUT_PATH = Path(__file__).parent / "outputs"
+
+
+def process_atoms(path):
+    """
+    Get the ASE Atoms object with prepared charge and spin states.
+
+    Parameters
+    ----------
+    path
+        Path to the system xyz.
+
+    Returns
+    -------
+    ase.Atoms
+        ASE Atoms object of the system.
+    """
+    with open(path) as lines:
+        for i, line in enumerate(lines):
+            if i == 1:
+                items = line.strip().split()
+                charge = int(items[0])
+                spin = int(items[1])
+
+    atoms = read(path)
+    atoms.info["charge"] = charge
+    atoms.info["spin"] = spin
+    return atoms
+
+
+def parse_cc_energy(fname):
+    """
+    Get the CCSD barrier from the data file.
+
+    Parameters
+    ----------
+    fname
+        Path to the reference data file.
+
+    Returns
+    -------
+    float
+        Reaction barrier in eV.
+    """
+    with open(fname) as lines:
+        for line in lines:
+            if "ref" in line:
+                items = line.strip().split()
+                break
+    return float(items[1]) * KCAL_TO_EV
+
+
+class BH9Benchmark(zntrack.Node):
+    """Benchmark the BH9 reaction benchmark."""
+
+    model: NodeWithCalculator = zntrack.deps()
+    model_name: str = zntrack.params()
+
+    def get_ref_energies(self, data_path):
+        """
+        Get the reference barriers.
+
+        Parameters
+        ----------
+        data_path
+            Path to the dataset directory.
+        """
+        self.ref_energies = {}
+        labels = [
+            path.stem.replace("TS", "")
+            for path in sorted((data_path / "BH9_SI" / "XYZ_files").glob("*TS.xyz"))
+        ]
+        rxn_count = 0
+        for label in labels:
+            self.ref_energies[label] = {}
+            rxn_count += 1
+            for direction in ["forward", "reverse"]:
+                ref_fname = (
+                    data_path
+                    / "BH9_SI"
+                    / "DB_files"
+                    / "BH"
+                    / f"BH9-BH_{rxn_count}_{direction}.db"
+                )
+                self.ref_energies[label][direction] = parse_cc_energy(ref_fname)
+
+    def run(self):
+        """Run new benchmark."""
+        data_path = (
+            download_s3_data(
+                filename="BH9.zip",
+                key="inputs/molecular_reactions/BH9/BH9.zip",
+            )
+            / "BH9"
+        )
+        # Read in data and attach calculator
+        self.get_ref_energies(data_path)
+        calc = self.model.get_calculator()
+        # Add D3 calculator for this test
+        calc = self.model.add_d3_calculator(calc)
+
+        for fname in tqdm(sorted((data_path / "BH9_SI" / "XYZ_files").glob("*TS.xyz"))):
+            atoms = process_atoms(fname)
+            atoms.calc = calc
+            atoms.info["model_energy"] = atoms.get_potential_energy()
+
+            """
+            Write both forward and reverse barriers,
+            only forward will be used in analysis here.
+            """
+            label = fname.stem
+            if "TS" in label:
+                label = label.replace("TS", "")
+                atoms.info["ref_forward_barrier"] = self.ref_energies[label]["forward"]
+                atoms.info["ref_reverse_barrier"] = self.ref_energies[label]["reverse"]
+
+            write_dir = OUT_PATH / self.model_name
+            write_dir.mkdir(parents=True, exist_ok=True)
+            write(write_dir / f"{fname.stem}.xyz", atoms)
+
+
+def build_project(repro: bool = False) -> None:
+    """
+    Build mlipx project.
+
+    Parameters
+    ----------
+    repro
+        Whether to call dvc repro -f after building.
+    """
+    project = mlipx.Project()
+    benchmark_node_dict = {}
+
+    for model_name, model in MODELS.items():
+        with project.group(model_name):
+            benchmark = BH9Benchmark(
+                model=model,
+                model_name=model_name,
+            )
+            benchmark_node_dict[model_name] = benchmark
+
+    if repro:
+        with chdir(Path(__file__).parent):
+            project.repro(build=True, force=True)
+    else:
+        project.build()
+
+
+def test_bh9_barrier_heights():
+    """Run BH9 barriers benchmark via pytest."""
+    build_project(repro=True)
diff --git a/ml_peg/calcs/molecular_reactions/bh9/dvc.lock b/ml_peg/calcs/molecular_reactions/bh9/dvc.lock
new file mode 100644
index 00000000..5fc7c75c
--- /dev/null
+++ b/ml_peg/calcs/molecular_reactions/bh9/dvc.lock
@@ -0,0 +1,107 @@
+schema: '2.0'
+stages:
+  uma-s-1p1-omol_BH9_Benchmark:
+    cmd: zntrack run calc_bh9.BH9_Benchmark --name uma-s-1p1-omol_BH9_Benchmark
+    params:
+      params.yaml:
+        uma-s-1p1-omol_BH9_Benchmark:
+          model:
+            _cls: ml_peg.models.models.FairChemCalc
+            d3_kwargs: {}
+            default_dtype: float32
+            device: cuda
+            model_name: uma-s-1p1
+            overrides: {}
+            task_name: omol
+            trained_on_d3: true
+          model_name: uma-s-1p1-omol
+    outs:
+    - path: nodes/uma-s-1p1-omol/BH9_Benchmark/node-meta.json
+      hash: md5
+      md5: 97ba35213ce1ef2a8f04aa4b475b17dd
+      size: 701
+  uma-s-1p1-omol_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name uma-s-1p1-omol_BH9Benchmark
+    params:
+      params.yaml:
+        uma-s-1p1-omol_BH9Benchmark:
+          model:
+            _cls: ml_peg.models.models.FairChemCalc
+            d3_kwargs: {}
+            default_dtype: float32
+            device: cuda
+            model_name: uma-s-1p1
+            overrides: {}
+            task_name: omol
+            trained_on_d3: true
+          model_name: uma-s-1p1-omol
+    outs:
+    - path: nodes/uma-s-1p1-omol/BH9Benchmark/node-meta.json
+      hash: md5
+      md5: 0d675724a6a08846e146bda983efb3da
+      size: 698
+  mace-fukui-spin-2L_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name
+      mace-fukui-spin-2L_BH9Benchmark
+    params:
+      params.yaml:
+        mace-fukui-spin-2L_BH9Benchmark:
+          model:
+            _cls: ml_peg.models.models.GenericASECalc
+            class_name: MACECalculator
+            d3_kwargs: {}
+            default_dtype:
+            device: auto
+            kwargs:
+              model_paths:
+                /home/dk584/work/mace-fukui-spin-models/mace-fukui-spin-2L.model
+            module: mace.calculators.mace
+            spec:
+            trained_on_d3: true
+          model_name: mace-fukui-spin-2L
+    outs:
+    - path: nodes/mace-fukui-spin-2L/BH9Benchmark/node-meta.json
+      hash: md5
+      md5: fa42eea6da09a1fb68998bfceace6d1b
+      size: 859
+  mace-omol_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name mace-omol_BH9Benchmark
+    params:
+      params.yaml:
+        mace-omol_BH9Benchmark:
+          model:
+            _cls: ml_peg.models.models.GenericASECalc
+            class_name: mace_omol
+            d3_kwargs: {}
+            default_dtype:
+            device: auto
+            kwargs: {}
+            module: mace.calculators
+            spec:
+            trained_on_d3: true
+          model_name: mace-omol
+    outs:
+    - path: nodes/mace-omol/BH9Benchmark/node-meta.json
+      hash: md5
+      md5: 63f0ef91724649e74038688d88e37a1f
+      size: 713
+  orb-v3-conservative-omol_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name
+      orb-v3-conservative-omol_BH9Benchmark
+    params:
+      params.yaml:
+        orb-v3-conservative-omol_BH9Benchmark:
+          model:
+            _cls: ml_peg.models.models.OrbCalc
+            d3_kwargs: {}
+            default_dtype: float32-high
+            device: auto
+            kwargs: {}
+            name: orb_v3_conservative_omol
+            trained_on_d3: true
+          model_name: orb-v3-conservative-omol
+    outs:
+    - path: nodes/orb-v3-conservative-omol/BH9Benchmark/node-meta.json
+      hash: md5
+      md5: 62ec82f15423d4f69e761b804f6c3bcd
+      size: 701
diff --git a/ml_peg/calcs/molecular_reactions/bh9/dvc.yaml b/ml_peg/calcs/molecular_reactions/bh9/dvc.yaml
new file mode 100644
index 00000000..314cfde2
--- /dev/null
+++ b/ml_peg/calcs/molecular_reactions/bh9/dvc.yaml
@@ -0,0 +1,22 @@
+stages:
+  mace-fukui-spin-2L_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name mace-fukui-spin-2L_BH9Benchmark
+    metrics:
+    - nodes/mace-fukui-spin-2L/BH9Benchmark/node-meta.json:
+        cache: true
+    params:
+    - mace-fukui-spin-2L_BH9Benchmark
+  mace-omol_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name mace-omol_BH9Benchmark
+    metrics:
+    - nodes/mace-omol/BH9Benchmark/node-meta.json:
+        cache: true
+    params:
+    - mace-omol_BH9Benchmark
+  orb-v3-conservative-omol_BH9Benchmark:
+    cmd: zntrack run calc_bh9.BH9Benchmark --name orb-v3-conservative-omol_BH9Benchmark
+    metrics:
+    - nodes/orb-v3-conservative-omol/BH9Benchmark/node-meta.json:
+        cache: true
+    params:
+    - orb-v3-conservative-omol_BH9Benchmark

From 55b44583e2bacefd4788086adc3fa255b2011c22 Mon Sep 17 00:00:00 2001
From: Domantas Kuryla <dk584@icepc16>
Date: Mon, 5 Jan 2026 19:44:16 +0000
Subject: [PATCH 2/3] Add BH9 analysiss files

---
 .../molecular_reactions/bh9/analyse_bh9.py    | 195 ++++++++++++++++++
 .../molecular_reactions/bh9/metrics.yml       |  13 ++
 2 files changed, 208 insertions(+)
 create mode 100644 ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py
 create mode 100644 ml_peg/analysis/molecular_reactions/bh9/metrics.yml

diff --git a/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py b/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py
new file mode 100644
index 00000000..93bab519
--- /dev/null
+++ b/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py
@@ -0,0 +1,195 @@
+"""
+Analyse the BH9 reaction barriers dataset.
+
+Journal of Chemical Theory and Computation 2022 18 (1), 151-166
+DOI: 10.1021/acs.jctc.1c00694
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase import units
+from ase.io import read, write
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import (
+    build_d3_name_map,
+    load_metrics_config,
+    mae,
+    rmse,
+)
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+D3_MODEL_NAMES = build_d3_name_map(MODELS)
+
+KCAL_TO_EV = units.kcal / units.mol
+EV_TO_KCAL = 1 / KCAL_TO_EV
+CALC_PATH = CALCS_ROOT / "molecular_reactions" / "bh9" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "bh9"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+def labels() -> list:
+    """
+    Get list of system names.
+
+    Returns
+    -------
+    list
+        List of all system names.
+    """
+    for model_name in sorted(CALC_PATH.glob("*")):
+        xyz_paths = sorted((CALC_PATH / model_name).glob("*TS.xyz"))
+        labels_list = [path.stem.replace("TS", "") for path in xyz_paths]
+        break
+    return labels_list
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_bh9_barriers.json",
+    title="Reaction barriers",
+    x_label="Predicted barrier / eV",
+    y_label="Reference barrier / eV",
+    hoverdata={
+        "Labels": labels(),
+    },
+)
+def barrier_heights() -> dict[str, list]:
+    """
+    Get barrier heights for all systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of all reference and predicted barrier heights.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        model_barriers = []
+        ref_barriers = []
+        for label in labels()[model_name]:
+            ref_stored = False
+
+            model_forward_barrier = 0
+            ref_forward_barrier = 0
+
+            # Write structures for app
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+
+            for fname in (CALC_PATH / model_name).glob(f"{label}*"):
+                if "TS" in fname.stem:
+                    atoms = read(fname)
+                    model_forward_barrier += atoms.info["model_energy"]
+                    ref_forward_barrier = atoms.info["ref_forward_barrier"]
+                    write(structs_dir / f"{fname.stem}.xyz", atoms)
+
+                if "R" in fname.stem:
+                    atoms = read(fname)
+                    model_forward_barrier -= atoms.info["model_energy"]
+                    write(structs_dir / f"{fname.stem}.xyz", atoms)
+            model_barriers.append(model_forward_barrier)
+            ref_barriers.append(ref_forward_barrier)
+
+        results[model_name] = model_barriers
+        if not ref_stored:
+            results["ref"] = ref_barriers
+            ref_stored = True
+    return results
+
+
+@pytest.fixture
+def get_mae(barrier_heights) -> dict[str, float]:
+    """
+    Get mean absolute error for barrier heights.
+
+    Parameters
+    ----------
+    barrier_heights
+        Dictionary of reference and predicted barrier heights.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted barrier height errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name])
+    return results
+
+
+@pytest.fixture
+def get_rmse(barrier_heights) -> dict[str, float]:
+    """
+    Get root mean square error for barrier heights.
+
+    Parameters
+    ----------
+    barrier_heights
+        Dictionary of reference and predicted barrier heights.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted barrier height errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        results[model_name] = rmse(barrier_heights["ref"], barrier_heights[model_name])
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "bh9_barriers_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=D3_MODEL_NAMES,
+)
+def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str, dict]:
+    """
+    Get all metrics.
+
+    Parameters
+    ----------
+    get_mae
+        Mean absolute errors for all models.
+
+    get_rmse
+        Root Mean Square Error for all models.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": get_mae,
+        "RMSE": get_rmse,
+    }
+
+
+def test_bh9_barriers(metrics: dict[str, dict]) -> None:
+    """
+    Run bh9_barriers test.
+
+    Parameters
+    ----------
+    metrics
+        All new benchmark metric names and dictionary of values for each model.
+    """
+    return
diff --git a/ml_peg/analysis/molecular_reactions/bh9/metrics.yml b/ml_peg/analysis/molecular_reactions/bh9/metrics.yml
new file mode 100644
index 00000000..468b7f55
--- /dev/null
+++ b/ml_peg/analysis/molecular_reactions/bh9/metrics.yml
@@ -0,0 +1,13 @@
+metrics:
+  MAE:
+    good: 0.0
+    bad: 2.0
+    unit: eV
+    tooltip: Mean Absolute Error for all systems
+    level_of_theory: CCSD(T)
+  RMSE:
+    good: 0.0
+    bad: 2.0
+    unit: eV
+    tooltip: Root Mean Square Error for all systems
+    level_of_theory: CCSD(T)

From 52690133acb35a48d8b599e495b98aa889536ef9 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Wed, 7 Jan 2026 18:59:53 +0000
Subject: [PATCH 3/3] bh9 app and improved labels

---
 .../molecular_reactions/bh9/analyse_bh9.py    | 104 +++++++++--------
 .../molecular_reactions/bh9/metrics.yml       |   6 -
 ml_peg/app/molecular_reactions/bh9/app_bh9.py |  91 +++++++++++++++
 ml_peg/calcs/molecular_reactions/bh9/dvc.lock | 107 ------------------
 ml_peg/calcs/molecular_reactions/bh9/dvc.yaml |  22 ----
 5 files changed, 150 insertions(+), 180 deletions(-)
 create mode 100644 ml_peg/app/molecular_reactions/bh9/app_bh9.py
 delete mode 100644 ml_peg/calcs/molecular_reactions/bh9/dvc.lock
 delete mode 100644 ml_peg/calcs/molecular_reactions/bh9/dvc.yaml

diff --git a/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py b/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py
index 93bab519..d89890b4 100644
--- a/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py
+++ b/ml_peg/analysis/molecular_reactions/bh9/analyse_bh9.py
@@ -14,12 +14,7 @@
 import pytest
 
 from ml_peg.analysis.utils.decorators import build_table, plot_parity
-from ml_peg.analysis.utils.utils import (
-    build_d3_name_map,
-    load_metrics_config,
-    mae,
-    rmse,
-)
+from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
 from ml_peg.app import APP_ROOT
 from ml_peg.calcs import CALCS_ROOT
 from ml_peg.models.get_models import load_models
@@ -39,20 +34,59 @@
 )
 
 
-def labels() -> list:
+def get_system_names() -> list[str]:
     """
-    Get list of system names.
+    Get list of reaction system names from the first available model.
 
     Returns
     -------
-    list
-        List of all system names.
+    list[str]
+        List of system names (reaction identifiers).
     """
     for model_name in sorted(CALC_PATH.glob("*")):
-        xyz_paths = sorted((CALC_PATH / model_name).glob("*TS.xyz"))
-        labels_list = [path.stem.replace("TS", "") for path in xyz_paths]
-        break
-    return labels_list
+        if model_name.is_dir():
+            xyz_paths = sorted((CALC_PATH / model_name).glob("*TS.xyz"))
+            if xyz_paths:
+                return [path.stem.replace("TS", "") for path in xyz_paths]
+    return []
+
+
+def get_reaction_numbers() -> list[int]:
+    """
+    Get reaction numbers extracted from system names.
+
+    Returns
+    -------
+    list[int]
+        List of reaction numbers (e.g., [1, 2, 3, ...]).
+    """
+    system_names = get_system_names()
+    reaction_nums = []
+    for name in system_names:
+        # Extract reaction number from format like "01_1" -> 1
+        parts = name.split("_")
+        if len(parts) == 2:
+            reaction_nums.append(int(parts[0]))
+    return reaction_nums
+
+
+def get_structure_numbers() -> list[int]:
+    """
+    Get structure numbers (different geometries for same reaction).
+
+    Returns
+    -------
+    list[int]
+        List of structure numbers for each reaction.
+    """
+    system_names = get_system_names()
+    struct_nums = []
+    for name in system_names:
+        # Extract structure number from format like "01_1" -> 1
+        parts = name.split("_")
+        if len(parts) == 2:
+            struct_nums.append(int(parts[1]))
+    return struct_nums
 
 
 @pytest.fixture
@@ -62,7 +96,9 @@ def labels() -> list:
     x_label="Predicted barrier / eV",
     y_label="Reference barrier / eV",
     hoverdata={
-        "Labels": labels(),
+        "Reaction": get_reaction_numbers(),
+        "Structure": get_structure_numbers(),
+        "System ID": get_system_names(),
     },
 )
 def barrier_heights() -> dict[str, list]:
@@ -77,12 +113,15 @@ def barrier_heights() -> dict[str, list]:
     results = {"ref": []} | {mlip: [] for mlip in MODELS}
     ref_stored = False
 
+    system_names = get_system_names()
     for model_name in MODELS:
         model_barriers = []
         ref_barriers = []
-        for label in labels()[model_name]:
-            ref_stored = False
-
+        model_dir = CALC_PATH / model_name
+        if not model_dir.exists():
+            results[model_name] = []
+            continue
+        for system_name in system_names:
             model_forward_barrier = 0
             ref_forward_barrier = 0
 
@@ -90,7 +129,7 @@ def barrier_heights() -> dict[str, list]:
             structs_dir = OUT_PATH / model_name
             structs_dir.mkdir(parents=True, exist_ok=True)
 
-            for fname in (CALC_PATH / model_name).glob(f"{label}*"):
+            for fname in model_dir.glob(f"{system_name}*"):
                 if "TS" in fname.stem:
                     atoms = read(fname)
                     model_forward_barrier += atoms.info["model_energy"]
@@ -132,27 +171,6 @@ def get_mae(barrier_heights) -> dict[str, float]:
     return results
 
 
-@pytest.fixture
-def get_rmse(barrier_heights) -> dict[str, float]:
-    """
-    Get root mean square error for barrier heights.
-
-    Parameters
-    ----------
-    barrier_heights
-        Dictionary of reference and predicted barrier heights.
-
-    Returns
-    -------
-    dict[str, float]
-        Dictionary of predicted barrier height errors for all models.
-    """
-    results = {}
-    for model_name in MODELS:
-        results[model_name] = rmse(barrier_heights["ref"], barrier_heights[model_name])
-    return results
-
-
 @pytest.fixture
 @build_table(
     filename=OUT_PATH / "bh9_barriers_metrics_table.json",
@@ -160,7 +178,7 @@ def get_rmse(barrier_heights) -> dict[str, float]:
     thresholds=DEFAULT_THRESHOLDS,
     mlip_name_map=D3_MODEL_NAMES,
 )
-def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str, dict]:
+def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
     """
     Get all metrics.
 
@@ -169,9 +187,6 @@ def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str,
     get_mae
         Mean absolute errors for all models.
 
-    get_rmse
-        Root Mean Square Error for all models.
-
     Returns
     -------
     dict[str, dict]
@@ -179,7 +194,6 @@ def metrics(get_mae: dict[str, float], get_rmse: dict[str, float]) -> dict[str,
     """
     return {
         "MAE": get_mae,
-        "RMSE": get_rmse,
     }
 
 
diff --git a/ml_peg/analysis/molecular_reactions/bh9/metrics.yml b/ml_peg/analysis/molecular_reactions/bh9/metrics.yml
index 468b7f55..d77caf42 100644
--- a/ml_peg/analysis/molecular_reactions/bh9/metrics.yml
+++ b/ml_peg/analysis/molecular_reactions/bh9/metrics.yml
@@ -5,9 +5,3 @@ metrics:
     unit: eV
     tooltip: Mean Absolute Error for all systems
     level_of_theory: CCSD(T)
-  RMSE:
-    good: 0.0
-    bad: 2.0
-    unit: eV
-    tooltip: Root Mean Square Error for all systems
-    level_of_theory: CCSD(T)
diff --git a/ml_peg/app/molecular_reactions/bh9/app_bh9.py b/ml_peg/app/molecular_reactions/bh9/app_bh9.py
new file mode 100644
index 00000000..ca956a2c
--- /dev/null
+++ b/ml_peg/app/molecular_reactions/bh9/app_bh9.py
@@ -0,0 +1,91 @@
+"""Run BH9 barriers app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "BH9"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/"
+    "molecular.html#bh9-reaction-barriers"
+)
+DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "bh9"
+
+
+class BH9App(BaseApp):
+    """BH9 benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_bh9_barriers.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        model_dir = DATA_PATH / MODELS[0]
+        if model_dir.exists():
+            ts_files = sorted(model_dir.glob("*TS.xyz"))
+            structs = [
+                f"assets/molecular_reactions/bh9/{MODELS[0]}/{ts_file.name}"
+                for ts_file in ts_files
+            ]
+        else:
+            structs = []
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> BH9App:
+    """
+    Get BH9 benchmark app layout and callback registration.
+
+    Returns
+    -------
+    BH9App
+        Benchmark layout and callback registration.
+    """
+    return BH9App(
+        name=BENCHMARK_NAME,
+        description=(
+            "Performance in predicting hydrolysis reaction barriers for the "
+            "BH9 dataset of nine aqueous reactions spanning multiple functional "
+            "groups. Reference data from CCSD(T) calculations."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "bh9_barriers_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+    benchmark_app = get_app()
+    full_app.layout = benchmark_app.layout
+    benchmark_app.register_callbacks()
+    full_app.run(port=8071, debug=True)
diff --git a/ml_peg/calcs/molecular_reactions/bh9/dvc.lock b/ml_peg/calcs/molecular_reactions/bh9/dvc.lock
deleted file mode 100644
index 5fc7c75c..00000000
--- a/ml_peg/calcs/molecular_reactions/bh9/dvc.lock
+++ /dev/null
@@ -1,107 +0,0 @@
-schema: '2.0'
-stages:
-  uma-s-1p1-omol_BH9_Benchmark:
-    cmd: zntrack run calc_bh9.BH9_Benchmark --name uma-s-1p1-omol_BH9_Benchmark
-    params:
-      params.yaml:
-        uma-s-1p1-omol_BH9_Benchmark:
-          model:
-            _cls: ml_peg.models.models.FairChemCalc
-            d3_kwargs: {}
-            default_dtype: float32
-            device: cuda
-            model_name: uma-s-1p1
-            overrides: {}
-            task_name: omol
-            trained_on_d3: true
-          model_name: uma-s-1p1-omol
-    outs:
-    - path: nodes/uma-s-1p1-omol/BH9_Benchmark/node-meta.json
-      hash: md5
-      md5: 97ba35213ce1ef2a8f04aa4b475b17dd
-      size: 701
-  uma-s-1p1-omol_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name uma-s-1p1-omol_BH9Benchmark
-    params:
-      params.yaml:
-        uma-s-1p1-omol_BH9Benchmark:
-          model:
-            _cls: ml_peg.models.models.FairChemCalc
-            d3_kwargs: {}
-            default_dtype: float32
-            device: cuda
-            model_name: uma-s-1p1
-            overrides: {}
-            task_name: omol
-            trained_on_d3: true
-          model_name: uma-s-1p1-omol
-    outs:
-    - path: nodes/uma-s-1p1-omol/BH9Benchmark/node-meta.json
-      hash: md5
-      md5: 0d675724a6a08846e146bda983efb3da
-      size: 698
-  mace-fukui-spin-2L_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name
-      mace-fukui-spin-2L_BH9Benchmark
-    params:
-      params.yaml:
-        mace-fukui-spin-2L_BH9Benchmark:
-          model:
-            _cls: ml_peg.models.models.GenericASECalc
-            class_name: MACECalculator
-            d3_kwargs: {}
-            default_dtype:
-            device: auto
-            kwargs:
-              model_paths:
-                /home/dk584/work/mace-fukui-spin-models/mace-fukui-spin-2L.model
-            module: mace.calculators.mace
-            spec:
-            trained_on_d3: true
-          model_name: mace-fukui-spin-2L
-    outs:
-    - path: nodes/mace-fukui-spin-2L/BH9Benchmark/node-meta.json
-      hash: md5
-      md5: fa42eea6da09a1fb68998bfceace6d1b
-      size: 859
-  mace-omol_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name mace-omol_BH9Benchmark
-    params:
-      params.yaml:
-        mace-omol_BH9Benchmark:
-          model:
-            _cls: ml_peg.models.models.GenericASECalc
-            class_name: mace_omol
-            d3_kwargs: {}
-            default_dtype:
-            device: auto
-            kwargs: {}
-            module: mace.calculators
-            spec:
-            trained_on_d3: true
-          model_name: mace-omol
-    outs:
-    - path: nodes/mace-omol/BH9Benchmark/node-meta.json
-      hash: md5
-      md5: 63f0ef91724649e74038688d88e37a1f
-      size: 713
-  orb-v3-conservative-omol_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name
-      orb-v3-conservative-omol_BH9Benchmark
-    params:
-      params.yaml:
-        orb-v3-conservative-omol_BH9Benchmark:
-          model:
-            _cls: ml_peg.models.models.OrbCalc
-            d3_kwargs: {}
-            default_dtype: float32-high
-            device: auto
-            kwargs: {}
-            name: orb_v3_conservative_omol
-            trained_on_d3: true
-          model_name: orb-v3-conservative-omol
-    outs:
-    - path: nodes/orb-v3-conservative-omol/BH9Benchmark/node-meta.json
-      hash: md5
-      md5: 62ec82f15423d4f69e761b804f6c3bcd
-      size: 701
diff --git a/ml_peg/calcs/molecular_reactions/bh9/dvc.yaml b/ml_peg/calcs/molecular_reactions/bh9/dvc.yaml
deleted file mode 100644
index 314cfde2..00000000
--- a/ml_peg/calcs/molecular_reactions/bh9/dvc.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-stages:
-  mace-fukui-spin-2L_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name mace-fukui-spin-2L_BH9Benchmark
-    metrics:
-    - nodes/mace-fukui-spin-2L/BH9Benchmark/node-meta.json:
-        cache: true
-    params:
-    - mace-fukui-spin-2L_BH9Benchmark
-  mace-omol_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name mace-omol_BH9Benchmark
-    metrics:
-    - nodes/mace-omol/BH9Benchmark/node-meta.json:
-        cache: true
-    params:
-    - mace-omol_BH9Benchmark
-  orb-v3-conservative-omol_BH9Benchmark:
-    cmd: zntrack run calc_bh9.BH9Benchmark --name orb-v3-conservative-omol_BH9Benchmark
-    metrics:
-    - nodes/orb-v3-conservative-omol/BH9Benchmark/node-meta.json:
-        cache: true
-    params:
-    - orb-v3-conservative-omol_BH9Benchmark