-
Notifications
You must be signed in to change notification settings - Fork 45
Feat/add folmsbee conformer benchmark #429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b542fd3
e82bc26
cd4e9da
e03bf30
e2ab058
398fd9b
d682fb0
894c1ef
efb1ea1
3fad5b7
c484172
caf5786
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| """Analyse Folmsbee benchmark.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
|
|
||
| from ase import Atoms | ||
| from ase.calculators.calculator import Calculator | ||
| from ase.io import write | ||
| from mlipaudit.benchmarks.conformer_selection.conformer_selection import ( | ||
| ConformerSelectionModelOutput, | ||
| ) | ||
| import pytest | ||
|
|
||
| from ml_peg.analysis.utils.decorators import build_table, plot_parity | ||
| from ml_peg.analysis.utils.utils import ( | ||
| build_dispersion_name_map, | ||
| load_metrics_config, | ||
| mae, | ||
| ) | ||
| from ml_peg.app import APP_ROOT | ||
| from ml_peg.calcs import CALCS_ROOT | ||
| from ml_peg.calcs.utils.mlipaudit import MlPegConformerSelectionBenchmark | ||
| from ml_peg.calcs.utils.utils import download_s3_data | ||
| from ml_peg.models import current_models | ||
| from ml_peg.models.get_models import load_models | ||
|
|
||
| MODELS = load_models(current_models) | ||
| DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS) | ||
|
|
||
| CALC_PATH = CALCS_ROOT / "conformers" / "Folmsbee" / "outputs" | ||
| OUT_PATH = APP_ROOT / "data" / "conformers" / "Folmsbee" | ||
|
|
||
| METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") | ||
| DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( | ||
| METRICS_CONFIG_PATH | ||
| ) | ||
|
|
||
|
|
||
| def labels() -> list: | ||
| """ | ||
| Get list of system names. | ||
|
|
||
| Returns | ||
| ------- | ||
| list | ||
| List of all system names. | ||
| """ | ||
| for model_name in MODELS: | ||
| raw = (CALC_PATH / model_name / "model_output.json").read_text() | ||
| output = ConformerSelectionModelOutput.model_validate_json(raw) | ||
| labels_list = sorted( | ||
| f"{m.molecule_name}_conf{i}" | ||
| for m in output.molecules | ||
| for i in range(len(m.predicted_energy_profile)) | ||
| ) | ||
| break | ||
| return labels_list | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| @plot_parity( | ||
| filename=OUT_PATH / "figure_folmsbee.json", | ||
| title="Energies", | ||
| x_label="Predicted energy / kcal/mol", | ||
| y_label="Reference energy / kcal/mol", | ||
| hoverdata={ | ||
| "Labels": labels(), | ||
| }, | ||
| ) | ||
| def conformer_energies() -> dict[str, list]: | ||
| """ | ||
| Get conformer energies for all systems. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, list] | ||
| Dictionary of all reference and predicted barrier heights. | ||
| """ | ||
| results = {"ref": []} | {mlip: [] for mlip in MODELS} | ||
| ref_stored = False | ||
|
|
||
| data_input_dir = download_s3_data( | ||
| key="inputs/conformers/Folmsbee/conformer_selection.zip", | ||
| filename="conformer_selection.zip", | ||
| ) | ||
|
|
||
| for model_name in MODELS: | ||
| benchmark = MlPegConformerSelectionBenchmark( | ||
| force_field=Calculator(), | ||
| data_input_dir=data_input_dir, | ||
| run_mode="standard", | ||
| ) | ||
| raw = (CALC_PATH / model_name / "model_output.json").read_text() | ||
| benchmark.model_output = ConformerSelectionModelOutput.model_validate_json(raw) | ||
| result = benchmark.analyze() | ||
|
|
||
| result_by_name = {m.molecule_name: m for m in result.molecules} | ||
| data_by_name = {m.molecule_name: m for m in benchmark._folmsbee_data} | ||
|
|
||
| for label in labels(): | ||
| mol_name, conf_str = label.rsplit("_conf", 1) | ||
| i = int(conf_str) | ||
| molecule = result_by_name[mol_name] | ||
|
|
||
| results[model_name].append(float(molecule.predicted_energy_profile[i])) | ||
| if not ref_stored: | ||
| results["ref"].append(float(molecule.reference_energy_profile[i])) | ||
|
|
||
| # Write structures for app | ||
| data_mol = data_by_name[mol_name] | ||
| atoms = Atoms( | ||
| symbols=data_mol.atom_symbols, | ||
| positions=data_mol.conformer_coordinates[i], | ||
| ) | ||
| structs_dir = OUT_PATH / model_name | ||
| structs_dir.mkdir(parents=True, exist_ok=True) | ||
| write(structs_dir / f"{label}.xyz", atoms) | ||
| ref_stored = True | ||
| return results | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def get_mae(conformer_energies) -> dict[str, float]: | ||
| """ | ||
| Get mean absolute error for conformer energies. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| conformer_energies | ||
| Dictionary of reference and predicted conformer energies. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, float] | ||
| Dictionary of predicted conformer energies errors for all models. | ||
| """ | ||
| results = {} | ||
| for model_name in MODELS: | ||
| results[model_name] = mae( | ||
| conformer_energies["ref"], conformer_energies[model_name] | ||
| ) | ||
| return results | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC, here we are putting all conformers into a single flat list then taking the mae, whereas in the upstream benchmark we compute per-molecule MAEs which we then average to give |
||
|
|
||
|
|
||
| @pytest.fixture | ||
| @build_table( | ||
| filename=OUT_PATH / "folmsbee_metrics_table.json", | ||
| metric_tooltips=DEFAULT_TOOLTIPS, | ||
| thresholds=DEFAULT_THRESHOLDS, | ||
| mlip_name_map=DISPERSION_NAME_MAP, | ||
| ) | ||
| def metrics(get_mae: dict[str, float]) -> dict[str, dict]: | ||
| """ | ||
| Get all metrics. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| get_mae | ||
| Mean absolute errors for all models. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, dict] | ||
| Metric names and values for all models. | ||
| """ | ||
| return { | ||
| "MAE": get_mae, | ||
| } | ||
|
|
||
|
|
||
| def test_folmsbee(metrics: dict[str, dict]) -> None: | ||
| """ | ||
| Run Folmsbee analysis. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| metrics : dict[str, dict] | ||
| Folmsbee metric results provided by fixtures. | ||
| """ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| metrics: | ||
| MAE: | ||
| good: 0.0 | ||
| bad: 20.0 | ||
| unit: kcal/mol | ||
| tooltip: Mean Absolute Error for all systems of the relative energy to the lowest energy conformer. | ||
| level_of_theory: DLPNO-CCSD(T) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| """Run Folmsbee conformer benchmark app.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from dash import Dash | ||
| from dash.html import Div | ||
|
|
||
| from ml_peg.app import APP_ROOT | ||
| from ml_peg.app.base_app import BaseApp | ||
| from ml_peg.app.utils.build_callbacks import ( | ||
| plot_from_table_column, | ||
| struct_from_scatter, | ||
| ) | ||
| from ml_peg.app.utils.load import read_plot | ||
| from ml_peg.models import current_models | ||
| from ml_peg.models.get_models import get_model_names | ||
|
|
||
| MODELS = get_model_names(current_models) | ||
| BENCHMARK_NAME = "Folmsbee" | ||
| DOCS_URL = ( | ||
| "https://ddmms.github.io/ml-peg/user_guide/benchmarks/conformers.html#folmsbee" | ||
| ) | ||
| DATA_PATH = APP_ROOT / "data" / "conformers" / "Folmsbee" | ||
|
|
||
|
|
||
| class FolmsbeeApp(BaseApp): | ||
| """Folmsbee conformer benchmark app layout and callbacks.""" | ||
|
|
||
| def register_callbacks(self) -> None: | ||
| """Register callbacks to app.""" | ||
| scatter = read_plot( | ||
| DATA_PATH / "figure_folmsbee.json", | ||
| id=f"{BENCHMARK_NAME}-figure", | ||
| ) | ||
|
|
||
| model_dir = DATA_PATH / MODELS[0] | ||
| if model_dir.exists(): | ||
| labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) | ||
| structs = [ | ||
| f"/assets/conformers/Folmsbee/{MODELS[0]}/{label}.xyz" | ||
| for label in labels | ||
| ] | ||
| else: | ||
| structs = [] | ||
|
|
||
| plot_from_table_column( | ||
| table_id=self.table_id, | ||
| plot_id=f"{BENCHMARK_NAME}-figure-placeholder", | ||
| column_to_plot={"MAE": scatter}, | ||
| ) | ||
|
|
||
| struct_from_scatter( | ||
| scatter_id=f"{BENCHMARK_NAME}-figure", | ||
| struct_id=f"{BENCHMARK_NAME}-struct-placeholder", | ||
| structs=structs, | ||
| mode="struct", | ||
| ) | ||
|
|
||
|
|
||
| def get_app() -> FolmsbeeApp: | ||
| """ | ||
| Get Folmsbee benchmark app layout and callback registration. | ||
|
|
||
| Returns | ||
| ------- | ||
| FolmsbeeApp | ||
| Benchmark layout and callback registration. | ||
| """ | ||
| return FolmsbeeApp( | ||
| name=BENCHMARK_NAME, | ||
| framework_id="mlip_audit", | ||
| description=( | ||
| "Performance in predicting relative conformer energies for " | ||
| "drug-like molecules. " | ||
| "Reference data from DLPNO-CCSD(T) calculations." | ||
| ), | ||
| docs_url=DOCS_URL, | ||
| table_path=DATA_PATH / "folmsbee_metrics_table.json", | ||
| extra_components=[ | ||
| Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), | ||
| Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), | ||
| ], | ||
| ) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) | ||
| benchmark_app = get_app() | ||
| full_app.layout = benchmark_app.layout | ||
| benchmark_app.register_callbacks() | ||
| full_app.run(port=8066, debug=True) |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,61 @@ | ||||||||
| """ | ||||||||
| Compute the Folmsbee dataset of molecular conformers. | ||||||||
|
|
||||||||
| Assessing conformer energies using electronic structure and | ||||||||
| machine learning methods | ||||||||
|
|
||||||||
| Dakota Folmsbee, Geoffrey Hutchinson | ||||||||
| International Journal of Quantum Chemistry 2020 121 (1) e26381 | ||||||||
| DOI: 10.1002/qua.26381 | ||||||||
| """ | ||||||||
|
|
||||||||
| from __future__ import annotations | ||||||||
|
|
||||||||
| from pathlib import Path | ||||||||
| from typing import Any | ||||||||
|
|
||||||||
| import pytest | ||||||||
|
|
||||||||
| from ml_peg.calcs.utils.mlipaudit import MlPegConformerSelectionBenchmark | ||||||||
| from ml_peg.calcs.utils.utils import download_s3_data | ||||||||
| from ml_peg.models import current_models | ||||||||
| from ml_peg.models.get_models import load_models | ||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
|
|
||||||||
| MODELS = load_models(current_models) | ||||||||
|
|
||||||||
| OUT_PATH = Path(__file__).parent / "outputs" | ||||||||
|
|
||||||||
|
|
||||||||
| @pytest.mark.parametrize("mlip", MODELS.items()) | ||||||||
| def test_folmsbee(mlip: tuple[str, Any]) -> None: | ||||||||
| """ | ||||||||
| Benchmark the Folmsbee dataset. | ||||||||
|
|
||||||||
| Parameters | ||||||||
| ---------- | ||||||||
| mlip | ||||||||
| Name of model and model object to get calculator. | ||||||||
| """ | ||||||||
| model_name, model = mlip | ||||||||
| model.default_dtype = "float64" | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: What's the reason for this? |
||||||||
| calc = model.get_calculator() | ||||||||
| calc = model.add_d3_calculator(calc) | ||||||||
|
|
||||||||
| data_input_dir = download_s3_data( | ||||||||
| key="inputs/conformers/Folmsbee/conformer_selection.zip", | ||||||||
| filename="conformer_selection.zip", | ||||||||
| ) | ||||||||
|
|
||||||||
| out_path = OUT_PATH / model_name | ||||||||
| out_path.mkdir(parents=True, exist_ok=True) | ||||||||
|
|
||||||||
| benchmark = MlPegConformerSelectionBenchmark( | ||||||||
| force_field=calc, | ||||||||
| data_input_dir=data_input_dir, | ||||||||
| run_mode="standard", | ||||||||
| ) | ||||||||
| benchmark.run_model() | ||||||||
|
|
||||||||
| (out_path / "model_output.json").write_text( | ||||||||
| benchmark.model_output.model_dump_json() | ||||||||
| ) | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Why not |
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| """Adapters for using mlipaudit benchmarks with ml-peg's ASE calculators.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from mlipaudit.benchmarks.conformer_selection.conformer_selection import ( | ||
| ConformerSelectionBenchmark, | ||
| ) | ||
|
|
||
|
|
||
| class MlPegConformerSelectionBenchmark(ConformerSelectionBenchmark): | ||
| """ | ||
| ConformerSelectionBenchmark wired up for ml-peg's ASE calculators. | ||
|
|
||
| ``skip_if_elements_missing`` is disabled because ASE ``Calculator`` objects | ||
| do not expose ``allowed_atomic_numbers``. ml-peg manages model/element | ||
| compatibility separately via its model registry. | ||
| """ | ||
|
|
||
| skip_if_elements_missing = False |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,9 @@ mace = [ | |
| mattersim = [ | ||
| "mattersim==1.2.2", | ||
| ] | ||
| mlipaudit = [ | ||
| "mlipaudit; python_version >= '3.11'", | ||
| ] | ||
| orb = [ | ||
| "orb-models == 0.6.2; sys_platform != 'win32' and python_version >= '3.12'", | ||
| ] | ||
|
|
@@ -205,6 +208,10 @@ conflicts = [ | |
| { extra = "mattersim" }, | ||
| { extra = "grace" }, | ||
| ], | ||
| [ | ||
| { extra = "mlipaudit" }, | ||
| { extra = "grace" }, | ||
| ], | ||
| ] | ||
|
|
||
| constraint-dependencies = [ | ||
|
|
@@ -216,3 +223,4 @@ module-root = "" | |
|
|
||
| [tool.uv.sources] | ||
| asemolec = { git = "https://github.com/imagdau/aseMolec.git" } | ||
| mlipaudit = { git = "https://github.com/instadeepai/MLIPAudit.git", branch = "mlpeg-migration" } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remark: Note that this will have to be udpated to main at some point, either once everything is migrated or per-benchmark. Probably the former. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be None if a molecule had an unsupported element.