Skip to content

Commit

Permalink
Add warnings for multiple molecules and tests (#1939)
Browse files Browse the repository at this point in the history
* add warnings for multiple molecules and tests

* improve warnings and fix formatting

* update releasehistory

* use custom warning type

* lint
  • Loading branch information
j-wags authored Sep 30, 2024
1 parent 747c2f1 commit 31e4c4b
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 2 deletions.
7 changes: 7 additions & 0 deletions docs/releasehistory.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w
* `minor` increments add features but do not break API compatibility
* `micro` increments represent bugfix releases or improvements in documentation

## Current development

### Improved warnings and documentation

- [PR #1939](https://github.com/openforcefield/openff-toolkit/pull/1939): Resolves [#1587](https://github.com/openforcefield/openff-toolkit/pull/1587) by emitting a warning when from_openeye or from_rdkit try to load a molecule with multiple disconnected components.


## 0.16.4

### Bugfixes
Expand Down
16 changes: 16 additions & 0 deletions openff/toolkit/_tests/test_toolkits.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
IncorrectNumConformersWarning,
InvalidIUPACNameError,
InvalidToolkitError,
MultipleComponentsInMoleculeWarning,
NotAttachedToMoleculeError,
RadicalsNotSupportedError,
ToolkitUnavailableException,
Expand Down Expand Up @@ -725,6 +726,14 @@ def test_from_openeye_transition_metal_radical(self):

OpenEyeToolkitWrapper().from_openeye(oemol)

def test_from_openeye_multiple_molecule(self):
"""Test that parsing a OEMol that is actually multiple disconnected molecules raises a warning"""
from openeye import oechem
oemol = oechem.OEMol()
oechem.OESmilesToMol(oemol, "C.N")
with pytest.warns(MultipleComponentsInMoleculeWarning, match="more than one molecule", ):
OpenEyeToolkitWrapper().from_openeye(oemol)

def test_from_openeye_implicit_hydrogen(self):
"""
Test OpenEyeToolkitWrapper for loading a molecule with implicit
Expand Down Expand Up @@ -2654,6 +2663,13 @@ def test_from_rdkit_transition_metal_radical(self):

RDKitToolkitWrapper().from_rdkit(rdmol)

def test_from_rdkit_multiple_molecule(self):
"""Test that parsing a rdmol that is actually multiple disconnected molecules raises a warning"""
from rdkit import Chem
rdmol = Chem.MolFromSmiles("C.N")
with pytest.warns(MultipleComponentsInMoleculeWarning, match="more than one molecule"):
RDKitToolkitWrapper().from_rdkit(rdmol)

@pytest.mark.parametrize(
"smiles, expected_map", [("[Cl:1][Cl]", {0: 1}), ("[Cl:1][Cl:2]", {0: 1, 1: 2})]
)
Expand Down
2 changes: 2 additions & 0 deletions openff/toolkit/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,8 @@ class OpenEyeImportError(OpenFFToolkitException):
class MultipleMoleculesInPDBError(OpenFFToolkitException):
"""Error raised when a multiple molecules are found when one was expected"""

class MultipleComponentsInMoleculeWarning(UserWarning):
"""Warning emitted when user attempts to make an OpenFF Molecule with multiple disconnected components"""

class WrongShapeError(OpenFFToolkitException):
"""Error raised when an array of the wrong shape is found"""
Expand Down
18 changes: 16 additions & 2 deletions openff/toolkit/utils/openeye_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pathlib
import re
import tempfile
import warnings
from collections import defaultdict
from functools import wraps
from typing import TYPE_CHECKING, Any, Optional, Union
Expand Down Expand Up @@ -46,6 +47,7 @@
InvalidAromaticityModelError,
InvalidIUPACNameError,
LicenseError,
MultipleComponentsInMoleculeWarning,
NotAttachedToMoleculeError,
OpenEyeError,
OpenEyeImportError,
Expand Down Expand Up @@ -1061,8 +1063,6 @@ def _check_mol2_gaff_atom_type(molecule, file_path=None):
except KeyError:
pass
else:
import warnings

warn_msg = (
f'OpenEye interpreted the type "{atom_type}" in {file_path}{molecule.name}'
f" as {element_name}. Does your mol2 file uses Tripos SYBYL atom types?"
Expand Down Expand Up @@ -1203,6 +1203,20 @@ def from_openeye(

oemol = oechem.OEMol(oemol)

components = oechem.OEDetermineComponents(oemol)
if components[0] > 1:
warnings.warn("OpenEye OEMol passed to from_openeye consists of more than one molecule, consider running "
"something like https://docs.eyesopen.com/toolkits/python/oechemtk/oechem_examples/"
"oechem_example_parts2mols.html or splitting input SMILES at '.'s to get separate molecules "
"and pass them to from_openeye one at a time. While this is supported for "
"legacy reasons, OpenFF Molecule objects are not supposed to contain disconnected chemical "
"graphs and this may result in undefined behavior later on. The OpenFF ecosystem is built "
"to handle multiple molecules, but they should be in a Topology object, ex: "
"top = Topology.from_molecules([mol1, mol2])",
MultipleComponentsInMoleculeWarning,
stacklevel=2
)

# Add explicit hydrogens if they're implicit
if oechem.OEHasImplicitHydrogens(oemol):
oechem.OEAddExplicitHydrogens(oemol)
Expand Down
15 changes: 15 additions & 0 deletions openff/toolkit/utils/rdkit_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
InChIParseError,
InvalidAromaticityModelError,
MoleculeParseError,
MultipleComponentsInMoleculeWarning,
NonUniqueSubstructureName,
NotAttachedToMoleculeError,
RadicalsNotSupportedError,
Expand Down Expand Up @@ -2232,6 +2233,7 @@ def from_rdkit(
"""
from rdkit import Chem
from rdkit.Chem import AllChem

if _cls is None:
from openff.toolkit.topology.molecule import Molecule
Expand All @@ -2241,6 +2243,19 @@ def from_rdkit(
# Make a copy of the RDKit Mol as we'll need to change it (e.g. assign stereo).
rdmol = Chem.Mol(rdmol)

frags = AllChem.GetMolFrags(rdmol)
if len(frags) > 1:
warnings.warn("RDKit Molecule passed to from_rdkit consists of more than one molecule, consider running "
"rdkit.Chem.AllChem.GetMolfrags(rdmol, asMols=True) or splitting input SMILES at '.' to get "
"separate molecules and pass them to from_rdkit one at a time. While this is supported for "
"legacy reasons, OpenFF Molecule objects are not supposed to contain disconnected chemical "
"graphs and this may result in undefined behavior later on. The OpenFF ecosystem is built "
"to handle multiple molecules, but they should be in a Topology object, ex: "
"top = Topology.from_molecules([mol1, mol2])",
MultipleComponentsInMoleculeWarning,
stacklevel=2
)

if not hydrogens_are_explicit:
rdmol = Chem.AddHs(rdmol, addCoords=True)

Expand Down

0 comments on commit 31e4c4b

Please sign in to comment.