Skip to content

Commit 272a66b

Browse files
authored
Catch errors in sanitize (#59)
* Add test for pred_mol input that dose not sanitize and raises error * Catch errors in sanitize
1 parent 31d9864 commit 272a66b

9 files changed

Lines changed: 2603 additions & 27 deletions

File tree

posebusters/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,4 @@
3232
"check_volume_overlap",
3333
]
3434

35-
__version__ = "0.3.5"
35+
__version__ = "0.3.6"

posebusters/modules/energy_ratio.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,11 @@
1111
from rdkit.Chem.inchi import InchiReadWriteError, MolFromInchi
1212
from rdkit.Chem.rdchem import Mol
1313
from rdkit.Chem.rdDistGeom import EmbedMultipleConfs, ETKDGv3
14-
from rdkit.Chem.rdForceFieldHelpers import (
15-
UFFGetMoleculeForceField,
16-
UFFOptimizeMoleculeConfs,
17-
)
18-
from rdkit.Chem.rdmolops import AddHs, AssignStereochemistryFrom3D
14+
from rdkit.Chem.rdForceFieldHelpers import UFFGetMoleculeForceField, UFFOptimizeMoleculeConfs
15+
from rdkit.Chem.rdmolops import AddHs, AssignStereochemistryFrom3D, SanitizeMol
1916

2017
from ..tools.inchi import get_inchi
2118
from ..tools.logging import CaptureLogger
22-
from ..tools.molecules import assert_sanity
2319

2420
logger = logging.getLogger(__name__)
2521

@@ -58,7 +54,7 @@ def check_energy_ratio(
5854

5955
try:
6056
assert mol_pred.GetNumConformers() > 0, "Molecule does not have a conformer."
61-
mol_pred = assert_sanity(mol_pred)
57+
assert not SanitizeMol(mol_pred, catchErrors=True), "Molecule does not sanitize."
6258
AddHs(mol_pred, addCoords=True)
6359
except Exception as e:
6460
logger.warning(_warning_prefix + "failed because RDKit sanitization failed for molecule: %s", e)

posebusters/modules/rmsd.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ def robust_rmsd( # noqa: PLR0913
100100
RemoveStereochemistry(mol_ref)
101101

102102
if heavy_only:
103-
mol_probe = RemoveHs(mol_probe)
104-
mol_ref = RemoveHs(mol_ref)
103+
mol_probe = RemoveHs(mol_probe, sanitize=False)
104+
mol_ref = RemoveHs(mol_ref, sanitize=False)
105105

106106
# combine parameters
107107
params = dict(symmetrizeConjugatedTerminalGroups=symmetrizeConjugatedTerminalGroups, kabsch=kabsch, **params)
@@ -186,11 +186,15 @@ def intercentroid(
186186
mol_probe: Mol, mol_ref: Mol, conf_id_probe: int = -1, conf_id_ref: int = -1, heavy_only: bool = True
187187
) -> float:
188188
"""Distance between centroids of two molecules."""
189-
if heavy_only:
190-
mol_probe = RemoveHs(mol_probe)
191-
mol_ref = RemoveHs(mol_ref)
192-
193-
centroid_probe = mol_probe.GetConformer(conf_id_probe).GetPositions().mean(axis=0)
194-
centroid_ref = mol_ref.GetConformer(conf_id_ref).GetPositions().mean(axis=0)
195189

190+
centroid_probe = get_centroid(mol_probe, heavy_only, conf_id_probe)
191+
centroid_ref = get_centroid(mol_ref, heavy_only, conf_id_ref)
196192
return float(np.linalg.norm(centroid_probe - centroid_ref))
193+
194+
195+
def get_centroid(mol: Mol, heavy_only: bool = True, conf_id: int = -1) -> np.ndarray:
196+
"""Get centroid of molecule."""
197+
pos = mol.GetConformer(conf_id).GetPositions()
198+
if heavy_only:
199+
pos = pos[[atom.GetAtomicNum() != 1 for atom in mol.GetAtoms()], :]
200+
return pos.mean(axis=0)

posebusters/tools/inchi.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from rdkit.Chem.rdmolops import AssignStereochemistryFrom3D, RemoveHs, RemoveStereochemistry, SanitizeMol
1212

1313
from .logging import CaptureLogger
14-
from .molecules import add_stereo_hydrogens, assert_sanity, neutralize_atoms, remove_isotopic_info
14+
from .molecules import add_stereo_hydrogens, neutralize_atoms, remove_isotopic_info
1515

1616
logger = logging.getLogger(__name__)
1717

@@ -33,8 +33,12 @@ def get_inchi(mol: Mol, inchi_strict: bool = False) -> str:
3333

3434
def standardize_and_get_inchi(mol: Mol, options: str = "", log_level=None, warnings_as_errors=False) -> str:
3535
"""Return InChI after standardising molecule and inferring stereo from coordinates."""
36+
3637
mol = deepcopy(mol)
37-
mol = assert_sanity(mol)
38+
39+
if flags := SanitizeMol(mol, catchErrors=True):
40+
logger.debug("Cannot get InChI because molecule doesn't sanitize due to %s.", flags)
41+
return ""
3842

3943
# standardise molecule
4044
mol = remove_isotopic_info(mol)
@@ -65,8 +69,8 @@ def is_valid_inchi(inchi: str) -> bool:
6569
"""Check that InChI can be parsed and sanitization does not fail."""
6670
try:
6771
mol = MolFromInchi(inchi)
68-
assert_sanity(mol)
69-
assert mol is not None
72+
assert mol is not None, "Molecule is None."
73+
assert not SanitizeMol(mol, catchErrors=True), "Molecule does not sanitize."
7074
return True
7175
except Exception:
7276
return False

posebusters/tools/molecules.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,6 @@ def add_stereo_hydrogens(mol: Mol) -> Mol:
9494
return mol
9595

9696

97-
def assert_sanity(mol: Mol) -> Mol:
98-
"""Check that RDKit sanitization does not fail."""
99-
flags = SanitizeMol(mol)
100-
assert flags == 0, f"Sanitization failed with flags {flags}"
101-
return mol
102-
103-
10497
def remove_isotopic_info(mol: Mol) -> Mol:
10598
"""Remove isotopic information from molecule."""
10699
for atom in mol.GetAtoms():
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/pred.pdb
2+
OpenBabel03212512233D
3+
4+
13 12 0 0 1 0 0 0 0 0999 V2000
5+
20.0370 46.5970 12.1140 N 0 0 0 0 0 0 0 0 0 0 0 0
6+
18.9380 46.2330 16.2850 P 0 0 2 0 0 0 0 0 0 0 0 0
7+
20.0790 47.1960 18.3360 C 0 0 0 0 0 0 0 0 0 0 0 0
8+
19.6880 48.4400 19.0620 C 0 0 1 0 0 0 0 0 0 0 0 0
9+
19.7350 48.1890 20.5450 C 0 0 0 0 0 0 0 0 0 0 0 0
10+
18.8130 45.4360 13.8410 C 0 0 0 0 0 0 0 0 0 0 0 0
11+
19.7710 47.3330 16.9860 O 0 0 0 0 0 0 0 0 0 0 0 0
12+
19.6730 45.2860 12.6310 C 0 0 0 0 0 0 0 0 0 0 0 0
13+
19.5130 46.1760 14.8290 O 0 0 0 0 0 0 0 0 0 0 0 0
14+
17.4840 46.6470 16.2100 O 0 5 0 0 0 0 0 0 0 0 0 0
15+
19.1520 44.8900 16.9230 O 0 0 0 0 0 0 0 0 0 0 0 0
16+
20.5340 49.4930 18.6810 O 0 0 0 0 0 0 0 0 0 0 0 0
17+
19.2020 49.3120 21.2340 O 0 0 0 0 0 0 0 0 0 0 0 0
18+
1 8 1 0 0 0 0
19+
2 7 1 0 0 0 0
20+
2 9 1 0 0 0 0
21+
2 10 2 6 0 0 0
22+
2 11 1 0 0 0 0
23+
3 4 1 0 0 0 0
24+
3 7 1 0 0 0 0
25+
4 5 1 0 0 0 0
26+
4 12 1 1 0 0 0
27+
5 13 1 0 0 0 0
28+
6 8 1 0 0 0 0
29+
6 9 1 0 0 0 0
30+
M CHG 1 10 -1
31+
M END
32+
$$$$

0 commit comments

Comments
 (0)