From d232eab9c8b915a928f230b30b1d47373dfb14e3 Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Wed, 12 Feb 2025 12:05:21 +0000 Subject: [PATCH 1/3] fix: enable returning molecules without conformers (with nan coords). Also set coordinates of non-3d conformers to nan even when explicitly requested, as they are meaningless. --- src/biotite/interface/rdkit/mol.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index eb31a0854..8d632d3ae 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -290,14 +290,20 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): raise BadStructureError("Could not obtains atoms from Mol") if conformer_id is None: - conformers = [conf for conf in mol.GetConformers() if conf.Is3D()] + conformers = [conf for conf in mol.GetConformers()] atoms = AtomArrayStack(len(conformers), len(rdkit_atoms)) for i, conformer in enumerate(conformers): - atoms.coord[i] = np.array(conformer.GetPositions()) + if conformer.Is3D(): + atoms.coord[i] = np.array(conformer.GetPositions()) + else: + atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan) else: conformer = mol.GetConformer(conformer_id) atoms = AtomArray(len(rdkit_atoms)) - atoms.coord = np.array(conformer.GetPositions()) + if conformer.Is3D(): + atoms.coord = np.array(conformer.GetPositions()) + else: + atoms.coord = np.full((len(rdkit_atoms), 3), np.nan) extra_annotations = defaultdict( # Use 'object' dtype first, as the maximum string length is unknown From e36303c42f270f3cb755def4f0526899c14c9825 Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Wed, 12 Feb 2025 12:06:07 +0000 Subject: [PATCH 2/3] feat: attempt to type-cast extra annotations instead of always leaving them as 'str' objects. Also type-cast the rdkit internal 'isImplicit' flag. --- src/biotite/interface/rdkit/mol.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 8d632d3ae..09bc66ab6 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -369,7 +369,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): extra_annotations[annot_name][_atom_idx] = rdkit_atom.GetProp(annot_name) for annot_name, array in extra_annotations.items(): - atoms.set_annotation(annot_name, array.astype(str)) + # ... handle special case of implicit hydrogen atom flags + if annot_name == "isImplicit": + atoms.set_annotation(annot_name, array.astype(bool)) + continue + + # ... for all custom annotations, try numeric conversions in order of preference + for dtype in (bool, int, float): + try: + converted = array.astype(dtype) + # Verify the conversion was lossless by converting back + if np.array_equal( + converted.astype(str), array.astype(str), equal_nan=True + ): + atoms.set_annotation(annot_name, converted) + break + except (ValueError, TypeError): + continue + else: # ... if no numeric conversion succeeded + atoms.set_annotation(annot_name, array.astype(str)) rdkit_bonds = list(mol.GetBonds()) is_aromatic = np.array( From c1b64c807091953cfc4fb0d479c90e356ee145dc Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Wed, 12 Feb 2025 12:15:25 +0000 Subject: [PATCH 3/3] fix: suggestion for default case & warnings for explicit hydrogens --- src/biotite/interface/rdkit/mol.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 09bc66ab6..21ccf9561 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -68,7 +68,7 @@ def to_mol( kekulize=False, use_dative_bonds=False, include_extra_annotations=(), - explicit_hydrogen=True, + explicit_hydrogen=False, ): """ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a @@ -103,6 +103,9 @@ def to_mol( explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`. If set to false, the conversion process treats all hydrogen atoms as implicit and all hydrogen atoms in the :class:`AtomArray` are removed. + By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to + avoid accidentally introducing radicals into the molecule upon calling + :func:`Chem.SanitizeMol()`. Returns ------- @@ -140,12 +143,24 @@ def to_mol( HXT """ hydrogen_mask = atoms.element == "H" + _has_hydrogen = hydrogen_mask.any() if explicit_hydrogen: - if not hydrogen_mask.any(): + if not _has_hydrogen: warnings.warn( - "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'" + "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. " + "This may lead to unexpected results, as hydrogen atoms are not " + "implicitly added, which often results in radicals after sanitization " + "in RDKit. Make sure this is what you want.", + UserWarning, ) else: + if _has_hydrogen: + warnings.warn( + "Hydrogen atoms are present in the input, although 'explicit_hydrogen' " + "is 'False'. All hydrogen atoms will be removed and then re-inferred " + "by RDKit. Make sure this is what you want.", + UserWarning, + ) atoms = atoms[..., ~hydrogen_mask] mol = Chem.EditableMol(Chem.Mol())