diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index eb31a0854..21ccf9561 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -68,7 +68,7 @@ def to_mol( kekulize=False, use_dative_bonds=False, include_extra_annotations=(), - explicit_hydrogen=True, + explicit_hydrogen=False, ): """ Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a @@ -103,6 +103,9 @@ def to_mol( explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`. If set to false, the conversion process treats all hydrogen atoms as implicit and all hydrogen atoms in the :class:`AtomArray` are removed. + By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to + avoid accidentally introducing radicals into the molecule upon calling + :func:`Chem.SanitizeMol()`. Returns ------- @@ -140,12 +143,24 @@ def to_mol( HXT """ hydrogen_mask = atoms.element == "H" + _has_hydrogen = hydrogen_mask.any() if explicit_hydrogen: - if not hydrogen_mask.any(): + if not _has_hydrogen: warnings.warn( - "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'" + "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. " + "This may lead to unexpected results, as hydrogen atoms are not " + "implicitly added, which often results in radicals after sanitization " + "in RDKit. Make sure this is what you want.", + UserWarning, ) else: + if _has_hydrogen: + warnings.warn( + "Hydrogen atoms are present in the input, although 'explicit_hydrogen' " + "is 'False'. All hydrogen atoms will be removed and then re-inferred " + "by RDKit. Make sure this is what you want.", + UserWarning, + ) atoms = atoms[..., ~hydrogen_mask] mol = Chem.EditableMol(Chem.Mol()) @@ -290,14 +305,20 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): raise BadStructureError("Could not obtains atoms from Mol") if conformer_id is None: - conformers = [conf for conf in mol.GetConformers() if conf.Is3D()] + conformers = [conf for conf in mol.GetConformers()] atoms = AtomArrayStack(len(conformers), len(rdkit_atoms)) for i, conformer in enumerate(conformers): - atoms.coord[i] = np.array(conformer.GetPositions()) + if conformer.Is3D(): + atoms.coord[i] = np.array(conformer.GetPositions()) + else: + atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan) else: conformer = mol.GetConformer(conformer_id) atoms = AtomArray(len(rdkit_atoms)) - atoms.coord = np.array(conformer.GetPositions()) + if conformer.Is3D(): + atoms.coord = np.array(conformer.GetPositions()) + else: + atoms.coord = np.full((len(rdkit_atoms), 3), np.nan) extra_annotations = defaultdict( # Use 'object' dtype first, as the maximum string length is unknown @@ -363,7 +384,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): extra_annotations[annot_name][_atom_idx] = rdkit_atom.GetProp(annot_name) for annot_name, array in extra_annotations.items(): - atoms.set_annotation(annot_name, array.astype(str)) + # ... handle special case of implicit hydrogen atom flags + if annot_name == "isImplicit": + atoms.set_annotation(annot_name, array.astype(bool)) + continue + + # ... for all custom annotations, try numeric conversions in order of preference + for dtype in (bool, int, float): + try: + converted = array.astype(dtype) + # Verify the conversion was lossless by converting back + if np.array_equal( + converted.astype(str), array.astype(str), equal_nan=True + ): + atoms.set_annotation(annot_name, converted) + break + except (ValueError, TypeError): + continue + else: # ... if no numeric conversion succeeded + atoms.set_annotation(annot_name, array.astype(str)) rdkit_bonds = list(mol.GetBonds()) is_aromatic = np.array(