Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions src/biotite/interface/rdkit/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def to_mol(
kekulize=False,
use_dative_bonds=False,
include_extra_annotations=(),
explicit_hydrogen=True,
explicit_hydrogen=False,
):
"""
Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
Expand Down Expand Up @@ -103,6 +103,9 @@ def to_mol(
explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
If set to false, the conversion process treats all hydrogen atoms as implicit
and all hydrogen atoms in the :class:`AtomArray` are removed.
By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to
avoid accidentally introducing radicals into the molecule upon calling
:func:`Chem.SanitizeMol()`.

Returns
-------
Expand Down Expand Up @@ -140,12 +143,24 @@ def to_mol(
HXT
"""
hydrogen_mask = atoms.element == "H"
_has_hydrogen = hydrogen_mask.any()
if explicit_hydrogen:
if not hydrogen_mask.any():
if not _has_hydrogen:
warnings.warn(
"No hydrogen found in the input, although 'explicit_hydrogen' is 'True'"
"No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. "
"This may lead to unexpected results, as hydrogen atoms are not "
"implicitly added, which often results in radicals after sanitization "
"in RDKit. Make sure this is what you want.",
UserWarning,
)
else:
if _has_hydrogen:
warnings.warn(
"Hydrogen atoms are present in the input, although 'explicit_hydrogen' "
"is 'False'. All hydrogen atoms will be removed and then re-inferred "
"by RDKit. Make sure this is what you want.",
UserWarning,
)
atoms = atoms[..., ~hydrogen_mask]

mol = Chem.EditableMol(Chem.Mol())
Expand Down Expand Up @@ -290,14 +305,20 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
raise BadStructureError("Could not obtains atoms from Mol")

if conformer_id is None:
conformers = [conf for conf in mol.GetConformers() if conf.Is3D()]
conformers = [conf for conf in mol.GetConformers()]
atoms = AtomArrayStack(len(conformers), len(rdkit_atoms))
for i, conformer in enumerate(conformers):
atoms.coord[i] = np.array(conformer.GetPositions())
if conformer.Is3D():
atoms.coord[i] = np.array(conformer.GetPositions())
else:
atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan)
else:
conformer = mol.GetConformer(conformer_id)
atoms = AtomArray(len(rdkit_atoms))
atoms.coord = np.array(conformer.GetPositions())
if conformer.Is3D():
atoms.coord = np.array(conformer.GetPositions())
else:
atoms.coord = np.full((len(rdkit_atoms), 3), np.nan)

extra_annotations = defaultdict(
# Use 'object' dtype first, as the maximum string length is unknown
Expand Down Expand Up @@ -363,7 +384,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
extra_annotations[annot_name][_atom_idx] = rdkit_atom.GetProp(annot_name)

for annot_name, array in extra_annotations.items():
atoms.set_annotation(annot_name, array.astype(str))
# ... handle special case of implicit hydrogen atom flags
if annot_name == "isImplicit":
atoms.set_annotation(annot_name, array.astype(bool))
continue

# ... for all custom annotations, try numeric conversions in order of preference
for dtype in (bool, int, float):
try:
converted = array.astype(dtype)
# Verify the conversion was lossless by converting back
if np.array_equal(
converted.astype(str), array.astype(str), equal_nan=True
):
atoms.set_annotation(annot_name, converted)
break
except (ValueError, TypeError):
continue
else: # ... if no numeric conversion succeeded
atoms.set_annotation(annot_name, array.astype(str))

rdkit_bonds = list(mol.GetBonds())
is_aromatic = np.array(
Expand Down
Loading