From dd00e3288e1b9651db53a93f3664cfd4bbb1fc64 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Fri, 24 Jan 2025 15:12:06 +0100
Subject: [PATCH 01/12] Add `explicit_hydrogen` parameter

---
 src/biotite/interface/rdkit/mol.py | 27 +++++++++++++++++++++++----
 tests/interface/test_rdkit.py      | 10 ++++------
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index eef433179..500807cb0 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -76,6 +76,7 @@ def to_mol(
     kekulize=False,
     use_dative_bonds=False,
     include_extra_annotations=(),
+    explicit_hydrogen=True,
 ):
     """
     Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
@@ -105,6 +106,11 @@ def to_mol(
         are always included per default. These standard annotations can be accessed
         with :meth:`rdkit.Chem.rdchem.Atom.GetPDBResidueInfo()` for each atom in the
         returned :class:`rdkit.Chem.rdchem.Mol`.
+    explicit_hydrogen : bool, optional
+        If set to true, the conversion process expects that all hydrogen atoms are
+        explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
+        If set to false, the conversion process treats all hydrogen atoms as implicit
+        and all hydrogen atoms in the :class:`AtomArray` are removed.
 
     Returns
     -------
@@ -141,12 +147,24 @@ def to_mol(
     HB3
     HXT
     """
+    hydrogen_mask = atoms.element == "H"
+    if explicit_hydrogen:
+        if not hydrogen_mask.any():
+            warnings.warn(
+                "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'"
+            )
+    else:
+        atoms = atoms[..., ~hydrogen_mask]
+
     mol = EditableMol(Mol())
 
     has_annot = frozenset(atoms.get_annotation_categories())
     extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS
+
     for i in range(atoms.array_length()):
         rdkit_atom = Atom(atoms.element[i].capitalize())
+        if explicit_hydrogen:
+            rdkit_atom.SetNoImplicit(True)
         if "charge" in has_annot:
             rdkit_atom.SetFormalCharge(atoms.charge[i].item())
 
@@ -176,11 +194,12 @@ def to_mol(
 
     if atoms.bonds is None:
         raise BadStructureError("An AtomArray with associated BondList is required")
-    bonds = atoms.bonds.as_array()
     if kekulize:
-        bonds = bonds.copy()
+        bonds = atoms.bonds.copy()
         bonds.remove_aromaticity()
-    for atom_i, atom_j, bond_type in atoms.bonds.as_array():
+    else:
+        bonds = atoms.bonds
+    for atom_i, atom_j, bond_type in bonds.as_array():
         if not use_dative_bonds and bond_type == BondType.COORDINATION:
             bond_type = BondType.SINGLE
         mol.AddBond(
@@ -367,7 +386,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
         except KekulizeException:
             warnings.warn(
                 "Kekulization failed, "
-                "using 'BondType.ANY' instead for aromatic bonds instead",
+                "using 'BondType.AROMATIC' instead for aromatic bonds instead",
                 LossyConversionWarning,
             )
         rdkit_bonds = list(mol.GetBonds())
diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py
index 2e342ae92..f9fc6245d 100644
--- a/tests/interface/test_rdkit.py
+++ b/tests/interface/test_rdkit.py
@@ -20,11 +20,9 @@ def _load_smiles():
         return file.read().splitlines()
 
 
-@pytest.mark.filterwarnings(
-    "ignore:"
-    "The coordinates are missing for some atoms. "
-    "The fallback coordinates will be used instead"
-)
+@pytest.mark.filterwarnings("ignore:Missing coordinates.*")
+@pytest.mark.filterwarnings("ignore:.*coordinates are missing.*")
+@pytest.mark.filterwarnings("ignore::biotite.interface.LossyConversionWarning")
 @pytest.mark.parametrize(
     "res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist()
 )
@@ -46,7 +44,7 @@ def test_conversion_from_biotite(res_name):
     # Some compounds in the CCD have missing coordinates
     assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True)
 
-    # There should be now undefined bonds
+    # There should be no undefined bonds
     assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all()
     # Kekulization returns one of multiple resonance structures, so the returned one
     # might not be the same as the input

From 30b8eb3be23483e147021de90014c478448cf53c Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sun, 2 Feb 2025 11:15:24 +0100
Subject: [PATCH 02/12] Use canonical rdkit import

---
 doc/tutorial/interface/rdkit.rst   | 20 ++++-----
 src/biotite/interface/rdkit/mol.py | 66 +++++++++++++-----------------
 2 files changed, 36 insertions(+), 50 deletions(-)

diff --git a/doc/tutorial/interface/rdkit.rst b/doc/tutorial/interface/rdkit.rst
index ff9954a52..e91ee55d2 100644
--- a/doc/tutorial/interface/rdkit.rst
+++ b/doc/tutorial/interface/rdkit.rst
@@ -27,15 +27,14 @@ For a proper structural formula, we need to compute proper 2D coordinates first.
 
     import biotite.interface.rdkit as rdkit_interface
     import biotite.structure.info as struc
+    import rdkit.Chem.AllChem as Chem
     from rdkit.Chem.Draw import MolToImage
-    from rdkit.Chem.rdDepictor import Compute2DCoords
-    from rdkit.Chem.rdmolops import RemoveHs
 
     penicillin = struc.residue("PNN")
     mol = rdkit_interface.to_mol(penicillin)
     # We do not want to include explicit hydrogen atoms in the structural formula
-    mol = RemoveHs(mol)
-    Compute2DCoords(mol)
+    mol = Chem.RemoveHs(mol)
+    Chem.Compute2DCoords(mol)
     image = MolToImage(mol, size=(600, 400))
     display(image)
 
@@ -49,18 +48,13 @@ One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to
 
 .. jupyter-execute::
 
-    from rdkit.Chem import MolFromSmiles
-    from rdkit.Chem.rdDistGeom import EmbedMolecule
-    from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
-    from rdkit.Chem.rdmolops import AddHs
-
     ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O"
 
-    mol = MolFromSmiles(ERTAPENEM_SMILES)
+    mol = Chem.MolFromSmiles(ERTAPENEM_SMILES)
     # RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones
-    mol = AddHs(mol)
+    mol = Chem.AddHs(mol)
     # Create a 3D conformer
-    conformer_id = EmbedMolecule(mol)
-    UFFOptimizeMolecule(mol)
+    conformer_id = Chem.EmbedMolecule(mol)
+    Chem.UFFOptimizeMolecule(mol)
     ertapenem = rdkit_interface.from_mol(mol, conformer_id)
     print(ertapenem)
\ No newline at end of file
diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 500807cb0..eb31a0854 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -9,16 +9,8 @@
 import warnings
 from collections import defaultdict
 import numpy as np
-from rdkit.Chem.rdchem import (
-    Atom,
-    AtomPDBResidueInfo,
-    Conformer,
-    EditableMol,
-    KekulizeException,
-    Mol,
-)
-from rdkit.Chem.rdchem import BondType as RDKitBondType
-from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol
+import rdkit.Chem.AllChem as Chem
+from rdkit.Chem import SanitizeFlags
 from biotite.interface.version import requires_version
 from biotite.interface.warning import LossyConversionWarning
 from biotite.structure.atoms import AtomArray, AtomArrayStack
@@ -31,26 +23,26 @@
     BondType.TRIPLE: BondType.AROMATIC_TRIPLE,
 }
 _BIOTITE_TO_RDKIT_BOND_TYPE = {
-    BondType.ANY: RDKitBondType.UNSPECIFIED,
-    BondType.SINGLE: RDKitBondType.SINGLE,
-    BondType.DOUBLE: RDKitBondType.DOUBLE,
-    BondType.TRIPLE: RDKitBondType.TRIPLE,
-    BondType.QUADRUPLE: RDKitBondType.QUADRUPLE,
-    BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC,
-    BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC,
-    BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC,
-    BondType.AROMATIC: RDKitBondType.AROMATIC,
+    BondType.ANY: Chem.BondType.UNSPECIFIED,
+    BondType.SINGLE: Chem.BondType.SINGLE,
+    BondType.DOUBLE: Chem.BondType.DOUBLE,
+    BondType.TRIPLE: Chem.BondType.TRIPLE,
+    BondType.QUADRUPLE: Chem.BondType.QUADRUPLE,
+    BondType.AROMATIC_SINGLE: Chem.BondType.AROMATIC,
+    BondType.AROMATIC_DOUBLE: Chem.BondType.AROMATIC,
+    BondType.AROMATIC_TRIPLE: Chem.BondType.AROMATIC,
+    BondType.AROMATIC: Chem.BondType.AROMATIC,
     # Dative bonds may lead to a KekulizeException and may potentially be deprecated
     # in the future (https://github.com/rdkit/rdkit/discussions/6995)
-    BondType.COORDINATION: RDKitBondType.SINGLE,
+    BondType.COORDINATION: Chem.BondType.SINGLE,
 }
 _RDKIT_TO_BIOTITE_BOND_TYPE = {
-    RDKitBondType.UNSPECIFIED: BondType.ANY,
-    RDKitBondType.SINGLE: BondType.SINGLE,
-    RDKitBondType.DOUBLE: BondType.DOUBLE,
-    RDKitBondType.TRIPLE: BondType.TRIPLE,
-    RDKitBondType.QUADRUPLE: BondType.QUADRUPLE,
-    RDKitBondType.DATIVE: BondType.COORDINATION,
+    Chem.BondType.UNSPECIFIED: BondType.ANY,
+    Chem.BondType.SINGLE: BondType.SINGLE,
+    Chem.BondType.DOUBLE: BondType.DOUBLE,
+    Chem.BondType.TRIPLE: BondType.TRIPLE,
+    Chem.BondType.QUADRUPLE: BondType.QUADRUPLE,
+    Chem.BondType.DATIVE: BondType.COORDINATION,
 }
 _STANDARD_ANNOTATIONS = frozenset(
     {
@@ -156,20 +148,20 @@ def to_mol(
     else:
         atoms = atoms[..., ~hydrogen_mask]
 
-    mol = EditableMol(Mol())
+    mol = Chem.EditableMol(Chem.Mol())
 
     has_annot = frozenset(atoms.get_annotation_categories())
     extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS
 
     for i in range(atoms.array_length()):
-        rdkit_atom = Atom(atoms.element[i].capitalize())
+        rdkit_atom = Chem.Atom(atoms.element[i].capitalize())
         if explicit_hydrogen:
             rdkit_atom.SetNoImplicit(True)
         if "charge" in has_annot:
             rdkit_atom.SetFormalCharge(atoms.charge[i].item())
 
         # add standard pdb annotations
-        rdkit_atom_res_info = AtomPDBResidueInfo(
+        rdkit_atom_res_info = Chem.AtomPDBResidueInfo(
             atomName=atoms.atom_name[i].item(),
             residueName=atoms.res_name[i].item(),
             chainId=atoms.chain_id[i].item(),
@@ -213,7 +205,7 @@ def to_mol(
         # Handle AtomArray and AtomArrayStack consistently
         coord = coord[None, :, :]
     for model_coord in coord:
-        conformer = Conformer(mol.GetNumAtoms())
+        conformer = Chem.Conformer(mol.GetNumAtoms())
         # RDKit silently expects the data to be in C-contiguous order
         # Otherwise the coordinates would be completely misassigned
         # (https://github.com/rdkit/rdkit/issues/8221)
@@ -290,8 +282,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
     if add_hydrogen is None:
         add_hydrogen = not _has_explicit_hydrogen(mol)
     if add_hydrogen:
-        SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
-        mol = AddHs(mol, addCoords=False, addResidueInfo=False)
+        Chem.SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
+        mol = Chem.AddHs(mol, addCoords=False, addResidueInfo=False)
 
     rdkit_atoms = mol.GetAtoms()
     if rdkit_atoms is None:
@@ -328,7 +320,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
         residue_info = rdkit_atom.GetPDBResidueInfo()
         if residue_info is None:
             # ... default values for atoms with missing residue information
-            residue_info = AtomPDBResidueInfo(
+            residue_info = Chem.AtomPDBResidueInfo(
                 atomName="",
                 occupancy=0.0,
                 tempFactor=float("nan"),
@@ -375,15 +367,15 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
 
     rdkit_bonds = list(mol.GetBonds())
     is_aromatic = np.array(
-        [bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds]
+        [bond.GetBondType() == Chem.BondType.AROMATIC for bond in rdkit_bonds]
     )
     if np.any(is_aromatic):
         # Determine the kekulized order of aromatic bonds
         # Copy as 'Kekulize()' modifies the molecule in-place
-        mol = Mol(mol)
+        mol = Chem.Mol(mol)
         try:
-            Kekulize(mol)
-        except KekulizeException:
+            Chem.Kekulize(mol)
+        except Chem.KekulizeException:
             warnings.warn(
                 "Kekulization failed, "
                 "using 'BondType.AROMATIC' instead for aromatic bonds instead",

From 6ced9ea37092089053e97eb72882a839f324e1bf Mon Sep 17 00:00:00 2001
From: Simon Mathis <simon.mathis@gmail.com>
Date: Wed, 12 Feb 2025 12:05:21 +0000
Subject: [PATCH 03/12] fix: enable returning molecules without conformers
 (with nan coords). Also set coordinates of non-3d conformers to nan even when
 explicitly requested, as they are meaningless.

---
 src/biotite/interface/rdkit/mol.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index eb31a0854..8d632d3ae 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -290,14 +290,20 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
         raise BadStructureError("Could not obtains atoms from Mol")
 
     if conformer_id is None:
-        conformers = [conf for conf in mol.GetConformers() if conf.Is3D()]
+        conformers = [conf for conf in mol.GetConformers()]
         atoms = AtomArrayStack(len(conformers), len(rdkit_atoms))
         for i, conformer in enumerate(conformers):
-            atoms.coord[i] = np.array(conformer.GetPositions())
+            if conformer.Is3D():
+                atoms.coord[i] = np.array(conformer.GetPositions())
+            else:
+                atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan)
     else:
         conformer = mol.GetConformer(conformer_id)
         atoms = AtomArray(len(rdkit_atoms))
-        atoms.coord = np.array(conformer.GetPositions())
+        if conformer.Is3D():
+            atoms.coord = np.array(conformer.GetPositions())
+        else:
+            atoms.coord = np.full((len(rdkit_atoms), 3), np.nan)
 
     extra_annotations = defaultdict(
         # Use 'object' dtype first, as the maximum string length is unknown

From c111e7911c52fdec91739503380307af286b91c4 Mon Sep 17 00:00:00 2001
From: Simon Mathis <simon.mathis@gmail.com>
Date: Wed, 12 Feb 2025 12:06:07 +0000
Subject: [PATCH 04/12] feat: attempt to type-cast extra annotations instead of
 always leaving them as 'str' objects. Also type-cast the rdkit internal
 'isImplicit' flag.

---
 src/biotite/interface/rdkit/mol.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 8d632d3ae..09bc66ab6 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -369,7 +369,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
             extra_annotations[annot_name][_atom_idx] = rdkit_atom.GetProp(annot_name)
 
     for annot_name, array in extra_annotations.items():
-        atoms.set_annotation(annot_name, array.astype(str))
+        # ... handle special case of implicit hydrogen atom flags
+        if annot_name == "isImplicit":
+            atoms.set_annotation(annot_name, array.astype(bool))
+            continue
+
+        # ... for all custom annotations, try numeric conversions in order of preference
+        for dtype in (bool, int, float):
+            try:
+                converted = array.astype(dtype)
+                # Verify the conversion was lossless by converting back
+                if np.array_equal(
+                    converted.astype(str), array.astype(str), equal_nan=True
+                ):
+                    atoms.set_annotation(annot_name, converted)
+                    break
+            except (ValueError, TypeError):
+                continue
+        else:  # ... if no numeric conversion succeeded
+            atoms.set_annotation(annot_name, array.astype(str))
 
     rdkit_bonds = list(mol.GetBonds())
     is_aromatic = np.array(

From 88cdfee7331e78cc1100deabbbf7a373d5593b39 Mon Sep 17 00:00:00 2001
From: Simon Mathis <simon.mathis@gmail.com>
Date: Wed, 12 Feb 2025 12:15:25 +0000
Subject: [PATCH 05/12] fix: suggestion for default case & warnings for
 explicit hydrogens

---
 src/biotite/interface/rdkit/mol.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 09bc66ab6..21ccf9561 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -68,7 +68,7 @@ def to_mol(
     kekulize=False,
     use_dative_bonds=False,
     include_extra_annotations=(),
-    explicit_hydrogen=True,
+    explicit_hydrogen=False,
 ):
     """
     Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
@@ -103,6 +103,9 @@ def to_mol(
         explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
         If set to false, the conversion process treats all hydrogen atoms as implicit
         and all hydrogen atoms in the :class:`AtomArray` are removed.
+        By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to
+        avoid accidentally introducing radicals into the molecule upon calling
+        :func:`Chem.SanitizeMol()`.
 
     Returns
     -------
@@ -140,12 +143,24 @@ def to_mol(
     HXT
     """
     hydrogen_mask = atoms.element == "H"
+    _has_hydrogen = hydrogen_mask.any()
     if explicit_hydrogen:
-        if not hydrogen_mask.any():
+        if not _has_hydrogen:
             warnings.warn(
-                "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'"
+                "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. "
+                "This may lead to unexpected results, as hydrogen atoms are not "
+                "implicitly added, which often results in radicals after sanitization "
+                "in RDKit. Make sure this is what you want.",
+                UserWarning,
             )
     else:
+        if _has_hydrogen:
+            warnings.warn(
+                "Hydrogen atoms are present in the input, although 'explicit_hydrogen' "
+                "is 'False'. All hydrogen atoms will be removed and then re-inferred "
+                "by RDKit. Make sure this is what you want.",
+                UserWarning,
+            )
         atoms = atoms[..., ~hydrogen_mask]
 
     mol = Chem.EditableMol(Chem.Mol())

From c67831723d61fb100a9939ff34109392be26c64c Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 15 Feb 2025 22:23:33 +0100
Subject: [PATCH 06/12] Update src/biotite/interface/rdkit/mol.py

Co-authored-by: Simon Mathis <simon.mathis@gmail.com>
---
 src/biotite/interface/rdkit/mol.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 21ccf9561..40a2d5b26 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -171,6 +171,7 @@ def to_mol(
     for i in range(atoms.array_length()):
         rdkit_atom = Chem.Atom(atoms.element[i].capitalize())
         if explicit_hydrogen:
+            # ... tell RDKit to not assume any implicit hydrogens
             rdkit_atom.SetNoImplicit(True)
         if "charge" in has_annot:
             rdkit_atom.SetFormalCharge(atoms.charge[i].item())

From 8235f0152e4fd773b160543af99c62570c2b2614 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 15 Feb 2025 22:44:20 +0100
Subject: [PATCH 07/12] Infer `explicit_hydrogen` from presence of hydrogen
 atoms

---
 src/biotite/interface/rdkit/mol.py | 34 +++++++++++++++++-------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 40a2d5b26..a2e855230 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -68,7 +68,7 @@ def to_mol(
     kekulize=False,
     use_dative_bonds=False,
     include_extra_annotations=(),
-    explicit_hydrogen=False,
+    explicit_hydrogen=None,
 ):
     """
     Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
@@ -78,6 +78,7 @@ def to_mol(
     ----------
     atoms : AtomArray or AtomArrayStack
         The molecule to be converted.
+        Must have an associated :class:`BondList`.
     kekulize : bool, optional
         If set to true, aromatic bonds are represented by single, double and triple
         bonds.
@@ -101,11 +102,9 @@ def to_mol(
     explicit_hydrogen : bool, optional
         If set to true, the conversion process expects that all hydrogen atoms are
         explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
-        If set to false, the conversion process treats all hydrogen atoms as implicit
-        and all hydrogen atoms in the :class:`AtomArray` are removed.
-        By default, explicit_hydrogen=False, i.e. hydrogen atoms are never assumed, to
-        avoid accidentally introducing radicals into the molecule upon calling
-        :func:`Chem.SanitizeMol()`.
+        If set to false, the conversion process treats all hydrogen atoms as implicit.
+        By default, explicit hydrogen atoms are only assumed of any hydrogen atoms are
+        present in `atoms`.
 
     Returns
     -------
@@ -114,6 +113,13 @@ def to_mol(
         If the input `atoms` is an :class:`AtomArrayStack`, all models are included
         as conformers with conformer IDs starting from ``0``.
 
+    Raised
+    ------
+    BadStructureError
+        If the input `atoms` does not have an associated :class:`BondList`.
+        Also raises a :class:`BadStructureError`, if `explicit_hydrogen` is set to
+        ``False`` despite hydrogen atoms being present in `atoms`.
+
     Examples
     --------
 
@@ -144,22 +150,20 @@ def to_mol(
     """
     hydrogen_mask = atoms.element == "H"
     _has_hydrogen = hydrogen_mask.any()
-    if explicit_hydrogen:
+    if explicit_hydrogen is None:
+        explicit_hydrogen = _has_hydrogen
+    elif explicit_hydrogen:
         if not _has_hydrogen:
             warnings.warn(
-                "No hydrogen found in the input, although 'explicit_hydrogen' is 'True'. "
-                "This may lead to unexpected results, as hydrogen atoms are not "
-                "implicitly added, which often results in radicals after sanitization "
-                "in RDKit. Make sure this is what you want.",
+                "No hydrogen found, although 'explicit_hydrogen' is 'True'. "
+                "This may lead to radicals after sanitization in RDKit.",
                 UserWarning,
             )
     else:
         if _has_hydrogen:
-            warnings.warn(
+            raise BadStructureError(
                 "Hydrogen atoms are present in the input, although 'explicit_hydrogen' "
-                "is 'False'. All hydrogen atoms will be removed and then re-inferred "
-                "by RDKit. Make sure this is what you want.",
-                UserWarning,
+                "is set to 'False'"
             )
         atoms = atoms[..., ~hydrogen_mask]
 

From 81fa25741849310a591bfbaab672af532488a9cc Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 15 Feb 2025 22:45:32 +0100
Subject: [PATCH 08/12] Add author

---
 src/biotite/interface/rdkit/mol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index a2e855230..7484842b7 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -3,7 +3,7 @@
 # information.
 
 __name__ = "biotite.interface.rdkit"
-__author__ = "Patrick Kunzmann"
+__author__ = "Patrick Kunzmann, Simon Mathis"
 __all__ = ["to_mol", "from_mol"]
 
 import warnings

From 2964417879fedd61238939e5a2558a1ef1414916 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 15 Feb 2025 23:38:09 +0100
Subject: [PATCH 09/12] Make 2D and 3D conformers selectable

---
 src/biotite/interface/rdkit/mol.py | 41 +++++++++++++++----------
 tests/interface/test_rdkit.py      | 49 +++++++++++++++++++++---------
 2 files changed, 59 insertions(+), 31 deletions(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 7484842b7..9540a4210 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -246,9 +246,12 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
     ----------
     mol : rdkit.Chem.rdchem.Mol
         The molecule to be converted.
-    conformer_id : int, optional
-        The conformer to be converted.
-        By default, an :class:`AtomArrayStack` with all conformers is returned.
+    conformer_id : int or {"2D", "3D"}, optional
+        The ID of the conformer to be converted.
+        If set to "2D" or "3D", an :class:`AtomArrayStack` with only the 2D or 3D
+        conformer is returned, respectively.
+        By default, an :class:`AtomArrayStack` with all conformers (2D and 3D) is
+        returned.
     add_hydrogen : bool, optional
         If set to true, explicit hydrogen atoms are always added.
         If set to false, explicit hydrogen atoms are never added.
@@ -259,8 +262,10 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
     -------
     atoms : AtomArray or AtomArrayStack
         The converted atoms.
-        An :class:`AtomArrayStack` is only returned, if the `conformer_id` parameter
-        is not set.
+        An :class:`AtomArray` is returned if an integer `conformer_id` is given.
+        Otherwise, an :class:`AtomArrayStack` is returned.
+        If the input `mol` does not have a conformer, an `AtomArrayStack` with a
+        single model, where all coordinates are *NaN*, is returned.
 
     Notes
     -----
@@ -309,21 +314,25 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
     if rdkit_atoms is None:
         raise BadStructureError("Could not obtains atoms from Mol")
 
-    if conformer_id is None:
+    if conformer_id in (None, "2D", "3D"):
         conformers = [conf for conf in mol.GetConformers()]
-        atoms = AtomArrayStack(len(conformers), len(rdkit_atoms))
-        for i, conformer in enumerate(conformers):
-            if conformer.Is3D():
-                atoms.coord[i] = np.array(conformer.GetPositions())
-            else:
-                atoms.coord[i] = np.full((len(rdkit_atoms), 3), np.nan)
+        if conformer_id == "2D":
+            conformers = [conf for conf in conformers if not conf.Is3D()]
+        elif conformer_id == "3D":
+            conformers = [conf for conf in conformers if conf.Is3D()]
+        if len(conformers) == 0:
+            # No conformer in 'Mol' that fulfills the criteria
+            # -> create a single model with all coordinates set to NaN
+            atoms = AtomArrayStack(1, len(rdkit_atoms))
+            atoms.coord = np.full((1, len(rdkit_atoms), 3), np.nan)
+        else:
+            atoms = AtomArrayStack(len(conformers), len(rdkit_atoms))
+            for i, conformer in enumerate(conformers):
+                atoms.coord[i] = np.array(conformer.GetPositions(), dtype=np.float32)
     else:
         conformer = mol.GetConformer(conformer_id)
         atoms = AtomArray(len(rdkit_atoms))
-        if conformer.Is3D():
-            atoms.coord = np.array(conformer.GetPositions())
-        else:
-            atoms.coord = np.full((len(rdkit_atoms), 3), np.nan)
+        atoms.coord = np.array(conformer.GetPositions(), dtype=np.float32)
 
     extra_annotations = defaultdict(
         # Use 'object' dtype first, as the maximum string length is unknown
diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py
index f9fc6245d..15d907842 100644
--- a/tests/interface/test_rdkit.py
+++ b/tests/interface/test_rdkit.py
@@ -1,13 +1,7 @@
 from pathlib import Path
 import numpy as np
 import pytest
-from rdkit.Chem import MolFromSmiles, MolToSmiles
-from rdkit.Chem.rdchem import Atom, EditableMol, Mol
-from rdkit.Chem.rdchem import BondType as RDKitBondType
-from rdkit.Chem.rdmolops import (
-    AddHs,
-    RemoveStereochemistry,
-)
+import rdkit.Chem.AllChem as Chem
 import biotite.interface.rdkit as rdkit_interface
 import biotite.structure as struc
 import biotite.structure.info as info
@@ -102,19 +96,44 @@ def test_conversion_from_rdkit(smiles):
     Start from SMILES string to ensure that built-in functionality of RDKit is used
     to create the initial molecule.
     """
-    ref_mol = MolFromSmiles(smiles)
+    ref_mol = Chem.MolFromSmiles(smiles)
     atoms = rdkit_interface.from_mol(ref_mol)
     test_mol = rdkit_interface.to_mol(atoms)
 
     # The intermediate AtomArray has explicit hydrogen atoms so add them explicitly
     # to the reference as well for fair comparison
-    ref_mol = AddHs(ref_mol)
+    ref_mol = Chem.AddHs(ref_mol)
     # The intermediate AtomArray does not have stereochemistry information,
     # so this info cannot be preserved in the comparison
-    RemoveStereochemistry(ref_mol)
+    Chem.RemoveStereochemistry(ref_mol)
 
     # RDKit does not support equality checking -> Use SMILES string as proxy
-    assert MolToSmiles(test_mol) == MolToSmiles(ref_mol)
+    assert Chem.MolToSmiles(test_mol) == Chem.MolToSmiles(ref_mol)
+
+
+@pytest.mark.parametrize("selected_conformer_type", ["2D", "3D"])
+@pytest.mark.parametrize("present_conformer_type", ["2D", "3D"])
+def test_conformer_selection(present_conformer_type, selected_conformer_type):
+    """
+    Check if 2D and 3D conformers can be selected and that a model with *NaN*
+    coordinates is returned if an absent conformer type is selected.
+    """
+    mol = Chem.MolFromSmiles("C1=CC=CC=C1")
+    conformer = Chem.Conformer(mol.GetNumAtoms())
+    conformer.Set3D(present_conformer_type=="3D")
+    mol.AddConformer(conformer)
+
+    atoms = rdkit_interface.from_mol(mol, conformer_id=selected_conformer_type)
+
+    # In any case there should be a single model
+    assert atoms.stack_depth() == 1
+    if selected_conformer_type != present_conformer_type:
+        # In case the selected conformer type is not present, expect a model
+        # with all coordinates set to NaN
+        assert np.all(np.isnan(atoms.coord))
+    else:
+        # The default conformer set above contains zeros
+        assert np.all(atoms.coord == 0)
 
 
 def test_kekulization():
@@ -147,11 +166,11 @@ def test_unmappable_bond_type():
     """
     Test that a warning is raised when a bond type cannot be mapped to Biotite.
     """
-    mol = EditableMol(Mol())
-    mol.AddAtom(Atom("F"))
-    mol.AddAtom(Atom("F"))
+    mol = Chem.EditableMol(Chem.Mol())
+    mol.AddAtom(Chem.Atom("F"))
+    mol.AddAtom(Chem.Atom("F"))
     # 'HEXTUPLE' has no corresponding Biotite bond type
-    mol.AddBond(0, 1, RDKitBondType.HEXTUPLE)
+    mol.AddBond(0, 1, Chem.BondType.HEXTUPLE)
     mol = mol.GetMol()
 
     with pytest.warns(LossyConversionWarning):

From f30ca157b3197ece67a074e9343a08bc4b81a1fc Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Tue, 18 Feb 2025 09:32:01 +0100
Subject: [PATCH 10/12] Allow multiple types for extra annotations

---
 src/biotite/interface/rdkit/mol.py | 63 ++++++++++++++++--------------
 tests/interface/test_rdkit.py      | 28 ++++++++++++-
 2 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 9540a4210..a357bb3d8 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -6,6 +6,7 @@
 __author__ = "Patrick Kunzmann, Simon Mathis"
 __all__ = ["to_mol", "from_mol"]
 
+import numbers
 import warnings
 from collections import defaultdict
 import numpy as np
@@ -199,7 +200,9 @@ def to_mol(
 
         # add extra annotations
         for annot_name in extra_annot:
-            rdkit_atom.SetProp(annot_name, atoms.get_annotation(annot_name)[i].item())
+            _set_property(
+                rdkit_atom, annot_name, atoms.get_annotation(annot_name)[i].item()
+            )
 
         # add atom to molecule
         mol.AddAtom(rdkit_atom)
@@ -270,8 +273,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
     Notes
     -----
     All atom-level properties of `mol`
-    (obtainable with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`) are added as string-type
-    annotation array with the same name.
+    (obtainable with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`) are added as annotation
+    array with the same name.
     ``element`` and ``charge`` are not inferred from properties but from the
     dedicated attributes in the :class:`rdkit.Chem.rdchem.Mol` object.
 
@@ -335,8 +338,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
         atoms.coord = np.array(conformer.GetPositions(), dtype=np.float32)
 
     extra_annotations = defaultdict(
-        # Use 'object' dtype first, as the maximum string length is unknown
-        lambda: np.full(atoms.array_length(), "", dtype=object)
+        # The dtype of each annotation array is inferred later
+        lambda: [None] * atoms.array_length()
     )
     atoms.add_annotation("charge", int)
     atoms.add_annotation("b_factor", float)
@@ -393,30 +396,17 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
         atoms.atom_name[_atom_idx] = residue_info.GetName().strip()
 
         # ... add extra annotations
-        annot_names = rdkit_atom.GetPropNames()
-        for annot_name in annot_names:
-            extra_annotations[annot_name][_atom_idx] = rdkit_atom.GetProp(annot_name)
-
-    for annot_name, array in extra_annotations.items():
-        # ... handle special case of implicit hydrogen atom flags
-        if annot_name == "isImplicit":
-            atoms.set_annotation(annot_name, array.astype(bool))
-            continue
-
-        # ... for all custom annotations, try numeric conversions in order of preference
-        for dtype in (bool, int, float):
-            try:
-                converted = array.astype(dtype)
-                # Verify the conversion was lossless by converting back
-                if np.array_equal(
-                    converted.astype(str), array.astype(str), equal_nan=True
-                ):
-                    atoms.set_annotation(annot_name, converted)
-                    break
-            except (ValueError, TypeError):
-                continue
-        else:  # ... if no numeric conversion succeeded
-            atoms.set_annotation(annot_name, array.astype(str))
+        for annot, value in rdkit_atom.GetPropsAsDict(includePrivate=False).items():
+            extra_annotations[annot][_atom_idx] = value
+
+    for annot, array in extra_annotations.items():
+        # Handle special case of implicit hydrogen atom flags,
+        # that is set by 'AddHs()' to hydrogen atoms
+        if annot == "isImplicit":
+            annotation_array = np.array(array, dtype=bool)
+        else:
+            annotation_array = np.array(array)
+        atoms.set_annotation(annot, annotation_array)
 
     rdkit_bonds = list(mol.GetBonds())
     is_aromatic = np.array(
@@ -465,3 +455,18 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
 
 def _has_explicit_hydrogen(mol):
     return mol.GetNumAtoms() > mol.GetNumHeavyAtoms()
+
+
+def _set_property(atom, annot_name, value):
+    if isinstance(value, bool):
+        atom.SetBoolProp(annot_name, value)
+    elif isinstance(value, numbers.Integral):
+        atom.SetIntProp(annot_name, value)
+    elif isinstance(value, numbers.Real):
+        atom.SetDoubleProp(annot_name, value)
+    elif isinstance(value, str):
+        atom.SetProp(annot_name, value)
+    else:
+        raise TypeError(
+            f"Unsupported dtype '{type(value).__name__}' for annotation '{annot_name}'"
+        )
diff --git a/tests/interface/test_rdkit.py b/tests/interface/test_rdkit.py
index 15d907842..58bd35272 100644
--- a/tests/interface/test_rdkit.py
+++ b/tests/interface/test_rdkit.py
@@ -1,3 +1,4 @@
+import itertools
 from pathlib import Path
 import numpy as np
 import pytest
@@ -120,7 +121,7 @@ def test_conformer_selection(present_conformer_type, selected_conformer_type):
     """
     mol = Chem.MolFromSmiles("C1=CC=CC=C1")
     conformer = Chem.Conformer(mol.GetNumAtoms())
-    conformer.Set3D(present_conformer_type=="3D")
+    conformer.Set3D(present_conformer_type == "3D")
     mol.AddConformer(conformer)
 
     atoms = rdkit_interface.from_mol(mol, conformer_id=selected_conformer_type)
@@ -162,6 +163,31 @@ def test_kekulization():
     )
 
 
+@pytest.mark.parametrize(
+    "values", [(False, True), (1, 2, 3), (1.0, 1.5, 2.0), ("foo", "bar", "baz")]
+)
+def test_extra_annotations(values):
+    """
+    Check if extra annotations can be added to a :class:`Mol` and recovered.
+    """
+    EXTRA_ANNOT_NAME = "extra"
+
+    ref_atoms = info.residue("ALA")
+    ref_annot = np.array(
+        list(itertools.islice(itertools.cycle(values), ref_atoms.array_length()))
+    )
+    ref_atoms.set_annotation(EXTRA_ANNOT_NAME, ref_annot)
+
+    mol = rdkit_interface.to_mol(
+        ref_atoms, include_extra_annotations=[EXTRA_ANNOT_NAME]
+    )
+    test_atoms = rdkit_interface.from_mol(mol)
+    test_annot = test_atoms.get_annotation(EXTRA_ANNOT_NAME)
+
+    assert test_annot.dtype == ref_annot.dtype
+    assert test_annot.tolist() == ref_annot.tolist()
+
+
 def test_unmappable_bond_type():
     """
     Test that a warning is raised when a bond type cannot be mapped to Biotite.

From aed49b533a0e574491743a19f0d8fd5503da6254 Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Sat, 22 Feb 2025 08:10:41 +0100
Subject: [PATCH 11/12] Note that the atoms are in the same order

---
 src/biotite/interface/rdkit/mol.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index a357bb3d8..75e1f2fd5 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -114,13 +114,19 @@ def to_mol(
         If the input `atoms` is an :class:`AtomArrayStack`, all models are included
         as conformers with conformer IDs starting from ``0``.
 
-    Raised
+    Raises
     ------
     BadStructureError
         If the input `atoms` does not have an associated :class:`BondList`.
         Also raises a :class:`BadStructureError`, if `explicit_hydrogen` is set to
         ``False`` despite hydrogen atoms being present in `atoms`.
 
+    Notes
+    -----
+    The atoms in the return value are in the same order as the input `atoms`,
+    i.e. indices pointing to the :class:`rdkit.Chem.rdchem.Mol` can be used to point to
+    the same atoms in the :class:`.AtomArray`.
+
     Examples
     --------
 
@@ -272,6 +278,10 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
 
     Notes
     -----
+    The atoms in the return value are in the same order as the input `mol`,
+    i.e. indices pointing to the :class:`rdkit.Chem.rdchem.Mol` can be used to point to
+    the same atoms in the :class:`.AtomArray`.
+
     All atom-level properties of `mol`
     (obtainable with :meth:`rdkit.Chem.rdchem.Mol.GetProp()`) are added as annotation
     array with the same name.

From e42473968e2f72b3c98bfce3ab4286c85151709f Mon Sep 17 00:00:00 2001
From: Patrick Kunzmann <padix.key@gmail.com>
Date: Mon, 24 Feb 2025 14:55:36 +0100
Subject: [PATCH 12/12] Fix typo

Co-authored-by: Simon Mathis <simon.mathis@gmail.com>
---
 src/biotite/interface/rdkit/mol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py
index 75e1f2fd5..06d4d38a2 100644
--- a/src/biotite/interface/rdkit/mol.py
+++ b/src/biotite/interface/rdkit/mol.py
@@ -104,7 +104,7 @@ def to_mol(
         If set to true, the conversion process expects that all hydrogen atoms are
         explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
         If set to false, the conversion process treats all hydrogen atoms as implicit.
-        By default, explicit hydrogen atoms are only assumed of any hydrogen atoms are
+        By default, explicit hydrogen atoms are only assumed if any hydrogen atoms are
         present in `atoms`.
 
     Returns