aivant · padix-key · Jan 13, 2026 · Nov 7, 2025 · Nov 7, 2025
diff --git a/README.rst b/README.rst
@@ -25,7 +25,7 @@ for assessing their quality.
 It supports
 
 - all *CASP*/*CAPRI* metrics and more
-- small molecules to huge protein complexes
+- small molecules to huge protein or nucleic acid complexes
 - easy extension with custom metrics
 - a command line interface and a Python API
 

diff --git a/docs/api.rst b/docs/api.rst
@@ -103,14 +103,13 @@ Atom Matching
 
 Miscellaneous
 -------------
-
 .. autosummary::
     :toctree: apidoc
     :caption: Miscellaneous
 
+    MoleculeType
     sanitize
     standardize
-    is_small_molecule
     get_contact_residues
     find_atoms_by_pattern
     estimate_formal_charges

diff --git a/docs/index.rst b/docs/index.rst
@@ -42,7 +42,7 @@ pepp'r documentation
         It supports
 
         - all *CASP*/*CAPRI* metrics and more
-        - small molecules to huge protein complexes
+        - small molecules to huge protein or nucleic acid complexes
         - easy extension with custom metrics
         - a command line interface and a Python API
 

diff --git a/docs/tutorial/cli.rst b/docs/tutorial/cli.rst
@@ -132,6 +132,11 @@ In this case we select the best value of the three most confident poses.
     ! peppr tabulate $TMPDIR/peppr.pkl $TMPDIR/table.csv top3
     ! cat $TMPDIR/table.csv
 
+.. note::
+
+    The term '*polymer*' is used throughout ``peppr`` to refer to protein or nucleic
+    acid chains.
+
 Aggregating results over systems
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 The other type of report is an aggregated value for each metric (and each selector)
@@ -147,8 +152,8 @@ system.
 For each metric the mean and median value over all systems are reported.
 For some metrics, such as ``rmsd`` and ``dockq``, there is also bins the values are
 sorted into.
-For example, ``CA-RMSD <2.0`` gives the percentage of systems with a *CA-RMSD* below
-2.0 Å.
+For example, ``backbone RMSD <2.0`` gives the percentage of systems with a
+:math:`C_{\alpha}`-RMSD below 2.0 Å.
 
 Increasing the accuracy
 -----------------------

diff --git a/src/peppr/common.py b/src/peppr/common.py
@@ -1,7 +1,10 @@
-__all__ = ["is_small_molecule", "standardize"]
+__all__ = ["MoleculeType", "standardize"]
 
 
+import functools
+from enum import Enum, auto
 import biotite.structure as struc
+import biotite.structure.info as info
 
 DONOR_PATTERN = (
     "["
@@ -37,6 +40,48 @@
 HALOGEN_DISTANCE_SCALING = (0.8, 1.05)
 
 
+class MoleculeType(Enum):
+    """
+    The type of a molecule.
+    """
+
+    SMALL_MOLECULE = auto()
+    PROTEIN = auto()
+    NUCLEIC_ACID = auto()
+
+    @staticmethod
+    def of(chain: struc.AtomArray) -> "MoleculeType":
+        """
+        Determine the :class:`MoleculeType` of the given chain.
+
+        Parameters
+        ----------
+        chain : struc.AtomArray, shape=(n,)
+            The chain to determine the :class:`MoleculeType` of.
+
+        Returns
+        -------
+        MoleculeType
+            The :class:`MoleculeType` of the given chain.
+
+        Warnings
+        --------
+        This function does not check if `chain` is truly a single chain, this is the
+        responsibility of the caller.
+        """
+        if chain.hetero[0].item():
+            return MoleculeType.SMALL_MOLECULE
+        res_name = chain.res_name[0].item()
+        if res_name in _amino_acid_names():
+            return MoleculeType.PROTEIN
+        if res_name in _nucleotide_names():
+            return MoleculeType.NUCLEIC_ACID
+        raise ValueError(
+            f"Chain contains residue '{res_name}' which is not polymer component, "
+            "but it is also not marked as a small molecule"
+        )
+
+
 def is_small_molecule(chain: struc.AtomArray) -> bool:
     """
     Check whether the given chain is a small molecule.
@@ -86,3 +131,19 @@ def standardize(
     if remove_monoatomic_ions:
         mask &= ~struc.filter_monoatomic_ions(system)
     return system[..., mask]
+
+
+@functools.cache
+def _amino_acid_names() -> set[str]:
+    """
+    Get the names of all residues considered to be amino acids.
+    """
+    return set(info.amino_acid_names())
+
+
+@functools.cache
+def _nucleotide_names() -> set[str]:
+    """
+    Get the names of all residues considered to be nucleotides.
+    """
+    return set(info.nucleotide_names())
diff --git a/src/peppr/contacts.py b/src/peppr/contacts.py
@@ -375,7 +375,7 @@ def find_stacking_interactions(
         return [
             self._map_stacking_indices(combined_atoms, indices_1, indices_2, kind)
             for indices_1, indices_2, kind in all_interactions
-            # filter out intra protein and intra ligand interactions
+            # filter out intra polymer and intra ligand interactions
             if combined_atoms.hetero[indices_1[0]]
             != combined_atoms.hetero[indices_2[0]]
         ]
@@ -388,13 +388,13 @@ def _map_stacking_indices(
         kind: struc.PiStacking,
     ) -> tuple[NDArray[np.int_], NDArray[np.int_], struc.PiStacking]:
         """Map combined structure indices back to original receptor and ligand."""
-        # Sort to ensure protein indices comes first then ligand
-        protein_idx, ligand_idx = sorted(
+        # Sort to ensure polymer indices comes first then ligand
+        polymer_idx, ligand_idx = sorted(
             [indices_1, indices_2], key=lambda idx: combined_atoms.hetero[idx[0]]
         )
 
         return (
-            self._binding_site_indices[protein_idx],  # Map to full receptor
+            self._binding_site_indices[polymer_idx],  # Map to full receptor
             ligand_idx - len(self._binding_site),  # Map to ligand
             kind,
         )
@@ -438,7 +438,7 @@ def find_pi_cation_interactions(
         return [
             self._map_pi_cation_indices(ring_indices, cation_index)
             for ring_indices, cation_index in all_interactions
-            #  filter out intra protein and intra ligand interactions
+            #  filter out intra polymer and intra ligand interactions
             if (ring_indices[0] >= binding_site_size)
             != (cation_index >= binding_site_size)
         ]

diff --git a/src/peppr/match.py b/src/peppr/match.py
@@ -26,16 +26,22 @@
     SanitizeFlags,
     SanitizeMol,
 )
-from peppr.common import is_small_molecule
+from peppr.common import MoleculeType
 
 # To match atoms between pose and reference chain,
 # the residue and atom name are sufficient to unambiguously identify an atom
 _ANNOTATIONS_FOR_ATOM_MATCHING = ["res_name", "atom_name"]
-_IDENTITY_MATRIX = align.SubstitutionMatrix(
-    seq.ProteinSequence.alphabet,
-    seq.ProteinSequence.alphabet,
-    np.eye(len(seq.ProteinSequence.alphabet), dtype=np.int32),
-)
+_IDENTITY_MATRIX = {
+    molecule_type: align.SubstitutionMatrix(
+        alphabet,
+        alphabet,
+        np.eye(len(alphabet), dtype=np.int32),
+    )
+    for molecule_type, alphabet in (
+        (MoleculeType.PROTEIN, seq.ProteinSequence.alphabet),
+        (MoleculeType.NUCLEIC_ACID, seq.NucleotideSequence.alphabet_amb),
+    )
+}
 _PADDING = -1
 _UNMAPPABLE_ENTITY_ID = -1
 
@@ -610,7 +616,10 @@ def _all_global_mappings(
             list[tuple[struc.AtomArray, struc.AtomArray]]
         ] = []
         for ref_chain_i, pose_chain_i in zip(ref_chain_indices, pose_chain_indices):
-            if is_small_molecule(reference_chains[ref_chain_i]):
+            if (
+                MoleculeType.of(reference_chains[ref_chain_i])
+                == MoleculeType.SMALL_MOLECULE
+            ):
                 pose_mappings = _molecule_mappings(
                     reference_chains[ref_chain_i], pose_chains[pose_chain_i]
                 )
@@ -761,7 +770,7 @@ def _match_using_chain_order(
     matched_reference_chains = []
     matched_pose_chains = []
     for ref_i, pose_i in zip(reference_chain_order, pose_chain_order):
-        if is_small_molecule(reference_chains[ref_i]):
+        if MoleculeType.of(reference_chains[ref_i]) == MoleculeType.SMALL_MOLECULE:
             ref_atom_order, pose_atom_order = _find_optimal_molecule_permutation(
                 reference_chains[ref_i],
                 pose_chains[pose_i],
@@ -800,7 +809,7 @@ def _match_common_residues(
     reference: struc.AtomArray, pose: struc.AtomArray
 ) -> tuple[struc.AtomArray, struc.AtomArray]:
     """
-    Find common residues (and the common atoms) in two protein chains.
+    Find common residues (and the common atoms) in two polymer chains.
 
     Parameters
     ----------
@@ -825,7 +834,7 @@ def _match_common_residues(
     alignment = align.align_optimal(
         reference_sequence,
         pose_sequence,
-        _IDENTITY_MATRIX,
+        _IDENTITY_MATRIX[MoleculeType.of(reference)],
         # We get mismatches due to cropping, not due to evolution
         # -> linear gap penalty makes most sense
         gap_penalty=-1,
@@ -1182,24 +1191,28 @@ def _assign_entity_ids_to_chains(
             entity_ids.append(chain.entity_id[0].item())
         return np.array(entity_ids, dtype=int)
 
+    molecule_types = [MoleculeType.of(chain) for chain in chains]
     sequences = [
-        struc.to_sequence(chain)[0][0] if not is_small_molecule(chain) else None
-        for chain in chains
+        struc.to_sequence(chain)[0][0]
+        if molecule_type != MoleculeType.SMALL_MOLECULE
+        else None
+        for chain, molecule_type in zip(chains, molecule_types)
     ]
     if use_structure_match:
         molecules = [
-            _to_mol(chain) if is_small_molecule(chain) else None for chain in chains
+            _to_mol(chain) if molecule_type == MoleculeType.SMALL_MOLECULE else None
+            for chain, molecule_type in zip(chains, molecule_types)
         ]
 
     current_entity_id = 0
-    for i, (chain, sequence) in enumerate(zip(chains, sequences)):
+    for i, (chain, sequence, molecule_type) in enumerate(
+        zip(chains, sequences, molecule_types)
+    ):
         for j in range(i):
-            if (sequence is None and sequences[j] is not None) or (
-                sequence is not None and sequences[j] is None
-            ):
-                # Cannot match small molecules to polymer chains
+            if molecule_type != molecule_types[j]:
+                # Cannot match different molecule types to each other
                 continue
-            elif sequence is None:
+            elif molecule_type == MoleculeType.SMALL_MOLECULE:
                 # Small molecule case
                 if use_structure_match:
                     # It is only a complete structure match,
@@ -1222,7 +1235,7 @@ def _assign_entity_ids_to_chains(
                 alignment = align.align_optimal(
                     sequence,
                     sequences[j],
-                    _IDENTITY_MATRIX,
+                    _IDENTITY_MATRIX[molecule_type],
                     # We get mismatches due to experimental artifacts, not evolution
                     # -> linear gap penalty makes most sense
                     gap_penalty=-1,
@@ -1273,19 +1286,22 @@ def _all_anchor_combinations(
         raise UnmappableEntityError(
             "No chain in the pose can be mapped to a reference chain"
         )
-    protein_chain_mask = np.array(
-        [not is_small_molecule(chain) for chain in pose_chains]
+    polymer_chain_mask = np.array(
+        [
+            not MoleculeType.of(chain) != MoleculeType.SMALL_MOLECULE
+            for chain in pose_chains
+        ]
     )
-    valid_anchor_mask = protein_chain_mask & mappable_mask
+    valid_anchor_mask = polymer_chain_mask & mappable_mask
     if not valid_anchor_mask.any():
-        # No mappable protein chains
+        # No mappable polymer chains
         # -> Simply use the first mappable small molecule as anchor
         anchor_entity_id = pose_entity_ids[np.where(mappable_mask)[0][0]]
     else:
         valid_anchor_indices = np.where(valid_anchor_mask)[0]
-        protein_entity_ids = pose_entity_ids[valid_anchor_indices]
-        multiplicities_of_entity_ids = np.bincount(protein_entity_ids)
-        multiplicities = multiplicities_of_entity_ids[protein_entity_ids]
+        polymer_entity_ids = pose_entity_ids[valid_anchor_indices]
+        multiplicities_of_entity_ids = np.bincount(polymer_entity_ids)
+        multiplicities = multiplicities_of_entity_ids[polymer_entity_ids]
         least_multiplicity_indices = np.where(multiplicities == np.min(multiplicities))[
             0
         ]
@@ -1335,7 +1351,7 @@ def _get_superimposition_transform(
     transform : AffineTransformation
         The transformation that superimposes the pose chain onto the reference chain.
     """
-    if is_small_molecule(reference_chain):
+    if MoleculeType.of(reference_chain) == MoleculeType.SMALL_MOLECULE:
         if reference_chain.array_length() == pose_chain.array_length():
             _, transform = struc.superimpose(reference_chain, pose_chain)
         else:
@@ -1351,7 +1367,7 @@ def _get_superimposition_transform(
         _, transform, _, _ = struc.superimpose_homologs(
             reference_chain,
             pose_chain,
-            _IDENTITY_MATRIX,
+            _IDENTITY_MATRIX[MoleculeType.of(reference_chain)],
             gap_penalty=-1,
             min_anchors=1,
             # No outlier removal