-import copy
-import itertools
-import stat
-from typing import Optional, List, Union, Tuple
-import os
-import glob
-import tempfile
-import subprocess
-import re
-from pathlib import Path
-from urllib.request import urlretrieve
-from collections import OrderedDict
-
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.colors import to_hex
-from prody.proteins.functions import showProtein, view3D
-import py3Dmol
-import rdkit
-from rdkit import Chem
-from rdkit.Chem import AllChem, Draw
-from rdkit.Chem.rdMolAlign import AlignMol
-from rdkit.Chem import PandasTools
-import mols2grid
-import pandas
-
-
-from .conformers import generate_conformers
-from .toxicity import tox_props
-
-
-def replace_atom(mol: Chem.Mol, target_idx: int, new_atom: int) -> Chem.Mol:
- edit_mol = Chem.RWMol(mol)
- for atom in edit_mol.GetAtoms():
- if atom.GetIdx() == target_idx:
- atom.SetAtomicNum(new_atom)
- return Chem.Mol(edit_mol)
-
-
-def rep2D(mol, idx=-1, rdkit_mol=False, **kwargs):
- numbered = copy.deepcopy(mol)
- numbered.RemoveAllConformers()
- if idx:
- for atom in numbered.GetAtoms():
- atom.SetAtomMapNum(atom.GetIdx())
- AllChem.Compute2DCoords(numbered)
-
- if rdkit_mol:
- return numbered
- else:
- return Draw.MolToImage(numbered, **kwargs)
-
-
-def rep3D(mol):
- viewer = py3Dmol.view(width=300, height=300, viewergrid=(1, 1))
- for i in range(mol.GetNumConformers()):
- mb = Chem.MolToMolBlock(mol, confId=i)
- viewer.addModel(mb, "mol")
- viewer.setStyle({"stick": {}})
- viewer.zoomTo()
- return viewer
-
-
-def is_linker(rmol):
- """
- Check if the molecule is a linker by checking if it has 2 R-group points
- """
- if len([atom for atom in rmol.GetAtoms() if atom.GetAtomMapNum() in (1, 2)]) == 2:
- return True
-
- return False
-
-
-def __getAttachmentVector(R_group):
- """In the R-group or a linker, search for the position of the attachment point (R atom)
- and extract the atom (currently only single bond supported). In case of the linker,
- the R1 atom is selected.
- rgroup: fragment passed as rdkit molecule
- return: tuple (ratom, ratom_neighbour)
- """
-
- # find the R groups in the molecule
- ratoms = [atom for atom in R_group.GetAtoms() if atom.GetAtomicNum() == 0]
- if not len(ratoms):
- raise Exception(
- "The R-group does not have R-atoms (Atoms with index == 0, visualised with a '*' character)"
- )
-
- # if it is a linker, it will have more than 1 R group, pick the one with index 1
- if len(ratoms) == 1:
- atom = ratoms[0]
- elif is_linker(R_group):
- # find the attachable point
- ratoms = [atom for atom in ratoms if atom.GetAtomMapNum() == 1]
- atom = ratoms[0]
- else:
- raise Exception(
- "Either missing R-atoms, or more than two R-atoms. "
- '"Atom.GetAtomicNum" should be 0 for the R-atoms, and in the case of the linker, '
- '"Atom.GetAtomMapNum" has to specify the order (1,2) '
- )
-
- neighbours = atom.GetNeighbors()
- if len(neighbours) > 1:
- raise NotImplementedError(
- "The linking R atom in the R group has two or more attachment points. "
- )
-
- return atom, neighbours[0]
-
-
-def merge_R_group(mol, R_group, replaceIndex):
- """function originally copied from
- https://github.com/molecularsets/moses/blob/master/moses/baselines/combinatorial.py
- """
-
- # the linking R atom on the R group
- rgroup_R_atom, R_atom_neighbour = __getAttachmentVector(R_group)
- print(f"Rgroup atom index {rgroup_R_atom} neighbouring {R_atom_neighbour}")
-
- # atom to be replaced in the molecule
- replace_atom = mol.GetAtomWithIdx(replaceIndex)
- assert (
- len(replace_atom.GetNeighbors()) == 1
- ), "The atom being replaced on the molecule has more neighbour atoms than 1. Not supported."
- replace_atom_neighbour = replace_atom.GetNeighbors()[0]
-
- # align the Rgroup
- AlignMol(
- R_group,
- mol,
- atomMap=(
- (R_atom_neighbour.GetIdx(), replace_atom.GetIdx()),
- (rgroup_R_atom.GetIdx(), replace_atom_neighbour.GetIdx()),
- ),
- )
-
- # merge the two molecules
- combined = Chem.CombineMols(mol, R_group)
- emol = Chem.EditableMol(combined)
-
- # connect
- bond_order = rgroup_R_atom.GetBonds()[0].GetBondType()
- emol.AddBond(
- replace_atom_neighbour.GetIdx(),
- R_atom_neighbour.GetIdx() + mol.GetNumAtoms(),
- order=bond_order,
- )
- # -1 accounts for the removed linking atom on the template
- emol.RemoveAtom(rgroup_R_atom.GetIdx() + mol.GetNumAtoms())
- # remove the linking atom on the template
- emol.RemoveAtom(replace_atom.GetIdx())
-
- merged = emol.GetMol()
- Chem.SanitizeMol(merged)
-
- # if the molecule was previously merged
- if hasattr(mol, "template") and mol.template is not None:
- # mol already had a connected e.g. a linker, therefore we use the area without the linker
- template = mol.template
- else:
- # prepare the template
- etemp = Chem.EditableMol(mol)
- etemp.RemoveAtom(replace_atom.GetIdx())
- template = etemp.GetMol()
-
- with_template = RMol(merged)
- with_template._save_template(template)
- # save the group
- with_template._save_rgroup(R_group)
-
- if is_linker(R_group):
- # the linker's label = 1 was used for the merging,
- # rename label = 2 to 0 to turn it into a simple R-group
- for atom in with_template.GetAtoms():
- if atom.GetAtomMapNum() == 2:
- atom.SetAtomMapNum(0)
-
- return with_template
-
-
-def ic50(x):
- return 10 ** (-x - -9)
-
-
-class RInterface:
- """
- This is a shared interface for a molecule and a list of molecules.
-
- The main purpose is to allow using the same functions on a single molecule and on a group of them.
- """
-
- def rep2D(self, **kwargs):
- pass
-
- def toxicity(self):
- pass
-
- def generate_conformers(
- self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs
- ):
- pass
-
- def remove_clashing_confs(self, prot, min_dst_allowed=1):
- pass
-
-
-[docs]class RMol(rdkit.Chem.rdchem.Mol, RInterface):
-
"""
-
RMol is essentially a wrapper around RDKit Mol with
-
tailored functionalities for attaching R groups, etc.
-
-
:param rmol: when provided, energies and additional metadata is preserved.
-
:type rmol: RMol
-
:param template: Provide the original molecule template
-
used for this RMol.
-
"""
-
-
gnina_dir = None
-
-
def __init__(self, *args, id=None, template=None, **kwargs):
-
super().__init__(*args, **kwargs)
-
-
if isinstance(args[0], RMol) or isinstance(args[0], rdkit.Chem.Mol):
-
self.template = args[0].template if hasattr(args[0], "template") else None
-
self.rgroup = args[0].rgroup if hasattr(args[0], "rgroup") else None
-
self.opt_energies = (
-
args[0].opt_energies if hasattr(args[0], "opt_energies") else None
-
)
-
self.id = args[0].id if hasattr(args[0], "id") else None
-
else:
-
self.template = template
-
self.rgroup = None
-
self.opt_energies = None
-
self.id = id
-
-
def _save_template(self, mol):
-
self.template = RMol(copy.deepcopy(mol))
-
-
def _save_rgroup(self, rgroup):
-
self.rgroup = rgroup
-
-
def _save_opt_energies(self, energies):
-
self.opt_energies = energies
-
-
[docs] def toxicity(self):
-
"""
-
Assessed various ADMET properties, including
-
- Lipinksi rule of 5 properties,
-
- the presence of unwanted substructures
-
- problematic functional groups
-
- synthetic accessibility
-
-
:return: a row of a dataframe with the descriptors
-
:rtype: dataframe
-
"""
-
df = tox_props(self)
-
# add an index column to the front
-
df.insert(0, "ID", self.id)
-
df.set_index("ID", inplace=True)
-
-
# add a column with smiles
-
df = df.assign(Smiles=[Chem.MolToSmiles(self)])
-
-
return df
-
-
-
-
[docs] def optimise_in_receptor(self, *args, **kwargs):
-
"""
-
Enumerate the conformers inside of the receptor by employing
-
ANI2x, a hybrid machine learning / molecular mechanics (ML/MM) approach.
-
ANI2x is neural nework potential for the ligand energetics
-
but works only for the following atoms: H, C, N, O, F, S, Cl.
-
-
Open Force Field Parsley force field is used for intermolecular interactions with the receptor.
-
-
:param sigma_scale_factor: is used to scale the Lennard-Jones radii of the atoms.
-
:param relative_permittivity: is used to scale the electrostatic interactions with the protein.
-
:param water_model: can be used to set the force field for any water molecules present in the binding site.
-
"""
-
if self.GetNumConformers() == 0:
-
print("Warning: no conformers so cannot optimise_in_receptor. Ignoring.")
-
return
-
-
from .receptor import ForceField, optimise_in_receptor
-
-
opt_mol, energies = optimise_in_receptor(self, *args, **kwargs)
-
# replace the conformers with the optimised ones
-
self.RemoveAllConformers()
-
[
-
self.AddConformer(conformer, assignId=True)
-
for conformer in opt_mol.GetConformers()
-
]
-
# save the energies
-
self._save_opt_energies(energies)
-
-
# build a dataframe with the molecules
-
conformer_ids = [c.GetId() for c in self.GetConformers()]
-
df = pandas.DataFrame(
-
{
-
"ID": [self.id] * len(energies),
-
"Conformer": conformer_ids,
-
"Energy": energies,
-
}
-
)
-
-
return df
-
-
-
-
[docs] def rep2D(self, **kwargs):
-
"""
-
Use RDKit and get a 2D diagram.
-
Uses Compute2DCoords and Draw.MolToImage function
-
-
Works with IPython Notebook.
-
-
:param **kwargs: are passed further to Draw.MolToImage function.
-
"""
-
return rep2D(self, **kwargs)
-
-
[docs] def rep3D(
-
self, view=None, prody=None, template=False, confIds: Optional[List[int]] = None
-
):
-
"""
-
Use py3Dmol to obtain the 3D view of the molecule.
-
-
Works with IPython Notebook.
-
-
:param view: a view to which add the visualisation. Useful if one wants to 3D view
-
multiple conformers in one view.
-
:type view: py3Dmol view instance (None)
-
:param prody: A prody protein around which a view 3D can be created
-
:type prody: Prody instance (Default: None)
-
:param template: Whether to visualise the original 3D template as well from which the molecule was made.
-
:type template: bool (False)
-
:param confIds: Select the conformations for display.
-
:type confIds: List[int]
-
"""
-
if prody is not None:
-
view = view3D(prody)
-
-
if view is None:
-
view = py3Dmol.view(width=400, height=400, viewergrid=(1, 1))
-
-
for conf in self.GetConformers():
-
# ignore the confIds we've not asked for
-
if confIds is not None and conf.GetId() not in confIds:
-
continue
-
-
mb = Chem.MolToMolBlock(self, confId=conf.GetId())
-
view.addModel(mb, "lig")
-
-
# use reverse indexing to reference the just added conformer
-
# http://3dmol.csb.pitt.edu/doc/types.html#AtomSelectionSpec
-
# cmap = plt.get_cmap("tab20c")
-
# hex = to_hex(cmap.colors[i]).split('#')[-1]
-
view.setStyle({"model": -1}, {"stick": {}})
-
-
if template:
-
mb = Chem.MolToMolBlock(self.template)
-
view.addModel(mb, "template")
-
# show as sticks
-
view.setStyle({"model": -1}, {"stick": {"color": "0xAF10AB"}})
-
-
# zoom to the last added model
-
view.zoomTo({"model": -1})
-
return view
-
-
[docs] def remove_clashing_confs(self, prot, min_dst_allowed=1.0):
-
"""
-
Removing conformations that class with the protein.
-
Note that the original conformer should be well docked into the protein,
-
ideally with some space between the area of growth and the protein,
-
so that any growth on the template doesn't automatically cause
-
clashes.
-
-
:param prot: The protein against which the conformers should be tested.
-
:type prot: Prody instance
-
:param min_dst_allowed: If any atom is within this distance in a conformer, the
-
conformer will be deleted.
-
:type min_dst_allowed: float in Angstroms
-
"""
-
prot_coords = prot.getCoords()
-
-
counter = 0
-
for conf in list(self.GetConformers())[::-1]:
-
confid = conf.GetId()
-
-
# for each atom check how far it is from the protein atoms
-
eacheach_shortest = []
-
for coord in conf.GetPositions():
-
shortest = np.min(np.sqrt(np.sum((coord - prot_coords) ** 2, axis=1)))
-
eacheach_shortest.append(shortest)
-
-
min_dst = np.min(eacheach_shortest)
-
-
if min_dst < min_dst_allowed:
-
self.RemoveConformer(confid)
-
print(f"Clash with the protein. Removing conformer id: {confid}")
-
-
[docs] @staticmethod
-
def set_gnina(loc):
-
"""
-
Set the location of the binary file gnina. This could be your own compiled directory,
-
or a directory where you'd like it to be downloaded.
-
-
By default, gnina path is to the working directory (~500MB).
-
-
:param loc: path to gnina binary file. E.g. /dir/path/gnina. Note that right now gnina should
-
be a binary file with that specific filename "gnina".
-
:type loc: str
-
"""
-
# set gnina location
-
path = Path(loc)
-
if path.is_file():
-
assert path.name == "gnina", 'Please ensure gnina binary is named "gnina"'
-
RMol.gnina_dir = path.parent
-
else:
-
raise Exception("The path is not the binary file gnina")
-
# extend this with running a binary check
-
-
@staticmethod
-
def _check_download_gnina():
-
"""
-
Check if gnina works. Otherwise, download it.
-
"""
-
if RMol.gnina_dir is None:
-
# assume it is in the current directory
-
RMol.gnina_dir = os.getcwd()
-
-
# check if gnina works
-
try:
-
subprocess.run(
-
["./gnina", "--help"], capture_output=True, cwd=RMol.gnina_dir
-
)
-
return
-
except FileNotFoundError as E:
-
pass
-
-
# gnina is not found, try downloading it
-
print(f"Gnina not found or set. Download gnina (~500MB) into {RMol.gnina_dir}")
-
gnina = os.path.join(RMol.gnina_dir, "gnina")
-
# fixme - currently download to the working directory (Home could be more applicable).
-
urlretrieve(
-
"https://github.com/gnina/gnina/releases/download/v1.0.1/gnina",
-
filename=gnina,
-
)
-
# make executable (chmod +x)
-
mode = os.stat(gnina).st_mode
-
os.chmod(gnina, mode | stat.S_IEXEC)
-
-
# check if it works
-
subprocess.run(["./gnina", "--help"], capture_output=True, cwd=RMol.gnina_dir)
-
-
[docs] def gnina(self, receptor_file):
-
"""
-
Use gnina to extract CNNaffinity, and convert it into IC50.
-
-
LIMITATION: currenly the gnina binaries do not support Mac.
-
-
:param receptor_file: Path to the receptor file.
-
:type receptor_file: str
-
"""
-
self._check_download_gnina()
-
-
# obtain the absolute file to the receptor
-
receptor = Path(receptor_file)
-
assert receptor.exists()
-
-
# make a temporary sdf file for gnina
-
tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".sdf")
-
with Chem.SDWriter(tmp.name) as w:
-
for conformer in self.GetConformers():
-
w.write(self, confId=conformer.GetId())
-
-
# run the code on the sdf
-
process = subprocess.run(
-
[
-
"./gnina",
-
"--score_only",
-
"-l",
-
tmp.name,
-
"-r",
-
receptor.absolute(),
-
"--seed",
-
"0",
-
"--stripH",
-
"False",
-
],
-
capture_output=True,
-
cwd=RMol.gnina_dir,
-
)
-
output = process.stdout.decode("utf-8")
-
CNNaffinities_str = re.findall(r"CNNaffinity: (-?\d+.\d+)", output)
-
-
# convert to floats
-
CNNaffinities = list(map(float, CNNaffinities_str))
-
-
# generate IC50 from the CNNaffinities
-
ic50s = list(map(ic50, CNNaffinities))
-
-
# create a dataframe
-
conformer_ids = [c.GetId() for c in self.GetConformers()]
-
df = pandas.DataFrame(
-
{
-
"ID": [self.id] * len(CNNaffinities),
-
"Conformer": conformer_ids,
-
"CNNaffinity": CNNaffinities,
-
"CNNaffinity->IC50s": ic50s,
-
}
-
)
-
-
return df
-
-
[docs] def to_file(self, file_name: str):
-
"""
-
Write the molecule and all conformers to file.
-
-
Note:
-
The file type is worked out from the name extension by splitting on `.`.
-
"""
-
file_type = file_name.split(".")[-1]
-
write_functions = {
-
"mol": Chem.MolToMolBlock,
-
"sdf": Chem.MolToMolBlock,
-
"pdb": Chem.MolToPDBBlock,
-
"xyz": Chem.MolToXYZBlock,
-
}
-
-
func = write_functions.get(file_type, None)
-
if func is None:
-
raise RuntimeError(
-
f"The file type {file_type} is not support please chose from {write_functions.keys()}"
-
)
-
-
with open(file_name, "w") as output:
-
for conformer in self.GetConformers():
-
output.write(func(self, confId=conformer.GetId()))
-
-
[docs] def df(self):
-
"""
-
Generate a pandas dataframe row for this molecule with SMILES.
-
-
:returns: pandas dataframe row.
-
"""
-
df = pandas.DataFrame(
-
{
-
"ID": [self.id],
-
"Smiles": [Chem.MolToSmiles(self)],
-
}
-
)
-
# attach energies if they're present
-
if self.opt_energies:
-
df = df.assign(
-
Energies=", ".join([str(e) for e in sorted(self.opt_energies)])
-
)
-
-
df.set_index(["ID"], inplace=True)
-
return df
-
-
def _repr_html_(self):
-
# return a dataframe with the rdkit visualisation
-
-
df = self.df()
-
-
# add a visualisation column
-
PandasTools.AddMoleculeColumnToFrame(
-
df, "Smiles", "Molecule", includeFingerprints=True
-
)
-
return df._repr_html_()
-
-
-class RGroupGrid(mols2grid.MolGrid):
- """
- A wrapper around the mols to grid class to load and process the r group folders locally.
- """
-
- def __init__(self):
- dataframe = self._load_molecules()
-
- super(RGroupGrid, self).__init__(
- dataframe, removeHs=True, mol_col="Mol", use_coords=False, name="m2"
- )
-
- def _load_molecules(self) -> pandas.DataFrame:
- """
- Load the local r groups into rdkit molecules
- """
- molecules = []
- names = []
-
- builtin_rgroups = Path(__file__).parent / "data" / "rgroups" / "library"
- for molfile in glob.glob(str(builtin_rgroups / "*.mol")):
- r_mol = Chem.MolFromMolFile(molfile, removeHs=False)
- molecules.append(r_mol)
- names.append(Path(molfile).stem)
-
- # highlight the attachment atom
- for atom in r_mol.GetAtoms():
- if atom.GetAtomicNum() == 0:
- setattr(r_mol, "__sssAtoms", [atom.GetIdx()])
-
- return pandas.DataFrame({"Mol": molecules, "Name": names})
-
- def _ipython_display_(self):
- from IPython.display import display, update_display, display_html
-
- subset = ["img", "Name", "mols2grid-id"]
- display_html(self.display(subset=subset, substruct_highlight=True))
-
- def get_selected(self):
- # use the new API
- df = self.get_selection()
- # now get a list of the molecules
- return list(df["Mol"])
-
-
-class RLinkerGrid(mols2grid.MolGrid):
- """
- A wrapper around the mols to grid class to load and process the linker folders locally.
- """
-
- def __init__(self):
- dataframe = self._load_molecules()
-
- super(RLinkerGrid, self).__init__(
- dataframe,
- removeHs=True,
- mol_col="Mol",
- use_coords=False,
- name="m1",
- prerender=False,
- )
-
- def _load_molecules(self) -> pandas.DataFrame:
- """
- Load the local linkers into rdkit molecules
- """
- builtin_rlinkers = Path(__file__).parent / "data" / "linkers" / "library"
- linker_files = glob.glob(str(builtin_rlinkers / "*.sdf"))
- # sort the linkers so that [R1]C[R2] is next to [R2]C[R1] in the grid
- linker_files = sorted(
- linker_files,
- key=lambda smiles: smiles.replace("[*:1]", "R").replace("[*:2]", "R"),
- )
-
- linkers = []
- for molfile in linker_files:
- r_mol = list(Chem.SDMolSupplier(molfile, removeHs=False)).pop()
-
- # generate a searchable name in the form of a simple SMILE without hydrogens
- name = (
- Chem.MolToSmiles(Chem.RemoveHs(r_mol), canonical=False)
- .replace("[*:1]", "R1")
- .replace("[*:2]", "R2")
- )
-
- # simplify the names for the user
- linkers.append(
- {
- "Mol": r_mol,
- "Name": name,
- "Common": r_mol.GetIntProp(
- "SmileIndex"
- ), # extract the index property from the original publication
- }
- )
-
- # presort using the original publication index
- linkers = sorted(linkers, key=lambda i: i["Common"])
-
- return pandas.DataFrame(linkers)
-
- def _ipython_display_(self):
- from IPython.display import display
-
- subset = ["img", "Name", "mols2grid-id"]
- return display(self.display(subset=subset, substruct_highlight=True))
-
- def get_selected(self):
- # use the new API
- df = self.get_selection()
- # now get a list of the molecules
- return list(df["Mol"])
-
-
-def link(Rgroups, Rlinkers, One2One=False):
- """
-
- :param Rgroups:
- :param Rlinkers:
- :param One2One: If True, for each selected RGroup, a selected linker with the same index is added.
- Alternatively, each linker is added to each RGroup resulting in n x m number of combinsions, with n being
- the number of RGroups and m the number of linkers.
- :return:
- """
-
- # convert rgroups/linkers to smiles
- rgroup_smiles = [Chem.MolToSmiles(mol) for mol in Rgroups]
- linker_smiles = [Chem.MolToSmiles(mol) for mol in Rlinkers]
-
- # loop over and combine smiles
- combined = []
- if One2One:
- for linker, rgroup in zip(linker_smiles, rgroup_smiles):
- combined.append(rgroup.replace("*", "*" + linker))
- else:
- for linker in linker_smiles:
- for rgroup in rgroup_smiles:
- combined.append(rgroup.replace("*", "*" + linker))
-
- # generate 3D conformers for each molecule
- mols = []
- for smile in combined:
- mol = Chem.MolFromSmiles(smile)
- AllChem.EmbedMultipleConfs(mol, numConfs=1, params=AllChem.ETKDGv3())
- mols.append(mol)
- return mols
-
-
-[docs]class RList(RInterface, list):
-
"""
-
Streamline working with RMol by presenting the same interface on the list,
-
and allowing to dispatch the functions to any single member.
-
"""
-
-
def rep2D(self, subImgSize=(400, 400), **kwargs):
-
return Draw.MolsToGridImage(
-
[mol.rep2D(rdkit_mol=True, **kwargs) for mol in self], subImgSize=subImgSize
-
)
-
-
def toxicity(self):
-
df = pandas.concat([m.toxicity() for m in self])
-
# add a column with the visualisation
-
PandasTools.AddMoleculeColumnToFrame(
-
df, "Smiles", "Molecule", includeFingerprints=True
-
)
-
-
return df
-
-
def generate_conformers(
-
self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs
-
):
-
for i, rmol in enumerate(self):
-
print(f"RMol index {i}")
-
rmol.generate_conformers(num_conf, minimum_conf_rms, **kwargs)
-
-
def GetNumConformers(self):
-
return [rmol.GetNumConformers() for rmol in self]
-
-
def remove_clashing_confs(self, prot, min_dst_allowed=1):
-
for i, rmol in enumerate(self):
-
print(f"RMol index {i}")
-
rmol.remove_clashing_confs(prot, min_dst_allowed=min_dst_allowed)
-
-
[docs] def optimise_in_receptor(self, *args, **kwargs):
-
"""
-
Replace the current molecule with the optimised one. Return lists of energies.
-
"""
-
# return pandas.concat([m.toxicity() for m in self])
-
-
dfs = []
-
for i, rmol in enumerate(self):
-
print(f"RMol index {i}")
-
dfs.append(rmol.optimise_in_receptor(*args, **kwargs))
-
-
df = pandas.concat(dfs)
-
df.set_index(["ID", "Conformer"], inplace=True)
-
return df
-
-
def sort_conformers(self, energy_range=5):
-
dfs = []
-
for i, rmol in enumerate(self):
-
print(f"RMol index {i}")
-
dfs.append(rmol.sort_conformers(energy_range))
-
-
df = pandas.concat(dfs)
-
df.set_index(["ID", "Conformer"], inplace=True)
-
return df
-
-
def gnina(self, receptor_file):
-
dfs = []
-
for i, rmol in enumerate(self):
-
print(f"RMol index {i}")
-
dfs.append(rmol.gnina(receptor_file))
-
-
df = pandas.concat(dfs)
-
df.set_index(["ID", "Conformer"], inplace=True)
-
return df
-
-
[docs] def discard_missing(self):
-
"""
-
Remove from this list the molecules that have no conformers
-
"""
-
removed = []
-
for rmol in self[::-1]:
-
if rmol.GetNumConformers() == 0:
-
rmindex = self.index(rmol)
-
print(
-
f"Discarding a molecule (id {rmindex}) due to the lack of conformers. "
-
)
-
self.remove(rmol)
-
removed.append(rmindex)
-
return removed
-
-
def _repr_html_(self):
-
# return the dataframe with the visualisation column of the dataframe
-
-
df = pandas.concat([rmol.df() for rmol in self])
-
-
# add a column with the visualisation
-
PandasTools.AddMoleculeColumnToFrame(
-
df, "Smiles", "Molecule", includeFingerprints=True
-
)
-
return df._repr_html_()
-
-
-[docs]def build_molecules(
-
templates: Union[Chem.Mol, List[Chem.Mol]],
-
r_groups: Union[Chem.Mol, List[Chem.Mol], int],
-
attachment_points: Optional[List[int]] = None,
-
):
-
"""
-
For the given core molecule and list of attachment points
-
and r groups enumerate the possible molecules and
-
return a list of them.
-
-
:param template: The core scaffold molecule to attach the r groups to, or a list of them.
-
:param r_group: The list of rdkit molecules which should be considered
-
r groups or the RGroup Grid with highlighted molecules.
-
:param attachment_points: The list of atom index in the core ligand
-
that the r groups should be attached to. If it is empty, connecting points are sought out and matched.
-
"""
-
-
# fixme - special case after changing the API: remove in the future
-
# This is a temporary warning about the change in the interface.
-
# This change is because there are situations where the attachment_points do not need to be passed to the function.
-
if (
-
isinstance(r_groups, list)
-
and len(r_groups) > 0
-
and isinstance(r_groups[0], int)
-
):
-
print(
-
'Warning: second argument is detected to be an integer. It is now "r_groups" '
-
"whereas attachement_points are provided as the 3rd argument. "
-
)
-
raise Exception(
-
"Please note that after adding the linker to FEgrow (version 1.1), "
-
'the "build_molecules" function interface has changed to'
-
' "build_molecules(core_ligand, r_groups, attachment_points)". '
-
)
-
-
# ensure template and r_group are lists
-
if not issubclass(templates.__class__, List):
-
templates = [templates]
-
if not issubclass(r_groups.__class__, List):
-
r_groups = [r_groups]
-
-
# make a deep copy of r_groups/linkers to ensure we don't modify the library
-
templates = [copy.deepcopy(mol) for mol in templates]
-
r_groups = [copy.deepcopy(mol) for mol in r_groups]
-
-
# get attachment points for each template
-
if not attachment_points:
-
# attempt to generate the attachment points by picking the joining molecule
-
# case: a list of templates previously joined with linkers requires iterating over them
-
attachment_points = [__getAttachmentVector(lig)[0].GetIdx() for lig in templates]
-
-
if not attachment_points:
-
raise Exception("Could not find attachement points. ")
-
-
if len(attachment_points) != len(templates):
-
raise Exception(
-
f"There must be one attachment point for each template. "
-
f"Provided attachement points = {len(attachment_points)} "
-
f"with templates number: {len(templates)}"
-
)
-
-
combined_mols = RList()
-
id_counter = 0
-
for atom_idx, core_ligand in zip(attachment_points, templates):
-
for r_mol in r_groups:
-
core_mol = RMol(copy.deepcopy(core_ligand))
-
merged_mol = merge_R_group(
-
mol=core_mol, R_group=r_mol, replaceIndex=atom_idx
-
)
-
# assign the identifying index to the molecule
-
merged_mol.id = id_counter
-
combined_mols.append(merged_mol)
-
id_counter += 1
-
-
return combined_mols
-