From b4aeb1147f09cb2482212894f5c2cc8684076ff3 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 05:33:37 -0700 Subject: [PATCH 01/81] new PR --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index a9ae5036..4a7277fe 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -52,7 +52,7 @@ logger = logging.getLogger('mdpow.workflows.dihedrals') - +# init new Mol plot PR SOLVENTS_DEFAULT = ('water', 'octanol') """Default solvents are water and octanol: From cc17a6b6248702e54ad6c3ec2ee2ff2278a962bd Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 06:26:18 -0700 Subject: [PATCH 02/81] update mol object methods, change dihedral_indices naming convention to atom_indices/get_atom_indices --- mdpow/workflows/dihedrals.py | 68 +++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 4a7277fe..73dc04d2 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -22,7 +22,7 @@ .. autofunction:: automated_dihedral_analysis .. autofunction:: build_universe .. autofunction:: rdkit_conversion -.. autofunction:: dihedral_indices +.. autofunction:: get_atom_indices .. autofunction:: dihedral_groups .. autofunction:: dihedral_groups_ensemble .. autofunction:: save_df @@ -38,6 +38,7 @@ import pandas as pd from rdkit import Chem +from rdkit.Chem import rdCoordGen import seaborn as sns import matplotlib.pyplot as plt @@ -52,8 +53,6 @@ logger = logging.getLogger('mdpow.workflows.dihedrals') -# init new Mol plot PR - SOLVENTS_DEFAULT = ('water', 'octanol') """Default solvents are water and octanol: @@ -94,7 +93,7 @@ def build_universe(dirname): ``water/Coulomb/0000`` topology and trajectory for the specified project. Used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` - and :func:`~mdpow.workflows.dihedrals.dihedral_indices` to obtain atom indices + and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices for each dihedral atom group. :keywords: @@ -157,18 +156,31 @@ def rdkit_conversion(u, resname): """ - try: - solute = u.select_atoms(f'resname {resname}') - mol = solute.convert_to('RDKIT') - except AttributeError: - elements = [guess_atom_element(name) for name in u.atoms.names] - u.add_TopologyAttr("elements", elements) - solute = u.select_atoms(f'resname {resname}') - mol = solute.convert_to('RDKIT') + # i think somewhere we decided to ditch try/except method, will check notes + # notes pertaining to testing topology attributes for explicit Hs + + #try: + # solute = u.select_atoms(f'resname {resname}') + # mol = solute.convert_to('RDKIT') + #except AttributeError: + # elements = [guess_atom_element(name) for name in u.atoms.names] + # u.add_TopologyAttr("elements", elements) + # solute = u.select_atoms(f'resname {resname}') + # mol = solute.convert_to('RDKIT') + + elements = [guess_atom_element(name) for name in u.atoms.names] + u.add_TopologyAttr("elements", elements) + + # com or cog? + solute = u.select_atoms(f'resname {resname}') + solute.unwrap(compound='residues', reference='com') + + mol = solute.convert_to('RDKIT') + rdCoordGen.AddCoords(mol) return mol, solute -def dihedral_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): +def get_atom_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): '''Uses a SMARTS selection string to identify indices for relevant dihedral atom groups. @@ -200,7 +212,7 @@ def dihedral_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): :returns: - *atom_group_indices* + *atom_indices* tuple of tuples of indices for each dihedral atom group ''' @@ -208,18 +220,18 @@ def dihedral_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): u = build_universe(dirname=dirname) mol = rdkit_conversion(u=u, resname=resname)[0] pattern = Chem.MolFromSmarts(SMARTS) - atom_group_indices = mol.GetSubstructMatches(pattern) + atom_indices = mol.GetSubstructMatches(pattern) - return atom_group_indices + return atom_indices def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): '''Uses the indices of the relevant dihedral atom groups determined - by :func:`~mdpow.workflows.dihedral.dihedral_indices` + by :func:`~mdpow.workflows.dihedral.get_atom_indices` and returns the names for each atom in each group. Requires an MDPOW project directory and `resname` as input. Expands upon usage of - :func:`~mdpow.workflows.dihedral.dihedral_indices` + :func:`~mdpow.workflows.dihedral.get_atom_indices` to return an array of the names of each atom within its respective dihedral atom group as identified by the SMARTS selection string. Not necessary @@ -253,14 +265,14 @@ def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): ''' u = build_universe(dirname=dirname) - solute = rdkit_conversion(u=u, resname=resname)[1] - atom_group_indices = dihedral_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) - dihedral_groups = [solute.atoms[list(dihedral_indices)].names for dihedral_indices - in atom_group_indices] + _, solute = rdkit_conversion(u=u, resname=resname) + atom_indices = get_atom_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) + dihedral_groups = [solute.atoms[list(a_ind)].names for a_ind + in atom_indices] return dihedral_groups -def dihedral_groups_ensemble(dirname, atom_group_indices, +def dihedral_groups_ensemble(dirname, atom_indices, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, start=None, stop=None, step=None): @@ -286,10 +298,10 @@ def dihedral_groups_ensemble(dirname, atom_group_indices, and .xtc files for trajectory. It will default to using the tpr file available. - *atom_group_indices* + *atom_indices* tuples of atom indices for dihedral atom groups - .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_indices` + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices` *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. @@ -314,7 +326,7 @@ def dihedral_groups_ensemble(dirname, atom_group_indices, dih_ens = mdpow.analysis.ensemble.Ensemble(dirname=dirname, solvents=solvents, interactions=interactions) - indices = atom_group_indices + indices = atom_indices all_dihedrals = [dih_ens.select_atoms(f'index {i[0]}', f'index {i[1]}', f'index {i[2]}', @@ -673,8 +685,8 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, else: - atom_group_indices = dihedral_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) - df = dihedral_groups_ensemble(atom_group_indices=atom_group_indices, dirname=dirname, + atom_indices = get_atom_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) + df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, solvents=solvents, interactions=interactions, start=start, stop=stop, step=step) From 30ad175ec6d1c0aa545c68427e891303db10dd0f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 06:37:24 -0700 Subject: [PATCH 03/81] add get_bond_indices --- mdpow/workflows/dihedrals.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 73dc04d2..b3e8f0dc 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -224,6 +224,23 @@ def get_atom_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): return atom_indices +def get_bond_indices(mol, atom_indices): + + bonds = [] + + for aix in atom_indices: + + x = mol.GetBondBetweenAtoms(aix[0], aix[1]).GetIdx() + y = mol.GetBondBetweenAtoms(aix[1], aix[2]).GetIdx() + z = mol.GetBondBetweenAtoms(aix[2], aix[3]).GetIdx() + bix = (x, y, z) + + bonds.append(bix) + + bond_indices = tuple(bonds) + + return bond_indices + def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): '''Uses the indices of the relevant dihedral atom groups determined by :func:`~mdpow.workflows.dihedral.get_atom_indices` From 8d67d81eb86d1093481ba28466ff981a13fd5bea Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 06:41:23 -0700 Subject: [PATCH 04/81] reflect name change in tests from dihedrals automated workflow --- mdpow/tests/test_automated_dihedral_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 05d43de4..fd7802d6 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -44,11 +44,11 @@ def SM25_tmp_dir(self, molname_workflows_directory): @pytest.fixture(scope="function") def atom_indices(self, SM25_tmp_dir): - atom_group_indices = dihedrals.dihedral_indices(dirname=SM25_tmp_dir, resname=self.resname) + atom_group_indices = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, resname=self.resname) # testing optional user input of alternate SMARTS string # for automated dihedral atom group selection - atom_group_indices_alt = dihedrals.dihedral_indices(dirname=SM25_tmp_dir, + atom_group_indices_alt = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, resname=self.resname, SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') return atom_group_indices, atom_group_indices_alt From 59f32a52585035b57bb5ce952faa6e050abf4c49 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 06:48:41 -0700 Subject: [PATCH 05/81] missed name change in test --- mdpow/tests/test_automated_dihedral_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index fd7802d6..97ebc6a8 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -59,7 +59,7 @@ def atom_indices(self, SM25_tmp_dir): @pytest.fixture(scope="function") def dihedral_data(self, SM25_tmp_dir, atom_indices): atom_group_indices, _ = atom_indices - df = dihedrals.dihedral_groups_ensemble(atom_group_indices=atom_group_indices, + df = dihedrals.dihedral_groups_ensemble(atom_indices=atom_group_indices, dirname=SM25_tmp_dir, solvents=('water',)) df_aug = dihedrals.periodic_angle(df) From 9a7c25af41cce9ed8300ebd20912729f4d370287 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 09:27:13 -0700 Subject: [PATCH 06/81] fix bond highlighting and add low resolution plotting function for dihedrals --- mdpow/workflows/dihedrals.py | 70 ++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index b3e8f0dc..2f6f4808 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -180,7 +180,7 @@ def rdkit_conversion(u, resname): return mol, solute -def get_atom_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): +def get_atom_indices(dirname, mol, SMARTS=SMARTS_DEFAULT): '''Uses a SMARTS selection string to identify indices for relevant dihedral atom groups. @@ -217,8 +217,8 @@ def get_atom_indices(dirname, resname, SMARTS=SMARTS_DEFAULT): ''' - u = build_universe(dirname=dirname) - mol = rdkit_conversion(u=u, resname=resname)[0] + #u = build_universe(dirname=dirname) + #mol = rdkit_conversion(u=u, resname=resname)[0] pattern = Chem.MolFromSmarts(SMARTS) atom_indices = mol.GetSubstructMatches(pattern) @@ -241,7 +241,7 @@ def get_bond_indices(mol, atom_indices): return bond_indices -def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): +def get_dihedral_groups(solute, atom_indices): '''Uses the indices of the relevant dihedral atom groups determined by :func:`~mdpow.workflows.dihedral.get_atom_indices` and returns the names for each atom in each group. @@ -281,14 +281,31 @@ def dihedral_groups(dirname, resname, SMARTS=SMARTS_DEFAULT): ''' - u = build_universe(dirname=dirname) - _, solute = rdkit_conversion(u=u, resname=resname) - atom_indices = get_atom_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) + #u = build_universe(dirname=dirname) + #_, solute = rdkit_conversion(u=u, resname=resname) + #atom_indices = get_atom_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) dihedral_groups = [solute.atoms[list(a_ind)].names for a_ind in atom_indices] return dihedral_groups +def get_paired_indices(atom_indices, bond_indices, dihedral_groups): + + dg_list = [] + for dg in dihedral_groups: + group = (f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}') + dg_list.append(group) + all_dgs = tuple(dg_list) + + if (len(atom_indices) == len(bond_indices) == len(all_dgs)): + ab_pairs = {} + i = 0 + while i < len(all_dgs): + ab_pairs[f'{all_dgs[i]}'] = (atom_indices[i], bond_indices[i]) + i += 1 + + return ab_pairs + def dihedral_groups_ensemble(dirname, atom_indices, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, @@ -455,7 +472,7 @@ def periodic_angle(df, padding=45): return df_aug -def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): +def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): '''Plots distributions of dihedral angles for one dihedral atom group as violin plots, using as input the augmented :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle`. @@ -491,7 +508,8 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): # number of Coul windows + 1 / number of VDW windows # (+1 for additional space with axes) width_ratios = [len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) + 1, - len(df[df['interaction'] == "VDW"]["lambda"].unique())] + len(df[df['interaction'] == "VDW"]["lambda"].unique()), + len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] solvs = np.asarray(solvents) solv2 = 'octanol' @@ -501,9 +519,9 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", kind="violin", split=True, width=width, inner=None, cut=0, linewidth=0.5, - hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW"], + hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, - height=2, aspect=2.5, + height=3.0, aspect=2.0, facet_kws={'ylim': (-180, 180), 'gridspec_kws': {'width_ratios': width_ratios, }}) @@ -520,9 +538,13 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): axV.yaxis.set_visible(False) axV.spines["left"].set_visible(False) + axIM = g.axes_dict['Structure'] + axIM.imshow(im, aspect='equal', origin='upper', extent=(-50.0, 349.5, -180.0, 180.0)) + axIM.axis('off') + return g -def plot_violins(df, resname, figdir=None, molname=None, width=0.9, solvents=SOLVENTS_DEFAULT): +def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0.9, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and optionally saving figures for all dihedral atom groups. @@ -581,7 +603,10 @@ def plot_violins(df, resname, figdir=None, molname=None, width=0.9, solvents=SOL section = df.groupby(by='selection') for name in section: - plot = dihedral_violins(name[1], width=width, solvents=solvents) + a = list(ab_pairs[name[0]][0]) + b = list(ab_pairs[name[0]][1]) + im = Chem.Draw.MolToImage(mol=mol, highlightAtoms=a, highlightBonds=b) + plot = dihedral_violins(name[1], im=im, width=width, solvents=solvents) plot.set_titles(f'{molname},{name[0]}, ''{col_name}') # plot.set_titles needs to stay here during future development # this locale ensures that plots are properly named, @@ -695,7 +720,7 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, start=0, stop=100, step=10) ''' - if dataframe is not None: + '''if dataframe is not None: df = dataframe logger.info(f'Proceeding with results DataFrame provided.') @@ -706,10 +731,25 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, solvents=solvents, interactions=interactions, start=start, stop=stop, step=step) + ''' + # ^reincorporate this option once plots are done + # maybe add ab_pairs dict info in saved DF? + + u = build_universe(dirname=dirname) + mol, solute = rdkit_conversion(u=u, resname=resname) + atom_indices = get_atom_indices(dirname=dirname, mol=mol, SMARTS=SMARTS) + bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) + dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) + ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, dihedral_groups=dihedral_groups) + + df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, + solvents=solvents, interactions=interactions, + start=start, stop=stop, step=step) + if df_save_dir is not None: save_df(df=df, df_save_dir=df_save_dir, resname=resname, molname=molname) df_aug = periodic_angle(df, padding=padding) - return plot_violins(df_aug, resname=resname, figdir=figdir, molname=molname, width=width, solvents=solvents) + return plot_violins(df_aug, resname=resname, molname=molname, mol=mol, ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) From b6f66c294f6494ba86dbc06385a0d0a0ec231935 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 11:08:47 -0700 Subject: [PATCH 07/81] add atom_indices to titles of plot columns --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 2f6f4808..3fd3687a 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -607,7 +607,7 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0. b = list(ab_pairs[name[0]][1]) im = Chem.Draw.MolToImage(mol=mol, highlightAtoms=a, highlightBonds=b) plot = dihedral_violins(name[1], im=im, width=width, solvents=solvents) - plot.set_titles(f'{molname},{name[0]}, ''{col_name}') + plot.set_titles(f'{molname},{list(ab_pairs[name[0]][0])},{name[0]}, ''{col_name}') # plot.set_titles needs to stay here during future development # this locale ensures that plots are properly named, # especially when generated for a projecct iteratively From 2f8c042f4556e311d2ee8225e63ddb2ce934b0d2 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 11:28:38 -0700 Subject: [PATCH 08/81] add atom_indices to label Mol object on plots --- mdpow/workflows/dihedrals.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 3fd3687a..3bab2052 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -177,6 +177,10 @@ def rdkit_conversion(u, resname): mol = solute.convert_to('RDKIT') rdCoordGen.AddCoords(mol) + #rdDepictor.Compute2DCoords(mol), not sure how this is different from above^ + + for atom in mol.GetAtoms(): + atom.SetProp("atomNote", str(atom.GetIdx())) return mol, solute From 844b29f6a92fd6b680584a1fc7eacf946b94b1d6 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 12:01:59 -0700 Subject: [PATCH 09/81] cleaner plot titles --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 3bab2052..c37ac250 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -611,7 +611,7 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0. b = list(ab_pairs[name[0]][1]) im = Chem.Draw.MolToImage(mol=mol, highlightAtoms=a, highlightBonds=b) plot = dihedral_violins(name[1], im=im, width=width, solvents=solvents) - plot.set_titles(f'{molname},{list(ab_pairs[name[0]][0])},{name[0]}, ''{col_name}') + plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') # plot.set_titles needs to stay here during future development # this locale ensures that plots are properly named, # especially when generated for a projecct iteratively From 5fa5f118d0a7666bd6db8bb4c00d10ab28032def Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 21 Feb 2023 13:59:07 -0700 Subject: [PATCH 10/81] current best plot resolution/proportion --- mdpow/workflows/dihedrals.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index c37ac250..90db99ef 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -40,6 +40,11 @@ from rdkit import Chem from rdkit.Chem import rdCoordGen +# for SVG test +from rdkit.Chem.Draw import IPythonConsole +from IPython.display import SVG +from rdkit.Chem.Draw import rdMolDraw2D + import seaborn as sns import matplotlib.pyplot as plt @@ -513,7 +518,7 @@ def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): # (+1 for additional space with axes) width_ratios = [len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) + 1, len(df[df['interaction'] == "VDW"]["lambda"].unique()), - len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] + len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] # was - 1 solvs = np.asarray(solvents) solv2 = 'octanol' @@ -525,7 +530,7 @@ def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): linewidth=0.5, hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, - height=3.0, aspect=2.0, + height=3.0, aspect=2.0, # height was 3.0, aspect was 2.0 facet_kws={'ylim': (-180, 180), 'gridspec_kws': {'width_ratios': width_ratios, }}) @@ -542,8 +547,10 @@ def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): axV.yaxis.set_visible(False) axV.spines["left"].set_visible(False) - axIM = g.axes_dict['Structure'] - axIM.imshow(im, aspect='equal', origin='upper', extent=(-50.0, 349.5, -180.0, 180.0)) + axIM = g.axes_dict['Structure'] # -50, 349.5 + #axIM.imshow(im, aspect='equal', origin='upper', extent=(-30.0, 329.5, -180.0, 180.0)) + axIM.imshow(im, aspect='equal', origin='upper', extent=(-80.5, 379.5, -222.0, 222.0)) + #axIM.imshow(im, aspect='equal', origin='upper', extent=(-0.5, 299.5, -180.0, 180.0)) axIM.axis('off') return g @@ -609,7 +616,15 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0. for name in section: a = list(ab_pairs[name[0]][0]) b = list(ab_pairs[name[0]][1]) - im = Chem.Draw.MolToImage(mol=mol, highlightAtoms=a, highlightBonds=b) + + #drawer = rdMolDraw2D.MolDraw2DSVG(300, 300) + #drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) + #drawer.FinishDrawing() + #svg = drawer.GetDrawingText().replace('svg:','') + + #im = SVG(svg) + # default x300 + im = Chem.Draw.MolToImage(mol=mol, size=(400,400), highlightAtoms=a, highlightBonds=b) plot = dihedral_violins(name[1], im=im, width=width, solvents=solvents) plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') # plot.set_titles needs to stay here during future development From 82cbaa2ae9d270ae592dcd43e20bde1e31f763aa Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 4 Mar 2023 02:34:47 -0700 Subject: [PATCH 11/81] add svgutils and cairosvg methods to plot svg mol object --- mdpow/workflows/dihedrals.py | 105 +++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 34 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 90db99ef..95179eec 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -37,6 +37,12 @@ import numpy as np import pandas as pd +# add to dependencies for install and tests +import cairosvg +import svgutils +import svgutils.compose as sc +import svgutils.transform as sg + from rdkit import Chem from rdkit.Chem import rdCoordGen @@ -473,6 +479,7 @@ def periodic_angle(df, padding=45): ''' + # this method is changed in PR242 df1 = df[df.dihedral > 180 - padding] df1.dihedral -= 360 df2 = df[df.dihedral < -180 + padding] @@ -481,7 +488,7 @@ def periodic_angle(df, padding=45): return df_aug -def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): +def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): '''Plots distributions of dihedral angles for one dihedral atom group as violin plots, using as input the augmented :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle`. @@ -520,6 +527,10 @@ def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): len(df[df['interaction'] == "VDW"]["lambda"].unique()), len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] # was - 1 + # this method allows for one solvent + # to be plotted correctly + # needs to be corrected to fix legend + # for when only one solvent is plotted solvs = np.asarray(solvents) solv2 = 'octanol' if solvs.size > 1: @@ -534,28 +545,28 @@ def dihedral_violins(df, im, width=0.9, solvents=SOLVENTS_DEFAULT): facet_kws={'ylim': (-180, 180), 'gridspec_kws': {'width_ratios': width_ratios, }}) - g.set_xlabels(r"$\lambda$") - g.set_ylabels(r"dihedral angle $\phi$") - g.despine(offset=5) + + _ = g.set_xlabels(r"$\lambda$") + _ = g.set_ylabels(r"dihedral angle $\phi$") + _ = g.despine(offset=5) axC = g.axes_dict['Coulomb'] - axC.yaxis.set_major_locator(plt.matplotlib.ticker.MultipleLocator(60)) - axC.yaxis.set_minor_locator(plt.matplotlib.ticker.MultipleLocator(30)) - axC.yaxis.set_major_formatter(plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$")) + _ = axC.yaxis.set_major_locator(plt.matplotlib.ticker.MultipleLocator(60)) + _ = axC.yaxis.set_minor_locator(plt.matplotlib.ticker.MultipleLocator(30)) + _ = axC.yaxis.set_major_formatter(plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$")) axV = g.axes_dict['VDW'] - axV.yaxis.set_visible(False) - axV.spines["left"].set_visible(False) + _ = axV.yaxis.set_visible(False) + _ = axV.spines["left"].set_visible(False) - axIM = g.axes_dict['Structure'] # -50, 349.5 - #axIM.imshow(im, aspect='equal', origin='upper', extent=(-30.0, 329.5, -180.0, 180.0)) - axIM.imshow(im, aspect='equal', origin='upper', extent=(-80.5, 379.5, -222.0, 222.0)) - #axIM.imshow(im, aspect='equal', origin='upper', extent=(-0.5, 299.5, -180.0, 180.0)) - axIM.axis('off') + axIM = g.axes_dict['Structure'] + #axIM.imshow(im, aspect='equal', origin='upper', extent=(-80.5, 379.5, -222.0, 222.0)) + _ = axIM.axis('off') return g -def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0.9, solvents=SOLVENTS_DEFAULT): +def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, + width=0.9, plot_pdf_width=190, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and optionally saving figures for all dihedral atom groups. @@ -611,36 +622,58 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0. newdir = os.path.join(figdir, subdir) os.mkdir(newdir) - section = df.groupby(by='selection') + section = df.groupby(by="selection") + # conversion factor: 1 mm = 3.7795275591 px + plot_pdf_width = plot_pdf_width * 3.7795275591 + # DEFAULT: 190 mm = 718.110236229 pixels + + # put this into separate function for name in section: a = list(ab_pairs[name[0]][0]) b = list(ab_pairs[name[0]][1]) - #drawer = rdMolDraw2D.MolDraw2DSVG(300, 300) - #drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) - #drawer.FinishDrawing() - #svg = drawer.GetDrawingText().replace('svg:','') + drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) + drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) + drawer.FinishDrawing() + + svg = drawer.GetDrawingText().replace('svg:','') + mol_svg = sg.fromstring(svg) + mol_svg.save("mol_svg.svg") - #im = SVG(svg) - # default x300 - im = Chem.Draw.MolToImage(mol=mol, size=(400,400), highlightAtoms=a, highlightBonds=b) - plot = dihedral_violins(name[1], im=im, width=width, solvents=solvents) - plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') + plot = dihedral_violins(name[1], width=width, solvents=solvents) + _ = plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') # plot.set_titles needs to stay here during future development # this locale ensures that plots are properly named, - # especially when generated for a projecct iteratively + # especially when generated for a project iteratively + _ = plot.savefig("tmp_plot.svg") - if figdir is not None: - figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" - plot.savefig(figfile) - logger.info(f'Figure saved as {figfile}') + # 28cm leaves room for lengthier solvent names + # in the legend on the rightmost side + sc.Figure("28cm", "4.2cm", # width and height for SVG render, previously 30, 5 + sc.SVG("tmp_plot.svg").scale(0.02), + sc.SVG("mol_svg.svg").scale(0.0125).move(21.8,0.35) # previously 21.5,0.5 + ).save("plot_svg.svg") + # best pre-pdf size and ratio so far - return plot + # figdir is now necessary and plots are not returned with molecule graphic + figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" + plot_pdf = cairosvg.svg2pdf(url="plot_svg.svg", write_to=str(figfile), + output_width=plot_pdf_width) + + logger.info(f"Figure saved as {figfile}") + + # remove temp svg files + os.system("rm mol_svg.svg") + os.system("rm tmp_plot.svg") + os.system("rm plot_svg.svg") + + return logger.info(f"All figures generated and saved in {figdir}") + def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, resname=None, molname=None, - SMARTS=SMARTS_DEFAULT, + SMARTS=SMARTS_DEFAULT, plot_pdf_width=190, dataframe=None, padding=45, width=0.9, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, @@ -759,7 +792,8 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, atom_indices = get_atom_indices(dirname=dirname, mol=mol, SMARTS=SMARTS) bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) - ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, dihedral_groups=dihedral_groups) + ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, + dihedral_groups=dihedral_groups) df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, solvents=solvents, interactions=interactions, @@ -771,4 +805,7 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, df_aug = periodic_angle(df, padding=padding) - return plot_violins(df_aug, resname=resname, molname=molname, mol=mol, ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) + plot_violins(df_aug, resname=resname, molname=molname, mol=mol, plot_pdf_width=plot_pdf_width, + ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) + + return logger.info(f"DihedralAnalysis completed for all projects in {dirname}") From d26d9f18e0fe96747344e191a58ae3a70cbccc8f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 4 Mar 2023 04:07:13 -0700 Subject: [PATCH 12/81] reimplement DF input option and fix most tests to reflect name changes and altered function definitions --- .../tests/test_automated_dihedral_analysis.py | 65 ++++++++++--------- mdpow/workflows/dihedrals.py | 27 +++----- 2 files changed, 44 insertions(+), 48 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 97ebc6a8..00e605a1 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -29,6 +29,8 @@ RESOURCES = pathlib.PurePath(resource_filename(__name__, 'testing_resources')) MANIFEST = RESOURCES / "manifest.yml" +resname = "UNK" + @pytest.fixture(scope="function") def molname_workflows_directory(tmp_path, molname='SM25'): m = pybol.Manifest(str(MANIFEST)) @@ -43,13 +45,19 @@ def SM25_tmp_dir(self, molname_workflows_directory): return dirname @pytest.fixture(scope="function") - def atom_indices(self, SM25_tmp_dir): - atom_group_indices = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, resname=self.resname) + def mol_sol_data(self, SM25_tmp_dir): + u = dihedrals.build_universe(dirname=SM25_tmp_dir) + mol, solute = dihedrals.rdkit_conversion(u=u, resname=resname) + return mol, solute + + @pytest.fixture(scope="function") + def atom_indices(self, SM25_tmp_dir, mol_sol_data): + mol, _ = mol_sol_data + atom_group_indices = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, mol=mol) # testing optional user input of alternate SMARTS string # for automated dihedral atom group selection - atom_group_indices_alt = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, - resname=self.resname, + atom_group_indices_alt = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, mol=mol, SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') return atom_group_indices, atom_group_indices_alt # fixture output, tuple: @@ -68,8 +76,6 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): # dihedral_data[0]=df # dihedral_data[1]=df_aug - resname = 'UNK' - # tuple-tuples of dihedral atom group indices # collected using mdpow.workflows.dihedrals.SMARTS_DEFAULT check_atom_group_indices = ((0, 1, 2, 3),(0, 1, 12, 13),(1, 2, 3, 11),(1, 2, 3, 10), @@ -130,6 +136,7 @@ def test_build_universe(self, SM25_tmp_dir): solute = u.select_atoms('resname UNK') solute_names = solute.atoms.names assert solute_names.all() == self.universe_solute_atom_names.all() + #change this method # the following 'reason' affects every downstream function that relies # on the atom indices returned for dihedral atom group selections @@ -137,7 +144,7 @@ def test_build_universe(self, SM25_tmp_dir): @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' 'returns incorrect atom_indices for dihedral atom group selections') def test_dihedral_indices(self, atom_indices): - atom_group_indices = atom_indices[0] + atom_group_indices, _ = atom_indices assert atom_group_indices == self.check_atom_group_indices # the following 'reason' affects every downstream function that relies @@ -146,7 +153,7 @@ def test_dihedral_indices(self, atom_indices): @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' 'returns incorrect atom_indices for dihedral atom group selections') def test_SMARTS(self, atom_indices): - atom_group_indices_alt = atom_indices[1] + _, atom_group_indices_alt = atom_indices assert atom_group_indices_alt == self.check_atom_group_indices_alt # the following 'reason' affects every downstream function that relies @@ -154,8 +161,10 @@ def test_SMARTS(self, atom_indices): # issue raised (#239) to identify and resolve exact package/version responsible @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' 'returns incorrect atom_indices for dihedral atom group selections') - def test_dihedral_groups(self, SM25_tmp_dir): - groups = dihedrals.dihedral_groups(dirname=SM25_tmp_dir, resname=self.resname) + def test_dihedral_groups(self, SM25_tmp_dir, mol_sol_data, atom_indices): + _, solute = mol_sol_data + atom_group_indices, _ = atom_indices + groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=atom_group_indices) i = 0 while i < len(groups): assert groups[i].all() == self.check_groups[i].all() @@ -168,7 +177,7 @@ def test_dihedral_groups(self, SM25_tmp_dir): 'returns incorrect atom_indices for dihedral atom group selections') def test_dihedral_groups_ensemble(self, dihedral_data): - df = dihedral_data[0] + df, _ = dihedral_data dh1_result = df.loc[df['selection'] == 'O1-C2-N3-S4']['dihedral'] dh1_mean = circmean(dh1_result, high=180, low=-180) @@ -186,13 +195,15 @@ def test_dihedral_groups_ensemble(self, dihedral_data): dh2_var == pytest.approx(self.DG_C13141520_var) def test_save_df(self, dihedral_data, SM25_tmp_dir): - dihedrals.save_df(df=dihedral_data[0], df_save_dir=SM25_tmp_dir, molname='SM25') + df, _ = dihedral_data + dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, molname='SM25') assert (SM25_tmp_dir / 'SM25' / 'SM25_full_df.csv.bz2').exists(), 'Compressed csv file not saved' def test_save_df_info(self, dihedral_data, SM25_tmp_dir, caplog): + df, _ = dihedral_data caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - dihedrals.save_df(df=dihedral_data[0], df_save_dir=SM25_tmp_dir, molname='SM25') + dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, molname='SM25') assert f'Results DataFrame saved as {SM25_tmp_dir}/SM25/SM25_full_df.csv.bz2' in caplog.text, 'Save location not logged or returned' # the following 'reason' affects every downstream function that relies @@ -202,7 +213,7 @@ def test_save_df_info(self, dihedral_data, SM25_tmp_dir, caplog): 'returns incorrect atom_indices for dihedral atom group selections') def test_periodic_angle(self, dihedral_data): - df_aug = dihedral_data[1] + _, df_aug = dihedral_data aug_dh2_result = df_aug.loc[df_aug['selection'] == 'C13-C14-C15-C20']['dihedral'] @@ -219,7 +230,7 @@ def test_periodic_angle(self, dihedral_data): 'returns incorrect atom_indices for dihedral atom group selections') def test_save_fig(self, SM25_tmp_dir): dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, molname='SM25', + resname=resname, molname='SM25', solvents=('water',)) assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' @@ -232,26 +243,20 @@ def test_save_fig_info(self, SM25_tmp_dir, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, molname='SM25', + resname=resname, molname='SM25', solvents=('water',)) assert f'Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf' in caplog.text, 'PDF file not saved' - def test_DataFrame_input(self, SM25_tmp_dir): - test_df = pd.DataFrame([['C1-C2-C3-C4', 'water', 'Coulomb', 0, 0, 60.0], - ['C1-C2-C3-C5', 'water', 'Coulomb', 0, 0, 60.0]], - [1,2],['selection', 'solvent', 'interaction', 'lambda', 'time', 'dihedral']) - plot = dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, - solvents=('water',), dataframe=test_df) - assert isinstance(plot, seaborn.axisgrid.FacetGrid) + def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): + df, _ = dihedral_data + dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, + resname=resname, solvents=('water',), dataframe=df) + assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' - def test_DataFrame_input_info(self, SM25_tmp_dir, caplog): + def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - test_df = pd.DataFrame([['C1-C2-C3-C4', 'water', 'Coulomb', 0, 0, 60.0], - ['C1-C2-C3-C5', 'water', 'Coulomb', 0, 0, 60.0]], - [1,2],['selection', 'solvent', 'interaction', 'lambda', 'time', 'dihedral']) + df, _ = dihedral_data dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=self.resname, - solvents=('water',), dataframe=test_df) + resname=resname, solvents=('water',), dataframe=df) assert 'Proceeding with results DataFrame provided.' in caplog.text, 'No dataframe provided or dataframe not recognized' diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 95179eec..11468ed7 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -772,33 +772,24 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, start=0, stop=100, step=10) ''' - '''if dataframe is not None: + u = build_universe(dirname=dirname) + mol, solute = rdkit_conversion(u=u, resname=resname) + atom_indices = get_atom_indices(dirname=dirname, mol=mol, SMARTS=SMARTS) + bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) + dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) + ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, + dihedral_groups=dihedral_groups) + + if dataframe is not None: df = dataframe logger.info(f'Proceeding with results DataFrame provided.') else: - atom_indices = get_atom_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, solvents=solvents, interactions=interactions, start=start, stop=stop, step=step) - ''' - # ^reincorporate this option once plots are done - # maybe add ab_pairs dict info in saved DF? - - u = build_universe(dirname=dirname) - mol, solute = rdkit_conversion(u=u, resname=resname) - atom_indices = get_atom_indices(dirname=dirname, mol=mol, SMARTS=SMARTS) - bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) - dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) - ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, - dihedral_groups=dihedral_groups) - - df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, - solvents=solvents, interactions=interactions, - start=start, stop=stop, step=step) - if df_save_dir is not None: save_df(df=df, df_save_dir=df_save_dir, resname=resname, molname=molname) From d80ae4ccfcd41df06861ed415e42092107c81f12 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 4 Mar 2023 04:23:49 -0700 Subject: [PATCH 13/81] add svgutils and cairosvg to dependencies, install, requirements lists, remove broken test, add reminder to update func list in docs --- devtools/conda-envs/test_env.yaml | 2 ++ doc/requirements.txt | 2 ++ mdpow/tests/test_automated_dihedral_analysis.py | 10 +++++----- mdpow/workflows/dihedrals.py | 2 +- setup.py | 2 ++ 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 02eeef0a..04d0f29f 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -17,6 +17,8 @@ dependencies: - alchemlyb <2 - rdkit - seaborn +- svgutils +- cairosvg # Testing - pytest diff --git a/doc/requirements.txt b/doc/requirements.txt index 37f3c3f6..3725e194 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -9,3 +9,5 @@ mdanalysis rdkit seaborn matplotlib +svgutils +cairosvg diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 00e605a1..a78f8186 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -247,11 +247,11 @@ def test_save_fig_info(self, SM25_tmp_dir, caplog): solvents=('water',)) assert f'Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf' in caplog.text, 'PDF file not saved' - def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): - df, _ = dihedral_data - dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=resname, solvents=('water',), dataframe=df) - assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' + #def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): + # df, _ = dihedral_data + # dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, + # resname=resname, solvents=('water',), dataframe=df) + # assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): caplog.clear() diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 11468ed7..879d9665 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -31,7 +31,7 @@ .. autofunction:: plot_violins """ - +#^need to update function list import os import pathlib import numpy as np diff --git a/setup.py b/setup.py index 43a900ce..feaa00a7 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,8 @@ 'matplotlib', 'seaborn', 'rdkit', + 'svgutils', + 'cairosvg', ], #setup_requires=['pytest-runner',], tests_require=['pytest', 'pybol', 'py'], From a8f5b5e4652407f31f5fa66dff683dceb9f3c8f5 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 4 Mar 2023 04:29:00 -0700 Subject: [PATCH 14/81] remove unneccessary import --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 879d9665..fb514297 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -48,7 +48,7 @@ # for SVG test from rdkit.Chem.Draw import IPythonConsole -from IPython.display import SVG +#from IPython.display import SVG from rdkit.Chem.Draw import rdMolDraw2D import seaborn as sns From 2522b5c38034174a6b5f0c156881f654cdd15ebd Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 4 Mar 2023 04:34:14 -0700 Subject: [PATCH 15/81] remove import --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index fb514297..0fa7b50f 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -47,7 +47,7 @@ from rdkit.Chem import rdCoordGen # for SVG test -from rdkit.Chem.Draw import IPythonConsole +#from rdkit.Chem.Draw import IPythonConsole #from IPython.display import SVG from rdkit.Chem.Draw import rdMolDraw2D From d54cd869de8d708d13a0a2513242551b9e296af9 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 20 Mar 2023 22:21:02 -0700 Subject: [PATCH 16/81] remove temp svg files and build, convert, and save as pdf through mem --- mdpow/workflows/dihedrals.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 0fa7b50f..c401eb81 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -617,6 +617,7 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, if molname is None: molname = resname + # figdir is now necessary?? if figdir is not None: subdir = molname newdir = os.path.join(figdir, subdir) @@ -636,38 +637,39 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) drawer.FinishDrawing() - svg = drawer.GetDrawingText().replace('svg:','') + mol_svg = sg.fromstring(svg) - mol_svg.save("mol_svg.svg") + m = mol_svg.getroot() + m.scale(0.0125).moveto(21.8, 0.35) plot = dihedral_violins(name[1], width=width, solvents=solvents) _ = plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') # plot.set_titles needs to stay here during future development # this locale ensures that plots are properly named, # especially when generated for a project iteratively - _ = plot.savefig("tmp_plot.svg") + + plot_svg = sg.from_mpl(plot) + p = plot_svg.getroot() + p.scale(0.02) + + mol_svg = sg.fromstring(svg) + m = mol_svg.getroot() + m.scale(0.0125).moveto(21.8, 0.35) # 28cm leaves room for lengthier solvent names # in the legend on the rightmost side - sc.Figure("28cm", "4.2cm", # width and height for SVG render, previously 30, 5 - sc.SVG("tmp_plot.svg").scale(0.02), - sc.SVG("mol_svg.svg").scale(0.0125).move(21.8,0.35) # previously 21.5,0.5 - ).save("plot_svg.svg") - # best pre-pdf size and ratio so far + # width and height for SVG render, previously 30, 5 + # the order matters here, plot down first, mol on top + fig = sc.Figure("28cm", "4.2cm", p, m) # figdir is now necessary and plots are not returned with molecule graphic figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" - plot_pdf = cairosvg.svg2pdf(url="plot_svg.svg", write_to=str(figfile), + plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), output_width=plot_pdf_width) logger.info(f"Figure saved as {figfile}") - # remove temp svg files - os.system("rm mol_svg.svg") - os.system("rm tmp_plot.svg") - os.system("rm plot_svg.svg") - return logger.info(f"All figures generated and saved in {figdir}") From 193dc848b56b0501da99f4682ed7293e0517bd1f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 20 Mar 2023 22:43:32 -0700 Subject: [PATCH 17/81] fix legend issue when plotting one solvent --- mdpow/workflows/dihedrals.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index c401eb81..95888447 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -529,17 +529,15 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): # this method allows for one solvent # to be plotted correctly - # needs to be corrected to fix legend - # for when only one solvent is plotted - solvs = np.asarray(solvents) - solv2 = 'octanol' - if solvs.size > 1: - solv2 = solvs[1] + solvs = list(solvents) + if len(solvs) < 2: + solvs.append('N/A') g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", kind="violin", split=True, width=width, inner=None, cut=0, linewidth=0.5, - hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW", "Structure"], + #hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW", "Structure"], + hue_order=[solvs[0], solvs[1]], col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, height=3.0, aspect=2.0, # height was 3.0, aspect was 2.0 facet_kws={'ylim': (-180, 180), From fda320677bd40044733c14dfeb3535fe9fb72c85 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 28 Mar 2023 08:00:56 -0700 Subject: [PATCH 18/81] cleanup --- mdpow/workflows/dihedrals.py | 80 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 52 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 95888447..f4baab8a 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -13,6 +13,10 @@ depending on the desired results. Details of the completely automated workflow are discussed under :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`. +Atom indices obtained by :func:`get_atom_indices` are 0-based, +atom index labels on the molecule in the plots are 0-based, +but atom `names` in plots and file names are 1-based. + .. autodata:: SOLVENTS_DEFAULT :annotation: = ('water', 'octanol') .. autodata:: INTERACTIONS_DEFAULT @@ -37,18 +41,13 @@ import numpy as np import pandas as pd -# add to dependencies for install and tests import cairosvg import svgutils -import svgutils.compose as sc -import svgutils.transform as sg +import svgutils.compose +import svgutils.transform from rdkit import Chem from rdkit.Chem import rdCoordGen - -# for SVG test -#from rdkit.Chem.Draw import IPythonConsole -#from IPython.display import SVG from rdkit.Chem.Draw import rdMolDraw2D import seaborn as sns @@ -142,8 +141,9 @@ def rdkit_conversion(u, resname): :func:`~mdpow.workflows.dihedrals.build_universe` and a `resname` as input. Uses `resname` to select the solute for conversion by :class:`~MDAnalysis.converters.RDKit.RDKitConverter` to :class:`rdkit.Chem.rdchem.Mol`, - and will add element attributes for Hydrogen if not listed in the topology. - + and will add element attributes for Hydrogen if not listed in the topology, + using :func:`MDAnalysis.topology.guessers.guess_atom_element`. + :keywords: *u* @@ -167,28 +167,14 @@ def rdkit_conversion(u, resname): """ - # i think somewhere we decided to ditch try/except method, will check notes - # notes pertaining to testing topology attributes for explicit Hs - - #try: - # solute = u.select_atoms(f'resname {resname}') - # mol = solute.convert_to('RDKIT') - #except AttributeError: - # elements = [guess_atom_element(name) for name in u.atoms.names] - # u.add_TopologyAttr("elements", elements) - # solute = u.select_atoms(f'resname {resname}') - # mol = solute.convert_to('RDKIT') - elements = [guess_atom_element(name) for name in u.atoms.names] u.add_TopologyAttr("elements", elements) - # com or cog? solute = u.select_atoms(f'resname {resname}') solute.unwrap(compound='residues', reference='com') mol = solute.convert_to('RDKIT') rdCoordGen.AddCoords(mol) - #rdDepictor.Compute2DCoords(mol), not sure how this is different from above^ for atom in mol.GetAtoms(): atom.SetProp("atomNote", str(atom.GetIdx())) @@ -232,8 +218,6 @@ def get_atom_indices(dirname, mol, SMARTS=SMARTS_DEFAULT): ''' - #u = build_universe(dirname=dirname) - #mol = rdkit_conversion(u=u, resname=resname)[0] pattern = Chem.MolFromSmarts(SMARTS) atom_indices = mol.GetSubstructMatches(pattern) @@ -296,9 +280,6 @@ def get_dihedral_groups(solute, atom_indices): ''' - #u = build_universe(dirname=dirname) - #_, solute = rdkit_conversion(u=u, resname=resname) - #atom_indices = get_atom_indices(dirname=dirname, resname=resname, SMARTS=SMARTS) dihedral_groups = [solute.atoms[list(a_ind)].names for a_ind in atom_indices] @@ -312,7 +293,7 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): dg_list.append(group) all_dgs = tuple(dg_list) - if (len(atom_indices) == len(bond_indices) == len(all_dgs)): + if len(atom_indices) == len(bond_indices) == len(all_dgs): ab_pairs = {} i = 0 while i < len(all_dgs): @@ -431,7 +412,7 @@ def save_df(df, df_save_dir, resname=None, molname=None): newdir = os.path.join(df_save_dir, subdir) os.mkdir(newdir) - # time and compress level can be adjusted as kwargs + # time and compression level can be adjusted as kwargs df.to_csv(f'{newdir}/{molname}_full_df.csv.bz2', index=False, compression='bz2') @@ -479,7 +460,6 @@ def periodic_angle(df, padding=45): ''' - # this method is changed in PR242 df1 = df[df.dihedral > 180 - padding] df1.dihedral -= 360 df2 = df[df.dihedral < -180 + padding] @@ -525,10 +505,9 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): # (+1 for additional space with axes) width_ratios = [len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) + 1, len(df[df['interaction'] == "VDW"]["lambda"].unique()), - len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] # was - 1 + len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] - # this method allows for one solvent - # to be plotted correctly + # this method allows for one solvent to be plotted correctly solvs = list(solvents) if len(solvs) < 2: solvs.append('N/A') @@ -536,30 +515,28 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", kind="violin", split=True, width=width, inner=None, cut=0, linewidth=0.5, - #hue_order=[solvs[0], solv2], col_order=["Coulomb", "VDW", "Structure"], hue_order=[solvs[0], solvs[1]], col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, - height=3.0, aspect=2.0, # height was 3.0, aspect was 2.0 + height=3.0, aspect=2.0, facet_kws={'ylim': (-180, 180), 'gridspec_kws': {'width_ratios': width_ratios, }}) - _ = g.set_xlabels(r"$\lambda$") - _ = g.set_ylabels(r"dihedral angle $\phi$") - _ = g.despine(offset=5) + g.set_xlabels(r"$\lambda$") + g.set_ylabels(r"dihedral angle $\phi$") + g.despine(offset=5) axC = g.axes_dict['Coulomb'] - _ = axC.yaxis.set_major_locator(plt.matplotlib.ticker.MultipleLocator(60)) - _ = axC.yaxis.set_minor_locator(plt.matplotlib.ticker.MultipleLocator(30)) - _ = axC.yaxis.set_major_formatter(plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$")) + axC.yaxis.set_major_locator(plt.matplotlib.ticker.MultipleLocator(60)) + axC.yaxis.set_minor_locator(plt.matplotlib.ticker.MultipleLocator(30)) + axC.yaxis.set_major_formatter(plt.matplotlib.ticker.FormatStrFormatter(r"$%g^\circ$")) axV = g.axes_dict['VDW'] - _ = axV.yaxis.set_visible(False) - _ = axV.spines["left"].set_visible(False) + axV.yaxis.set_visible(False) + axV.spines["left"].set_visible(False) axIM = g.axes_dict['Structure'] - #axIM.imshow(im, aspect='equal', origin='upper', extent=(-80.5, 379.5, -222.0, 222.0)) - _ = axIM.axis('off') + axIM.axis('off') return g @@ -615,7 +592,6 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, if molname is None: molname = resname - # figdir is now necessary?? if figdir is not None: subdir = molname newdir = os.path.join(figdir, subdir) @@ -637,21 +613,21 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, drawer.FinishDrawing() svg = drawer.GetDrawingText().replace('svg:','') - mol_svg = sg.fromstring(svg) + mol_svg = svgutils.transform.fromstring(svg) m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) plot = dihedral_violins(name[1], width=width, solvents=solvents) - _ = plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') + plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') # plot.set_titles needs to stay here during future development # this locale ensures that plots are properly named, # especially when generated for a project iteratively - plot_svg = sg.from_mpl(plot) + plot_svg = svgutils.transform.from_mpl(plot) p = plot_svg.getroot() p.scale(0.02) - mol_svg = sg.fromstring(svg) + mol_svg = svgutils.transform.fromstring(svg) m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) @@ -659,7 +635,7 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, # in the legend on the rightmost side # width and height for SVG render, previously 30, 5 # the order matters here, plot down first, mol on top - fig = sc.Figure("28cm", "4.2cm", p, m) + fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) # figdir is now necessary and plots are not returned with molecule graphic figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" From e5a440e14a1f4453206a05ffb7fd5419f8b74742 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 28 Mar 2023 08:17:29 -0700 Subject: [PATCH 19/81] move title setting and cleanup --- mdpow/workflows/dihedrals.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index f4baab8a..00d472a8 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -468,7 +468,7 @@ def periodic_angle(df, padding=45): return df_aug -def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): +def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): '''Plots distributions of dihedral angles for one dihedral atom group as violin plots, using as input the augmented :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle`. @@ -538,6 +538,8 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT): axIM = g.axes_dict['Structure'] axIM.axis('off') + g.set_titles(plot_title) + return g def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, @@ -617,11 +619,8 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) - plot = dihedral_violins(name[1], width=width, solvents=solvents) - plot.set_titles(f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}') - # plot.set_titles needs to stay here during future development - # this locale ensures that plots are properly named, - # especially when generated for a project iteratively + plot_title = f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}' + plot = dihedral_violins(name[1], width=width, solvents=solvents, plot_title=plot_title) plot_svg = svgutils.transform.from_mpl(plot) p = plot_svg.getroot() @@ -637,15 +636,15 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, # the order matters here, plot down first, mol on top fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) - # figdir is now necessary and plots are not returned with molecule graphic figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), output_width=plot_pdf_width) logger.info(f"Figure saved as {figfile}") - return logger.info(f"All figures generated and saved in {figdir}") - + logger.info(f"All figures generated and saved in {figdir}") + + return None def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, resname=None, molname=None, @@ -775,4 +774,6 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, plot_violins(df_aug, resname=resname, molname=molname, mol=mol, plot_pdf_width=plot_pdf_width, ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) - return logger.info(f"DihedralAnalysis completed for all projects in {dirname}") + logger.info(f"DihedralAnalysis completed for all projects in {dirname}") + + return From c71d3d8360cb95d818847fbef8c83f6d7f5f237a Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Tue, 28 Mar 2023 08:51:34 -0700 Subject: [PATCH 20/81] split plot_violins into new build_svg function --- mdpow/workflows/dihedrals.py | 65 +++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 00d472a8..cfaa3284 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -32,6 +32,7 @@ .. autofunction:: save_df .. autofunction:: periodic_angle .. autofunction:: dihedral_violins +.. autofunction:: build_svg .. autofunction:: plot_violins """ @@ -542,6 +543,35 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): return g +def build_svg(mol, molname=None, name=None, ab_pairs=None, + solvents=SOLVENTS_DEFAULT, width=0.9): + # atoms + a = list(ab_pairs[name[0]][0]) + # bonds + b = list(ab_pairs[name[0]][1]) + + drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) + drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) + drawer.FinishDrawing() + svg = drawer.GetDrawingText().replace('svg:','') + + mol_svg = svgutils.transform.fromstring(svg) + m = mol_svg.getroot() + m.scale(0.0125).moveto(21.8, 0.35) + + plot_title = f'{molname}, {name[0]} {a} | ''{col_name}' + plot = dihedral_violins(name[1], width=width, solvents=solvents, plot_title=plot_title) + + plot_svg = svgutils.transform.from_mpl(plot) + p = plot_svg.getroot() + p.scale(0.02) + + # 28cm leaves room for lengthier solvent names in the legend + # order matters here, plot down first, mol on top (p, m) + fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) + + return fig + def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0.9, plot_pdf_width=190, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and optionally saving figures for all dihedral @@ -602,43 +632,18 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, section = df.groupby(by="selection") # conversion factor: 1 mm = 3.7795275591 px - plot_pdf_width = plot_pdf_width * 3.7795275591 # DEFAULT: 190 mm = 718.110236229 pixels + plot_pdf_width_px = plot_pdf_width * 3.7795275591 # put this into separate function for name in section: - a = list(ab_pairs[name[0]][0]) - b = list(ab_pairs[name[0]][1]) - - drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) - drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) - drawer.FinishDrawing() - svg = drawer.GetDrawingText().replace('svg:','') - - mol_svg = svgutils.transform.fromstring(svg) - m = mol_svg.getroot() - m.scale(0.0125).moveto(21.8, 0.35) - - plot_title = f'{molname}, {name[0]} {list(ab_pairs[name[0]][0])} | ''{col_name}' - plot = dihedral_violins(name[1], width=width, solvents=solvents, plot_title=plot_title) - - plot_svg = svgutils.transform.from_mpl(plot) - p = plot_svg.getroot() - p.scale(0.02) - - mol_svg = svgutils.transform.fromstring(svg) - m = mol_svg.getroot() - m.scale(0.0125).moveto(21.8, 0.35) - - # 28cm leaves room for lengthier solvent names - # in the legend on the rightmost side - # width and height for SVG render, previously 30, 5 - # the order matters here, plot down first, mol on top - fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) + + fig = build_svg(mol=mol, molname=molname, name=name, ab_pairs=ab_pairs, + solvents=solvents, width=width) figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), - output_width=plot_pdf_width) + output_width=plot_pdf_width_px) logger.info(f"Figure saved as {figfile}") From cc673febd033027164723fc8aeda51046f6db712 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 1 Apr 2023 22:43:31 -0700 Subject: [PATCH 21/81] change, better function names for dihedrals workflow module --- .../tests/test_automated_dihedral_analysis.py | 4 +-- mdpow/workflows/dihedrals.py | 26 ++++++++++--------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index a78f8186..dc9a3b1c 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -70,7 +70,7 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): df = dihedrals.dihedral_groups_ensemble(atom_indices=atom_group_indices, dirname=SM25_tmp_dir, solvents=('water',)) - df_aug = dihedrals.periodic_angle(df) + df_aug = dihedrals.periodic_angle_padding(df) return df, df_aug # fixture output, tuple: # dihedral_data[0]=df @@ -211,7 +211,7 @@ def test_save_df_info(self, dihedral_data, SM25_tmp_dir, caplog): # issue raised (#239) to identify and resolve exact package/version responsible @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' 'returns incorrect atom_indices for dihedral atom group selections') - def test_periodic_angle(self, dihedral_data): + def test_periodic_angle_padding(self, dihedral_data): _, df_aug = dihedral_data diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 4dbe36ec..1d940862 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -27,13 +27,15 @@ .. autofunction:: build_universe .. autofunction:: rdkit_conversion .. autofunction:: get_atom_indices -.. autofunction:: dihedral_groups +.. autofunction:: get_bond_indices +.. autofunction:: get_dihedral_groups +.. autofunction:: get_paired_indices .. autofunction:: dihedral_groups_ensemble .. autofunction:: save_df -.. autofunction:: periodic_angle +.. autofunction:: periodic_angle_padding .. autofunction:: dihedral_violins .. autofunction:: build_svg -.. autofunction:: plot_violins +.. autofunction:: plot_dihedral_violins """ #^need to update function list @@ -419,7 +421,7 @@ def save_df(df, df_save_dir, resname=None, molname=None): logger.info(f'Results DataFrame saved as {newdir}/{molname}_full_df.csv.bz2') -def periodic_angle(df, padding=45): +def periodic_angle_padding(df, padding=45): '''Pads the angles from the results :class:`~pandas.DataFrame` to maintain periodicity in the violin plots. @@ -456,7 +458,7 @@ def periodic_angle(df, padding=45): da = DihedralAnalysis(all_dihedrals) da.run(start=0, stop=100, step=10) df = da.results - df_aug = periodic_angle(df, padding=45) + df_aug = periodic_angle_padding(df, padding=45) plot = dihedral_violins(df_aug, width=0.9) ''' @@ -472,13 +474,13 @@ def periodic_angle(df, padding=45): def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): '''Plots distributions of dihedral angles for one dihedral atom group as violin plots, using as input the augmented :class:`pandas.DataFrame` - from :func:`~mdpow.workflows.dihedrals.periodic_angle`. + from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. :keywords: *df* augmented results :class:`pandas.DataFrame` from - :func:`~mdpow.workflows.dihedrals.periodic_angle` + :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *width* width of the violin element (>1 overlaps) @@ -572,7 +574,7 @@ def build_svg(mol, molname=None, name=None, ab_pairs=None, return fig -def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, +def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0.9, plot_pdf_width=190, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and optionally saving figures for all dihedral atom groups. @@ -590,7 +592,7 @@ def plot_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, *df* augmented results :class:`pandas.DataFrame` from - :func:`~mdpow.workflows.dihedrals.periodic_angle` + :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *resname* `resname` for the molecule as defined in @@ -710,7 +712,7 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, value in degrees default: 45 - .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle` + .. seealso:: :func:`~mdpow.workflows.dihedrals.periodic_angle_padding` *width* width of the violin element (>1 overlaps) @@ -774,9 +776,9 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, if df_save_dir is not None: save_df(df=df, df_save_dir=df_save_dir, resname=resname, molname=molname) - df_aug = periodic_angle(df, padding=padding) + df_aug = periodic_angle_padding(df, padding=padding) - plot_violins(df_aug, resname=resname, molname=molname, mol=mol, plot_pdf_width=plot_pdf_width, + plot_dihedral_violins(df_aug, resname=resname, molname=molname, mol=mol, plot_pdf_width=plot_pdf_width, ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) logger.info(f"DihedralAnalysis completed for all projects in {dirname}") From 348495dc945d43c192ee26df0386a3f754f263fc Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 1 Apr 2023 22:53:46 -0700 Subject: [PATCH 22/81] cleanup --- mdpow/workflows/dihedrals.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 1d940862..371c3483 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -38,7 +38,7 @@ .. autofunction:: plot_dihedral_violins """ -#^need to update function list + import os import pathlib import numpy as np @@ -637,7 +637,6 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, # DEFAULT: 190 mm = 718.110236229 pixels plot_pdf_width_px = plot_pdf_width * 3.7795275591 - # put this into separate function for name in section: fig = build_svg(mol=mol, molname=molname, name=name, ab_pairs=ab_pairs, From 4ad68bb2e6c10cc60520b25077e02d357e0264f6 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 2 Apr 2023 23:48:41 -0700 Subject: [PATCH 23/81] docs and cleanup, plot width docs, dict comprehension for ab_pairs --- mdpow/workflows/dihedrals.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 371c3483..1169d5d2 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -23,6 +23,8 @@ :annotation: = ('Coulomb', 'VDW') .. autodata:: SMARTS_DEFAULT :annotation: = [!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1] +.. autodata:: PLOT_WIDTH_DEFAULT + :annotation: = 190 .. autofunction:: automated_dihedral_analysis .. autofunction:: build_universe .. autofunction:: rdkit_conversion @@ -101,6 +103,14 @@ """ +PLOT_WIDTH_DEFAULT = 190 +"""Plot width (:code:`plot_pdf_width`) should be provided in millimeters (mm), + and is converted to pixels (px) for use with :mod:`cairosvg`. + + conversion factor: 1 mm = 3.7795275591 px + default value: 190 mm = 718.110236229 pixels +""" + def build_universe(dirname): """Builds :class:`~MDAnalysis.core.universe.Universe` from ``water/Coulomb/0000`` topology and trajectory for the specified project. @@ -296,12 +306,11 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): dg_list.append(group) all_dgs = tuple(dg_list) - if len(atom_indices) == len(bond_indices) == len(all_dgs): - ab_pairs = {} - i = 0 - while i < len(all_dgs): - ab_pairs[f'{all_dgs[i]}'] = (atom_indices[i], bond_indices[i]) - i += 1 + assert (len(atom_indices) == len(bond_indices) == len(all_dgs), + "atom_indices, bond_indices, and dihedral_groups are out of sync") + + ab_pairs = {} + ab_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} return ab_pairs @@ -575,7 +584,7 @@ def build_svg(mol, molname=None, name=None, ab_pairs=None, return fig def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, - width=0.9, plot_pdf_width=190, solvents=SOLVENTS_DEFAULT): + width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and optionally saving figures for all dihedral atom groups. @@ -614,6 +623,10 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + :returns: *violin plot* @@ -654,7 +667,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, resname=None, molname=None, - SMARTS=SMARTS_DEFAULT, plot_pdf_width=190, + SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, solvents=SOLVENTS_DEFAULT, interactions=INTERACTIONS_DEFAULT, @@ -725,6 +738,10 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + *start, stop, step* arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, as parameters for iterating through the trajectories of the current ensemble From 9609022ac62aa30234c09cd3e70a9d012e2c8515 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 00:04:07 -0700 Subject: [PATCH 24/81] cleanup and add list and dict comprehension methods for get_paired_indices function --- mdpow/workflows/dihedrals.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 1169d5d2..cb1edf06 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -300,16 +300,13 @@ def get_dihedral_groups(solute, atom_indices): def get_paired_indices(atom_indices, bond_indices, dihedral_groups): - dg_list = [] - for dg in dihedral_groups: - group = (f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}') - dg_list.append(group) - all_dgs = tuple(dg_list) + all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] assert (len(atom_indices) == len(bond_indices) == len(all_dgs), "atom_indices, bond_indices, and dihedral_groups are out of sync") ab_pairs = {} + # check Oliver's other comment for this ab_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} return ab_pairs @@ -557,9 +554,9 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): def build_svg(mol, molname=None, name=None, ab_pairs=None, solvents=SOLVENTS_DEFAULT, width=0.9): # atoms - a = list(ab_pairs[name[0]][0]) + a = ab_pairs[name[0]][0] # bonds - b = list(ab_pairs[name[0]][1]) + b = ab_pairs[name[0]][1] drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) From 42847f8043fa98ddde9f531fbe459c1eff2b98e5 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 00:34:22 -0700 Subject: [PATCH 25/81] intersphinx mapping --- doc/sphinx/source/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index a8000fbe..82f97694 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -248,6 +248,8 @@ 'https://www.rdkit.org/docs/': None, 'https://pandas.pydata.org/docs/': None, 'https://seaborn.pydata.org': None, + 'https://cairosvg.org/documentation/': None, + 'https://svgutils.readthedocs.io/en/latest/#': None, } From 60ec5225a29bebf6dde6e23fff449c5362107c30 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 00:41:50 -0700 Subject: [PATCH 26/81] remove assert method for notebook test --- mdpow/workflows/dihedrals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index cb1edf06..d2cb2b70 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -302,8 +302,8 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] - assert (len(atom_indices) == len(bond_indices) == len(all_dgs), - "atom_indices, bond_indices, and dihedral_groups are out of sync") + #assert len(atom_indices) == len(bond_indices) == len(all_dgs), + # "atom_indices, bond_indices, and dihedral_groups are out of sync" ab_pairs = {} # check Oliver's other comment for this From 52791617f93e7e47990f95dd5fb3beb909ca383f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 01:47:46 -0700 Subject: [PATCH 27/81] tests: new fixtures and tests for bond_indices and ab_pairs --- .../tests/test_automated_dihedral_analysis.py | 56 +++++++++++++++++-- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index dc9a3b1c..ea4a2b0f 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -64,6 +64,20 @@ def atom_indices(self, SM25_tmp_dir, mol_sol_data): # atom_indices[0]=atom_group_indices # atom_indices[1]=atom_group_indices_alt + @pytest.fixture(scope="function") + def bond_indices(self, mol_sol_data, atom_indices): + mol, _ = mol_sol_data + aix, _ = atom_indices + bond_indices = dihedrals.get_bond_indices(mol=mol, atom_indices=aix) + return bond_indices + + @pytest.fixture(scope="function") + def dihedral_groups(self, mol_sol_data, atom_indices): + _, solute = mol_sol_data + aix, _ = atom_indices + dihedral_groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=aix) + return dihedral_groups + @pytest.fixture(scope="function") def dihedral_data(self, SM25_tmp_dir, atom_indices): atom_group_indices, _ = atom_indices @@ -82,7 +96,29 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): (1, 2, 3, 4),(1, 12, 13, 14),(2, 3, 4, 5),(2, 3, 4, 9), (2, 1, 12, 13),(3, 2, 1, 12),(5, 4, 3, 11),(5, 4, 3, 10), (9, 4, 3, 11),(9, 4, 3, 10),(12, 13, 14, 15),(12, 13, 14, 19)) - + + check_bond_indices = ((8, 4, 2), (8, 9, 14), (4, 2, 0), (4, 2, 1), + (4, 2, 3), (9, 14, 21), (2, 3, 6), (2, 3, 7), + (4, 9, 14), (2, 4, 9), (6, 3, 0), (6, 3, 1), + (7, 3, 0), (7, 3, 1), (14, 21, 25), (14, 21, 26)) + + check_ab_pairs = {'O1-C2-N3-S4': ((0, 1, 2, 3), (8, 4, 2)), + 'O1-C2-C13-C14': ((0, 1, 12, 13), (8, 9, 14)), + 'C2-N3-S4-O12': ((1, 2, 3, 11), (4, 2, 0)), + 'C2-N3-S4-O11': ((1, 2, 3, 10), (4, 2, 1)), + 'C2-N3-S4-C5': ((1, 2, 3, 4), (4, 2, 3)), + 'C2-C13-C14-C15': ((1, 12, 13, 14), (9, 14, 21)), + 'N3-S4-C5-C6': ((2, 3, 4, 5), (2, 3, 6)), + 'N3-S4-C5-C10': ((2, 3, 4, 9), (2, 3, 7)), + 'N3-C2-C13-C14': ((2, 1, 12, 13), (4, 9, 14)), + 'S4-N3-C2-C13': ((3, 2, 1, 12), (2, 4, 9)), + 'C6-C5-S4-O12': ((5, 4, 3, 11), (6, 3, 0)), + 'C6-C5-S4-O11': ((5, 4, 3, 10), (6, 3, 1)), + 'C10-C5-S4-O12': ((9, 4, 3, 11), (7, 3, 0)), + 'C10-C5-S4-O11': ((9, 4, 3, 10), (7, 3, 1)), + 'C13-C14-C15-C16': ((12, 13, 14, 15), (14, 21, 25)), + 'C13-C14-C15-C20': ((12, 13, 14, 19), (14, 21, 26))} + # tuple-tuples of dihedral atom group indices # collected using alternate SMARTS input (explicitly defined) # see: fixture - atom_indices().atom_group_indices_alt @@ -147,6 +183,10 @@ def test_dihedral_indices(self, atom_indices): atom_group_indices, _ = atom_indices assert atom_group_indices == self.check_atom_group_indices + def test_bond_indices(self, bond_indices): + bix = bond_indices + assert bix == self.check_bond_indices + # the following 'reason' affects every downstream function that relies # on the atom indices returned for dihedral atom group selections # issue raised (#239) to identify and resolve exact package/version responsible @@ -161,15 +201,21 @@ def test_SMARTS(self, atom_indices): # issue raised (#239) to identify and resolve exact package/version responsible @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' 'returns incorrect atom_indices for dihedral atom group selections') - def test_dihedral_groups(self, SM25_tmp_dir, mol_sol_data, atom_indices): - _, solute = mol_sol_data - atom_group_indices, _ = atom_indices - groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=atom_group_indices) + def test_dihedral_groups(self, dihedral_groups): + groups = dihedral_groups i = 0 while i < len(groups): assert groups[i].all() == self.check_groups[i].all() i+=1 + def test_ab_pairs(self, atom_indices, bond_indices, dihedral_groups): + aix, _ = atom_indices + bix = bond_indices + groups = dihedral_groups + ab_pairs = dihedrals.get_paired_indices(atom_indices=aix, bond_indices=bix, + dihedral_groups=groups) + assert ab_pairs == self.check_ab_pairs + # the following 'reason' affects every downstream function that relies # on the atom indices returned for dihedral atom group selections # issue raised (#239) to identify and resolve exact package/version responsible From 181c012e69ff49d490170363a68bfc73c511b92d Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 02:00:37 -0700 Subject: [PATCH 28/81] tests: new fixtures and tests for bond_indices and ab_pairs, skip 3.7 --- mdpow/tests/test_automated_dihedral_analysis.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index ea4a2b0f..31022663 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -183,6 +183,11 @@ def test_dihedral_indices(self, atom_indices): atom_group_indices, _ = atom_indices assert atom_group_indices == self.check_atom_group_indices + # the following 'reason' affects every downstream function that relies + # on the atom indices returned for dihedral atom group selections + # issue raised (#239) to identify and resolve exact package/version responsible + @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' + 'returns incorrect atom_indices for dihedral atom group selections') def test_bond_indices(self, bond_indices): bix = bond_indices assert bix == self.check_bond_indices @@ -208,6 +213,11 @@ def test_dihedral_groups(self, dihedral_groups): assert groups[i].all() == self.check_groups[i].all() i+=1 + # the following 'reason' affects every downstream function that relies + # on the atom indices returned for dihedral atom group selections + # issue raised (#239) to identify and resolve exact package/version responsible + @pytest.mark.skipif(sys.version_info < (3, 8), reason='pytest=7.2.0, build=py37h89c1867_0, ' + 'returns incorrect atom_indices for dihedral atom group selections') def test_ab_pairs(self, atom_indices, bond_indices, dihedral_groups): aix, _ = atom_indices bix = bond_indices From a69f4da655b170eeb4682e4518b9a783ece989e5 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 02:08:42 -0700 Subject: [PATCH 29/81] test_build_universe method --- mdpow/tests/test_automated_dihedral_analysis.py | 3 +-- mdpow/workflows/dihedrals.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 31022663..edff4239 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -171,8 +171,7 @@ def test_build_universe(self, SM25_tmp_dir): u = dihedrals.build_universe(dirname=SM25_tmp_dir) solute = u.select_atoms('resname UNK') solute_names = solute.atoms.names - assert solute_names.all() == self.universe_solute_atom_names.all() - #change this method + assert solute_names == self.universe_solute_atom_names # the following 'reason' affects every downstream function that relies # on the atom indices returned for dihedral atom group selections diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index d2cb2b70..fbef41ed 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -104,7 +104,7 @@ """ PLOT_WIDTH_DEFAULT = 190 -"""Plot width (:code:`plot_pdf_width`) should be provided in millimeters (mm), +"""Plot width (`plot_pdf_width`) should be provided in millimeters (mm), and is converted to pixels (px) for use with :mod:`cairosvg`. conversion factor: 1 mm = 3.7795275591 px From 4df1d88c430250cbad8d6e4069121294b2d67025 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 02:15:01 -0700 Subject: [PATCH 30/81] confirm build universe test --- mdpow/tests/test_automated_dihedral_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index edff4239..bf28f8aa 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -171,7 +171,7 @@ def test_build_universe(self, SM25_tmp_dir): u = dihedrals.build_universe(dirname=SM25_tmp_dir) solute = u.select_atoms('resname UNK') solute_names = solute.atoms.names - assert solute_names == self.universe_solute_atom_names + assert solute_names.all() == self.universe_solute_atom_names.all() # the following 'reason' affects every downstream function that relies # on the atom indices returned for dihedral atom group selections From c53cd1a210e4b3e1ccffb33fb4d3d91a5a7cda25 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Apr 2023 02:33:31 -0700 Subject: [PATCH 31/81] fix df input test --- mdpow/tests/test_automated_dihedral_analysis.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index bf28f8aa..117be0ef 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -30,6 +30,7 @@ MANIFEST = RESOURCES / "manifest.yml" resname = "UNK" +molname = "SM25" @pytest.fixture(scope="function") def molname_workflows_directory(tmp_path, molname='SM25'): @@ -302,16 +303,18 @@ def test_save_fig_info(self, SM25_tmp_dir, caplog): solvents=('water',)) assert f'Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf' in caplog.text, 'PDF file not saved' - #def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): - # df, _ = dihedral_data - # dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - # resname=resname, solvents=('water',), dataframe=df) - # assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' + def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): + df, _ = dihedral_data + dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, + resname=resname, molname=molname, + solvents=('water',), dataframe=df) + assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') df, _ = dihedral_data dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, - resname=resname, solvents=('water',), dataframe=df) + resname=resname, molname=molname, + solvents=('water',), dataframe=df) assert 'Proceeding with results DataFrame provided.' in caplog.text, 'No dataframe provided or dataframe not recognized' From 8135b51aea7b286a404535bf572a81303a1745f9 Mon Sep 17 00:00:00 2001 From: Cade Duckworth <104414304+cadeduckworth@users.noreply.github.com> Date: Mon, 3 Apr 2023 06:06:26 -0500 Subject: [PATCH 32/81] Update dihedrals.py cleanup '#' inline comments --- mdpow/workflows/dihedrals.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index fbef41ed..805fd0f9 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -302,11 +302,7 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] - #assert len(atom_indices) == len(bond_indices) == len(all_dgs), - # "atom_indices, bond_indices, and dihedral_groups are out of sync" - ab_pairs = {} - # check Oliver's other comment for this ab_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} return ab_pairs @@ -510,13 +506,10 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): "interaction", "lambda"]).reset_index(drop=True) - # number of Coul windows + 1 / number of VDW windows - # (+1 for additional space with axes) width_ratios = [len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) + 1, len(df[df['interaction'] == "VDW"]["lambda"].unique()), len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] - # this method allows for one solvent to be plotted correctly solvs = list(solvents) if len(solvs) < 2: solvs.append('N/A') @@ -574,7 +567,6 @@ def build_svg(mol, molname=None, name=None, ab_pairs=None, p = plot_svg.getroot() p.scale(0.02) - # 28cm leaves room for lengthier solvent names in the legend # order matters here, plot down first, mol on top (p, m) fig = svgutils.compose.Figure("28cm", "4.2cm", p, m) @@ -643,8 +635,6 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, section = df.groupby(by="selection") - # conversion factor: 1 mm = 3.7795275591 px - # DEFAULT: 190 mm = 718.110236229 pixels plot_pdf_width_px = plot_pdf_width * 3.7795275591 for name in section: From bf1b05dd42a7581ce797f929c983eed2461a4b8d Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 21:34:40 -0700 Subject: [PATCH 33/81] remove unused kwargs and update docs for new functions --- mdpow/workflows/dihedrals.py | 145 +++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 68 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 805fd0f9..debacf8f 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -115,7 +115,7 @@ def build_universe(dirname): """Builds :class:`~MDAnalysis.core.universe.Universe` from ``water/Coulomb/0000`` topology and trajectory for the specified project. - Used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` + Output used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices for each dihedral atom group. @@ -194,32 +194,18 @@ def rdkit_conversion(u, resname): return mol, solute -def get_atom_indices(dirname, mol, SMARTS=SMARTS_DEFAULT): - '''Uses a SMARTS selection string to identify indices for - relevant dihedral atom groups. +def get_atom_indices(mol, SMARTS=SMARTS_DEFAULT): + '''Uses a SMARTS selection string to identify atom indices + for relevant dihedral atom groups. - Requires an MDPOW project directory and `resname` - as input. With :func:`~mdpow.workflows.dihedrals.build_universe` and - :func:`~mdpow.workflows.dihedrals.rdkit_conversion`, uses the topology - and trajectory from ``water/Coulomb/0000`` and creates a - :class:`~MDAnalysis.core.universe.Universe` object. - Uses a SMARTS string to obtain all relevant dihedral atom groups. + Requires a :class:`rdkit.Chem.rdchem.Mol` object as input + for the :data:`SMARTS_DEFAULT` kwarg to match patterns to and + identify relevant dihedral atom groups. :keywords: - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. - - *resname* - `resname` for the molecule as defined in - the topology and trajectory + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *SMARTS* The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. @@ -237,6 +223,24 @@ def get_atom_indices(dirname, mol, SMARTS=SMARTS_DEFAULT): return atom_indices def get_bond_indices(mol, atom_indices): + '''From the :class:`rdkit.Chem.rdchem.Mol` object, uses + `atom_indices` to determine the indices of the bonds + between those atoms for each dihedral atom group. + + :keywords: + + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + + *atom_indices* + tuple of tuples of indices for each dihedral atom group + + :returns: + + *bond_indices* + tuple of tuples of indices for the bonds in each dihedral atom group + + ''' bonds = [] @@ -254,38 +258,24 @@ def get_bond_indices(mol, atom_indices): return bond_indices def get_dihedral_groups(solute, atom_indices): - '''Uses the indices of the relevant dihedral atom groups determined - by :func:`~mdpow.workflows.dihedral.get_atom_indices` - and returns the names for each atom in each group. - - Requires an MDPOW project directory and `resname` - as input. Expands upon usage of - :func:`~mdpow.workflows.dihedral.get_atom_indices` - to return an array of the names of each atom within - its respective dihedral atom group as identified by - the SMARTS selection string. Not necessary - for automation, useful for verifying all atom groups of interest - are properly identified before analysis. + '''Uses the 0-based `atom_indices` of the relevant dihedral atom groups + determined by :func:`~mdpow.workflows.dihedral.get_atom_indices` + and returns the 1-based index names for each atom in each group. + + Requires the `atom_indices` from :func:`~mdpow.workflows.dihedral.get_atom_indices` + to index the `solute` specified by :func:`~MDAnalysis.core.groups.select_atoms` + and return an array of the names of each atom within its respective + dihedral atom group as identified by the SMARTS selection string. :keywords: - *dirname* - Molecule Simulation directory. Loads simulation files present in - lambda directories into the new instance. With this method for - generating an :class:`~mdpow.analysis.ensemble.Ensemble` the - lambda directories are explored and - :meth:`~mdpow.analysis.ensemble.Ensemble._load_universe_from_dir` - searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, - and .xtc files for trajectory. It will default to using the tpr file - available. + *solute* + molecule specified by :func:`~MDAnalysis.core.groups.select_atoms` + for :class:`~MDAnalysis.core.universe.Universe` object - *resname* - `resname` for the molecule as defined in - the topology and trajectory + *atom_indices* + tuple of tuples of indices for each dihedral atom group - *SMARTS* - The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. - :returns: *dihedral_groups* @@ -299,6 +289,33 @@ def get_dihedral_groups(solute, atom_indices): return dihedral_groups def get_paired_indices(atom_indices, bond_indices, dihedral_groups): + '''Combines `atom_indices` and `bond_indices` in tuples + to be paired with their respective `dihedral_groups` in + :func:`~MDAnalysis.core.groups.select_atoms` selection string format. + + A dictionary is created with key-value pairs as follows: + `atom_indices` and `bond_indices` are joined in a tuple + as the value, with the key being the respective member + of `dihedral_groups` to facilitate highlighting the + relevant dihedral atom group when generating violin plots. + + :keywords: + + *atom_indices* + tuple of tuples of indices for each dihedral atom group + + *bond_indices* + tuple of tuples of indices for the bonds in each dihedral atom group + + *dihedral_groups* + list of :func:`numpy.array` for atom names in each dihedral atom group + + :returns: + + *ab_pairs* + dictionary with key-value pair: '*#-*#-*#-*#': (atom_indices[i], bond_indices[i]) + + ''' all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] @@ -336,7 +353,7 @@ def dihedral_groups_ensemble(dirname, atom_indices, *atom_indices* tuples of atom indices for dihedral atom groups - .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices` + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_atom_indices`, :data:`SMARTS_DEFAULT` *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. @@ -373,7 +390,7 @@ def dihedral_groups_ensemble(dirname, atom_indices, return df -def save_df(df, df_save_dir, resname=None, molname=None): +def save_df(df, df_save_dir, resname, molname=None): '''Takes a :class:`pandas.DataFrame` of results from :class:`~mdpow.analysis.dihedral.DihedralAnalysis` as input before padding the angles to optionaly save the raw @@ -429,13 +446,15 @@ def periodic_angle_padding(df, padding=45): Takes a :class:`pandas.DataFrame` of results from :class:`~mdpow.analysis.dihedral.DihedralAnalysis` + or :func:`~mdpow.workflows.dihedrals.dihedral_groups_ensemble` as input and pads the angles to maintain periodicity for properly plotting dihedral angle frequencies as KDE violins - with :func:`~mdpow.workflows.dihedrals.dihedral_violins`. + with :func:`~mdpow.workflows.dihedrals.dihedral_violins` and + :func:`~mdpow.workflows.dihedrals.plot_violins`. Creates two new :class:`pandas.DataFrame` based on the - cutoff value specified, adds to the angle values, concatenates + `padding` value specified, pads the angle values, concatenates all three :class:`pandas.DataFrame`, maintaining original data and - adding padding, and returns new augmented :class:`pandas.DataFrame`. + adding padded values, and returns new augmented :class:`pandas.DataFrame`. :keywords: @@ -444,7 +463,7 @@ def periodic_angle_padding(df, padding=45): results, including all dihedral atom groups for molecule of current project *padding* - value in degrees + value in degrees to specify angle augmentation threshold default: 45 :returns: @@ -453,16 +472,6 @@ def periodic_angle_padding(df, padding=45): augmented results :class:`pandas.DataFrame` containing padded dihedral angles as specified by `padding` - .. rubric:: Example - - Typical Workflow:: - - da = DihedralAnalysis(all_dihedrals) - da.run(start=0, stop=100, step=10) - df = da.results - df_aug = periodic_angle_padding(df, padding=45) - plot = dihedral_violins(df_aug, width=0.9) - ''' df1 = df[df.dihedral > 180 - padding].copy(deep=True) @@ -746,9 +755,9 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, Typical Workflow:: - import automated_dihedral_analysis as ada + import dihedrals - ada.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', + dihedrals.automated_dihedral_analysis(dirname='/foo/bar/MDPOW_project_data', figdir='/foo/bar/MDPOW_figure_directory', resname='UNK', molname='benzene', padding=45, width=0.9, From 97267ac72a1b09e0672f391b8aabc39b84131abd Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 22:37:31 -0700 Subject: [PATCH 34/81] rewrite docs to cover new functions and kwarg changes --- mdpow/workflows/dihedrals.py | 144 ++++++++++++++++++++++------------- 1 file changed, 92 insertions(+), 52 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index debacf8f..5a7d4742 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -483,9 +483,12 @@ def periodic_angle_padding(df, padding=45): return df_aug def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): - '''Plots distributions of dihedral angles for one dihedral atom group - as violin plots, using as input the augmented :class:`pandas.DataFrame` - from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. + '''Plots kernel density estimates (KDE) of dihedral angle frequencies for + one dihedral atom group as violin plots, using as input the augmented + :class:`pandas.DataFrame` from :func:`~mdpow.workflows.dihedrals.periodic_angle_padding`. + + Output is converted to SVG by :func:`~mdpow.workflows.dihedrals.build_svg` + and final output is saved as PDF by :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` :keywords: @@ -499,13 +502,20 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + + *plot_title* + format: f'{molname}, {name[0]} {a} | ''{col_name}' + determined by, + + .. seealso:: :func:`~mdpow.workflows.dihedrals.build_svg` + :returns: - *violin plot* + *g (violin plot)* returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimations (KDE) of the dihedral angle frequencies for each - relevant dihedral atom group in the molecule from the current MDPOW project + kernel density estimates (KDE) of the dihedral angle frequencies for each + dihedral atom group identified by :data:`SMARTS_DEFAULT` ''' @@ -553,12 +563,50 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): return g -def build_svg(mol, molname=None, name=None, ab_pairs=None, +def build_svg(mol, molname, ab_pairs, atom_group_selection, solvents=SOLVENTS_DEFAULT, width=0.9): + '''Converts and combines figure components into an + SVG object to be converted and saved as a publication + quality PDF. + + :keywords: + + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + + *molname* + molecule name to be used for labelling + plots, if different from `resname` + (in this case, carried over from an upstream + decision between the two) + + *ab_pairs* + dictionary with key-value pair: '*#-*#-*#-*#': (atom_indices[i], bond_indices[i]) + + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` + + *atom_group_selection* + `name` of each section in the `groupby` series of atom group selections + + .. seealso:: :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins` + + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + + *width* + width of the violin element (>1 overlaps) + default: 0.9 + + :returns: + + *fig* + :mod:`svgutils` SVG figure object + + ''' # atoms - a = ab_pairs[name[0]][0] + a = ab_pairs[atom_group_selection[0]][0] # bonds - b = ab_pairs[name[0]][1] + b = ab_pairs[atom_group_selection[0]][1] drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) @@ -569,8 +617,8 @@ def build_svg(mol, molname=None, name=None, ab_pairs=None, m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) - plot_title = f'{molname}, {name[0]} {a} | ''{col_name}' - plot = dihedral_violins(name[1], width=width, solvents=solvents, plot_title=plot_title) + plot_title = f'{molname}, {atom_group_selection[0]} {a} | ''{col_name}' + plot = dihedral_violins(atom_group_selection[1], width=width, solvents=solvents, plot_title=plot_title) plot_svg = svgutils.transform.from_mpl(plot) p = plot_svg.getroot() @@ -581,19 +629,19 @@ def build_svg(mol, molname=None, name=None, ab_pairs=None, return fig -def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, +def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir, molname=None, width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT): - '''Coordinates plotting and optionally saving figures for all dihedral - atom groups. + '''Coordinates plotting and saving figures for all dihedral atom groups. - Makes a subdirectory within the specified - `figdir` using `resname` or `molname` provided and saves violin plot - figur for each dihedral atom group separately. + Makes a subdirectory for the current project within the specified + `figdir` using `resname` or `molname` as title and saves production + quality PDFs for each dihedral atom group separately. .. seealso:: :func:`~mdpow.workflows.dihedrals.automated_dihedral_analysis`, - :func:`~mdpow.workflows.dihedrals.dihedral_violins` + :func:`~mdpow.workflows.dihedrals.dihedral_violins`, + :func:`~mdpow.workflows.dihedrals.build_svg` :keywords: @@ -605,8 +653,16 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, `resname` for the molecule as defined in the topology and trajectory + *mol* + :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` + + *ab_pairs* + dictionary with key-value pair: '*#-*#-*#-*#': (atom_indices[i], bond_indices[i]) + + .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` + *figdir* - optional, path to the location to save figures + path to the location to save figures *molname* molecule name to be used for labelling @@ -618,19 +674,12 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, .. seealso:: :func:`~mdpow.workflows.dihedrals.dihedral_violins` - *solvents* - The default solvents are documented under :data:`SOLVENTS_DEFAULT`. - *plot_pdf_width* The default value for width of plot output is described in detail under :data:`PLOT_WIDTH_DEFAULT`. - :returns: - - *violin plot* - returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimations (KDE) of the dihedral angle frequencies for each - relevant dihedral atom group in the molecule from the current MDPOW project + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. ''' @@ -648,7 +697,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, for name in section: - fig = build_svg(mol=mol, molname=molname, name=name, ab_pairs=ab_pairs, + fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, solvents=solvents, width=width) figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" @@ -661,8 +710,8 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, return None -def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, - resname=None, molname=None, +def automated_dihedral_analysis(dirname, figdir, resname, + df_save_dir=None, molname=None, SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, solvents=SOLVENTS_DEFAULT, @@ -674,13 +723,11 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, For one MDPOW project, automatically determines all relevant dihedral atom groups in the molecule, runs :class:`~mdpow.analysis.dihedral.DihedralAnalysis` for each group, - pads the dihedral angles from analysis results for all groups to maintain periodicity, - creates violin plots of dihedral angle frequencies for each group, separately, from the - padded results. + pads the dihedral angles to maintain periodicity, creates violin plots of dihedral angle + frequencies (KDEs), and saves publication quality PDF figures for each group, separately. Optionally saves all pre-padded :class:`~mdpow.analysis.dihedral.DihedralAnalysis` results - as a single :class:`pandas.DataFrame`, and separate violin plots for each dihedral atom group - in `df_save_dir`, and `figdir` directories provided, respectively. + as a single :class:`pandas.DataFrame` in `df_save_dir` provided. :keywords: @@ -694,23 +741,27 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, and .xtc files for trajectory. It will default to using the tpr file available. - *df_save_dir* - optional, path to the location to save results :class:`pandas.DataFrame` - *figdir* - optional, path to the location to save figures + path to the location to save figures *resname* `resname` for the molecule as defined in the topology and trajectory + *df_save_dir* + optional, path to the location to save results :class:`pandas.DataFrame` + *molname* molecule name to be used for labelling plots, if different from `resname` - + *SMARTS* The default SMARTS string is described in detail under :data:`SMARTS_DEFAULT`. + *plot_pdf_width* + The default value for width of plot output is described in detail under + :data:`PLOT_WIDTH_DEFAULT`. + *dataframe* optional, if :class:`~mdpow.analysis.dihedral.DihedralAnalysis` was done prior, then results :class:`pandas.DataFrame` can be @@ -734,23 +785,12 @@ def automated_dihedral_analysis(dirname=None, df_save_dir=None, figdir=None, *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. - *plot_pdf_width* - The default value for width of plot output is described in detail under - :data:`PLOT_WIDTH_DEFAULT`. - *start, stop, step* arguments passed to :func:`~mdpow.analysis.ensemble.EnsembleAnalysis.run`, as parameters for iterating through the trajectories of the current ensemble .. seealso:: :class:`~mdpow.analysis.ensemble.EnsembleAnalysis` - :returns: - - *violin plot* - returns a :class:`seaborn.FacetGrid` object containing a violin plot of the - kernel density estimations (KDE) of the dihedral angle frequencies for each - relevant dihedral atom group in the molecule from the current MDPOW project - .. rubric:: Example Typical Workflow:: From a25c97d1688639d1ce3c56e4963ac763c316f38a Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 23:01:37 -0700 Subject: [PATCH 35/81] fix tests to accommodate kwarg updates in dihedrals module --- mdpow/tests/test_automated_dihedral_analysis.py | 11 +++++------ mdpow/workflows/dihedrals.py | 5 +++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 117be0ef..bcb36174 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -52,14 +52,13 @@ def mol_sol_data(self, SM25_tmp_dir): return mol, solute @pytest.fixture(scope="function") - def atom_indices(self, SM25_tmp_dir, mol_sol_data): + def atom_indices(self, mol_sol_data): mol, _ = mol_sol_data - atom_group_indices = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, mol=mol) + atom_group_indices = dihedrals.get_atom_indices(mol=mol) # testing optional user input of alternate SMARTS string # for automated dihedral atom group selection - atom_group_indices_alt = dihedrals.get_atom_indices(dirname=SM25_tmp_dir, mol=mol, - SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') + atom_group_indices_alt = dihedrals.get_atom_indices(mol=mol, SMARTS='[!$(*#*)&!D1]-!@[!$(*#*)&!D1]') return atom_group_indices, atom_group_indices_alt # fixture output, tuple: # atom_indices[0]=atom_group_indices @@ -252,14 +251,14 @@ def test_dihedral_groups_ensemble(self, dihedral_data): def test_save_df(self, dihedral_data, SM25_tmp_dir): df, _ = dihedral_data - dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, molname='SM25') + dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, resname='UNK', molname='SM25') assert (SM25_tmp_dir / 'SM25' / 'SM25_full_df.csv.bz2').exists(), 'Compressed csv file not saved' def test_save_df_info(self, dihedral_data, SM25_tmp_dir, caplog): df, _ = dihedral_data caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') - dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, molname='SM25') + dihedrals.save_df(df=df, df_save_dir=SM25_tmp_dir, resname='UNK', molname='SM25') assert f'Results DataFrame saved as {SM25_tmp_dir}/SM25/SM25_full_df.csv.bz2' in caplog.text, 'Save location not logged or returned' # the following 'reason' affects every downstream function that relies diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 5a7d4742..6c1593fe 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -808,7 +808,7 @@ def automated_dihedral_analysis(dirname, figdir, resname, u = build_universe(dirname=dirname) mol, solute = rdkit_conversion(u=u, resname=resname) - atom_indices = get_atom_indices(dirname=dirname, mol=mol, SMARTS=SMARTS) + atom_indices = get_atom_indices(mol=mol, SMARTS=SMARTS) bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, @@ -821,7 +821,7 @@ def automated_dihedral_analysis(dirname, figdir, resname, else: - df = dihedral_groups_ensemble(atom_indices=atom_indices, dirname=dirname, + df = dihedral_groups_ensemble(dirname=dirname, atom_indices=atom_indices, solvents=solvents, interactions=interactions, start=start, stop=stop, step=step) @@ -830,6 +830,7 @@ def automated_dihedral_analysis(dirname, figdir, resname, df_aug = periodic_angle_padding(df, padding=padding) + #kwargs in weird order plot_dihedral_violins(df_aug, resname=resname, molname=molname, mol=mol, plot_pdf_width=plot_pdf_width, ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) From 9175fda19348e3503b31cde47f7477e5e3b953d3 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 23:16:44 -0700 Subject: [PATCH 36/81] explanation of why figdir is a kwarg at top level of dihedrals module but a positional argument elsewhere - workflows base **kwargs, issue #244, see in-line comment in dihedrals.py --- mdpow/workflows/dihedrals.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 6c1593fe..1bcf4017 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -710,7 +710,15 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir, molname=None, return None -def automated_dihedral_analysis(dirname, figdir, resname, + #figdir=None is a temporary way to satisfy + #workflows base tests until issue #244 is resolved + #because it currently uses a **kwargs convention and the + #positional argument figdir will not carry over nicely, + #or requires changes that will result in wasted time + + #for this current version, figdir is required and will + #cause issues if not specified +def automated_dihedral_analysis(dirname, figdir=None, resname, df_save_dir=None, molname=None, SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, From 45cf29d8239439c738ec8083cb81b5adffafe76f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 23:18:27 -0700 Subject: [PATCH 37/81] reorder positional and key word args --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 1bcf4017..042cd7d0 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -718,7 +718,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir, molname=None, #for this current version, figdir is required and will #cause issues if not specified -def automated_dihedral_analysis(dirname, figdir=None, resname, +def automated_dihedral_analysis(dirname, resname, figdir=None, df_save_dir=None, molname=None, SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, From 71da645c3479ef2d43e5429f92e22ab35a00ed39 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 23:33:46 -0700 Subject: [PATCH 38/81] accommodation for figdir kwarg --- mdpow/tests/test_workflows_base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index bf259048..dcf82993 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -67,14 +67,15 @@ def test_project_paths_csv_input(self, csv_input_data): pd.testing.assert_frame_equal(project_paths, csv_df) - def test_automated_project_analysis(self, project_paths_data, caplog): + def test_automated_project_analysis(self, SM_tmp_dir, project_paths_data, caplog): project_paths = project_paths_data # change resname to match topology (every SAMPL7 resname is 'UNK') # only necessary for this dataset, not necessary for normal use project_paths['resname'] = 'UNK' base.automated_project_analysis(project_paths, solvents=('water',), - ensemble_analysis='DihedralAnalysis') + ensemble_analysis='DihedralAnalysis', + figdir=SM_tmp_dir) assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' 'did not iteratively run to completion for the provided project') From 2c21a7c1986fe916505572bb14b3635f4f44844f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 23:34:02 -0700 Subject: [PATCH 39/81] docs typo --- mdpow/workflows/dihedrals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 042cd7d0..df85cdc3 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -259,10 +259,10 @@ def get_bond_indices(mol, atom_indices): def get_dihedral_groups(solute, atom_indices): '''Uses the 0-based `atom_indices` of the relevant dihedral atom groups - determined by :func:`~mdpow.workflows.dihedral.get_atom_indices` + determined by :func:`~mdpow.workflows.dihedrals.get_atom_indices` and returns the 1-based index names for each atom in each group. - Requires the `atom_indices` from :func:`~mdpow.workflows.dihedral.get_atom_indices` + Requires the `atom_indices` from :func:`~mdpow.workflows.dihedrals.get_atom_indices` to index the `solute` specified by :func:`~MDAnalysis.core.groups.select_atoms` and return an array of the names of each atom within its respective dihedral atom group as identified by the SMARTS selection string. From 83c4c639f534b7e202d5192a9efcbd27bf8f82c0 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 14 Apr 2023 23:44:40 -0700 Subject: [PATCH 40/81] docs typos --- mdpow/workflows/dihedrals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index df85cdc3..bff213ee 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -204,7 +204,7 @@ def get_atom_indices(mol, SMARTS=SMARTS_DEFAULT): :keywords: - *mol* + *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *SMARTS* @@ -450,7 +450,7 @@ def periodic_angle_padding(df, padding=45): as input and pads the angles to maintain periodicity for properly plotting dihedral angle frequencies as KDE violins with :func:`~mdpow.workflows.dihedrals.dihedral_violins` and - :func:`~mdpow.workflows.dihedrals.plot_violins`. + :func:`~mdpow.workflows.dihedrals.plot_dihedral_violins`. Creates two new :class:`pandas.DataFrame` based on the `padding` value specified, pads the angle values, concatenates all three :class:`pandas.DataFrame`, maintaining original data and From 60636b41bc99a6c2b49b0eeee0e79774ed84282f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 15 Apr 2023 00:09:56 -0700 Subject: [PATCH 41/81] temporary fix for figdir issue which should currently be a positional argument, but would require redundant rewrite of workflows base module, pending issue #244 --- mdpow/tests/test_workflows_base.py | 5 ++--- mdpow/workflows/dihedrals.py | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index dcf82993..35b24659 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -67,15 +67,14 @@ def test_project_paths_csv_input(self, csv_input_data): pd.testing.assert_frame_equal(project_paths, csv_df) - def test_automated_project_analysis(self, SM_tmp_dir, project_paths_data, caplog): + def test_automated_project_analysis(self, project_paths_data, caplog): project_paths = project_paths_data # change resname to match topology (every SAMPL7 resname is 'UNK') # only necessary for this dataset, not necessary for normal use project_paths['resname'] = 'UNK' base.automated_project_analysis(project_paths, solvents=('water',), - ensemble_analysis='DihedralAnalysis', - figdir=SM_tmp_dir) + ensemble_analysis='DihedralAnalysis') assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' 'did not iteratively run to completion for the provided project') diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index bff213ee..3f6e9615 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -629,7 +629,7 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, return fig -def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir, molname=None, +def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and saving figures for all dihedral atom groups. @@ -691,22 +691,23 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir, molname=None, newdir = os.path.join(figdir, subdir) os.mkdir(newdir) - section = df.groupby(by="selection") + section = df.groupby(by="selection") - plot_pdf_width_px = plot_pdf_width * 3.7795275591 + plot_pdf_width_px = plot_pdf_width * 3.7795275591 - for name in section: + for name in section: - fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, - solvents=solvents, width=width) + fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, + solvents=solvents, width=width) - figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" - plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), - output_width=plot_pdf_width_px) + figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" + if figdir is not None: + plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), + output_width=plot_pdf_width_px) - logger.info(f"Figure saved as {figfile}") + logger.info(f"Figure saved as {figfile}") - logger.info(f"All figures generated and saved in {figdir}") + logger.info(f"All figures generated and saved in {figdir}") return None From 473abaf081504baec9d5462c71467f359f3c9468 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 15 Apr 2023 00:45:43 -0700 Subject: [PATCH 42/81] upcoming CHANGES --- CHANGES | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES b/CHANGES index e1166f48..71aee90f 100644 --- a/CHANGES +++ b/CHANGES @@ -23,6 +23,9 @@ Changes Enhancements +* convert figure components to SVG and save as scalable PDFs (#243) +* add RDKit mol object to dihedrals plot with dihedral atom indices + labeled and dihedral atom group bonds highlighted (#243) * new workflows registry that contains each EnsembleAnalysis for which a workflows module exists, for use with workflows base module (#229) * new workflows base module that provides iterative workflow use for From 6660c98b7dc92e5334941af9feea37681cfc47db Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 9 Jun 2023 04:39:12 +0000 Subject: [PATCH 43/81] update to specify where enhancements are occurring --- CHANGES | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 71aee90f..daea607e 100644 --- a/CHANGES +++ b/CHANGES @@ -23,9 +23,11 @@ Changes Enhancements -* convert figure components to SVG and save as scalable PDFs (#243) -* add RDKit mol object to dihedrals plot with dihedral atom indices - labeled and dihedral atom group bonds highlighted (#243) +* convert figure components to SVG and save as scalable PDFs + for workflows dihedrals module (#243) +* add RDKit mol object to dihedrals plot with dihedral atom + indices labeled and dihedral atom group bonds highlighted + for workflows dihedrals module (#243) * new workflows registry that contains each EnsembleAnalysis for which a workflows module exists, for use with workflows base module (#229) * new workflows base module that provides iterative workflow use for From 554bdd386a74eb730c1e04ed2a3378a5fe66bcd9 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 9 Jun 2023 04:47:01 +0000 Subject: [PATCH 44/81] remove dafault scope specification for defined functions --- mdpow/tests/test_automated_dihedral_analysis.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index bcb36174..3f0f21b4 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -32,7 +32,7 @@ resname = "UNK" molname = "SM25" -@pytest.fixture(scope="function") +@pytest.fixture def molname_workflows_directory(tmp_path, molname='SM25'): m = pybol.Manifest(str(MANIFEST)) m.assemble('workflows', tmp_path) @@ -40,18 +40,18 @@ def molname_workflows_directory(tmp_path, molname='SM25'): class TestAutomatedDihedralAnalysis(object): - @pytest.fixture(scope="function") + @pytest.fixture def SM25_tmp_dir(self, molname_workflows_directory): dirname = molname_workflows_directory return dirname - @pytest.fixture(scope="function") + @pytest.fixture def mol_sol_data(self, SM25_tmp_dir): u = dihedrals.build_universe(dirname=SM25_tmp_dir) mol, solute = dihedrals.rdkit_conversion(u=u, resname=resname) return mol, solute - @pytest.fixture(scope="function") + @pytest.fixture def atom_indices(self, mol_sol_data): mol, _ = mol_sol_data atom_group_indices = dihedrals.get_atom_indices(mol=mol) @@ -64,21 +64,21 @@ def atom_indices(self, mol_sol_data): # atom_indices[0]=atom_group_indices # atom_indices[1]=atom_group_indices_alt - @pytest.fixture(scope="function") + @pytest.fixture def bond_indices(self, mol_sol_data, atom_indices): mol, _ = mol_sol_data aix, _ = atom_indices bond_indices = dihedrals.get_bond_indices(mol=mol, atom_indices=aix) return bond_indices - @pytest.fixture(scope="function") + @pytest.fixture def dihedral_groups(self, mol_sol_data, atom_indices): _, solute = mol_sol_data aix, _ = atom_indices dihedral_groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=aix) return dihedral_groups - @pytest.fixture(scope="function") + @pytest.fixture def dihedral_data(self, SM25_tmp_dir, atom_indices): atom_group_indices, _ = atom_indices df = dihedrals.dihedral_groups_ensemble(atom_indices=atom_group_indices, From 9de6a4d12a340c9760b25082efe869abe85c280b Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 9 Jun 2023 05:13:07 +0000 Subject: [PATCH 45/81] reimplement try/except method for rdkit conversion topology element guessing --- mdpow/workflows/dihedrals.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 3f6e9615..4808ec24 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -180,13 +180,16 @@ def rdkit_conversion(u, resname): """ - elements = [guess_atom_element(name) for name in u.atoms.names] - u.add_TopologyAttr("elements", elements) + try: + solute = u.select_atoms(f'resname {resname}') + mol = solute.convert_to('RDKIT') + except AttributeError: + elements = [guess_atom_element(name) for name in u.atoms.names] + u.add_TopologyAttr("elements", elements) + solute = u.select_atoms(f'resname {resname}') + mol = solute.convert_to('RDKIT') - solute = u.select_atoms(f'resname {resname}') solute.unwrap(compound='residues', reference='com') - - mol = solute.convert_to('RDKIT') rdCoordGen.AddCoords(mol) for atom in mol.GetAtoms(): From d62061975713bbad920ea7fc02772b1730469d6b Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 07:32:58 +0000 Subject: [PATCH 46/81] generate combined plots pdf for automated dihedral analysis --- mdpow/workflows/dihedrals.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 4808ec24..d890da7a 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -46,6 +46,7 @@ import numpy as np import pandas as pd +import pypdf import cairosvg import svgutils import svgutils.compose @@ -698,6 +699,9 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, plot_pdf_width_px = plot_pdf_width * 3.7795275591 + # create PDF file list + pdf_list = [] + for name in section: fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, @@ -707,11 +711,23 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, if figdir is not None: plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), output_width=plot_pdf_width_px) + + # add PDF for each dihedral atom group to all_PDFs list + pdf_list.append(f'{figfile}') logger.info(f"Figure saved as {figfile}") logger.info(f"All figures generated and saved in {figdir}") + # create a combined PDF for all dihedral atom groups + merger = pypdf.PdfWriter() + + for pdf in pdf_list: + merger.append(pdf) + # name chosen so that this is the first PDF file in the figure directory + merger.write(f"{figdir}/{molname}/{molname}_all_figures.pdf") + merger.close() + return None #figdir=None is a temporary way to satisfy From bd02eb74fd86f2f5169487c89047737600e9a832 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 07:46:06 +0000 Subject: [PATCH 47/81] updates for implementation of pypdf in workflows dihedrals module: CHANGES, testing environment, requirements, sphinx source configuration --- CHANGES | 3 ++- devtools/conda-envs/test_env.yaml | 1 + doc/requirements.txt | 1 + doc/sphinx/source/conf.py | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index daea607e..3c7ca4a2 100644 --- a/CHANGES +++ b/CHANGES @@ -23,7 +23,8 @@ Changes Enhancements -* convert figure components to SVG and save as scalable PDFs +* convert figure components to SVG, save as individual PDFs, + and generate PDF of all figures combined in one file, for workflows dihedrals module (#243) * add RDKit mol object to dihedrals plot with dihedral atom indices labeled and dihedral atom group bonds highlighted diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 04d0f29f..fb21fa4d 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -19,6 +19,7 @@ dependencies: - seaborn - svgutils - cairosvg +- pypdf # Testing - pytest diff --git a/doc/requirements.txt b/doc/requirements.txt index 3725e194..47b9f293 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -11,3 +11,4 @@ seaborn matplotlib svgutils cairosvg +pypdf diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index 82f97694..4da897b5 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -250,6 +250,7 @@ 'https://seaborn.pydata.org': None, 'https://cairosvg.org/documentation/': None, 'https://svgutils.readthedocs.io/en/latest/#': None, + 'https://pypdf.readthedocs.io/en/stable/': None, } From ba053112402d80a704d50aeec15d4efee33175ea Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 07:48:58 +0000 Subject: [PATCH 48/81] intersphinx mapping, docs --- doc/sphinx/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index 4da897b5..d32857d0 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -249,7 +249,7 @@ 'https://pandas.pydata.org/docs/': None, 'https://seaborn.pydata.org': None, 'https://cairosvg.org/documentation/': None, - 'https://svgutils.readthedocs.io/en/latest/#': None, + 'https://svgutils.readthedocs.io/en/latest/': None, 'https://pypdf.readthedocs.io/en/stable/': None, } From f435207039fe191925bf8eb846d3498af37649c5 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 07:55:11 +0000 Subject: [PATCH 49/81] fix solute kwarg description for workflows dihedrals module --- mdpow/workflows/dihedrals.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index d890da7a..160e160c 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -176,8 +176,7 @@ def rdkit_conversion(u, resname): :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *solute* - molecule specified by :func:`~MDAnalysis.core.groups.select_atoms` - for :class:`~MDAnalysis.core.universe.Universe` object + the :any:`MDAnalysis` `AtomGroup` for the solute """ @@ -274,8 +273,7 @@ def get_dihedral_groups(solute, atom_indices): :keywords: *solute* - molecule specified by :func:`~MDAnalysis.core.groups.select_atoms` - for :class:`~MDAnalysis.core.universe.Universe` object + the :any:`MDAnalysis` `AtomGroup` for the solute *atom_indices* tuple of tuples of indices for each dihedral atom group From 3a85dde8901cbc7114b5e9f41519ec456282c615 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:00:43 +0000 Subject: [PATCH 50/81] fix ab_pairs kwarg description for workflows dihedrals module --- mdpow/workflows/dihedrals.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 160e160c..524b37e1 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -315,7 +315,8 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): :returns: *ab_pairs* - dictionary with key-value pair: '*#-*#-*#-*#': (atom_indices[i], bond_indices[i]) + dictionary with key-value pair + example: 'C1-N2-O3-S4': (atom_indices[i], bond_indices[i]) ''' @@ -583,7 +584,8 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, decision between the two) *ab_pairs* - dictionary with key-value pair: '*#-*#-*#-*#': (atom_indices[i], bond_indices[i]) + dictionary with key-value pair + example: 'C1-N2-O3-S4': (atom_indices[i], bond_indices[i]) .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` @@ -659,7 +661,8 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *ab_pairs* - dictionary with key-value pair: '*#-*#-*#-*#': (atom_indices[i], bond_indices[i]) + dictionary with key-value pair + example: 'C1-N2-O3-S4': (atom_indices[i], bond_indices[i]) .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` From 7f32b93bc8f8bebc6356570957f8231717447554 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:06:24 +0000 Subject: [PATCH 51/81] documentation for dihedral_violins function in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 524b37e1..e984a8f2 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -515,7 +515,7 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): :returns: - *g (violin plot)* + *g* returns a :class:`seaborn.FacetGrid` object containing a violin plot of the kernel density estimates (KDE) of the dihedral angle frequencies for each dihedral atom group identified by :data:`SMARTS_DEFAULT` From 87f393055f84379c96a75a083ce275c7870eaebb Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:10:28 +0000 Subject: [PATCH 52/81] documentation for get_paired_indices function in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index e984a8f2..49601389 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -292,8 +292,7 @@ def get_dihedral_groups(solute, atom_indices): def get_paired_indices(atom_indices, bond_indices, dihedral_groups): '''Combines `atom_indices` and `bond_indices` in tuples - to be paired with their respective `dihedral_groups` in - :func:`~MDAnalysis.core.groups.select_atoms` selection string format. + to be paired with their respective dihedral atom groups. A dictionary is created with key-value pairs as follows: `atom_indices` and `bond_indices` are joined in a tuple From 34c63ec48a1984400ed29d052501be9177659335 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:21:43 +0000 Subject: [PATCH 53/81] documentation and kwarg definition for get_paired_indices function and ab_pairs dictionary object in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 49601389..c08aef08 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -299,6 +299,8 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): as the value, with the key being the respective member of `dihedral_groups` to facilitate highlighting the relevant dihedral atom group when generating violin plots. + As an example, `'C1-N2-O3-S4': ((0, 1, 2, 3), (0, 1, 2))`, + would be one key-value pair in the dictionary. :keywords: @@ -314,8 +316,8 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): :returns: *ab_pairs* - dictionary with key-value pair - example: 'C1-N2-O3-S4': (atom_indices[i], bond_indices[i]) + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices ''' @@ -583,8 +585,8 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, decision between the two) *ab_pairs* - dictionary with key-value pair - example: 'C1-N2-O3-S4': (atom_indices[i], bond_indices[i]) + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` @@ -660,8 +662,8 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` *ab_pairs* - dictionary with key-value pair - example: 'C1-N2-O3-S4': (atom_indices[i], bond_indices[i]) + dictionary with key-value pair for dihedral atom group, + atom indices, and bond indices .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` From bf98c2354045225ad3a5542067062ec094057fa1 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:30:50 +0000 Subject: [PATCH 54/81] kwarg definition for plot_title for dihedral_violins function in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index c08aef08..ded443e9 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -508,11 +508,9 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): The default solvents are documented under :data:`SOLVENTS_DEFAULT`. *plot_title* - format: f'{molname}, {name[0]} {a} | ''{col_name}' - determined by, - - .. seealso:: :func:`~mdpow.workflows.dihedrals.build_svg` - + generated by :func:`~mdpow.workflows.dihedrals.build_svg` using + `molname`, `dihedral_groups`, `atom_indices`, and `interactions` + in this order and format: f'{molname}, {name[0]} {a} | ''{col_name}' :returns: From a3bcddccf88d825917299572da89f91235e0d7c0 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:36:08 +0000 Subject: [PATCH 55/81] move in-line comments explaining figdir kward for workflows dihedrals module --- mdpow/workflows/dihedrals.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index ded443e9..4ec4b1f5 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -730,15 +730,13 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, return None - #figdir=None is a temporary way to satisfy - #workflows base tests until issue #244 is resolved - #because it currently uses a **kwargs convention and the - #positional argument figdir will not carry over nicely, - #or requires changes that will result in wasted time - - #for this current version, figdir is required and will - #cause issues if not specified -def automated_dihedral_analysis(dirname, resname, figdir=None, +def automated_dihedral_analysis(dirname, resname, + figdir=None, + # figdir is required and will cause issues if not specified + # figdir=None is a temporary way to satisfy + # workflows base tests until issue #244 is resolved + # because it currently uses a **kwargs convention and the + # positional argument figdir will not carry over nicely df_save_dir=None, molname=None, SMARTS=SMARTS_DEFAULT, plot_pdf_width=PLOT_WIDTH_DEFAULT, dataframe=None, padding=45, width=0.9, From c0f9550807a24bb30732c151d7bca30c108d9461 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 08:51:27 +0000 Subject: [PATCH 56/81] reorganize kwargs for plot_dihedral_violins in top-level automated_dihedral_analysis function call in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 4ec4b1f5..dc5ca6b1 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -856,9 +856,8 @@ def automated_dihedral_analysis(dirname, resname, df_aug = periodic_angle_padding(df, padding=padding) - #kwargs in weird order - plot_dihedral_violins(df_aug, resname=resname, molname=molname, mol=mol, plot_pdf_width=plot_pdf_width, - ab_pairs=ab_pairs, figdir=figdir, width=width, solvents=solvents) + plot_dihedral_violins(df=df_aug, resname=resname, mol=mol, ab_pairs=ab_pairs, figdir=figdir, molname=molname, + width=width, plot_pdf_width=plot_pdf_width, solvents=solvents) logger.info(f"DihedralAnalysis completed for all projects in {dirname}") From c47e0550190bc39326eb0545640db87666f8c1c7 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 11 Jun 2023 09:22:47 +0000 Subject: [PATCH 57/81] add assert method to make figdir kwarg required in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 39 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index dc5ca6b1..24be2d6c 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -687,46 +687,45 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, ''' + assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" + if molname is None: molname = resname - if figdir is not None: - subdir = molname - newdir = os.path.join(figdir, subdir) - os.mkdir(newdir) + subdir = molname + newdir = os.path.join(figdir, subdir) + os.mkdir(newdir) - section = df.groupby(by="selection") + section = df.groupby(by="selection") - plot_pdf_width_px = plot_pdf_width * 3.7795275591 + plot_pdf_width_px = plot_pdf_width * 3.7795275591 - # create PDF file list - pdf_list = [] + pdf_list = [] - for name in section: + for name in section: - fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, - solvents=solvents, width=width) + fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, + solvents=solvents, width=width) - figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" - if figdir is not None: - plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), - output_width=plot_pdf_width_px) + figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" + if figdir is not None: + plot_pdf = cairosvg.svg2pdf(bytestring=fig.tostr(), write_to=str(figfile), + output_width=plot_pdf_width_px) # add PDF for each dihedral atom group to all_PDFs list - pdf_list.append(f'{figfile}') + pdf_list.append(f'{figfile}') - logger.info(f"Figure saved as {figfile}") + logger.info(f"Figure saved as {figfile}") - logger.info(f"All figures generated and saved in {figdir}") + logger.info(f"All figures generated and saved in {figdir}") - # create a combined PDF for all dihedral atom groups merger = pypdf.PdfWriter() for pdf in pdf_list: merger.append(pdf) - # name chosen so that this is the first PDF file in the figure directory merger.write(f"{figdir}/{molname}/{molname}_all_figures.pdf") merger.close() + logger.info(f"PDF of combined figures generated and saved as {figdir}/{molname}/{molname}_all_figures.pdf") return None From 5376eead25341a75d1260b69dc187903ff09c7e2 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 20:20:25 -0700 Subject: [PATCH 58/81] temporarily remove figdir required assertion --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 24be2d6c..60489644 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -687,7 +687,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, ''' - assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" + #assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" if molname is None: molname = resname From 110b3f90910c562276c2251461f0b978c0b74524 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 20:40:27 -0700 Subject: [PATCH 59/81] change MDA guess_atom_element to MDA guess_types for RDKit conversion in workflows dihedrals module --- mdpow/workflows/dihedrals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 60489644..f7dfd7fc 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -64,6 +64,7 @@ import MDAnalysis as mda from MDAnalysis.topology.guessers import guess_atom_element +from MDAnalysis.topology.guessers import guess_types import logging @@ -184,8 +185,7 @@ def rdkit_conversion(u, resname): solute = u.select_atoms(f'resname {resname}') mol = solute.convert_to('RDKIT') except AttributeError: - elements = [guess_atom_element(name) for name in u.atoms.names] - u.add_TopologyAttr("elements", elements) + u.add_TopologyAttr("elements", guess_types(u.atoms.names)) solute = u.select_atoms(f'resname {resname}') mol = solute.convert_to('RDKIT') From f5ead512e2b44dc5f16ec6e791621bd10af1343c Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 21:06:04 -0700 Subject: [PATCH 60/81] fix registry import error for workflows base, close #245 --- mdpow/workflows/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/base.py b/mdpow/workflows/base.py index 6e7018b7..17c8d442 100644 --- a/mdpow/workflows/base.py +++ b/mdpow/workflows/base.py @@ -24,7 +24,7 @@ import re import pandas as pd -from mdpow.workflows import registry +import registry import logging From a74403306fb9213dc43a337a042a84028f6319b0 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 21:06:30 -0700 Subject: [PATCH 61/81] remove guess_atom_element import --- mdpow/workflows/dihedrals.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index f7dfd7fc..0fcd3e0b 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -63,7 +63,6 @@ from mdpow.analysis.dihedral import DihedralAnalysis import MDAnalysis as mda -from MDAnalysis.topology.guessers import guess_atom_element from MDAnalysis.topology.guessers import guess_types import logging From 228b7c0c30668630d5cac0c913ff1a8458c6872e Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 21:10:02 -0700 Subject: [PATCH 62/81] fix registry import for workflows base module --- mdpow/workflows/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/base.py b/mdpow/workflows/base.py index 17c8d442..a44fb545 100644 --- a/mdpow/workflows/base.py +++ b/mdpow/workflows/base.py @@ -24,7 +24,7 @@ import re import pandas as pd -import registry +from ..workflows import registry import logging From 7c9f669e6a4e2c7fe01d79c98e1516f485812226 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 21:19:46 -0700 Subject: [PATCH 63/81] reimplement assert figdir reuired for workflows dihedrals module --- mdpow/workflows/dihedrals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 0fcd3e0b..8577ed84 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -686,7 +686,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, ''' - #assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" + assert figdir is not None, "figdir MUST be set, even though it is a kwarg. Will be changed with #244" if molname is None: molname = resname From b8236a60b824911b68c691e9ab02e9351ef37da5 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 22:21:41 -0700 Subject: [PATCH 64/81] require RDKit>=2023 for testing environment to investigate failed tests for workflows dihedrals module --- devtools/conda-envs/test_env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index fb21fa4d..6d34f715 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -15,7 +15,7 @@ dependencies: - numkit - gromacswrapper - alchemlyb <2 -- rdkit +- rdkit >=2023 - seaborn - svgutils - cairosvg From f45f073483b156eb8e6cb7a99985fcac08d70fb0 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 22:30:55 -0700 Subject: [PATCH 65/81] require RDKit<2023 for testing environment to investigate failed tests for workflows dihedrals module --- devtools/conda-envs/test_env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 6d34f715..6743c6f3 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -15,7 +15,7 @@ dependencies: - numkit - gromacswrapper - alchemlyb <2 -- rdkit >=2023 +- rdkit <2023 - seaborn - svgutils - cairosvg From adcecc628f0f5f8d44d929f6cc26c22471eebd2d Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Wed, 21 Jun 2023 22:47:23 -0700 Subject: [PATCH 66/81] require RDKit<=2022.09.1 for testing environment to investigate failed tests for workflows dihedrals module --- devtools/conda-envs/test_env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 6743c6f3..a3e0e68e 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -15,7 +15,7 @@ dependencies: - numkit - gromacswrapper - alchemlyb <2 -- rdkit <2023 +- rdkit <=2022.09.1 - seaborn - svgutils - cairosvg From 9f751a8f6efb6c3b99521b5684943ba6d5a355a6 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Thu, 29 Jun 2023 20:56:10 -0700 Subject: [PATCH 67/81] fix function name in dihedral workflows tests --- mdpow/tests/test_automated_dihedral_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 8deaba21..7bfbd416 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -165,7 +165,7 @@ def test_SMARTS(self, atom_indices): # between RDKIT versions; issue raised (#239) to identify and # resolve exact package/version responsible def test_dihedral_groups(self, SM25_tmp_dir): - groups = dihedrals.dihedral_groups(dirname=SM25_tmp_dir, resname=self.resname) + groups = dihedrals.get_dihedral_groups(dirname=SM25_tmp_dir, resname=self.resname) values = [g.all() for g in groups] reference = [g.all() for g in self.check_groups] From 13231a25126d9ef8cf10d0ca19f4735906c8a9f3 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Thu, 29 Jun 2023 21:05:03 -0700 Subject: [PATCH 68/81] fix variable reference in dihedral workflows test --- mdpow/tests/test_automated_dihedral_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 7bfbd416..17858bfe 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -165,7 +165,7 @@ def test_SMARTS(self, atom_indices): # between RDKIT versions; issue raised (#239) to identify and # resolve exact package/version responsible def test_dihedral_groups(self, SM25_tmp_dir): - groups = dihedrals.get_dihedral_groups(dirname=SM25_tmp_dir, resname=self.resname) + groups = dihedrals.get_dihedral_groups(dirname=SM25_tmp_dir, resname=resname) values = [g.all() for g in groups] reference = [g.all() for g in self.check_groups] From 0db3c5a521a637b5e6fc89825f9152e7875b92b2 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Thu, 29 Jun 2023 21:18:00 -0700 Subject: [PATCH 69/81] fix fixture call and variable assignment in dihedral groups test for workflows dihedrals --- mdpow/tests/test_automated_dihedral_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 17858bfe..b99eb230 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -164,8 +164,8 @@ def test_SMARTS(self, atom_indices): # Use set comparison because ordering of indices appears to change # between RDKIT versions; issue raised (#239) to identify and # resolve exact package/version responsible - def test_dihedral_groups(self, SM25_tmp_dir): - groups = dihedrals.get_dihedral_groups(dirname=SM25_tmp_dir, resname=resname) + def test_dihedral_groups(self, dihedral_groups): + groups = dihedral_groups values = [g.all() for g in groups] reference = [g.all() for g in self.check_groups] From c540c8b92e7132834e16f80a64a8761fbce597f3 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Thu, 29 Jun 2023 21:26:20 -0700 Subject: [PATCH 70/81] modify workflows base test for assertion of figure directory requirement in dihedrals workflow --- mdpow/tests/test_workflows_base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index 14485b76..6f2078f2 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -63,13 +63,19 @@ def test_project_paths_csv_input(self, csv_input_data): pd.testing.assert_frame_equal(project_paths, csv_df) def test_automated_project_analysis(self, project_paths_data, caplog): + caplog.clear() + caplog.set_level(logging.ERROR, logger='mdpow.workflows.base') + project_paths = project_paths_data # change resname to match topology (every SAMPL7 resname is 'UNK') # only necessary for this dataset, not necessary for normal use project_paths['resname'] = 'UNK' - base.automated_project_analysis(project_paths, solvents=('water',), - ensemble_analysis='DihedralAnalysis') + with pytest.raises(AssertionError, + match="figdir MUST be set, even though it is a kwarg. Will be changed with #244"): + + base.automated_project_analysis(project_paths, solvents=('water',), + ensemble_analysis='DihedralAnalysis') assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' 'did not iteratively run to completion for the provided project') From eca078a74212d47c95cbc33d4f19eb944da638f2 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Thu, 29 Jun 2023 22:24:54 -0700 Subject: [PATCH 71/81] add name_index pairing tests for dihedral workflows atom indices consistency check --- .../tests/test_automated_dihedral_analysis.py | 44 +++++++++++++++-- mdpow/workflows/dihedrals.py | 47 ++++++++++--------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index b99eb230..8bc27acd 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -64,15 +64,15 @@ def atom_indices(self, mol_sol_data): @pytest.fixture def bond_indices(self, mol_sol_data, atom_indices): mol, _ = mol_sol_data - aix, _ = atom_indices - bond_indices = dihedrals.get_bond_indices(mol=mol, atom_indices=aix) + atom_index, _ = atom_indices + bond_indices = dihedrals.get_bond_indices(mol=mol, atom_indices=atom_index) return bond_indices @pytest.fixture def dihedral_groups(self, mol_sol_data, atom_indices): _, solute = mol_sol_data - aix, _ = atom_indices - dihedral_groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=aix) + atom_index, _ = atom_indices + dihedral_groups = dihedrals.get_dihedral_groups(solute=solute, atom_indices=atom_index) return dihedral_groups @pytest.fixture @@ -99,6 +99,23 @@ def dihedral_data(self, SM25_tmp_dir, atom_indices): # see: fixture - atom_indices().atom_group_indices_alt check_atom_group_indices_alt = ((1, 2), (1, 12), (2, 3), (3, 4), (12, 13), (13, 14)) + check_atom_name_index_pairs = {'O1-C2-N3-S4': (0, 1, 2, 3), + 'O1-C2-C13-C14': (0, 1, 12, 13), + 'C2-N3-S4-O12': (1, 2, 3, 11), + 'C2-N3-S4-O11': (1, 2, 3, 10), + 'C2-N3-S4-C5': (1, 2, 3, 4), + 'C2-C13-C14-C15': (1, 12, 13, 14), + 'N3-S4-C5-C6': (2, 3, 4, 5), + 'N3-S4-C5-C10': (2, 3, 4, 9), + 'N3-C2-C13-C14': (2, 1, 12, 13), + 'S4-N3-C2-C13': (3, 2, 1, 12), + 'C6-C5-S4-O12': (5, 4, 3, 11), + 'C6-C5-S4-O11': (5, 4, 3, 10), + 'C10-C5-S4-O12': (9, 4, 3, 11), + 'C10-C5-S4-O11': (9, 4, 3, 10), + 'C13-C14-C15-C16': (12, 13, 14, 15), + 'C13-C14-C15-C20': (12, 13, 14, 19)} + check_groups = [np.array(['O1', 'C2', 'N3', 'S4'], dtype=object), np.array(['O1', 'C2', 'C13', 'C14'], dtype=object), np.array(['C2', 'N3', 'S4', 'O12'], dtype=object), @@ -172,6 +189,25 @@ def test_dihedral_groups(self, dihedral_groups): assert set(values) == set(reference) + # atom indices are determined by RDKit Mol object + # bond indices are determined by atom indices and are subsequently self-consistent + # dihedral group names are determined by the MDAnalysis solute object from RDKit-derived atom indices + # this test checks if indexing schemes for RDKit and MDAnalysis are consistent + def test_RDKit_MDAnalysis_atom_index_consistency(self, atom_indices, bond_indices, dihedral_groups): + atom_index, _ = atom_indices + bond_index = bond_indices + groups = dihedral_groups + + name_index_pairs = dihedrals.get_paired_indices(atom_indices=atom_index, bond_indices=bond_index, + dihedral_groups=groups) + + atom_name_index_pairs = {} + + for key in name_index_pairs.keys(): + atom_name_index_pairs[key] = name_index_pairs[key][0] + + assert atom_name_index_pairs == self.check_atom_name_index_pairs + # Possible ordering issue (#239) def test_dihedral_groups_ensemble(self, dihedral_data): diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 8577ed84..8ad00607 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -246,11 +246,11 @@ def get_bond_indices(mol, atom_indices): bonds = [] - for aix in atom_indices: + for atom_index in atom_indices: - x = mol.GetBondBetweenAtoms(aix[0], aix[1]).GetIdx() - y = mol.GetBondBetweenAtoms(aix[1], aix[2]).GetIdx() - z = mol.GetBondBetweenAtoms(aix[2], aix[3]).GetIdx() + x = mol.GetBondBetweenAtoms(atom_index[0], atom_index[1]).GetIdx() + y = mol.GetBondBetweenAtoms(atom_index[1], atom_index[2]).GetIdx() + z = mol.GetBondBetweenAtoms(atom_index[2], atom_index[3]).GetIdx() bix = (x, y, z) bonds.append(bix) @@ -283,8 +283,10 @@ def get_dihedral_groups(solute, atom_indices): list of :func:`numpy.array` for atom names in each dihedral atom group ''' - - dihedral_groups = [solute.atoms[list(a_ind)].names for a_ind + # currently uses RDKit Mol object atom indices to retrieve + # atom names from the MDAnalysis solute object + # RDKit-MDAnalysis index consistency is currently tested + dihedral_groups = [solute.atoms[list(atom_index)].names for atom_index in atom_indices] return dihedral_groups @@ -314,7 +316,7 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): :returns: - *ab_pairs* + *name_index_pairs* dictionary with key-value pair for dihedral atom group, atom indices, and bond indices @@ -322,10 +324,10 @@ def get_paired_indices(atom_indices, bond_indices, dihedral_groups): all_dgs = [f'{dg[0]}-{dg[1]}-{dg[2]}-{dg[3]}' for dg in dihedral_groups] - ab_pairs = {} - ab_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} + name_index_pairs = {} + name_index_pairs = {all_dgs[i]: (atom_indices[i], bond_indices[i]) for i in range(len(all_dgs))} - return ab_pairs + return name_index_pairs def dihedral_groups_ensemble(dirname, atom_indices, solvents=SOLVENTS_DEFAULT, @@ -564,7 +566,7 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): return g -def build_svg(mol, molname, ab_pairs, atom_group_selection, +def build_svg(mol, molname, name_index_pairs, atom_group_selection, solvents=SOLVENTS_DEFAULT, width=0.9): '''Converts and combines figure components into an SVG object to be converted and saved as a publication @@ -581,7 +583,7 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, (in this case, carried over from an upstream decision between the two) - *ab_pairs* + *name_index_pairs* dictionary with key-value pair for dihedral atom group, atom indices, and bond indices @@ -605,13 +607,12 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, :mod:`svgutils` SVG figure object ''' - # atoms - a = ab_pairs[atom_group_selection[0]][0] - # bonds - b = ab_pairs[atom_group_selection[0]][1] + + atom_index = name_index_pairs[atom_group_selection[0]][0] + bond_index = name_index_pairs[atom_group_selection[0]][1] drawer = rdMolDraw2D.MolDraw2DSVG(250, 250) - drawer.DrawMolecule(mol=mol, highlightAtoms=a, highlightBonds=b) + drawer.DrawMolecule(mol=mol, highlightAtoms=atom_index, highlightBonds=bond_index) drawer.FinishDrawing() svg = drawer.GetDrawingText().replace('svg:','') @@ -619,7 +620,7 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, m = mol_svg.getroot() m.scale(0.0125).moveto(21.8, 0.35) - plot_title = f'{molname}, {atom_group_selection[0]} {a} | ''{col_name}' + plot_title = f'{molname}, {atom_group_selection[0]} {atom_index} | ''{col_name}' plot = dihedral_violins(atom_group_selection[1], width=width, solvents=solvents, plot_title=plot_title) plot_svg = svgutils.transform.from_mpl(plot) @@ -631,7 +632,7 @@ def build_svg(mol, molname, ab_pairs, atom_group_selection, return fig -def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, +def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molname=None, width=0.9, plot_pdf_width=PLOT_WIDTH_DEFAULT, solvents=SOLVENTS_DEFAULT): '''Coordinates plotting and saving figures for all dihedral atom groups. @@ -658,7 +659,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, *mol* :class:`rdkit.Chem.rdchem.Mol` object converted from `solute` - *ab_pairs* + *name_index_pairs* dictionary with key-value pair for dihedral atom group, atom indices, and bond indices @@ -703,7 +704,7 @@ def plot_dihedral_violins(df, resname, mol, ab_pairs, figdir=None, molname=None, for name in section: - fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, ab_pairs=ab_pairs, + fig = build_svg(mol=mol, molname=molname, atom_group_selection=name, name_index_pairs=name_index_pairs, solvents=solvents, width=width) figfile = pathlib.Path(newdir) / f"{molname}_{name[0]}_violins.pdf" @@ -835,7 +836,7 @@ def automated_dihedral_analysis(dirname, resname, atom_indices = get_atom_indices(mol=mol, SMARTS=SMARTS) bond_indices = get_bond_indices(mol=mol, atom_indices=atom_indices) dihedral_groups = get_dihedral_groups(solute=solute, atom_indices=atom_indices) - ab_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, + name_index_pairs = get_paired_indices(atom_indices=atom_indices, bond_indices=bond_indices, dihedral_groups=dihedral_groups) if dataframe is not None: @@ -854,7 +855,7 @@ def automated_dihedral_analysis(dirname, resname, df_aug = periodic_angle_padding(df, padding=padding) - plot_dihedral_violins(df=df_aug, resname=resname, mol=mol, ab_pairs=ab_pairs, figdir=figdir, molname=molname, + plot_dihedral_violins(df=df_aug, resname=resname, mol=mol, name_index_pairs=name_index_pairs, figdir=figdir, molname=molname, width=width, plot_pdf_width=plot_pdf_width, solvents=solvents) logger.info(f"DihedralAnalysis completed for all projects in {dirname}") From 34f3e18b30cf641c143ede2714f7098ae469c291 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 30 Jun 2023 21:31:02 -0700 Subject: [PATCH 72/81] modify workflows base test for dihedral workflow figure directory requirement --- mdpow/tests/test_workflows_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index 6f2078f2..917f7e45 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -62,7 +62,7 @@ def test_project_paths_csv_input(self, csv_input_data): pd.testing.assert_frame_equal(project_paths, csv_df) - def test_automated_project_analysis(self, project_paths_data, caplog): + def test_dihedral_analysis_figdir_requirement(self, project_paths_data, caplog): caplog.clear() caplog.set_level(logging.ERROR, logger='mdpow.workflows.base') @@ -77,8 +77,8 @@ def test_automated_project_analysis(self, project_paths_data, caplog): base.automated_project_analysis(project_paths, solvents=('water',), ensemble_analysis='DihedralAnalysis') - assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' - 'did not iteratively run to completion for the provided project') + assert 'all analyses completed' in caplog.text, ('automated_dihedral_analysis ' + 'did not iteratively run to completion for the provided project') def test_automated_project_analysis_KeyError(self, project_paths_data, caplog): caplog.clear() From 2b5d7afbf11dce957bfd8f3e259536a5c8ad32a8 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Fri, 30 Jun 2023 22:34:34 -0700 Subject: [PATCH 73/81] add pypdf to setup.py install_requires for dihedrals workflow --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e263e846..262f113c 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ 'rdkit', 'svgutils', 'cairosvg', + 'pypdf' ], #setup_requires=['pytest-runner',], tests_require=['pytest', 'pybol', 'py'], From 7843fce626b1dfee0e3e9cf84500a94e8d55a565 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 1 Jul 2023 20:58:12 -0700 Subject: [PATCH 74/81] change imports to follow PEP 8 --- .../tests/test_automated_dihedral_analysis.py | 19 ++++++---------- mdpow/tests/test_workflows_base.py | 11 ++++------ mdpow/workflows/base.py | 6 ++--- mdpow/workflows/dihedrals.py | 22 ++++++++----------- 4 files changed, 23 insertions(+), 35 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 8bc27acd..bde10be3 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -1,27 +1,22 @@ import os import sys import yaml -import pybol -import pytest +import py.path import pathlib import logging import scipy -import numpy as np -import pandas as pd - +from scipy.stats import circvar, circmean import seaborn - +import numpy as np from numpy.testing import assert_almost_equal -from scipy.stats import circvar, circmean +import pandas as pd +import pybol +import pytest from . import RESOURCES - -import py.path - -from mdpow.workflows import dihedrals - from pkg_resources import resource_filename +from mdpow.workflows import dihedrals RESOURCES = pathlib.PurePath(resource_filename(__name__, 'testing_resources')) MANIFEST = RESOURCES / "manifest.yml" diff --git a/mdpow/tests/test_workflows_base.py b/mdpow/tests/test_workflows_base.py index 917f7e45..3d33875b 100644 --- a/mdpow/tests/test_workflows_base.py +++ b/mdpow/tests/test_workflows_base.py @@ -2,19 +2,16 @@ import os import sys import yaml -import pybol -import pytest import pathlib import logging +import pybol +import pytest import pandas as pd -from mdpow.workflows import base - -from pkg_resources import resource_filename - from . import RESOURCES, MANIFEST, STATES - +from pkg_resources import resource_filename +from mdpow.workflows import base @pytest.fixture(scope='function') def molname_workflows_directory(tmp_path): diff --git a/mdpow/workflows/base.py b/mdpow/workflows/base.py index e4c0a790..d9bc7f32 100644 --- a/mdpow/workflows/base.py +++ b/mdpow/workflows/base.py @@ -22,11 +22,11 @@ import os import re -import pandas as pd +import logging -from ..workflows import registry +import pandas as pd -import logging +from . import registry logger = logging.getLogger('mdpow.workflows.base') diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 8ad00607..d8023655 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -42,31 +42,27 @@ """ import os +import logging import pathlib + import numpy as np import pandas as pd - +import MDAnalysis as mda +from MDAnalysis.topology.guessers import guess_types +from rdkit import Chem +from rdkit.Chem import rdCoordGen +from rdkit.Chem.Draw import rdMolDraw2D +import matplotlib.pyplot as plt +import seaborn as sns import pypdf import cairosvg import svgutils import svgutils.compose import svgutils.transform -from rdkit import Chem -from rdkit.Chem import rdCoordGen -from rdkit.Chem.Draw import rdMolDraw2D - -import seaborn as sns -import matplotlib.pyplot as plt - import mdpow from mdpow.analysis.dihedral import DihedralAnalysis -import MDAnalysis as mda -from MDAnalysis.topology.guessers import guess_types - -import logging - logger = logging.getLogger('mdpow.workflows.dihedrals') SOLVENTS_DEFAULT = ('water', 'octanol') From 0cb2ddc80eafc516145937782f57a20f5d5ac95f Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 1 Jul 2023 21:25:05 -0700 Subject: [PATCH 75/81] modify dihedrals workflow docs to explain figdir kwarg requirement --- mdpow/workflows/dihedrals.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index d8023655..d593abeb 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -662,7 +662,8 @@ def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molna .. seealso:: :func:`~mdpow.workflows.dihedrals.get_paired_indices` *figdir* - path to the location to save figures + path to the location to save figures (REQUIRED but marked + as a kwarg for technical reasons; will be changed in #244) *molname* molecule name to be used for labelling @@ -763,7 +764,8 @@ def automated_dihedral_analysis(dirname, resname, available. *figdir* - path to the location to save figures + path to the location to save figures (REQUIRED but marked + as a kwarg for technical reasons; will be changed in #244) *resname* `resname` for the molecule as defined in From 84aed57ff65336f7d2a5b8464b29d55e522a3122 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sat, 1 Jul 2023 21:49:32 -0700 Subject: [PATCH 76/81] use first solvent specified to build MDAnalysis Universe --- mdpow/workflows/dihedrals.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index d593abeb..42b22e5b 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -108,9 +108,10 @@ default value: 190 mm = 718.110236229 pixels """ -def build_universe(dirname): - """Builds :class:`~MDAnalysis.core.universe.Universe` from - ``water/Coulomb/0000`` topology and trajectory for the specified project. +def build_universe(dirname, solvents=SOLVENTS_DEFAULT): + """Builds :class:`~MDAnalysis.core.universe.Universe` from the + ``./Coulomb/0000`` topology and trajectory of the project for + the first solvent specified. Output used by :func:`~mdpow.workflows.dihedrals.rdkit_conversion` and :func:`~mdpow.workflows.dihedrals.get_atom_indices` to obtain atom indices @@ -127,6 +128,9 @@ def build_universe(dirname): searches for .gro, .gro.bz2, .gro.gz, and .tpr files for topology, and .xtc files for trajectory. It will default to using the tpr file available. + + *solvents* + The default solvents are documented under :data:`SOLVENTS_DEFAULT`. :returns: @@ -136,8 +140,8 @@ def build_universe(dirname): """ path = pathlib.Path(dirname) - topology = path / 'FEP/water/Coulomb/0000' / 'md.tpr' - trajectory = path / 'FEP/water/Coulomb/0000' / 'md.xtc' + topology = path / f'FEP/{solvents[0]}/Coulomb/0000' / 'md.tpr' + trajectory = path / f'FEP/{solvents[0]}/Coulomb/0000' / 'md.xtc' u = mda.Universe(str(topology), str(trajectory)) return u From 2970b24ba708a02ca8635b45c25af7404128bae6 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 2 Jul 2023 21:06:46 -0700 Subject: [PATCH 77/81] modify single solvent plotting method, add solvent count assertion --- mdpow/workflows/dihedrals.py | 46 ++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 636dcfc7..31216e54 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -130,6 +130,8 @@ def build_universe(dirname, solvents=SOLVENTS_DEFAULT): *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. :returns: @@ -361,6 +363,8 @@ def dihedral_groups_ensemble(dirname, atom_indices, *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. @@ -506,6 +510,8 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. *plot_title* generated by :func:`~mdpow.workflows.dihedrals.build_svg` using @@ -531,19 +537,27 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): len(df[df['interaction'] == "VDW"]["lambda"].unique()), len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] - solvs = list(solvents) - if len(solvs) < 2: - solvs.append('N/A') - - g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", - kind="violin", split=True, width=width, inner=None, cut=0, - linewidth=0.5, - hue_order=[solvs[0], solvs[1]], col_order=["Coulomb", "VDW", "Structure"], - sharex=False, sharey=True, - height=3.0, aspect=2.0, - facet_kws={'ylim': (-180, 180), - 'gridspec_kws': {'width_ratios': width_ratios, - }}) + assert 0 < len(solvents) < 3, "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" + if len(list(solvents)) < 2: + g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", + kind="violin", split=False, width=width, inner=None, cut=0, + linewidth=0.5, + hue_order=list(solvents[0]), col_order=["Coulomb", "VDW", "Structure"], + sharex=False, sharey=True, + height=3.0, aspect=2.0, + facet_kws={'ylim': (-180, 180), + 'gridspec_kws': {'width_ratios': width_ratios, + }}) + else: + g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", + kind="violin", split=True, width=width, inner=None, cut=0, + linewidth=0.5, + hue_order=list(solvents[:2]), col_order=["Coulomb", "VDW", "Structure"], + sharex=False, sharey=True, + height=3.0, aspect=2.0, + facet_kws={'ylim': (-180, 180), + 'gridspec_kws': {'width_ratios': width_ratios, + }}) g.set_xlabels(r"$\lambda$") g.set_ylabels(r"dihedral angle $\phi$") @@ -595,6 +609,8 @@ def build_svg(mol, molname, name_index_pairs, atom_group_selection, *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. *width* width of the violin element (>1 overlaps) @@ -684,6 +700,8 @@ def plot_dihedral_violins(df, resname, mol, name_index_pairs, figdir=None, molna *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. ''' @@ -807,6 +825,8 @@ def automated_dihedral_analysis(dirname, resname, *solvents* The default solvents are documented under :data:`SOLVENTS_DEFAULT`. + Normally takes a two-tuple, but analysis is compatible with single solvent selections. + Single solvent analyses will result in a figure with fully filled violins for the single solvent. *interactions* The default interactions are documented under :data:`INTERACTIONS_DEFAULT`. From 9801848493e1c6361af6f1727c65aa7037d00890 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 2 Jul 2023 21:13:23 -0700 Subject: [PATCH 78/81] comment expected fixture scope changes, reference issue #235 --- mdpow/tests/test_automated_dihedral_analysis.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index bde10be3..00ed56a8 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -249,6 +249,8 @@ def test_periodic_angle(self, dihedral_data): aug_dh2_var == pytest.approx(self.ADG_C13141520_var) # Possible ordering issue (#239) + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 def test_save_fig(self, SM25_tmp_dir): dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, resname=resname, molname='SM25', @@ -256,6 +258,8 @@ def test_save_fig(self, SM25_tmp_dir): assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' # Possible ordering issue (#239) + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 def test_save_fig_info(self, SM25_tmp_dir, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') @@ -264,6 +268,8 @@ def test_save_fig_info(self, SM25_tmp_dir, caplog): solvents=('water',)) assert f'Figure saved as {SM25_tmp_dir}/SM25/SM25_C10-C5-S4-O11_violins.pdf' in caplog.text, 'PDF file not saved' + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): df, _ = dihedral_data dihedrals.automated_dihedral_analysis(dirname=SM25_tmp_dir, figdir=SM25_tmp_dir, @@ -271,6 +277,8 @@ def test_DataFrame_input(self, SM25_tmp_dir, dihedral_data): solvents=('water',), dataframe=df) assert (SM25_tmp_dir / 'SM25' / 'SM25_C10-C5-S4-O11_violins.pdf').exists(), 'PDF file not generated' + # Tests using similar instances of the automated analyses + # will use module or class-scoped fixtures, pending #235 def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): caplog.clear() caplog.set_level(logging.INFO, logger='mdpow.workflows.dihedrals') From 4ebf3c0dff79eec03345b707a3cbd450fcbed92c Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 2 Jul 2023 21:53:17 -0700 Subject: [PATCH 79/81] remove solute.unwrap, not needed --- mdpow/workflows/dihedrals.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 31216e54..239de14f 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -189,7 +189,6 @@ def rdkit_conversion(u, resname): solute = u.select_atoms(f'resname {resname}') mol = solute.convert_to('RDKIT') - solute.unwrap(compound='residues', reference='com') rdCoordGen.AddCoords(mol) for atom in mol.GetAtoms(): @@ -542,7 +541,7 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", kind="violin", split=False, width=width, inner=None, cut=0, linewidth=0.5, - hue_order=list(solvents[0]), col_order=["Coulomb", "VDW", "Structure"], + hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, height=3.0, aspect=2.0, facet_kws={'ylim': (-180, 180), @@ -552,7 +551,7 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", kind="violin", split=True, width=width, inner=None, cut=0, linewidth=0.5, - hue_order=list(solvents[:2]), col_order=["Coulomb", "VDW", "Structure"], + hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], sharex=False, sharey=True, height=3.0, aspect=2.0, facet_kws={'ylim': (-180, 180), From 292a8bd13d100e0e4834e9e6f6a692f0133abdfe Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Sun, 2 Jul 2023 22:20:37 -0700 Subject: [PATCH 80/81] reference issue #260 to fix jupyter notebook figure output --- mdpow/workflows/dihedrals.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 239de14f..9c1ee1e1 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -536,6 +536,8 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): len(df[df['interaction'] == "VDW"]["lambda"].unique()), len(df[df['interaction'] == "Coulomb"]["lambda"].unique()) - 1] + # Usage in Jupyter causes matplotlib figure object output, not the completed figure + # Upcoming fix in issue #260 assert 0 < len(solvents) < 3, "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" if len(list(solvents)) < 2: g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", From 4af6de636cdea60a6c0080c0e2281d9db238ec60 Mon Sep 17 00:00:00 2001 From: cadeduckworth Date: Mon, 3 Jul 2023 17:31:08 -0700 Subject: [PATCH 81/81] finalize single solvent figure modifications and add test --- .../tests/test_automated_dihedral_analysis.py | 9 ++++++ mdpow/workflows/dihedrals.py | 32 +++++++------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/mdpow/tests/test_automated_dihedral_analysis.py b/mdpow/tests/test_automated_dihedral_analysis.py index 00ed56a8..b5f70139 100644 --- a/mdpow/tests/test_automated_dihedral_analysis.py +++ b/mdpow/tests/test_automated_dihedral_analysis.py @@ -287,3 +287,12 @@ def test_DataFrame_input_info(self, SM25_tmp_dir, dihedral_data, caplog): resname=resname, molname=molname, solvents=('water',), dataframe=df) assert 'Proceeding with results DataFrame provided.' in caplog.text, 'No dataframe provided or dataframe not recognized' + + # testing resources only contain analyses with single solvent input + def test_single_solvent(self, dihedral_data): + df, _ = dihedral_data + # all analysis data in one violin plot + g = dihedrals.dihedral_violins(df=df, width=0.9, solvents=('water',), plot_title='test') + # number of solvents in DataFrame used to generate plot + number_of_solvents = g.data['solvent'].nunique() + assert number_of_solvents == 1 \ No newline at end of file diff --git a/mdpow/workflows/dihedrals.py b/mdpow/workflows/dihedrals.py index 9c1ee1e1..de3178ad 100644 --- a/mdpow/workflows/dihedrals.py +++ b/mdpow/workflows/dihedrals.py @@ -539,26 +539,18 @@ def dihedral_violins(df, width=0.9, solvents=SOLVENTS_DEFAULT, plot_title=None): # Usage in Jupyter causes matplotlib figure object output, not the completed figure # Upcoming fix in issue #260 assert 0 < len(solvents) < 3, "one or two solvents must be specified, otherwise SOLVENTS_DEFAULT is used" - if len(list(solvents)) < 2: - g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", - kind="violin", split=False, width=width, inner=None, cut=0, - linewidth=0.5, - hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], - sharex=False, sharey=True, - height=3.0, aspect=2.0, - facet_kws={'ylim': (-180, 180), - 'gridspec_kws': {'width_ratios': width_ratios, - }}) - else: - g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", - kind="violin", split=True, width=width, inner=None, cut=0, - linewidth=0.5, - hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], - sharex=False, sharey=True, - height=3.0, aspect=2.0, - facet_kws={'ylim': (-180, 180), - 'gridspec_kws': {'width_ratios': width_ratios, - }}) + split = len(solvents) > 1 + g = sns.catplot(data=df, x="lambda", y="dihedral", hue="solvent", col="interaction", + kind="violin", split=split, width=width, inner=None, cut=0, + linewidth=0.5, + hue_order=list(solvents), col_order=["Coulomb", "VDW", "Structure"], + sharex=False, sharey=True, + height=3.0, aspect=2.0, + facet_kws={'ylim': (-180, 180), + 'gridspec_kws': {'width_ratios': width_ratios, + } + } + ) g.set_xlabels(r"$\lambda$") g.set_ylabels(r"dihedral angle $\phi$")