-
Notifications
You must be signed in to change notification settings - Fork 1
[DNM] Refactor rmsd.py #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4b257b2
236ff72
d274b0c
f3634dd
e92adb3
b528ca2
a477bc1
ad84082
8ba8087
ead7951
f898a35
c675a5c
88e456d
73a8e4d
43aaca2
c165525
5f17770
c28286e
197b6ba
b45390a
1d70936
20084c3
1a1c916
59c7392
5e135ab
a9a8780
92af45b
8ea3585
220d504
8c44cb2
d13495f
c34c97c
0161673
9b6ca69
1d5c849
f4e88e2
bd0c8ee
ba4c912
157c02f
98ea023
c5b2d70
54576ab
3aa52a5
ff6991a
7a30f69
ac1fe7b
67a0913
e706b11
260d72c
5e7a037
4fac0a0
c329ba9
b5cc7e0
56b0e61
6c1a6c5
f4637f4
e7d6935
deb5126
fa3227e
43eb039
73fe2ee
6793f1d
7be4c53
68a2aab
198156b
6f85466
1d19473
6cb52af
2ccc7a8
24163e5
5994774
b36a8e3
750543f
d2b2c34
592149c
bb54931
61d3e46
52792ac
250dd9f
6544450
01e3d15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,22 +15,54 @@ | |
| from .transformations import Aligner, ClosestImageShift, NoJump | ||
|
|
||
|
|
||
| def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe: | ||
| """Makes a Universe and applies some transformations | ||
| def _select_protein_and_ligands( | ||
| u: mda.Universe, | ||
| protein_selection: str, | ||
| ligand_selection: str, | ||
| ) -> tuple[mda.core.groups.AtomGroup, list[mda.core.groups.AtomGroup]]: | ||
| protein = u.select_atoms(protein_selection) | ||
| lig_atoms = u.select_atoms(ligand_selection) | ||
| # split ligands by fragment | ||
| ligands = lig_atoms.fragments | ||
| return protein, ligands | ||
|
|
||
|
|
||
| def make_Universe( | ||
| top: pathlib.Path, | ||
| trj: nc.Dataset, | ||
| state: int, | ||
| ligand_selection: str = "resname UNK", | ||
| protein_selection: str = "protein and name CA", | ||
| ) -> mda.Universe: | ||
| """ | ||
| Creates a Universe and applies transformations for protein and ligands. | ||
|
|
||
| Identifies two AtomGroups: | ||
| - protein, defined as having standard amino acid names, then filtered | ||
| down to CA | ||
| - ligand, defined as resname UNK | ||
| Parameters | ||
| ---------- | ||
| top : pathlib.Path | ||
| Path to the topology file. | ||
| trj : nc.Dataset | ||
| Trajectory dataset. | ||
| state : int | ||
| State index in the trajectory. | ||
| ligand_selection : str, default 'resname UNK' | ||
| MDAnalysis selection string for ligands. Supports multiple ligands. | ||
| protein_selection : str, default 'protein and name CA' | ||
| MDAnalysis selection string for the protein atoms to consider. | ||
|
|
||
| Then applies some transformations. | ||
| Returns | ||
| ------- | ||
| mda.Universe | ||
| Universe with transformations applied. | ||
|
|
||
| Notes | ||
| ----- | ||
| If a protein is present: | ||
| - prevents the protein from jumping between periodic images | ||
| - moves the ligand to the image closest to the protein | ||
| - aligns the entire system to minimise the protein RMSD | ||
|
|
||
| If only a ligand: | ||
| If only a ligand is present: | ||
| - prevents the ligand from jumping between periodic images | ||
| """ | ||
| u = mda.Universe( | ||
|
|
@@ -40,18 +72,21 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers | |
| index_method="state", | ||
| format=FEReader, | ||
| ) | ||
| prot = u.select_atoms("protein and name CA") | ||
| ligand = u.select_atoms("resname UNK") | ||
|
|
||
| if prot: | ||
| protein, ligands = _select_protein_and_ligands(u, protein_selection, ligand_selection) | ||
|
|
||
| if protein: | ||
| # Unwrap all atoms | ||
| unwrap_tr = unwrap(prot + ligand) | ||
| complex = protein | ||
| for ligand in ligands: | ||
| complex += ligand | ||
| unwrap_tr = unwrap(complex) | ||
|
|
||
| # Shift chains + ligand | ||
| chains = [seg.atoms for seg in prot.segments] | ||
| shift = ClosestImageShift(chains[0], [*chains[1:], ligand]) | ||
| chains = [seg.atoms for seg in protein.segments] | ||
| shift = ClosestImageShift(chains[0], [*chains[1:], *ligands]) | ||
|
|
||
| align = Aligner(prot) | ||
| align = Aligner(protein) | ||
|
|
||
| u.trajectory.add_transformations( | ||
| unwrap_tr, | ||
|
|
@@ -60,21 +95,134 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers | |
| ) | ||
| else: | ||
| # if there's no protein | ||
| # - make the ligand not jump periodic images between frames | ||
| # - align the ligand to minimise its RMSD | ||
| nope = NoJump(ligand) | ||
| align = Aligner(ligand) | ||
|
|
||
| u.trajectory.add_transformations( | ||
| nope, | ||
| align, | ||
| ) | ||
| # - make the ligands not jump periodic images between frames | ||
| # - align the ligands to minimise its RMSD | ||
| for lig in ligands: | ||
| u.trajectory.add_transformations(NoJump(lig), Aligner(lig)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would align the trajectory to minimize ligand 1 and then re-align to ligand 2. I'm not sure it's going to be done this corrrectly - would it not effectively mess up the RMSD for the first ligand? |
||
|
|
||
| return u | ||
|
|
||
|
|
||
| def twoD_RMSD(positions: np.ndarray, w: Optional[npt.NDArray]) -> list[float]: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine, but please open an issue to switch this to using something better, like this: https://userguide.mdanalysis.org/1.1.1/examples/analysis/alignment_and_rms/pairwise_rmsd.html#Pairwise-RMSD-of-a-trajectory-to-itself |
||
| """2 dimensions RMSD | ||
|
|
||
| Parameters | ||
| ---------- | ||
| positions : np.ndarray | ||
| the protein positions for the entire trajectory | ||
| w : np.ndarray, optional | ||
| weights array | ||
|
|
||
| Returns | ||
| ------- | ||
| rmsd_matrix : list | ||
| Flattened list of RMSD values between all frame pairs. | ||
| """ | ||
| nframes, _, _ = positions.shape | ||
|
|
||
| output = [] | ||
|
|
||
| for i, j in itertools.combinations(range(nframes), 2): | ||
| posi, posj = positions[i], positions[j] | ||
|
|
||
| rmsd = rms.rmsd(posi, posj, w, center=True, superposition=True) | ||
|
|
||
| output.append(rmsd) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def analyze_state( | ||
| u: mda.Universe, | ||
| prot: Optional[mda.core.groups.AtomGroup], | ||
| ligands: list[mda.core.groups.AtomGroup], | ||
| skip: int, | ||
| ) -> tuple[ | ||
| Optional[list[float]], | ||
| Optional[np.ndarray], | ||
| Optional[list[list[float]]], | ||
| Optional[list[list[float]]], | ||
| ]: | ||
| """ | ||
| Compute RMSD and COM drift for a single lambda state. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| u : mda.Universe | ||
| Universe containing the trajectory. | ||
| prot : AtomGroup or None | ||
| Protein atoms to compute RMSD for. | ||
| ligands : list of AtomGroups | ||
| Ligands to compute RMSD and COM drift for. | ||
| skip : int | ||
| Step size to skip frames (e.g., every `skip`-th frame). | ||
|
|
||
| Returns | ||
| ------- | ||
| protein_rmsd : list[float] or None | ||
| RMSD of protein per frame, if protein is present. | ||
| protein_2D_rmsd : list[float] or None | ||
| Flattened 2D RMSD between all protein frames. | ||
| ligand_rmsd : list of list[float] or None | ||
| RMSD of each ligand per frame. | ||
| ligand_com_drift : list of list[float] or None | ||
| COM drift of each ligand per frame. | ||
| """ | ||
| # Prepare storage | ||
| if prot: | ||
| prot_positions = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32) | ||
| prot_start = prot.positions | ||
| prot_rmsd = [] | ||
| else: | ||
| prot_positions = None | ||
| prot_rmsd = None | ||
|
|
||
| lig_starts = [lig.positions for lig in ligands] | ||
| lig_initial_coms = [lig.center_of_mass() for lig in ligands] | ||
| lig_rmsd: list[list[float]] = [[] for _ in ligands] | ||
| lig_com_drift: list[list[float]] = [[] for _ in ligands] | ||
|
|
||
| for ts_i, ts in enumerate(u.trajectory[::skip]): | ||
| if prot: | ||
| prot_positions[ts_i, :, :] = prot.positions | ||
| prot_rmsd.append( | ||
| rms.rmsd( | ||
| prot.positions, | ||
| prot_start, | ||
| None, # prot_weights, | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| ) | ||
| for i, lig in enumerate(ligands): | ||
| lig_rmsd[i].append( | ||
| rms.rmsd( | ||
| lig.positions, | ||
| lig_starts[i], | ||
| lig.masses / np.mean(lig.masses), | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| ) | ||
| lig_com_drift[i].append( | ||
| # distance between start and current ligand position | ||
| # ignores PBC, but we've already centered the traj | ||
| mda.lib.distances.calc_bonds(lig.center_of_mass(), lig_initial_coms[i]) | ||
| ) | ||
|
|
||
| if prot: | ||
| # can ignore weights here as it's all Ca | ||
| rmsd2d = twoD_RMSD(prot_positions, w=None) # prot_weights) | ||
|
|
||
| return prot_rmsd, rmsd2d, lig_rmsd, lig_com_drift | ||
|
|
||
|
|
||
| def gather_rms_data( | ||
| pdb_topology: pathlib.Path, dataset: pathlib.Path, skip: Optional[int] = None | ||
| pdb_topology: pathlib.Path, | ||
| dataset: pathlib.Path, | ||
| skip: Optional[int] = None, | ||
| ligand_selection: str = "resname UNK", | ||
| protein_selection: str = "protein and name CA", | ||
| ) -> dict[str, list[float]]: | ||
| """Generate structural analysis of RBFE simulation | ||
|
|
||
|
|
@@ -87,17 +235,24 @@ def gather_rms_data( | |
| skip : int, optional | ||
| step at which to progress through the trajectory. by default, selects a | ||
| step that produces roughly 500 frames of analysis per replicate | ||
| ligand_selection : str, optional | ||
| MDAnalysis selection string for ligands (default "resname UNK"). | ||
| protein_selection : str, optional | ||
| MDAnalysis selection string for protein (default "protein and name CA"). | ||
|
|
||
| Produces, for each lambda state: | ||
| - 1D protein RMSD timeseries 'protein_RMSD' | ||
| - ligand RMSD timeseries | ||
| - ligand COM motion 'ligand_wander' | ||
| - 2D protein RMSD plot | ||
| Returns | ||
| ------- | ||
| output : dict[str, list] | ||
| Dictionary containing: | ||
| - 'protein_RMSD': list of protein RMSD per state | ||
| - 'protein_2D_RMSD': list of 2D RMSD per state | ||
| - 'ligand_RMSD': list of ligand RMSD per state | ||
| - 'ligand_COM_drift': list of ligand COM drift per state | ||
| """ | ||
| output = { | ||
| "protein_RMSD": [], | ||
| "ligand_RMSD": [], | ||
| "ligand_wander": [], | ||
| "ligand_COM_drift": [], | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note - this will be a breaking change downstream, please don't put this in the next release. |
||
| "protein_2D_RMSD": [], | ||
| } | ||
|
|
||
|
|
@@ -121,95 +276,28 @@ def gather_rms_data( | |
|
|
||
| u_top = mda.Universe(pdb_topology) | ||
|
|
||
| for i in range(n_lambda): | ||
| for state in range(n_lambda): | ||
| # cheeky, but we can read the PDB topology once and reuse per universe | ||
| # this then only hits the PDB file once for all replicas | ||
| u = make_Universe(u_top._topology, ds, state=i) | ||
|
|
||
| prot = u.select_atoms("protein and name CA") | ||
| ligand = u.select_atoms("resname UNK") | ||
|
|
||
| # save coordinates for 2D RMSD matrix | ||
| # TODO: Some smart guard to avoid allocating a silly amount of memory? | ||
| prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32) | ||
|
|
||
| prot_start = prot.positions | ||
| ligand_start = ligand.positions | ||
| ligand_initial_com = ligand.center_of_mass() | ||
| ligand_weights = ligand.masses / np.mean(ligand.masses) | ||
|
|
||
| this_protein_rmsd = [] | ||
| this_ligand_rmsd = [] | ||
| this_ligand_wander = [] | ||
|
|
||
| for ts_i, ts in enumerate(u.trajectory[::skip]): | ||
| pb.update() | ||
|
|
||
| if prot: | ||
| prot2d[ts_i, :, :] = prot.positions | ||
| this_protein_rmsd.append( | ||
| rms.rmsd( | ||
| prot.positions, | ||
| prot_start, | ||
| None, # prot_weights, | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| ) | ||
| if ligand: | ||
| this_ligand_rmsd.append( | ||
| rms.rmsd( | ||
| ligand.positions, | ||
| ligand_start, | ||
| ligand_weights, | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| ) | ||
| this_ligand_wander.append( | ||
| # distance between start and current ligand position | ||
| # ignores PBC, but we've already centered the traj | ||
| mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com) | ||
| ) | ||
| u = make_Universe( | ||
| u_top._topology, | ||
| ds, | ||
| state=state, | ||
| ligand_selection=ligand_selection, | ||
| protein_selection=protein_selection, | ||
| ) | ||
| prot, ligands = _select_protein_and_ligands(u, protein_selection, ligand_selection) | ||
| prot_rmsd, rmsd2d, lig_rmsd, lig_com_drift = analyze_state(u, prot, ligands, skip) | ||
|
|
||
| if prot: | ||
| # can ignore weights here as it's all Ca | ||
| rmsd2d = twoD_RMSD(prot2d, w=None) # prot_weights) | ||
| output["protein_RMSD"].append(this_protein_rmsd) | ||
| output["protein_RMSD"].append(prot_rmsd) | ||
| output["protein_2D_RMSD"].append(rmsd2d) | ||
| if ligand: | ||
| output["ligand_RMSD"].append(this_ligand_rmsd) | ||
| output["ligand_wander"].append(this_ligand_wander) | ||
|
|
||
| output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def twoD_RMSD(positions, w: Optional[npt.NDArray]) -> list[float]: | ||
| """2 dimensions RMSD | ||
| if ligands: | ||
| output["ligand_RMSD"].append(lig_rmsd) | ||
| output["ligand_COM_drift"].append(lig_com_drift) | ||
|
|
||
| Parameters | ||
| ---------- | ||
| positions : np.ndarray | ||
| the protein positions for the entire trajectory | ||
| w : np.ndarray, optional | ||
| weights array | ||
|
|
||
| Returns | ||
| ------- | ||
| rmsd_matrix : list | ||
| a flattened version of the 2d | ||
| """ | ||
| nframes, _, _ = positions.shape | ||
|
|
||
| output = [] | ||
|
|
||
| for i, j in itertools.combinations(range(nframes), 2): | ||
| posi, posj = positions[i], positions[j] | ||
|
|
||
| rmsd = rms.rmsd(posi, posj, w, center=True, superposition=True) | ||
|
|
||
| output.append(rmsd) | ||
| output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt) | ||
| pb.update(len(u.trajectory[::skip])) | ||
|
|
||
| return output | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is fine for now, but in the refactor just take in atomgroups since we can't guarantee residue / atom names going forward, especially with rosemary.