Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
4b257b2
Fix stab at fixing multi chain RMSD analysis
hannahbaumann Dec 18, 2025
236ff72
Some updates
hannahbaumann Dec 18, 2025
d274b0c
Add tests
hannahbaumann Dec 18, 2025
f3634dd
Some fixes
hannahbaumann Dec 19, 2025
e92adb3
Add another test
hannahbaumann Dec 19, 2025
b528ca2
Move some tests to use skipped smaller data
hannahbaumann Jan 16, 2026
a477bc1
Test out zenodo dealings
hannahbaumann Jan 16, 2026
ad84082
Try to improbe speed
hannahbaumann Jan 16, 2026
8ba8087
Try removing locking
hannahbaumann Jan 16, 2026
ead7951
Run downloads before the testing to have a single download for all th…
hannahbaumann Jan 19, 2026
f898a35
add import pooch
hannahbaumann Jan 19, 2026
c675a5c
Test out more
hannahbaumann Jan 19, 2026
88e456d
Ensure datasets get closed
hannahbaumann Jan 19, 2026
73a8e4d
Move to per test download again
hannahbaumann Jan 19, 2026
43aaca2
Remove commented out lines
hannahbaumann Jan 21, 2026
c165525
Test out adding an extra slash
hannahbaumann Jan 21, 2026
5f17770
Switch to all version doi
hannahbaumann Jan 21, 2026
c28286e
Download url directly
hannahbaumann Jan 21, 2026
197b6ba
Small fix
hannahbaumann Jan 21, 2026
b45390a
Change url
hannahbaumann Jan 21, 2026
1d70936
Add missing s
hannahbaumann Jan 21, 2026
20084c3
Switch to api url
hannahbaumann Jan 21, 2026
1a1c916
Revert to old cli
hannahbaumann Jan 22, 2026
59c7392
Update cli.py
hannahbaumann Jan 22, 2026
5e135ab
Update cli.py
hannahbaumann Jan 22, 2026
a9a8780
Update tests for new results
hannahbaumann Jan 23, 2026
92af45b
Change shift to enable other boxes
hannahbaumann Jan 23, 2026
8ea3585
Update multichain code
hannahbaumann Jan 26, 2026
220d504
Add ligand in shifting
hannahbaumann Jan 26, 2026
8c44cb2
USe new shift class instead of old minimiser since that one is no lon…
hannahbaumann Jan 26, 2026
d13495f
Update some tests
hannahbaumann Jan 26, 2026
c34c97c
Update conftest
hannahbaumann Jan 26, 2026
0161673
Update to v2
hannahbaumann Jan 26, 2026
9b6ca69
Update tests
hannahbaumann Jan 26, 2026
1d5c849
Update rmsd test, currently large rmsd till rmsd fix comes in
hannahbaumann Jan 26, 2026
f4e88e2
Make last test pass
hannahbaumann Jan 26, 2026
bd0c8ee
Switch to zenodo fetch
hannahbaumann Jan 26, 2026
ba4c912
remove lines
hannahbaumann Jan 26, 2026
157c02f
Update tests with large errors multichain failure
hannahbaumann Jan 27, 2026
98ea023
Apply suggestion from @hannahbaumann
hannahbaumann Jan 28, 2026
c5b2d70
Reuse zenodo specification
hannahbaumann Jan 28, 2026
54576ab
reorder install
hannahbaumann Jan 28, 2026
3aa52a5
Small fix
hannahbaumann Jan 28, 2026
ff6991a
Remove flaky retries
hannahbaumann Jan 28, 2026
7a30f69
Small fix
hannahbaumann Jan 28, 2026
ac1fe7b
Merge in the fix flakyness PR and update tests
hannahbaumann Jan 28, 2026
67a0913
Add wrapping to get positions to be greater than 0
hannahbaumann Jan 29, 2026
e706b11
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 29, 2026
260d72c
Small fix
hannahbaumann Jan 29, 2026
5e7a037
fix
hannahbaumann Jan 29, 2026
4fac0a0
Use ligand selection string instead of resname
hannahbaumann Jan 29, 2026
c329ba9
Update test files
hannahbaumann Jan 30, 2026
b5cc7e0
Rename ligand_wander to ligand_COM_drift
hannahbaumann Jan 30, 2026
56b0e61
split out per state analysis into its own function
hannahbaumann Jan 30, 2026
6c1a6c5
Apply suggestion from @hannahbaumann
hannahbaumann Feb 2, 2026
f4637f4
Remove unnecessary make_whole
hannahbaumann Feb 2, 2026
e7d6935
Merge branch 'main' into fix_rmsd_multichain
hannahbaumann Feb 3, 2026
deb5126
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 3, 2026
fa3227e
Merge branch 'main' into fix_rmsd_multichain
hannahbaumann Feb 6, 2026
43eb039
Small fix
hannahbaumann Feb 6, 2026
73fe2ee
Merge branch 'main' into fix_rmsd_multichain
hannahbaumann Feb 6, 2026
6793f1d
Merge branch 'fix_rmsd_multichain' into rmsd_refactor
hannahbaumann Feb 10, 2026
7be4c53
Update tests
hannahbaumann Feb 10, 2026
68a2aab
Apply suggestion from @hannahbaumann
hannahbaumann Feb 10, 2026
198156b
Update src/openfe_analysis/transformations.py
hannahbaumann Feb 16, 2026
6f85466
Update src/openfe_analysis/transformations.py
hannahbaumann Feb 16, 2026
1d19473
Update src/openfe_analysis/transformations.py
hannahbaumann Feb 16, 2026
6cb52af
Update src/openfe_analysis/rmsd.py
hannahbaumann Feb 16, 2026
2ccc7a8
Update src/openfe_analysis/rmsd.py
hannahbaumann Feb 16, 2026
24163e5
Modify test for closest image shift
hannahbaumann Feb 16, 2026
5994774
Small fix
hannahbaumann Feb 16, 2026
b36a8e3
Merge branch 'fix_rmsd_multichain' into rmsd_refactor
hannahbaumann Feb 16, 2026
750543f
address review comments
hannahbaumann Feb 16, 2026
d2b2c34
Alternate ClosestImageShift
hannahbaumann Feb 16, 2026
592149c
Add caveat to doc string
hannahbaumann Feb 16, 2026
bb54931
Merge branch 'fix_rmsd_multichain' into rmsd_refactor
hannahbaumann Feb 16, 2026
61d3e46
Get ligands by fragment
hannahbaumann Feb 16, 2026
52792ac
Merge branch 'rmsd_refactor' of https://github.com/OpenFreeEnergy/ope…
hannahbaumann Feb 16, 2026
250dd9f
Small fix
hannahbaumann Feb 16, 2026
6544450
Merge branch 'main' into rmsd_refactor
hannahbaumann Feb 16, 2026
01e3d15
Merge branch 'main' into rmsd_refactor
hannahbaumann Feb 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
~/.cache/openfe_analysis
# macOS cache location
~/Library/Caches/openfe_analysis
key: pooch-${{ matrix.os }}-v2
key: pooch-${{ matrix.os }}-v1

- name: "Download Zenodo data"
run: |
Expand Down
316 changes: 202 additions & 114 deletions src/openfe_analysis/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,54 @@
from .transformations import Aligner, ClosestImageShift, NoJump


def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
"""Makes a Universe and applies some transformations
def _select_protein_and_ligands(
u: mda.Universe,
protein_selection: str,
ligand_selection: str,
) -> tuple[mda.core.groups.AtomGroup, list[mda.core.groups.AtomGroup]]:
protein = u.select_atoms(protein_selection)
lig_atoms = u.select_atoms(ligand_selection)
# split ligands by fragment
ligands = lig_atoms.fragments
return protein, ligands


def make_Universe(
top: pathlib.Path,
trj: nc.Dataset,
state: int,
ligand_selection: str = "resname UNK",
protein_selection: str = "protein and name CA",
Comment on lines +34 to +35
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine for now, but in the refactor just take in atomgroups since we can't guarantee residue / atom names going forward, especially with rosemary.

) -> mda.Universe:
"""
Creates a Universe and applies transformations for protein and ligands.

Identifies two AtomGroups:
- protein, defined as having standard amino acid names, then filtered
down to CA
- ligand, defined as resname UNK
Parameters
----------
top : pathlib.Path
Path to the topology file.
trj : nc.Dataset
Trajectory dataset.
state : int
State index in the trajectory.
ligand_selection : str, default 'resname UNK'
MDAnalysis selection string for ligands. Supports multiple ligands.
protein_selection : str, default 'protein and name CA'
MDAnalysis selection string for the protein atoms to consider.

Then applies some transformations.
Returns
-------
mda.Universe
Universe with transformations applied.

Notes
-----
If a protein is present:
- prevents the protein from jumping between periodic images
- moves the ligand to the image closest to the protein
- aligns the entire system to minimise the protein RMSD

If only a ligand:
If only a ligand is present:
- prevents the ligand from jumping between periodic images
"""
u = mda.Universe(
Expand All @@ -40,18 +72,21 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers
index_method="state",
format=FEReader,
)
prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

if prot:
protein, ligands = _select_protein_and_ligands(u, protein_selection, ligand_selection)

if protein:
# Unwrap all atoms
unwrap_tr = unwrap(prot + ligand)
complex = protein
for ligand in ligands:
complex += ligand
unwrap_tr = unwrap(complex)

# Shift chains + ligand
chains = [seg.atoms for seg in prot.segments]
shift = ClosestImageShift(chains[0], [*chains[1:], ligand])
chains = [seg.atoms for seg in protein.segments]
shift = ClosestImageShift(chains[0], [*chains[1:], *ligands])

align = Aligner(prot)
align = Aligner(protein)

u.trajectory.add_transformations(
unwrap_tr,
Expand All @@ -60,21 +95,134 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers
)
else:
# if there's no protein
# - make the ligand not jump periodic images between frames
# - align the ligand to minimise its RMSD
nope = NoJump(ligand)
align = Aligner(ligand)

u.trajectory.add_transformations(
nope,
align,
)
# - make the ligands not jump periodic images between frames
# - align the ligands to minimise its RMSD
for lig in ligands:
u.trajectory.add_transformations(NoJump(lig), Aligner(lig))
Copy link
Member

@IAlibay IAlibay Feb 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would align the trajectory to minimize ligand 1 and then re-align to ligand 2. I'm not sure it's going to be done this corrrectly - would it not effectively mess up the RMSD for the first ligand?


return u


def twoD_RMSD(positions: np.ndarray, w: Optional[npt.NDArray]) -> list[float]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine, but please open an issue to switch this to using something better, like this: https://userguide.mdanalysis.org/1.1.1/examples/analysis/alignment_and_rms/pairwise_rmsd.html#Pairwise-RMSD-of-a-trajectory-to-itself

"""2 dimensions RMSD

Parameters
----------
positions : np.ndarray
the protein positions for the entire trajectory
w : np.ndarray, optional
weights array

Returns
-------
rmsd_matrix : list
Flattened list of RMSD values between all frame pairs.
"""
nframes, _, _ = positions.shape

output = []

for i, j in itertools.combinations(range(nframes), 2):
posi, posj = positions[i], positions[j]

rmsd = rms.rmsd(posi, posj, w, center=True, superposition=True)

output.append(rmsd)

return output


def analyze_state(
u: mda.Universe,
prot: Optional[mda.core.groups.AtomGroup],
ligands: list[mda.core.groups.AtomGroup],
skip: int,
) -> tuple[
Optional[list[float]],
Optional[np.ndarray],
Optional[list[list[float]]],
Optional[list[list[float]]],
]:
"""
Compute RMSD and COM drift for a single lambda state.

Parameters
----------
u : mda.Universe
Universe containing the trajectory.
prot : AtomGroup or None
Protein atoms to compute RMSD for.
ligands : list of AtomGroups
Ligands to compute RMSD and COM drift for.
skip : int
Step size to skip frames (e.g., every `skip`-th frame).

Returns
-------
protein_rmsd : list[float] or None
RMSD of protein per frame, if protein is present.
protein_2D_rmsd : list[float] or None
Flattened 2D RMSD between all protein frames.
ligand_rmsd : list of list[float] or None
RMSD of each ligand per frame.
ligand_com_drift : list of list[float] or None
COM drift of each ligand per frame.
"""
# Prepare storage
if prot:
prot_positions = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)
prot_start = prot.positions
prot_rmsd = []
else:
prot_positions = None
prot_rmsd = None

lig_starts = [lig.positions for lig in ligands]
lig_initial_coms = [lig.center_of_mass() for lig in ligands]
lig_rmsd: list[list[float]] = [[] for _ in ligands]
lig_com_drift: list[list[float]] = [[] for _ in ligands]

for ts_i, ts in enumerate(u.trajectory[::skip]):
if prot:
prot_positions[ts_i, :, :] = prot.positions
prot_rmsd.append(
rms.rmsd(
prot.positions,
prot_start,
None, # prot_weights,
center=False,
superposition=False,
)
)
for i, lig in enumerate(ligands):
lig_rmsd[i].append(
rms.rmsd(
lig.positions,
lig_starts[i],
lig.masses / np.mean(lig.masses),
center=False,
superposition=False,
)
)
lig_com_drift[i].append(
# distance between start and current ligand position
# ignores PBC, but we've already centered the traj
mda.lib.distances.calc_bonds(lig.center_of_mass(), lig_initial_coms[i])
)

if prot:
# can ignore weights here as it's all Ca
rmsd2d = twoD_RMSD(prot_positions, w=None) # prot_weights)

return prot_rmsd, rmsd2d, lig_rmsd, lig_com_drift


def gather_rms_data(
pdb_topology: pathlib.Path, dataset: pathlib.Path, skip: Optional[int] = None
pdb_topology: pathlib.Path,
dataset: pathlib.Path,
skip: Optional[int] = None,
ligand_selection: str = "resname UNK",
protein_selection: str = "protein and name CA",
) -> dict[str, list[float]]:
"""Generate structural analysis of RBFE simulation

Expand All @@ -87,17 +235,24 @@ def gather_rms_data(
skip : int, optional
step at which to progress through the trajectory. by default, selects a
step that produces roughly 500 frames of analysis per replicate
ligand_selection : str, optional
MDAnalysis selection string for ligands (default "resname UNK").
protein_selection : str, optional
MDAnalysis selection string for protein (default "protein and name CA").

Produces, for each lambda state:
- 1D protein RMSD timeseries 'protein_RMSD'
- ligand RMSD timeseries
- ligand COM motion 'ligand_wander'
- 2D protein RMSD plot
Returns
-------
output : dict[str, list]
Dictionary containing:
- 'protein_RMSD': list of protein RMSD per state
- 'protein_2D_RMSD': list of 2D RMSD per state
- 'ligand_RMSD': list of ligand RMSD per state
- 'ligand_COM_drift': list of ligand COM drift per state
"""
output = {
"protein_RMSD": [],
"ligand_RMSD": [],
"ligand_wander": [],
"ligand_COM_drift": [],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note - this will be a breaking change downstream, please don't put this in the next release.

"protein_2D_RMSD": [],
}

Expand All @@ -121,95 +276,28 @@ def gather_rms_data(

u_top = mda.Universe(pdb_topology)

for i in range(n_lambda):
for state in range(n_lambda):
# cheeky, but we can read the PDB topology once and reuse per universe
# this then only hits the PDB file once for all replicas
u = make_Universe(u_top._topology, ds, state=i)

prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

# save coordinates for 2D RMSD matrix
# TODO: Some smart guard to avoid allocating a silly amount of memory?
prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)

prot_start = prot.positions
ligand_start = ligand.positions
ligand_initial_com = ligand.center_of_mass()
ligand_weights = ligand.masses / np.mean(ligand.masses)

this_protein_rmsd = []
this_ligand_rmsd = []
this_ligand_wander = []

for ts_i, ts in enumerate(u.trajectory[::skip]):
pb.update()

if prot:
prot2d[ts_i, :, :] = prot.positions
this_protein_rmsd.append(
rms.rmsd(
prot.positions,
prot_start,
None, # prot_weights,
center=False,
superposition=False,
)
)
if ligand:
this_ligand_rmsd.append(
rms.rmsd(
ligand.positions,
ligand_start,
ligand_weights,
center=False,
superposition=False,
)
)
this_ligand_wander.append(
# distance between start and current ligand position
# ignores PBC, but we've already centered the traj
mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com)
)
u = make_Universe(
u_top._topology,
ds,
state=state,
ligand_selection=ligand_selection,
protein_selection=protein_selection,
)
prot, ligands = _select_protein_and_ligands(u, protein_selection, ligand_selection)
prot_rmsd, rmsd2d, lig_rmsd, lig_com_drift = analyze_state(u, prot, ligands, skip)

if prot:
# can ignore weights here as it's all Ca
rmsd2d = twoD_RMSD(prot2d, w=None) # prot_weights)
output["protein_RMSD"].append(this_protein_rmsd)
output["protein_RMSD"].append(prot_rmsd)
output["protein_2D_RMSD"].append(rmsd2d)
if ligand:
output["ligand_RMSD"].append(this_ligand_rmsd)
output["ligand_wander"].append(this_ligand_wander)

output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt)

return output


def twoD_RMSD(positions, w: Optional[npt.NDArray]) -> list[float]:
"""2 dimensions RMSD
if ligands:
output["ligand_RMSD"].append(lig_rmsd)
output["ligand_COM_drift"].append(lig_com_drift)

Parameters
----------
positions : np.ndarray
the protein positions for the entire trajectory
w : np.ndarray, optional
weights array

Returns
-------
rmsd_matrix : list
a flattened version of the 2d
"""
nframes, _, _ = positions.shape

output = []

for i, j in itertools.combinations(range(nframes), 2):
posi, posj = positions[i], positions[j]

rmsd = rms.rmsd(posi, posj, w, center=True, superposition=True)

output.append(rmsd)
output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt)
pb.update(len(u.trajectory[::skip]))

return output
Loading