From fe79f171acf0852c4f6544ade130026e49b770f8 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 09:41:52 -0400 Subject: [PATCH 01/23] Add plumbdb package files --- pkg/README.md | 12 ++++++++++++ pkg/env.yaml | 6 ++++++ pkg/pyproject.toml | 27 +++++++++++++++++++++++++++ pkg/src/plumbdb/__init__.py | 0 pkg/src/plumbdb/cli.py | 9 +++++++++ 5 files changed, 54 insertions(+) create mode 100644 pkg/README.md create mode 100644 pkg/env.yaml create mode 100644 pkg/pyproject.toml create mode 100644 pkg/src/plumbdb/__init__.py create mode 100644 pkg/src/plumbdb/cli.py diff --git a/pkg/README.md b/pkg/README.md new file mode 100644 index 0000000..2b305d2 --- /dev/null +++ b/pkg/README.md @@ -0,0 +1,12 @@ +# plumbdb + +## Installation + +`plumbdb` can be installed via its Python source code with dependencies defined in a `conda` environment file. + +## Usage + +## Contributing + +## Citation + diff --git a/pkg/env.yaml b/pkg/env.yaml new file mode 100644 index 0000000..bf6b65c --- /dev/null +++ b/pkg/env.yaml @@ -0,0 +1,6 @@ +name: plumbdb +channels: + - conda-forge + +dependencies: + - click diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml new file mode 100644 index 0000000..f214cda --- /dev/null +++ b/pkg/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +requires = [ + "setuptools>=77.0.3", + "setuptools-scm>=8", +] +build-backend = "setuptools.build_meta" + +[project] +name = "plumbdb" +description = "" +readme = "README.md" +authors = [ + {name = "Ariana Brenner Clerkin", email = "ariana.clerkin@choderlab.org"} +] +license = "MIT" +classifiers = [ + "Intended Audience :: Science/Research", + "Operating System :: POSIX", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Chemistry", +] +requires-python = ">= 3.13" +dynamic = ["version"] + +[project.optional-dependencies] +test = ["pytest"] diff --git a/pkg/src/plumbdb/__init__.py b/pkg/src/plumbdb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py new file mode 100644 index 0000000..eb110e9 --- /dev/null +++ b/pkg/src/plumbdb/cli.py @@ -0,0 +1,9 @@ +import click + +@click.group() +def cli(): + pass + +@cli.command() +def bind_db(): + pass From 180023faa1ea626ed63ca99b8a78f1eadb6b72bc Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 10:15:57 -0400 Subject: [PATCH 02/23] Add asapdiscovery dependency --- pkg/env.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/env.yaml b/pkg/env.yaml index bf6b65c..5e6d371 100644 --- a/pkg/env.yaml +++ b/pkg/env.yaml @@ -3,4 +3,5 @@ channels: - conda-forge dependencies: + - asapdiscovery - click From 9a7be7089292466505c64ed64383aaf921f7c298 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 10:16:12 -0400 Subject: [PATCH 03/23] Expose cli entrypoint: plumbline --- pkg/pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml index f214cda..cc4656b 100644 --- a/pkg/pyproject.toml +++ b/pkg/pyproject.toml @@ -23,5 +23,8 @@ classifiers = [ requires-python = ">= 3.13" dynamic = ["version"] +[project.scripts] +plumbline = "plumbdb.cli:cli" + [project.optional-dependencies] test = ["pytest"] From a7b78f9c2e52428f4000dc0bb5d6e2ebf09f9a7f Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 10:44:18 -0400 Subject: [PATCH 04/23] Lower Python version constraint --- pkg/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml index cc4656b..3558ee4 100644 --- a/pkg/pyproject.toml +++ b/pkg/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Chemistry", ] -requires-python = ">= 3.13" +requires-python = ">= 3.11" dynamic = ["version"] [project.scripts] From 37acd2d7df2e764442a6c699477c9b6f7b2cc41a Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 10:45:16 -0400 Subject: [PATCH 05/23] Add test module --- pkg/src/plumbdb/tests/test_plumb.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pkg/src/plumbdb/tests/test_plumb.py diff --git a/pkg/src/plumbdb/tests/test_plumb.py b/pkg/src/plumbdb/tests/test_plumb.py new file mode 100644 index 0000000..e59fd58 --- /dev/null +++ b/pkg/src/plumbdb/tests/test_plumb.py @@ -0,0 +1,2 @@ +def test_import(): + import plumbdb From ce3972ce0a5e68cfa13ea3f67a8a125fdb2cbbbb Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 10:45:29 -0400 Subject: [PATCH 06/23] Add developer docs --- pkg/docs/developer.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 pkg/docs/developer.md diff --git a/pkg/docs/developer.md b/pkg/docs/developer.md new file mode 100644 index 0000000..8b99e46 --- /dev/null +++ b/pkg/docs/developer.md @@ -0,0 +1,19 @@ +# Developer documentation + +## Installation + +`plumbdb` can currently only be installed via its python source with its dependencies handled by `conda`. + +```bash +conda create -n plumbdb_dev -f env.yaml +conda activate plumbdb_dev +pip install -e ".[test]" +``` + +## Running tests + +Tests are performed with [`pytest`](https://docs.pytest.org/), which is automatically installed as part of the `test` dependency group specified in `pyproject.toml`. With the test environment active and from within `plumb/pkg/`, run: + +```bash +python -m pytest src/ +``` \ No newline at end of file From 487dd06c3be3195465b252d1c815c95f980e6f05 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 10:52:17 -0400 Subject: [PATCH 07/23] Trim package README --- pkg/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pkg/README.md b/pkg/README.md index 2b305d2..107f408 100644 --- a/pkg/README.md +++ b/pkg/README.md @@ -2,11 +2,10 @@ ## Installation -`plumbdb` can be installed via its Python source code with dependencies defined in a `conda` environment file. +Since `plumbdb` is not yet registered with conda-forge, follow the developer installation instructions at [docs/developer.md](docs/developer.md#installation). ## Usage -## Contributing - -## Citation +### Command line +`plumbdb` is usable via command line through the `plumbline` CLI application. From c1b9beb18ec48b1ac3958da786f80910ec1fccef Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 11:35:09 -0400 Subject: [PATCH 08/23] Add ruff to dev tools --- pkg/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml index 3558ee4..1c00003 100644 --- a/pkg/pyproject.toml +++ b/pkg/pyproject.toml @@ -28,3 +28,4 @@ plumbline = "plumbdb.cli:cli" [project.optional-dependencies] test = ["pytest"] +dev = ["pytest", "ruff"] \ No newline at end of file From b83f2e9ee498a3f845f0640001e452438b5586bb Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 11:35:49 -0400 Subject: [PATCH 09/23] Replicate commands from old bin/ directory - download_pdb_structure - assess_prepped_protein --- pkg/src/plumbdb/cli.py | 69 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index eb110e9..e0681b3 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -1,9 +1,72 @@ +import json +import pathlib + import click + @click.group() def cli(): pass -@cli.command() -def bind_db(): - pass + +@cli.command(name="download-pdb", help="Download PDB files from the PDB database.") +@click.option("-i", "--input-file", required=True) +@click.option( + "output_directory", + "-d", + "--output-directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, +) +def download_pdb_structure(input_file, output_directory): + from asapdiscovery.data.services.rcsb.rcsb_download import download_pdb_structure + + output_directory.mkdir(exist_ok=True, parents=True) + with open(input_file, "r") as f: + record_dict = json.load(f) + downloaded = download_pdb_structure( + record_dict["pdb_id"], output_dir, file_format="cif1" + ) + record_dict["cif"] = downloaded + with open(output_dir / "record.json", "w") as f: + json.dump(record_dict, f) + + +@cli.command( + "assess-prepped-protein", + help="Assess the quality of the prepped protein structure.", +) +@click.option( + "output_directory", + "-d", + "--output-directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, +) +@click.option("input_openeye_du", "-i", "--input-file", type=str, required=True) +def assess_prepped_protein(output_directory, input_openeye_du): + try: + from openeye import oespruce + except ModuleNotFoundError: + raise click.UsageError("Could not import openeye") + + from asapdiscovery.data.backend.openeye import load_openeye_design_unit + + output_directory.mkdir(exist_ok=True, parents=True) + + du = load_openeye_design_unit(input_openeye_du) + + # should use a different id method + stem = Path(input_openeye_du).stem + validator = oespruce.OEValidateDesignUnit() + err_msgs = validator.GetMessages(validator.Validate(du)) + sq = du.GetStructureQuality() + + report = { + "errors": err_msgs, + "warnings": [], + "has_iridium_data": sq.HasIridiumData(), + } + + with open(output_directory / f"{stem}_quality_report.json", "w") as f: + json.dump(report, f, indent=4) From b453eefbeb0372db12c821d406f40705c423ac76 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 11:49:50 -0400 Subject: [PATCH 10/23] Add generate_constrained_ligand_poses command to cli --- pkg/src/plumbdb/cli.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index e0681b3..0eecdf2 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -70,3 +70,44 @@ def assess_prepped_protein(output_directory, input_openeye_du): with open(output_directory / f"{stem}_quality_report.json", "w") as f: json.dump(report, f, indent=4) + + +# TODO: check for openeye installation, maybe make it a decorator +@cli.command( + "generate-constrained-ligand-poses", + help="Generate constrained ligand poses using OpenEye tooling.", +) +@click.option("input_sdf", "-i", "--input-sdf", required=True, type=str) +@click.option("prepped_schema", "-s", "--prepped-schema", type=str) +@click.option("output_directory", "-d", "--output-directory") +def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_directory): + try: + from asapdiscovery.data.schema.complex import PreppedComplex + from asapdiscovery.data.readers.molfile import MolFileFactory + from asapdiscovery.data.schema.ligand import Ligand + from asapdiscovery.docking.schema.pose_generation import ( + OpenEyeConstrainedPoseGenerator, + ) + from asapdiscovery.data.backend.openeye import save_openeye_sdfs + except ModuleNotFoundError: + raise click.UsageError("Could not import openeye") + + raw_ligands = MolFileFactory(filename=input_sdf).load() + + # reconstruct ligands from smiles because of some weirdness with the sdf files + ligs = [ + Ligand.from_smiles( + compound_name=lig.tags["BindingDB monomerid"], + smiles=lig.smiles, + tags=lig.dict(), + ) + for lig in raw_ligands + ] + + prepped_complex = PreppedComplex.parse_file(prepped_schema) + + poser = OpenEyeConstrainedPoseGenerator() + poses = poser.generate_poses(prepped_complex, ligands=ligs) + oemols = [ligand.to_oemol() for ligand in poses.posed_ligands] + # save to sdf file + save_openeye_sdfs(oemols, output_directory / "poses.sdf") From 9ce41a7ec30a1fa3bc9577942359ce3d30dd833a Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Mon, 27 Oct 2025 11:53:31 -0400 Subject: [PATCH 11/23] Add stubs for unimplemented CLI commands --- pkg/src/plumbdb/cli.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 0eecdf2..35a19b8 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -111,3 +111,18 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director oemols = [ligand.to_oemol() for ligand in poses.posed_ligands] # save to sdf file save_openeye_sdfs(oemols, output_directory / "poses.sdf") + + +@cli.command("prep-cif") +def prep_cif(): + raise NotImplementedError + + +@cli.command("process-bindingdb") +def process_bindingdb(): + raise NotImplementedError + + +@cli.command("visualize-network") +def visualize_network(): + raise NotImplementedError From cc6bf5478db5ad51dceddb2c5310f984b0052c30 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Tue, 28 Oct 2025 14:01:23 -0400 Subject: [PATCH 12/23] Fix variable name usage --- pkg/docs/developer.md | 2 +- pkg/env.yaml | 1 + pkg/src/plumbdb/cli.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/docs/developer.md b/pkg/docs/developer.md index 8b99e46..29326e9 100644 --- a/pkg/docs/developer.md +++ b/pkg/docs/developer.md @@ -7,7 +7,7 @@ ```bash conda create -n plumbdb_dev -f env.yaml conda activate plumbdb_dev -pip install -e ".[test]" +pip install -e ".[dev]" ``` ## Running tests diff --git a/pkg/env.yaml b/pkg/env.yaml index 5e6d371..6448b3e 100644 --- a/pkg/env.yaml +++ b/pkg/env.yaml @@ -4,4 +4,5 @@ channels: dependencies: - asapdiscovery + - openfe - click diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 35a19b8..c93a318 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -25,10 +25,10 @@ def download_pdb_structure(input_file, output_directory): with open(input_file, "r") as f: record_dict = json.load(f) downloaded = download_pdb_structure( - record_dict["pdb_id"], output_dir, file_format="cif1" + record_dict["pdb_id"], output_directory, file_format="cif1" ) record_dict["cif"] = downloaded - with open(output_dir / "record.json", "w") as f: + with open(output_directory / "record.json", "w") as f: json.dump(record_dict, f) From e2518b665aef6d87e1bc4032529f8fee6366d08f Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Tue, 28 Oct 2025 14:01:50 -0400 Subject: [PATCH 13/23] Implement prep_cif command --- pkg/src/plumbdb/cli.py | 66 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index c93a318..f0d9246 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -114,8 +114,70 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director @cli.command("prep-cif") -def prep_cif(): - raise NotImplementedError +@click.option("input_json", "-j", "--input-json", type=str, required=True) +@click.option("input_cif", "-c", "--input-cif", type=str, required=True) +@click.option("fasta_sequence", "-f", "--fasta-sequence", type=str, required=True) +@click.option("loop_db", "--loopdb", type=str, required=True) +@click.option( + "output_directory", + "-d", + "--output-directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + default="./", + required=True, +) +def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory): + from asapdiscovery.data.backend.openeye import load_openeye_cif1 + from asapdiscovery.modeling.modeling import split_openeye_mol + from asapdiscovery.data.schema.ligand import Ligand, Complex + from plumbdb.oespruce import spruce_protein + from asapdiscovery.data.schema.target import Target + + output_directory.mkdir(exist_ok=True, parents=True) + + with open(input_json, "r") as f: + record_dict = json.load(f) + + graphmol = load_openeye_cif1(args.input_cif) + + # this is what you would do if you didn't want to use whatever ligand is in the protein + # split_dict = split_openeye_mol(graphmol, keep_one_lig=False) + split_dict = split_openeye_mol(graphmol, keep_one_lig=True) + + # # Save initial protein as pdb file + target = Target.from_oemol( + split_dict["prot"], + target_name=record_dict["pdb_id"], + ) + # target.to_pdb("protein.pdb") + + # this is what you would do if you didn't want to use whatever ligand is in the protein + ligand = Ligand.from_oemol(split_dict["lig"]) + # ligand = Ligand.from_sdf(f'{record_dict["compound_name"]}.sdf') + + combined_complex = Complex(target=target, ligand=ligand, ligand_chain="L") + + oemol = combined_complex.to_combined_oemol() + + results, spruced = spruce_protein( + initial_prot=oemol, + protein_sequence=args.fasta_sequence, + loop_db=args.loop_db, + ) + + split_dict = split_openeye_mol(spruced) + prepped_target = Target.from_oemol(split_dict["prot"], **target.dict()) + prepped_ligand = Ligand.from_oemol(split_dict["lig"], **ligand.dict()) + prepped_ligand.to_sdf(output_directory / f"{ligand.compound_name}_ligand.sdf") + prepped_target.to_pdb(output_directory / f"{target.target_name}_spruced.pdb") + + prepped_complex = Complex( + target=prepped_target, ligand=prepped_ligand, ligand_chain="L" + ) + + filename = f"{target.target_name}_{ligand.compound_name}_spruced_complex.pdb" + prepped_complex.to_pdb(output_directory / filename) + results.to_json_file(output_directory / f"{target.target_name}_spruce_results.json") @cli.command("process-bindingdb") From c50aeccf2a8b4e3ebaa43bb8c0dc3479b9d66372 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Tue, 28 Oct 2025 14:02:13 -0400 Subject: [PATCH 14/23] Implement process_bindingdb command --- pkg/src/plumbdb/cli.py | 79 +++++++++++++++- pkg/src/plumbdb/oespruce.py | 180 ++++++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 pkg/src/plumbdb/oespruce.py diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index f0d9246..71f79ae 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -181,8 +181,83 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory): @cli.command("process-bindingdb") -def process_bindingdb(): - raise NotImplementedError +@click.option( + "input_directory", + "-i", + "--input-directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, + help="SDF file to process", +) +@click.option( + "output_directory", + "-o", + "--output-directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, + help="Directory to write output files", +) +def process_bindingdb(input_directory, output_directory): + from asapdiscovery.data.schema.ligand import Ligand + from asapdiscovery.data.readers.molfile import MolFileFactory + import pandas as pd + import math + + output_directory.mkdir(exist_ok=True, parents=True) + + # get all sdf files + sdfs = list(input_directory.glob("*3D.sdf")) + + for sdf in sdfs: + # asap function to read separate ligands from a multi-ligand sdf file + mols: list[Ligand] = MolFileFactory(filename=sdf).load() + + # create a dictionary for each ligand containing various relevant information + # there are some hidden choices here, for instance OpenEye is adding hydrogens which you might not want + + for mol in mols: + mol_dict = { + "compound_name": mol.compound_name, + "filename": sdf.name, + "has_3d": mol.to_oemol().GetDimension() == 3, + "num_atoms": mol.to_oemol().NumAtoms(), + "smiles": mol.smiles, + "pdb_id": mol.tags.get("PDB ID ")[:4] + if mol.tags.get("PDB ID ") + else "", + } + + # any data in the SDF file is saved to the 'tags' attribute of an asapdiscovery Ligand object + mol_dict.update(mol.tags) + + # write out sdf file + if mol_dict["has_3d"]: + mol.to_sdf(output_directory / f"{mol.compound_name}.sdf") + + output.append(mol_dict) + + df = pd.DataFrame.from_records(output) + df.to_csv(output_directory / "processed_bindingdb.csv", index=False) + + # write separate csvs for 2D and 3D + df_2d = df[~df["has_3d"]] + df_3d = df[df["has_3d"]] + df_2d.to_csv(output_directory / "2d_bindingdb.csv", index=False) + df_3d.to_csv(output_directory / "3d_bindingdb.csv", index=False) + + unique_sdf_filenames = df_3d["filename"].unique() + with open(output_directory / "unique_3D_sdf_filenames.txt", "w") as f: + for filename in unique_sdf_filenames: + f.write(f"{filename}\n") + + # write out separate json records + for record in df_3d.to_dict(orient="records"): + with open(output_directory / f"{record['compound_name']}.json", "w") as f: + f.write( + json.dumps( + record, indent=4, default=lambda x: None if math.isnan(x) else x + ) + ) @cli.command("visualize-network") diff --git a/pkg/src/plumbdb/oespruce.py b/pkg/src/plumbdb/oespruce.py new file mode 100644 index 0000000..3d24acf --- /dev/null +++ b/pkg/src/plumbdb/oespruce.py @@ -0,0 +1,180 @@ +from pathlib import Path +import json + +from asapdiscovery.data.schema.schema_base import DataModelAbstractBase +from asapdiscovery.data.backend.openeye import ( + oespruce, + oechem, + oegrid, + load_openeye_cif1, +) +from asapdiscovery.modeling.modeling import ( + split_openeye_mol, + spruce_protein, + get_oe_structure_metadata_from_sequence, + openeye_perceive_residues, +) +from asapdiscovery.data.schema.target import Target +from asapdiscovery.data.schema.ligand import Ligand + +from asapdiscovery.data.schema.complex import Complex + + +class SpruceResults(DataModelAbstractBase): + build_loops_success: bool + build_sidechains_success: bool + add_caps_success: bool = None + place_hydrogens_success: bool + error_message: str + + +def spruce_protein( + initial_prot: oechem.OEGraphMol, + protein_sequence: str = None, + loop_db: Path = None, +) -> oechem.OEDesignUnit or oechem.OEGraphMol: + """ + Applies the OESpruce protein preparation pipeline to the given protein structure. + + Parameters + ---------- + initial_prot : oechem.OEMol + The input protein structure to be prepared. + + protein_sequence : str, optional + The sequence of the protein for a single chain. If provided, this will be added to the Structure Metadata before applying the OESpruce pipeline. + Default is None. + + loop_db : str, optional + The filename of the loop database to be used by the OESpruce pipeline. If provided, the pipeline will include the loop building step. + Default is None. + + Returns + ------- + (success: bool, spruce_error_msg: str, initial_prot: oechem.OEMol) + Returns a tuple of: + a boolean for whether sprucing was successful + a string of the error message if sprucing failed + the prepared protein structure. + """ + + # Add Hs to prep protein and ligand + # oechem.OEAddExplicitHydrogens(initial_prot) + + # Even though we aren't making DUs, we still need to set up the options + opts = oespruce.OEMakeDesignUnitOptions() + opts.SetSuperpose(False) + opts.GetPrepOptions().SetStrictProtonationMode(True) + + # Add caps when needed + # Allow truncation in case adding a cap causes a clash + cap_opts = oespruce.OECapBuilderOptions() + cap_opts.SetAllowTruncate(True) + is_terminal_predicate = oechem.OEOrAtom( + oechem.OEIsNTerminalAtom(), oechem.OEIsCTerminalAtom() + ) + + # Set Build Loop and Sidechain Opts + sc_opts = oespruce.OESidechainBuilderOptions() + + loop_opts = oespruce.OELoopBuilderOptions() + loop_opts.SetSeqAlignMethod(oechem.OESeqAlignmentMethod_Identity) + loop_opts.SetSeqAlignGapPenalty(-1) + loop_opts.SetSeqAlignExtendPenalty(0) + + # Don't build tails, too much work for little gain + loop_opts.SetBuildTails(False) + + if loop_db is not None: + print(f"Adding loop db {loop_db}") + loop_opts.SetLoopDBFilename(str(loop_db)) + + # Construct spruce filter + spruce_opts = oespruce.OESpruceFilterOptions() + spruce = oespruce.OESpruceFilter(spruce_opts, opts) + + # Spruce! + + # These objects are for some reason needed in order to run spruce + grid = oegrid.OESkewGrid() + + if protein_sequence: + # convert fasta sequence to 3-letter codes + try: + protein_sequence = " ".join(convert_to_three_letter_codes(protein_sequence)) + print(type(protein_sequence)) + print("Adding sequence metadata from sequence: ", protein_sequence) + metadata = get_oe_structure_metadata_from_sequence( + initial_prot, protein_sequence + ) + except KeyError as e: + print( + f"Error converting protein sequence to 3-letter codes: {e}. Skipping sequence metadata." + ) + protein_sequence = None + + if not protein_sequence: + metadata = oespruce.OEStructureMetadata() + + # Building the loops actually does use the sequence metadata + build_loops_success = oespruce.OEBuildLoops( + initial_prot, metadata, sc_opts, loop_opts + ) + + build_sidechains_success = oespruce.OEBuildSidechains(initial_prot, sc_opts) + print(type(initial_prot), type(is_terminal_predicate), type(cap_opts)) + add_caps_success = oespruce.OECapTermini( + initial_prot, is_terminal_predicate, cap_opts + ) + print(add_caps_success) + place_hydrogens_success = oechem.OEPlaceHydrogens(initial_prot) + spruce_error_code = spruce.StandardizeAndFilter(initial_prot, grid, metadata) + spruce_error_msg = spruce.GetMessages(spruce_error_code) + + # Re-percieve residues so that atom number and connect records dont get screwed up + initial_prot = openeye_perceive_residues(initial_prot, preserve_all=False) + return ( + SpruceResults( + build_loops_success=build_loops_success, + build_sidechains_success=build_sidechains_success, + # add_caps_success=None, # add_caps_success, + place_hydrogens_success=place_hydrogens_success, + error_message=spruce_error_msg, + ), + initial_prot, + ) + + +def convert_to_three_letter_codes(sequence) -> list[str]: + """ + Convert a protein sequence from 1-letter codes to 3-letter codes. + """ + # Dictionary to map 1-letter codes to 3-letter codes + amino_acid_dict = { + "A": "ALA", + "R": "ARG", + "N": "ASN", + "D": "ASP", + "C": "CYS", + "Q": "GLN", + "E": "GLU", + "G": "GLY", + "H": "HIS", + "I": "ILE", + "L": "LEU", + "K": "LYS", + "M": "MET", + "F": "PHE", + "P": "PRO", + "S": "SER", + "T": "THR", + "W": "TRP", + "Y": "TYR", + "V": "VAL", + } + + # Convert the sequence to 3-letter codes + three_letter_sequence = [amino_acid_dict[aa] for aa in sequence] + + # Join the 3-letter codes with a space + return three_letter_sequence From c66bdaf65aa2926e9b1da616331e18a7294bfa7b Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Tue, 28 Oct 2025 14:03:23 -0400 Subject: [PATCH 15/23] Implement visualize-network command --- pkg/src/plumbdb/cli.py | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 71f79ae..82bd684 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -260,6 +260,40 @@ def process_bindingdb(input_directory, output_directory): ) -@cli.command("visualize-network") -def visualize_network(): - raise NotImplementedError +@cli.command( + "visualize-network", + help="Generate a network plot of the proposed alchemical network", +) +@click.option( + "network_graphml", + "-n", + "--network-graphml", + type=str, + required=True, + help="Path to the input JSON file containing the atom mapping network.", +) +@click.option( + "output_directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, + default=pathlib.Path("./"), + help="Path to the output directory where the results will be stored", +) +def visualize_network(network_graphml, output_directory): + from openfe.utils.atommapping_network_plotting import plot_atommapping_network + from openfe.setup import LigandNetwork + + output_directory.mkdir(exist_ok=True, parents=True) + ligand_network = args.network_graphml + + if not ligand_network.exists(): + raise FileNotFoundError( + f"Could not find the ligand network file at {ligand_network}" + ) + + with open(ligand_network) as f: + graphml = f.read() + + network = LigandNetwork.from_graphml(graphml) + fig = plot_atommapping_network(network) + fig.savefig(output_directory / "network_plot.png") From 86466e99b5a41da892a613ced090754f7a99bf62 Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Tue, 28 Oct 2025 16:39:00 -0400 Subject: [PATCH 16/23] Updated nextflow to use new plumbline command --- modules.nf | 39 ++++++++++++++++++++++++++----------- pkg/src/plumbdb/cli.py | 37 +++++++++++++++++++++++------------ pkg/src/plumbdb/oespruce.py | 13 +++---------- 3 files changed, 56 insertions(+), 33 deletions(-) diff --git a/modules.nf b/modules.nf index 1f3dcb7..25fee54 100644 --- a/modules.nf +++ b/modules.nf @@ -12,7 +12,9 @@ process PROCESS_BINDINGDB { script: """ - python "${params.scripts}/process_bindingdb.py" --input-dir "${params.bindingDB}" + plumbline process-bindingdb \ + --input-directory "${params.bindingDB}" \ + --output-directory "./" """ } process DOWNLOAD_PDB { @@ -30,7 +32,8 @@ process DOWNLOAD_PDB { script: """ - python "${params.scripts}/download_pdb.py" --input-json "${input_json}" + plumbline download-pdb \ + --input-file "${input_json}" """ } process PREP_CIF { @@ -51,7 +54,11 @@ process PREP_CIF { script: """ - python "${params.scripts}/prep_cif.py" --input-json "${input_json}" --input-cif "${input_cif}" --fasta-sequence "${params.fasta}" + plumbline prep-cif \ + --input-json "${input_json}" \ + --input-cif "${input_cif}" \ + --fasta-sequence "${params.fasta}" \ + --output-directory "./" """ } process PREP_FOR_DOCKING { @@ -71,7 +78,10 @@ process PREP_FOR_DOCKING { script: """ - asap-cli protein-prep --target SARS-CoV-2-Mpro --pdb-file "${prepped_pdb}" --output-dir "./" + asap-cli protein-prep \ + --target SARS-CoV-2-Mpro \ + --pdb-file "${prepped_pdb}" \ + --output-dir "./" """ } process ASSESS_PREPPED_PROTEIN { @@ -88,7 +98,9 @@ process ASSESS_PREPPED_PROTEIN { script: """ - python "${params.scripts}/assess_prepped_protein.py" --input-oedu "${design_unit}" + plumbline assess-prepped-protein \ + --input-file "${design_unit}" \ + --output-directory "./" """ } process GENERATE_CONSTRAINED_LIGAND_POSES { @@ -105,7 +117,10 @@ process GENERATE_CONSTRAINED_LIGAND_POSES { script: """ - python "${params.scripts}/generate_constrained_ligand_poses.py" --input-sdf "${params.congenericSeries}" --prepped-schema "${prepped_complex_json_schema}" + plumbline generate-constrained-ligand-poses \ + --input-sdf "${params.congenericSeries}" \ + --prepped-schema "${prepped_complex_json_schema}" \ + --output-directory "./" """ } @@ -127,10 +142,10 @@ process MAKE_FEC_INPUTS { asap-cli alchemy create fecs-workflow.json asap-cli alchemy plan \ - -f fecs-workflow.json \ - --name ${uuid}_plumb_alchemiscale_network \ - --receptor "${prepped_complex}" \ - --ligands "${posed_ligands}" \ + -f fecs-workflow.json \ + --name ${uuid}_plumb_alchemiscale_network \ + --receptor "${prepped_complex}" \ + --ligands "${posed_ligands}" \ """ } process VISUALIZE_NETWORK { @@ -147,6 +162,8 @@ process VISUALIZE_NETWORK { script: """ - python "${params.scripts}/visualize_network.py" --network-graphml "${network_graph}" + plumbline visualize-network \ + --network-graphml "${network_graph}" \ + --output-directory "./" """ } \ No newline at end of file diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 82bd684..fe41370 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -9,11 +9,12 @@ def cli(): pass +# TODO: input file type can be better defined @cli.command(name="download-pdb", help="Download PDB files from the PDB database.") -@click.option("-i", "--input-file", required=True) +@click.option("-i", "--input-file", type=str, required=True) @click.option( "output_directory", - "-d", + "-o", "--output-directory", type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), required=True, @@ -38,7 +39,7 @@ def download_pdb_structure(input_file, output_directory): ) @click.option( "output_directory", - "-d", + "-o", "--output-directory", type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), required=True, @@ -57,7 +58,7 @@ def assess_prepped_protein(output_directory, input_openeye_du): du = load_openeye_design_unit(input_openeye_du) # should use a different id method - stem = Path(input_openeye_du).stem + stem = pathlib.Path(input_openeye_du).stem validator = oespruce.OEValidateDesignUnit() err_msgs = validator.GetMessages(validator.Validate(du)) sq = du.GetStructureQuality() @@ -79,7 +80,13 @@ def assess_prepped_protein(output_directory, input_openeye_du): ) @click.option("input_sdf", "-i", "--input-sdf", required=True, type=str) @click.option("prepped_schema", "-s", "--prepped-schema", type=str) -@click.option("output_directory", "-d", "--output-directory") +@click.option( + "output_directory", + "-o", + "--output-directory", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, +) def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_directory): try: from asapdiscovery.data.schema.complex import PreppedComplex @@ -116,11 +123,13 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director @cli.command("prep-cif") @click.option("input_json", "-j", "--input-json", type=str, required=True) @click.option("input_cif", "-c", "--input-cif", type=str, required=True) -@click.option("fasta_sequence", "-f", "--fasta-sequence", type=str, required=True) +@click.option( + "fasta_sequence", "-f", "--fasta-sequence", type=str, default=None, required=True +) @click.option("loop_db", "--loopdb", type=str, required=True) @click.option( "output_directory", - "-d", + "-o", "--output-directory", type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), default="./", @@ -138,7 +147,7 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory): with open(input_json, "r") as f: record_dict = json.load(f) - graphmol = load_openeye_cif1(args.input_cif) + graphmol = load_openeye_cif1(input_cif) # this is what you would do if you didn't want to use whatever ligand is in the protein # split_dict = split_openeye_mol(graphmol, keep_one_lig=False) @@ -161,8 +170,8 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory): results, spruced = spruce_protein( initial_prot=oemol, - protein_sequence=args.fasta_sequence, - loop_db=args.loop_db, + protein_sequence=fasta_sequence, + loop_db=loop_db, ) split_dict = split_openeye_mol(spruced) @@ -180,7 +189,10 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory): results.to_json_file(output_directory / f"{target.target_name}_spruce_results.json") -@cli.command("process-bindingdb") +# TODO: help string +@cli.command( + "process-bindingdb", help="Parse and verify SDF files downloaded from bindingdb." +) @click.option( "input_directory", "-i", @@ -208,6 +220,7 @@ def process_bindingdb(input_directory, output_directory): # get all sdf files sdfs = list(input_directory.glob("*3D.sdf")) + output = [] for sdf in sdfs: # asap function to read separate ligands from a multi-ligand sdf file mols: list[Ligand] = MolFileFactory(filename=sdf).load() @@ -284,7 +297,7 @@ def visualize_network(network_graphml, output_directory): from openfe.setup import LigandNetwork output_directory.mkdir(exist_ok=True, parents=True) - ligand_network = args.network_graphml + ligand_network = network_graphml if not ligand_network.exists(): raise FileNotFoundError( diff --git a/pkg/src/plumbdb/oespruce.py b/pkg/src/plumbdb/oespruce.py index 3d24acf..7f986fe 100644 --- a/pkg/src/plumbdb/oespruce.py +++ b/pkg/src/plumbdb/oespruce.py @@ -1,23 +1,15 @@ -from pathlib import Path -import json +import pathlib from asapdiscovery.data.schema.schema_base import DataModelAbstractBase from asapdiscovery.data.backend.openeye import ( oespruce, oechem, oegrid, - load_openeye_cif1, ) from asapdiscovery.modeling.modeling import ( - split_openeye_mol, - spruce_protein, get_oe_structure_metadata_from_sequence, openeye_perceive_residues, ) -from asapdiscovery.data.schema.target import Target -from asapdiscovery.data.schema.ligand import Ligand - -from asapdiscovery.data.schema.complex import Complex class SpruceResults(DataModelAbstractBase): @@ -28,10 +20,11 @@ class SpruceResults(DataModelAbstractBase): error_message: str +# TODO: this was originally defined in asapdiscovery.modeling.modeling? def spruce_protein( initial_prot: oechem.OEGraphMol, protein_sequence: str = None, - loop_db: Path = None, + loop_db: pathlib.Path = None, ) -> oechem.OEDesignUnit or oechem.OEGraphMol: """ Applies the OESpruce protein preparation pipeline to the given protein structure. From a68166e016fd76de2a92b5a78479a722f067100c Mon Sep 17 00:00:00 2001 From: Ian Kenney Date: Tue, 28 Oct 2025 16:45:02 -0400 Subject: [PATCH 17/23] Replace tabs with spaces --- modules.nf | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/modules.nf b/modules.nf index 25fee54..db0663e 100644 --- a/modules.nf +++ b/modules.nf @@ -33,7 +33,7 @@ process DOWNLOAD_PDB { script: """ plumbline download-pdb \ - --input-file "${input_json}" + --input-file "${input_json}" """ } process PREP_CIF { @@ -55,10 +55,10 @@ process PREP_CIF { script: """ plumbline prep-cif \ - --input-json "${input_json}" \ - --input-cif "${input_cif}" \ - --fasta-sequence "${params.fasta}" \ - --output-directory "./" + --input-json "${input_json}" \ + --input-cif "${input_cif}" \ + --fasta-sequence "${params.fasta}" \ + --output-directory "./" """ } process PREP_FOR_DOCKING { @@ -79,9 +79,9 @@ process PREP_FOR_DOCKING { script: """ asap-cli protein-prep \ - --target SARS-CoV-2-Mpro \ - --pdb-file "${prepped_pdb}" \ - --output-dir "./" + --target SARS-CoV-2-Mpro \ + --pdb-file "${prepped_pdb}" \ + --output-dir "./" """ } process ASSESS_PREPPED_PROTEIN { @@ -99,8 +99,8 @@ process ASSESS_PREPPED_PROTEIN { script: """ plumbline assess-prepped-protein \ - --input-file "${design_unit}" \ - --output-directory "./" + --input-file "${design_unit}" \ + --output-directory "./" """ } process GENERATE_CONSTRAINED_LIGAND_POSES { @@ -118,9 +118,9 @@ process GENERATE_CONSTRAINED_LIGAND_POSES { script: """ plumbline generate-constrained-ligand-poses \ - --input-sdf "${params.congenericSeries}" \ - --prepped-schema "${prepped_complex_json_schema}" \ - --output-directory "./" + --input-sdf "${params.congenericSeries}" \ + --prepped-schema "${prepped_complex_json_schema}" \ + --output-directory "./" """ } @@ -142,10 +142,10 @@ process MAKE_FEC_INPUTS { asap-cli alchemy create fecs-workflow.json asap-cli alchemy plan \ - -f fecs-workflow.json \ - --name ${uuid}_plumb_alchemiscale_network \ - --receptor "${prepped_complex}" \ - --ligands "${posed_ligands}" \ + -f fecs-workflow.json \ + --name ${uuid}_plumb_alchemiscale_network \ + --receptor "${prepped_complex}" \ + --ligands "${posed_ligands}" \ """ } process VISUALIZE_NETWORK { @@ -163,7 +163,7 @@ process VISUALIZE_NETWORK { script: """ plumbline visualize-network \ - --network-graphml "${network_graph}" \ - --output-directory "./" + --network-graphml "${network_graph}" \ + --output-directory "./" """ } \ No newline at end of file From 7e36e282c90077d96ae3f971a1db775b0f1d17fb Mon Sep 17 00:00:00 2001 From: Ariana Brenner Clerkin Date: Fri, 14 Nov 2025 16:14:31 -0500 Subject: [PATCH 18/23] added openeye to conda env --- pkg/env.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/env.yaml b/pkg/env.yaml index 6448b3e..6aadb94 100644 --- a/pkg/env.yaml +++ b/pkg/env.yaml @@ -1,8 +1,11 @@ name: plumbdb channels: - conda-forge + - openeye dependencies: + - python=3.11 + - openeye-toolkits - asapdiscovery - openfe - click From 4135d700dc48068859de8cbd39da5f9b4ec7d646 Mon Sep 17 00:00:00 2001 From: Ariana Brenner Clerkin Date: Fri, 14 Nov 2025 16:14:52 -0500 Subject: [PATCH 19/23] added output option --- pkg/src/plumbdb/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index fe41370..8e97562 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -287,6 +287,8 @@ def process_bindingdb(input_directory, output_directory): ) @click.option( "output_directory", + "-o", + "--output-directory", type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), required=True, default=pathlib.Path("./"), From cbc386693f0cc91aff1c3531c3d32856858e517a Mon Sep 17 00:00:00 2001 From: Ariana Brenner Clerkin Date: Thu, 11 Dec 2025 13:52:16 -0500 Subject: [PATCH 20/23] renamed PDBID field --- pkg/src/plumbdb/cli.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 8e97562..ebfcb41 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -138,7 +138,8 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory): from asapdiscovery.data.backend.openeye import load_openeye_cif1 from asapdiscovery.modeling.modeling import split_openeye_mol - from asapdiscovery.data.schema.ligand import Ligand, Complex + from asapdiscovery.data.schema.ligand import Ligand + from asapdiscovery.data.schema.complex import Complex from plumbdb.oespruce import spruce_protein from asapdiscovery.data.schema.target import Target @@ -235,8 +236,10 @@ def process_bindingdb(input_directory, output_directory): "has_3d": mol.to_oemol().GetDimension() == 3, "num_atoms": mol.to_oemol().NumAtoms(), "smiles": mol.smiles, - "pdb_id": mol.tags.get("PDB ID ")[:4] - if mol.tags.get("PDB ID ") + # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space + # if mol.tags.get("PDB ID") # removed trailing space + "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space + if mol.tags.get("PDB ID(s) for Ligand-Target Complex") else "", } From 7ef7ca907fc53b063761752110f5e2189df57378 Mon Sep 17 00:00:00 2001 From: apayne97 Date: Thu, 11 Dec 2025 13:58:38 -0500 Subject: [PATCH 21/23] add prep-protein-for-docking step to avoid using the asapdiscovery cli which has ML import problems --- modules.nf | 4 +- pkg/src/plumbdb/cli.py | 352 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 351 insertions(+), 5 deletions(-) diff --git a/modules.nf b/modules.nf index db0663e..3cafef4 100644 --- a/modules.nf +++ b/modules.nf @@ -78,7 +78,7 @@ process PREP_FOR_DOCKING { script: """ - asap-cli protein-prep \ + plumbline prep-protein-for-docking \ --target SARS-CoV-2-Mpro \ --pdb-file "${prepped_pdb}" \ --output-dir "./" @@ -166,4 +166,4 @@ process VISUALIZE_NETWORK { --network-graphml "${network_graph}" \ --output-directory "./" """ -} \ No newline at end of file +} diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index ebfcb41..7cdff9a 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -72,6 +72,352 @@ def assess_prepped_protein(output_directory, input_openeye_du): with open(output_directory / f"{stem}_quality_report.json", "w") as f: json.dump(report, f, indent=4) +# Prep for docking +from typing import TYPE_CHECKING, Optional +# copying everything +# TODO delete everything from here that isn't needed + +def postera(func): + return click.option( + "--postera", + is_flag=True, + default=False, + help="Whether to download complexes from Postera.", + )(func) + + +def postera_molset_name(func): + return click.option( + "--postera-molset-name", + type=str, + default=None, + help="The name of the Postera molecule set to use.", + )(func) + + +def postera_upload(func): + return click.option( + "--postera-upload", + is_flag=True, + default=False, + help="Whether to upload results to Postera.", + )(func) + + +def postera_args(func): + return postera(postera_molset_name(postera_upload(func))) + + +def use_dask(func): + return click.option( + "--use-dask", + is_flag=True, + default=False, + help="Whether to use dask for parallelism.", + )(func) + + +def dask_type(func): + return click.option( + "--dask-type", + type=click.Choice(DaskType.get_values(), case_sensitive=False), + default=DaskType.LOCAL, + help="The type of dask cluster to use. Local mode is reccommended for most use cases.", + )(func) + + +def failure_mode(func): + return click.option( + "--failure-mode", + type=click.Choice(FailureMode.get_values(), case_sensitive=False), + default=FailureMode.SKIP, + help="The failure mode for dask. Can be 'raise' or 'skip'.", + show_default=True, + )(func) + + +def dask_n_workers(func): + return click.option( + "--dask-n-workers", + type=int, + default=None, + help="The number of workers to use with dask.", + )(func) + + +def dask_args(func): + return use_dask(dask_type(dask_n_workers(failure_mode(func)))) + + +def target(func): + from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags + return click.option( + "--target", + type=click.Choice(TargetTags.get_values(), case_sensitive=True), + help="The target for the workflow", + required=True, + )(func) + + +def ligands(func): + return click.option( + "-l", + "--ligands", + type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False), + help="File containing ligands", + )(func) + + +def output_dir(func): + return click.option( + "--output-dir", + type=click.Path( + resolve_path=True, exists=False, file_okay=False, dir_okay=True + ), + help="The directory to output results to.", + default="output", + )(func) + + +def overwrite(func): + return click.option( + "--overwrite/--no-overwrite", + default=True, + help="Whether to overwrite the output directory if it exists.", + )(func) + + +def input_json(func): + return click.option( + "--input-json", + type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False), + help="Path to a json file containing the inputs to the workflow, WARNING: overrides all other inputs.", + )(func) + +# flag to run all ml scorers +def ml_score(func): + return click.option( + "--ml-score", + is_flag=True, + default=True, + help="Whether to run all ml scorers", + )(func) + + +def fragalysis_dir(func): + return click.option( + "--fragalysis-dir", + type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True), + help="Path to a directory containing fragments to dock.", + )(func) + + +def structure_dir(func): + return click.option( + "--structure-dir", + type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True), + help="Path to a directory containing structures.", + )(func) + + +def pdb_file(func): + return click.option( + "--pdb-file", + type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False), + help="Path to a pdb file containing a structure", + )(func) + + +def cache_dir(func): + return click.option( + "--cache-dir", + type=click.Path( + resolve_path=True, exists=False, file_okay=False, dir_okay=True + ), + help="Path to a directory where design units are cached.", + )(func) + + +def use_only_cache(func): + return click.option( + "--use-only-cache", + is_flag=True, + default=False, + help="Whether to only use the cache.", + )(func) + + +def gen_cache_w_default(func): + return click.option( + "--gen-cache", + type=click.Path( + resolve_path=False, exists=False, file_okay=False, dir_okay=True + ), + help="Path to a directory where a design unit cache should be generated.", + default="prepped_structure_cache", + )(func) + + +def md(func): + return click.option( + "--md", + is_flag=True, + default=False, + help="Whether to run MD", + )(func) + + +def md_steps(func): + return click.option( + "--md-steps", + type=int, + default=2500000, + help="Number of MD steps", + )(func) + +def core_smarts(func): + return click.option( + "-cs", + "--core-smarts", + type=click.STRING, + help="The SMARTS which should be used to select which atoms to constrain to the reference structure.", + )(func) + + +def save_to_cache(func): + return click.option( + "--save-to-cache/--no-save-to-cache", + help="If the newly generated structures should be saved to the cache folder.", + default=True, + )(func) + + +def loglevel(func): + return click.option( + "--loglevel", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), + help="The log level to use.", + default="INFO", + show_default=True, + )(func) + + +def ref_chain(func): + return click.option( + "--ref-chain", + type=str, + default=None, + help="Chain ID to align to in reference structure containing the active site.", + )(func) + + +def active_site_chain(func): + return click.option( + "--active-site-chain", + type=str, + default=None, + help="Active site chain ID to align to ref_chain in reference structure", + )(func) + +from asapdiscovery.data.util.dask_utils import DaskType, FailureMode + +if TYPE_CHECKING: + from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags + + +@cli.command( + "prep-protein-for-docking", + help="Prep protein to make OE Design Units and corresponding schema.", +) +@target +@click.option( + "--align", + type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False), + help="Path to a reference structure to align to", +) +@ref_chain +@active_site_chain +@click.option( + "--seqres-yaml", + type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False), + help="Path to a seqres yaml file to mutate to, if not specified will use the default for the target", +) +@click.option( + "--loop-db", + type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False), + help="Path to a loop database to use for prepping", +) +@click.option( + "--oe-active-site-residue", + type=str, + help="OE formatted string of active site residue to use if not ligand bound", +) +@pdb_file +@fragalysis_dir +@structure_dir +@click.option( + "--cache-dir", + help="The path to cached prepared complexes which can be used again.", + type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True), +) +@save_to_cache +@dask_args +@output_dir +@input_json +def protein_prep( + target: "TargetTags", + align: Optional[str] = None, + ref_chain: Optional[str] = None, + active_site_chain: Optional[str] = None, + seqres_yaml: Optional[str] = None, + loop_db: Optional[str] = None, + oe_active_site_residue: Optional[str] = None, + pdb_file: Optional[str] = None, + fragalysis_dir: Optional[str] = None, + structure_dir: Optional[str] = None, + cache_dir: Optional[str] = None, + save_to_cache: bool = True, + use_dask: bool = False, + dask_type: DaskType = DaskType.LOCAL, + dask_n_workers: Optional[int] = None, + failure_mode: FailureMode = FailureMode.SKIP, + output_dir: str = "output", + input_json: Optional[str] = None, +): + """ + Run protein prep on a set of structures. + """ + from asapdiscovery.workflows.prep_workflows.protein_prep import ( + ProteinPrepInputs, + protein_prep_workflow, + ) + + if input_json is not None: + print("Loading inputs from json file... Will override all other inputs.") + inputs = ProteinPrepInputs.from_json_file(input_json) + + else: + inputs = ProteinPrepInputs( + target=target, + align=align, + ref_chain=ref_chain, + active_site_chain=active_site_chain, + seqres_yaml=seqres_yaml, + loop_db=loop_db, + oe_active_site_residue=oe_active_site_residue, + pdb_file=pdb_file, + fragalysis_dir=fragalysis_dir, + structure_dir=structure_dir, + cache_dir=cache_dir, + save_to_cache=save_to_cache, + use_dask=use_dask, + dask_type=dask_type, + dask_n_workers=dask_n_workers, + failure_mode=failure_mode, + output_dir=output_dir, + ) + + protein_prep_workflow(inputs) # TODO: check for openeye installation, maybe make it a decorator @cli.command( @@ -236,10 +582,10 @@ def process_bindingdb(input_directory, output_directory): "has_3d": mol.to_oemol().GetDimension() == 3, "num_atoms": mol.to_oemol().NumAtoms(), "smiles": mol.smiles, - # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space + # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space # if mol.tags.get("PDB ID") # removed trailing space - "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space - if mol.tags.get("PDB ID(s) for Ligand-Target Complex") + "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space + if mol.tags.get("PDB ID(s) for Ligand-Target Complex") else "", } From 1ffd0b8381538248b252312d377b1b133ebaf6e3 Mon Sep 17 00:00:00 2001 From: apayne97 Date: Thu, 11 Dec 2025 16:40:34 -0500 Subject: [PATCH 22/23] add small command to rename title of ligands so that the openfe cli works --- pkg/src/plumbdb/cli.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 7cdff9a..d5f3004 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -621,6 +621,30 @@ def process_bindingdb(input_directory, output_directory): ) ) +@cli.command("rename-ligands-for-openfe") +@click.option("-i", + "--input-sdf", + type=click.Path(file_okay=True, dir_okay=False, path_type=pathlib.Path),) +@click.option( + "-o", + "--output-dir", + type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path), + required=True, + default=pathlib.Path("./"), + help="Path to the output directory where the results will be stored", +) +def rename_ligands_for_sdf(input_sdf, output_dir): + from asapdiscovery.data.schema.ligand import Ligand + from asapdiscovery.data.readers.molfile import MolFileFactory + from asapdiscovery.data.schema.ligand import write_ligands_to_multi_sdf + mols: list[Ligand] = MolFileFactory(filename=input_sdf).load() + new_ligands = [] + for mol in mols: + oemol = mol.to_oemol() + oemol.SetTitle(mol.compound_name) + new_ligands.append(Ligand.from_oemol(oemol)) + output_sdf = output_dir / "renamed.sdf" + write_ligands_to_multi_sdf(output_sdf, new_ligands, overwrite=True) @cli.command( "visualize-network", From aa4fa6c38b6b189e81dabf327d975e8460b4c392 Mon Sep 17 00:00:00 2001 From: Ariana Brenner Clerkin Date: Thu, 11 Dec 2025 16:45:49 -0500 Subject: [PATCH 23/23] renamed field BindingDB monomerid to BindingDB MonomerID --- pkg/src/plumbdb/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py index 7cdff9a..1d44b0d 100644 --- a/pkg/src/plumbdb/cli.py +++ b/pkg/src/plumbdb/cli.py @@ -450,7 +450,7 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director # reconstruct ligands from smiles because of some weirdness with the sdf files ligs = [ Ligand.from_smiles( - compound_name=lig.tags["BindingDB monomerid"], + compound_name=lig.tags["BindingDB MonomerID"], # Changed capitalization smiles=lig.smiles, tags=lig.dict(), )