From fe79f171acf0852c4f6544ade130026e49b770f8 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 09:41:52 -0400
Subject: [PATCH 01/23] Add plumbdb package files

---
 pkg/README.md               | 12 ++++++++++++
 pkg/env.yaml                |  6 ++++++
 pkg/pyproject.toml          | 27 +++++++++++++++++++++++++++
 pkg/src/plumbdb/__init__.py |  0
 pkg/src/plumbdb/cli.py      |  9 +++++++++
 5 files changed, 54 insertions(+)
 create mode 100644 pkg/README.md
 create mode 100644 pkg/env.yaml
 create mode 100644 pkg/pyproject.toml
 create mode 100644 pkg/src/plumbdb/__init__.py
 create mode 100644 pkg/src/plumbdb/cli.py

diff --git a/pkg/README.md b/pkg/README.md
new file mode 100644
index 0000000..2b305d2
--- /dev/null
+++ b/pkg/README.md
@@ -0,0 +1,12 @@
+# plumbdb
+
+## Installation
+
+`plumbdb` can be installed via its Python source code with dependencies defined in a `conda` environment file.
+
+## Usage
+
+## Contributing
+
+## Citation
+
diff --git a/pkg/env.yaml b/pkg/env.yaml
new file mode 100644
index 0000000..bf6b65c
--- /dev/null
+++ b/pkg/env.yaml
@@ -0,0 +1,6 @@
+name: plumbdb
+channels:
+  - conda-forge
+
+dependencies:
+  - click
diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml
new file mode 100644
index 0000000..f214cda
--- /dev/null
+++ b/pkg/pyproject.toml
@@ -0,0 +1,27 @@
+[build-system]
+requires = [
+    "setuptools>=77.0.3",
+    "setuptools-scm>=8",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "plumbdb"
+description = ""
+readme = "README.md"
+authors = [
+	{name = "Ariana Brenner Clerkin", email = "ariana.clerkin@choderlab.org"}
+]
+license = "MIT"
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Operating System :: POSIX",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Chemistry",
+]
+requires-python = ">= 3.13"
+dynamic = ["version"]
+
+[project.optional-dependencies]
+test = ["pytest"]
diff --git a/pkg/src/plumbdb/__init__.py b/pkg/src/plumbdb/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
new file mode 100644
index 0000000..eb110e9
--- /dev/null
+++ b/pkg/src/plumbdb/cli.py
@@ -0,0 +1,9 @@
+import click
+
+@click.group()
+def cli():
+    pass
+
+@cli.command()
+def bind_db():
+    pass

From 180023faa1ea626ed63ca99b8a78f1eadb6b72bc Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 10:15:57 -0400
Subject: [PATCH 02/23] Add asapdiscovery dependency

---
 pkg/env.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/env.yaml b/pkg/env.yaml
index bf6b65c..5e6d371 100644
--- a/pkg/env.yaml
+++ b/pkg/env.yaml
@@ -3,4 +3,5 @@ channels:
   - conda-forge
 
 dependencies:
+  - asapdiscovery
   - click

From 9a7be7089292466505c64ed64383aaf921f7c298 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 10:16:12 -0400
Subject: [PATCH 03/23] Expose cli entrypoint: plumbline

---
 pkg/pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml
index f214cda..cc4656b 100644
--- a/pkg/pyproject.toml
+++ b/pkg/pyproject.toml
@@ -23,5 +23,8 @@ classifiers = [
 requires-python = ">= 3.13"
 dynamic = ["version"]
 
+[project.scripts]
+plumbline = "plumbdb.cli:cli"
+
 [project.optional-dependencies]
 test = ["pytest"]

From a7b78f9c2e52428f4000dc0bb5d6e2ebf09f9a7f Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 10:44:18 -0400
Subject: [PATCH 04/23] Lower Python version constraint

---
 pkg/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml
index cc4656b..3558ee4 100644
--- a/pkg/pyproject.toml
+++ b/pkg/pyproject.toml
@@ -20,7 +20,7 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Topic :: Scientific/Engineering :: Chemistry",
 ]
-requires-python = ">= 3.13"
+requires-python = ">= 3.11"
 dynamic = ["version"]
 
 [project.scripts]

From 37acd2d7df2e764442a6c699477c9b6f7b2cc41a Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 10:45:16 -0400
Subject: [PATCH 05/23] Add test module

---
 pkg/src/plumbdb/tests/test_plumb.py | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 pkg/src/plumbdb/tests/test_plumb.py

diff --git a/pkg/src/plumbdb/tests/test_plumb.py b/pkg/src/plumbdb/tests/test_plumb.py
new file mode 100644
index 0000000..e59fd58
--- /dev/null
+++ b/pkg/src/plumbdb/tests/test_plumb.py
@@ -0,0 +1,2 @@
+def test_import():
+    import plumbdb

From ce3972ce0a5e68cfa13ea3f67a8a125fdb2cbbbb Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 10:45:29 -0400
Subject: [PATCH 06/23] Add developer docs

---
 pkg/docs/developer.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 pkg/docs/developer.md

diff --git a/pkg/docs/developer.md b/pkg/docs/developer.md
new file mode 100644
index 0000000..8b99e46
--- /dev/null
+++ b/pkg/docs/developer.md
@@ -0,0 +1,19 @@
+# Developer documentation
+
+## Installation
+
+`plumbdb` can currently only be installed via its python source with its dependencies handled by `conda`.
+
+```bash
+conda create -n plumbdb_dev -f env.yaml
+conda activate plumbdb_dev
+pip install -e ".[test]"
+```
+
+## Running tests
+
+Tests are performed with [`pytest`](https://docs.pytest.org/), which is automatically installed as part of the `test` dependency group specified in `pyproject.toml`. With the test environment active and from within `plumb/pkg/`, run:
+
+```bash
+python -m pytest src/
+```
\ No newline at end of file

From 487dd06c3be3195465b252d1c815c95f980e6f05 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 10:52:17 -0400
Subject: [PATCH 07/23] Trim package README

---
 pkg/README.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pkg/README.md b/pkg/README.md
index 2b305d2..107f408 100644
--- a/pkg/README.md
+++ b/pkg/README.md
@@ -2,11 +2,10 @@
 
 ## Installation
 
-`plumbdb` can be installed via its Python source code with dependencies defined in a `conda` environment file.
+Since `plumbdb` is not yet registered with conda-forge, follow the developer installation instructions at [docs/developer.md](docs/developer.md#installation).
 
 ## Usage
 
-## Contributing
-
-## Citation
+### Command line
 
+`plumbdb` is usable via command line through the `plumbline` CLI application.

From c1b9beb18ec48b1ac3958da786f80910ec1fccef Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 11:35:09 -0400
Subject: [PATCH 08/23] Add ruff to dev tools

---
 pkg/pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml
index 3558ee4..1c00003 100644
--- a/pkg/pyproject.toml
+++ b/pkg/pyproject.toml
@@ -28,3 +28,4 @@ plumbline = "plumbdb.cli:cli"
 
 [project.optional-dependencies]
 test = ["pytest"]
+dev = ["pytest", "ruff"]
\ No newline at end of file

From b83f2e9ee498a3f845f0640001e452438b5586bb Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 11:35:49 -0400
Subject: [PATCH 09/23] Replicate commands from old bin/ directory

- download_pdb_structure
- assess_prepped_protein
---
 pkg/src/plumbdb/cli.py | 69 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index eb110e9..e0681b3 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -1,9 +1,72 @@
+import json
+import pathlib
+
 import click
 
+
 @click.group()
 def cli():
     pass
 
-@cli.command()
-def bind_db():
-    pass
+
+@cli.command(name="download-pdb", help="Download PDB files from the PDB database.")
+@click.option("-i", "--input-file", required=True)
+@click.option(
+    "output_directory",
+    "-d",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+)
+def download_pdb_structure(input_file, output_directory):
+    from asapdiscovery.data.services.rcsb.rcsb_download import download_pdb_structure
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+    with open(input_file, "r") as f:
+        record_dict = json.load(f)
+    downloaded = download_pdb_structure(
+        record_dict["pdb_id"], output_dir, file_format="cif1"
+    )
+    record_dict["cif"] = downloaded
+    with open(output_dir / "record.json", "w") as f:
+        json.dump(record_dict, f)
+
+
+@cli.command(
+    "assess-prepped-protein",
+    help="Assess the quality of the prepped protein structure.",
+)
+@click.option(
+    "output_directory",
+    "-d",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+)
+@click.option("input_openeye_du", "-i", "--input-file", type=str, required=True)
+def assess_prepped_protein(output_directory, input_openeye_du):
+    try:
+        from openeye import oespruce
+    except ModuleNotFoundError:
+        raise click.UsageError("Could not import openeye")
+
+    from asapdiscovery.data.backend.openeye import load_openeye_design_unit
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+
+    du = load_openeye_design_unit(input_openeye_du)
+
+    # should use a different id method
+    stem = Path(input_openeye_du).stem
+    validator = oespruce.OEValidateDesignUnit()
+    err_msgs = validator.GetMessages(validator.Validate(du))
+    sq = du.GetStructureQuality()
+
+    report = {
+        "errors": err_msgs,
+        "warnings": [],
+        "has_iridium_data": sq.HasIridiumData(),
+    }
+
+    with open(output_directory / f"{stem}_quality_report.json", "w") as f:
+        json.dump(report, f, indent=4)

From b453eefbeb0372db12c821d406f40705c423ac76 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 11:49:50 -0400
Subject: [PATCH 10/23] Add generate_constrained_ligand_poses command to cli

---
 pkg/src/plumbdb/cli.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index e0681b3..0eecdf2 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -70,3 +70,44 @@ def assess_prepped_protein(output_directory, input_openeye_du):
 
     with open(output_directory / f"{stem}_quality_report.json", "w") as f:
         json.dump(report, f, indent=4)
+
+
+# TODO: check for openeye installation, maybe make it a decorator
+@cli.command(
+    "generate-constrained-ligand-poses",
+    help="Generate constrained ligand poses using OpenEye tooling.",
+)
+@click.option("input_sdf", "-i", "--input-sdf", required=True, type=str)
+@click.option("prepped_schema", "-s", "--prepped-schema", type=str)
+@click.option("output_directory", "-d", "--output-directory")
+def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_directory):
+    try:
+        from asapdiscovery.data.schema.complex import PreppedComplex
+        from asapdiscovery.data.readers.molfile import MolFileFactory
+        from asapdiscovery.data.schema.ligand import Ligand
+        from asapdiscovery.docking.schema.pose_generation import (
+            OpenEyeConstrainedPoseGenerator,
+        )
+        from asapdiscovery.data.backend.openeye import save_openeye_sdfs
+    except ModuleNotFoundError:
+        raise click.UsageError("Could not import openeye")
+
+    raw_ligands = MolFileFactory(filename=input_sdf).load()
+
+    # reconstruct ligands from smiles because of some weirdness with the sdf files
+    ligs = [
+        Ligand.from_smiles(
+            compound_name=lig.tags["BindingDB monomerid"],
+            smiles=lig.smiles,
+            tags=lig.dict(),
+        )
+        for lig in raw_ligands
+    ]
+
+    prepped_complex = PreppedComplex.parse_file(prepped_schema)
+
+    poser = OpenEyeConstrainedPoseGenerator()
+    poses = poser.generate_poses(prepped_complex, ligands=ligs)
+    oemols = [ligand.to_oemol() for ligand in poses.posed_ligands]
+    # save to sdf file
+    save_openeye_sdfs(oemols, output_directory / "poses.sdf")

From 9ce41a7ec30a1fa3bc9577942359ce3d30dd833a Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Mon, 27 Oct 2025 11:53:31 -0400
Subject: [PATCH 11/23] Add stubs for unimplemented CLI commands

---
 pkg/src/plumbdb/cli.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 0eecdf2..35a19b8 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -111,3 +111,18 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director
     oemols = [ligand.to_oemol() for ligand in poses.posed_ligands]
     # save to sdf file
     save_openeye_sdfs(oemols, output_directory / "poses.sdf")
+
+
+@cli.command("prep-cif")
+def prep_cif():
+    raise NotImplementedError
+
+
+@cli.command("process-bindingdb")
+def process_bindingdb():
+    raise NotImplementedError
+
+
+@cli.command("visualize-network")
+def visualize_network():
+    raise NotImplementedError

From cc6bf5478db5ad51dceddb2c5310f984b0052c30 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Tue, 28 Oct 2025 14:01:23 -0400
Subject: [PATCH 12/23] Fix variable name usage

---
 pkg/docs/developer.md  | 2 +-
 pkg/env.yaml           | 1 +
 pkg/src/plumbdb/cli.py | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pkg/docs/developer.md b/pkg/docs/developer.md
index 8b99e46..29326e9 100644
--- a/pkg/docs/developer.md
+++ b/pkg/docs/developer.md
@@ -7,7 +7,7 @@
 ```bash
 conda create -n plumbdb_dev -f env.yaml
 conda activate plumbdb_dev
-pip install -e ".[test]"
+pip install -e ".[dev]"
 ```
 
 ## Running tests
diff --git a/pkg/env.yaml b/pkg/env.yaml
index 5e6d371..6448b3e 100644
--- a/pkg/env.yaml
+++ b/pkg/env.yaml
@@ -4,4 +4,5 @@ channels:
 
 dependencies:
   - asapdiscovery
+  - openfe
   - click
diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 35a19b8..c93a318 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -25,10 +25,10 @@ def download_pdb_structure(input_file, output_directory):
     with open(input_file, "r") as f:
         record_dict = json.load(f)
     downloaded = download_pdb_structure(
-        record_dict["pdb_id"], output_dir, file_format="cif1"
+        record_dict["pdb_id"], output_directory, file_format="cif1"
     )
     record_dict["cif"] = downloaded
-    with open(output_dir / "record.json", "w") as f:
+    with open(output_directory / "record.json", "w") as f:
         json.dump(record_dict, f)
 
 

From e2518b665aef6d87e1bc4032529f8fee6366d08f Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Tue, 28 Oct 2025 14:01:50 -0400
Subject: [PATCH 13/23] Implement prep_cif command

---
 pkg/src/plumbdb/cli.py | 66 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index c93a318..f0d9246 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -114,8 +114,70 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director
 
 
 @cli.command("prep-cif")
-def prep_cif():
-    raise NotImplementedError
+@click.option("input_json", "-j", "--input-json", type=str, required=True)
+@click.option("input_cif", "-c", "--input-cif", type=str, required=True)
+@click.option("fasta_sequence", "-f", "--fasta-sequence", type=str, required=True)
+@click.option("loop_db", "--loopdb", type=str, required=True)
+@click.option(
+    "output_directory",
+    "-d",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    default="./",
+    required=True,
+)
+def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
+    from asapdiscovery.data.backend.openeye import load_openeye_cif1
+    from asapdiscovery.modeling.modeling import split_openeye_mol
+    from asapdiscovery.data.schema.ligand import Ligand, Complex
+    from plumbdb.oespruce import spruce_protein
+    from asapdiscovery.data.schema.target import Target
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+
+    with open(input_json, "r") as f:
+        record_dict = json.load(f)
+
+    graphmol = load_openeye_cif1(args.input_cif)
+
+    # this is what you would do if you didn't want to use whatever ligand is in the protein
+    # split_dict = split_openeye_mol(graphmol, keep_one_lig=False)
+    split_dict = split_openeye_mol(graphmol, keep_one_lig=True)
+
+    # # Save initial protein as pdb file
+    target = Target.from_oemol(
+        split_dict["prot"],
+        target_name=record_dict["pdb_id"],
+    )
+    # target.to_pdb("protein.pdb")
+
+    # this is what you would do if you didn't want to use whatever ligand is in the protein
+    ligand = Ligand.from_oemol(split_dict["lig"])
+    # ligand = Ligand.from_sdf(f'{record_dict["compound_name"]}.sdf')
+
+    combined_complex = Complex(target=target, ligand=ligand, ligand_chain="L")
+
+    oemol = combined_complex.to_combined_oemol()
+
+    results, spruced = spruce_protein(
+        initial_prot=oemol,
+        protein_sequence=args.fasta_sequence,
+        loop_db=args.loop_db,
+    )
+
+    split_dict = split_openeye_mol(spruced)
+    prepped_target = Target.from_oemol(split_dict["prot"], **target.dict())
+    prepped_ligand = Ligand.from_oemol(split_dict["lig"], **ligand.dict())
+    prepped_ligand.to_sdf(output_directory / f"{ligand.compound_name}_ligand.sdf")
+    prepped_target.to_pdb(output_directory / f"{target.target_name}_spruced.pdb")
+
+    prepped_complex = Complex(
+        target=prepped_target, ligand=prepped_ligand, ligand_chain="L"
+    )
+
+    filename = f"{target.target_name}_{ligand.compound_name}_spruced_complex.pdb"
+    prepped_complex.to_pdb(output_directory / filename)
+    results.to_json_file(output_directory / f"{target.target_name}_spruce_results.json")
 
 
 @cli.command("process-bindingdb")

From c50aeccf2a8b4e3ebaa43bb8c0dc3479b9d66372 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Tue, 28 Oct 2025 14:02:13 -0400
Subject: [PATCH 14/23] Implement process_bindingdb command

---
 pkg/src/plumbdb/cli.py      |  79 +++++++++++++++-
 pkg/src/plumbdb/oespruce.py | 180 ++++++++++++++++++++++++++++++++++++
 2 files changed, 257 insertions(+), 2 deletions(-)
 create mode 100644 pkg/src/plumbdb/oespruce.py

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index f0d9246..71f79ae 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -181,8 +181,83 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
 
 
 @cli.command("process-bindingdb")
-def process_bindingdb():
-    raise NotImplementedError
+@click.option(
+    "input_directory",
+    "-i",
+    "--input-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    help="SDF file to process",
+)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    help="Directory to write output files",
+)
+def process_bindingdb(input_directory, output_directory):
+    from asapdiscovery.data.schema.ligand import Ligand
+    from asapdiscovery.data.readers.molfile import MolFileFactory
+    import pandas as pd
+    import math
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+
+    # get all sdf files
+    sdfs = list(input_directory.glob("*3D.sdf"))
+
+    for sdf in sdfs:
+        # asap function to read separate ligands from a multi-ligand sdf file
+        mols: list[Ligand] = MolFileFactory(filename=sdf).load()
+
+        # create a dictionary for each ligand containing various relevant information
+        # there are some hidden choices here, for instance OpenEye is adding hydrogens which you might not want
+
+        for mol in mols:
+            mol_dict = {
+                "compound_name": mol.compound_name,
+                "filename": sdf.name,
+                "has_3d": mol.to_oemol().GetDimension() == 3,
+                "num_atoms": mol.to_oemol().NumAtoms(),
+                "smiles": mol.smiles,
+                "pdb_id": mol.tags.get("PDB ID ")[:4]
+                if mol.tags.get("PDB ID ")
+                else "",
+            }
+
+            # any data in the SDF file is saved to the 'tags' attribute of an asapdiscovery Ligand object
+            mol_dict.update(mol.tags)
+
+            # write out sdf file
+            if mol_dict["has_3d"]:
+                mol.to_sdf(output_directory / f"{mol.compound_name}.sdf")
+
+            output.append(mol_dict)
+
+    df = pd.DataFrame.from_records(output)
+    df.to_csv(output_directory / "processed_bindingdb.csv", index=False)
+
+    # write separate csvs for 2D and 3D
+    df_2d = df[~df["has_3d"]]
+    df_3d = df[df["has_3d"]]
+    df_2d.to_csv(output_directory / "2d_bindingdb.csv", index=False)
+    df_3d.to_csv(output_directory / "3d_bindingdb.csv", index=False)
+
+    unique_sdf_filenames = df_3d["filename"].unique()
+    with open(output_directory / "unique_3D_sdf_filenames.txt", "w") as f:
+        for filename in unique_sdf_filenames:
+            f.write(f"{filename}\n")
+
+    # write out separate json records
+    for record in df_3d.to_dict(orient="records"):
+        with open(output_directory / f"{record['compound_name']}.json", "w") as f:
+            f.write(
+                json.dumps(
+                    record, indent=4, default=lambda x: None if math.isnan(x) else x
+                )
+            )
 
 
 @cli.command("visualize-network")
diff --git a/pkg/src/plumbdb/oespruce.py b/pkg/src/plumbdb/oespruce.py
new file mode 100644
index 0000000..3d24acf
--- /dev/null
+++ b/pkg/src/plumbdb/oespruce.py
@@ -0,0 +1,180 @@
+from pathlib import Path
+import json
+
+from asapdiscovery.data.schema.schema_base import DataModelAbstractBase
+from asapdiscovery.data.backend.openeye import (
+    oespruce,
+    oechem,
+    oegrid,
+    load_openeye_cif1,
+)
+from asapdiscovery.modeling.modeling import (
+    split_openeye_mol,
+    spruce_protein,
+    get_oe_structure_metadata_from_sequence,
+    openeye_perceive_residues,
+)
+from asapdiscovery.data.schema.target import Target
+from asapdiscovery.data.schema.ligand import Ligand
+
+from asapdiscovery.data.schema.complex import Complex
+
+
+class SpruceResults(DataModelAbstractBase):
+    build_loops_success: bool
+    build_sidechains_success: bool
+    add_caps_success: bool = None
+    place_hydrogens_success: bool
+    error_message: str
+
+
+def spruce_protein(
+    initial_prot: oechem.OEGraphMol,
+    protein_sequence: str = None,
+    loop_db: Path = None,
+) -> oechem.OEDesignUnit or oechem.OEGraphMol:
+    """
+    Applies the OESpruce protein preparation pipeline to the given protein structure.
+
+    Parameters
+    ----------
+    initial_prot : oechem.OEMol
+        The input protein structure to be prepared.
+
+    protein_sequence : str, optional
+        The sequence of the protein for a single chain. If provided, this will be added to the Structure Metadata before applying the OESpruce pipeline.
+        Default is None.
+
+    loop_db : str, optional
+        The filename of the loop database to be used by the OESpruce pipeline. If provided, the pipeline will include the loop building step.
+        Default is None.
+
+    Returns
+    -------
+    (success: bool, spruce_error_msg: str, initial_prot: oechem.OEMol)
+        Returns a tuple of:
+        a boolean for whether sprucing was successful
+        a string of the error message if sprucing failed
+        the prepared protein structure.
+    """
+
+    # Add Hs to prep protein and ligand
+    # oechem.OEAddExplicitHydrogens(initial_prot)
+
+    # Even though we aren't making DUs, we still need to set up the options
+    opts = oespruce.OEMakeDesignUnitOptions()
+    opts.SetSuperpose(False)
+    opts.GetPrepOptions().SetStrictProtonationMode(True)
+
+    # Add caps when needed
+    # Allow truncation in case adding a cap causes a clash
+    cap_opts = oespruce.OECapBuilderOptions()
+    cap_opts.SetAllowTruncate(True)
+    is_terminal_predicate = oechem.OEOrAtom(
+        oechem.OEIsNTerminalAtom(), oechem.OEIsCTerminalAtom()
+    )
+
+    # Set Build Loop and Sidechain Opts
+    sc_opts = oespruce.OESidechainBuilderOptions()
+
+    loop_opts = oespruce.OELoopBuilderOptions()
+    loop_opts.SetSeqAlignMethod(oechem.OESeqAlignmentMethod_Identity)
+    loop_opts.SetSeqAlignGapPenalty(-1)
+    loop_opts.SetSeqAlignExtendPenalty(0)
+
+    # Don't build tails, too much work for little gain
+    loop_opts.SetBuildTails(False)
+
+    if loop_db is not None:
+        print(f"Adding loop db {loop_db}")
+        loop_opts.SetLoopDBFilename(str(loop_db))
+
+    # Construct spruce filter
+    spruce_opts = oespruce.OESpruceFilterOptions()
+    spruce = oespruce.OESpruceFilter(spruce_opts, opts)
+
+    # Spruce!
+
+    # These objects are for some reason needed in order to run spruce
+    grid = oegrid.OESkewGrid()
+
+    if protein_sequence:
+        # convert fasta sequence to 3-letter codes
+        try:
+            protein_sequence = " ".join(convert_to_three_letter_codes(protein_sequence))
+            print(type(protein_sequence))
+            print("Adding sequence metadata from sequence: ", protein_sequence)
+            metadata = get_oe_structure_metadata_from_sequence(
+                initial_prot, protein_sequence
+            )
+        except KeyError as e:
+            print(
+                f"Error converting protein sequence to 3-letter codes: {e}. Skipping sequence metadata."
+            )
+            protein_sequence = None
+
+    if not protein_sequence:
+        metadata = oespruce.OEStructureMetadata()
+
+    # Building the loops actually does use the sequence metadata
+    build_loops_success = oespruce.OEBuildLoops(
+        initial_prot, metadata, sc_opts, loop_opts
+    )
+
+    build_sidechains_success = oespruce.OEBuildSidechains(initial_prot, sc_opts)
+    print(type(initial_prot), type(is_terminal_predicate), type(cap_opts))
+    add_caps_success = oespruce.OECapTermini(
+        initial_prot, is_terminal_predicate, cap_opts
+    )
+    print(add_caps_success)
+    place_hydrogens_success = oechem.OEPlaceHydrogens(initial_prot)
+    spruce_error_code = spruce.StandardizeAndFilter(initial_prot, grid, metadata)
+    spruce_error_msg = spruce.GetMessages(spruce_error_code)
+
+    # Re-percieve residues so that atom number and connect records dont get screwed up
+    initial_prot = openeye_perceive_residues(initial_prot, preserve_all=False)
+    return (
+        SpruceResults(
+            build_loops_success=build_loops_success,
+            build_sidechains_success=build_sidechains_success,
+            # add_caps_success=None,  # add_caps_success,
+            place_hydrogens_success=place_hydrogens_success,
+            error_message=spruce_error_msg,
+        ),
+        initial_prot,
+    )
+
+
+def convert_to_three_letter_codes(sequence) -> list[str]:
+    """
+    Convert a protein sequence from 1-letter codes to 3-letter codes.
+    """
+    # Dictionary to map 1-letter codes to 3-letter codes
+    amino_acid_dict = {
+        "A": "ALA",
+        "R": "ARG",
+        "N": "ASN",
+        "D": "ASP",
+        "C": "CYS",
+        "Q": "GLN",
+        "E": "GLU",
+        "G": "GLY",
+        "H": "HIS",
+        "I": "ILE",
+        "L": "LEU",
+        "K": "LYS",
+        "M": "MET",
+        "F": "PHE",
+        "P": "PRO",
+        "S": "SER",
+        "T": "THR",
+        "W": "TRP",
+        "Y": "TYR",
+        "V": "VAL",
+    }
+
+    # Convert the sequence to 3-letter codes
+    three_letter_sequence = [amino_acid_dict[aa] for aa in sequence]
+
+    # Join the 3-letter codes with a space
+    return three_letter_sequence

From c66bdaf65aa2926e9b1da616331e18a7294bfa7b Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Tue, 28 Oct 2025 14:03:23 -0400
Subject: [PATCH 15/23] Implement visualize-network command

---
 pkg/src/plumbdb/cli.py | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 71f79ae..82bd684 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -260,6 +260,40 @@ def process_bindingdb(input_directory, output_directory):
             )
 
 
-@cli.command("visualize-network")
-def visualize_network():
-    raise NotImplementedError
+@cli.command(
+    "visualize-network",
+    help="Generate a network plot of the proposed alchemical network",
+)
+@click.option(
+    "network_graphml",
+    "-n",
+    "--network-graphml",
+    type=str,
+    required=True,
+    help="Path to the input JSON file containing the atom mapping network.",
+)
+@click.option(
+    "output_directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    default=pathlib.Path("./"),
+    help="Path to the output directory where the results will be stored",
+)
+def visualize_network(network_graphml, output_directory):
+    from openfe.utils.atommapping_network_plotting import plot_atommapping_network
+    from openfe.setup import LigandNetwork
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+    ligand_network = args.network_graphml
+
+    if not ligand_network.exists():
+        raise FileNotFoundError(
+            f"Could not find the ligand network file at {ligand_network}"
+        )
+
+    with open(ligand_network) as f:
+        graphml = f.read()
+
+    network = LigandNetwork.from_graphml(graphml)
+    fig = plot_atommapping_network(network)
+    fig.savefig(output_directory / "network_plot.png")

From 86466e99b5a41da892a613ced090754f7a99bf62 Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Tue, 28 Oct 2025 16:39:00 -0400
Subject: [PATCH 16/23] Updated nextflow to use new plumbline command

---
 modules.nf                  | 39 ++++++++++++++++++++++++++-----------
 pkg/src/plumbdb/cli.py      | 37 +++++++++++++++++++++++------------
 pkg/src/plumbdb/oespruce.py | 13 +++----------
 3 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/modules.nf b/modules.nf
index 1f3dcb7..25fee54 100644
--- a/modules.nf
+++ b/modules.nf
@@ -12,7 +12,9 @@ process PROCESS_BINDINGDB {
 
     script:
     """
-    python "${params.scripts}/process_bindingdb.py" --input-dir "${params.bindingDB}"
+    plumbline process-bindingdb \
+    	      --input-directory "${params.bindingDB}" \
+	      --output-directory "./"
     """
 }
 process DOWNLOAD_PDB {
@@ -30,7 +32,8 @@ process DOWNLOAD_PDB {
 
     script:
     """
-    python "${params.scripts}/download_pdb.py" --input-json "${input_json}"
+    plumbline download-pdb \
+    	      --input-file "${input_json}"
     """
 }
 process PREP_CIF {
@@ -51,7 +54,11 @@ process PREP_CIF {
 
     script:
     """
-    python "${params.scripts}/prep_cif.py" --input-json "${input_json}" --input-cif "${input_cif}" --fasta-sequence "${params.fasta}"
+    plumbline prep-cif \
+    	      --input-json "${input_json}" \
+    	      --input-cif "${input_cif}" \
+	      --fasta-sequence "${params.fasta}" \
+	      --output-directory "./"
     """
 }
 process PREP_FOR_DOCKING {
@@ -71,7 +78,10 @@ process PREP_FOR_DOCKING {
 
     script:
     """
-    asap-cli protein-prep --target SARS-CoV-2-Mpro --pdb-file "${prepped_pdb}" --output-dir "./"
+    asap-cli protein-prep \
+    	     --target SARS-CoV-2-Mpro \
+	     --pdb-file "${prepped_pdb}" \
+	     --output-dir "./"
     """
 }
 process ASSESS_PREPPED_PROTEIN {
@@ -88,7 +98,9 @@ process ASSESS_PREPPED_PROTEIN {
 
     script:
     """
-    python "${params.scripts}/assess_prepped_protein.py" --input-oedu "${design_unit}"
+    plumbline assess-prepped-protein \
+    	      --input-file "${design_unit}" \
+	      --output-directory "./"
     """
 }
 process GENERATE_CONSTRAINED_LIGAND_POSES {
@@ -105,7 +117,10 @@ process GENERATE_CONSTRAINED_LIGAND_POSES {
 
     script:
     """
-    python "${params.scripts}/generate_constrained_ligand_poses.py" --input-sdf "${params.congenericSeries}" --prepped-schema "${prepped_complex_json_schema}"
+    plumbline generate-constrained-ligand-poses \
+    	      --input-sdf "${params.congenericSeries}" \
+	      --prepped-schema "${prepped_complex_json_schema}" \
+	      --output-directory "./"
     """
 
 }
@@ -127,10 +142,10 @@ process MAKE_FEC_INPUTS {
     asap-cli alchemy create fecs-workflow.json
 
     asap-cli alchemy plan \
-    -f fecs-workflow.json \
-    --name ${uuid}_plumb_alchemiscale_network \
-    --receptor "${prepped_complex}" \
-    --ligands "${posed_ligands}" \
+    	     -f fecs-workflow.json \
+    	     --name ${uuid}_plumb_alchemiscale_network \
+    	     --receptor "${prepped_complex}" \
+    	     --ligands "${posed_ligands}" \
     """
 }
 process VISUALIZE_NETWORK {
@@ -147,6 +162,8 @@ process VISUALIZE_NETWORK {
 
     script:
     """
-    python "${params.scripts}/visualize_network.py" --network-graphml "${network_graph}"
+    plumbline visualize-network \
+    	      --network-graphml "${network_graph}" \
+	      --output-directory "./"
     """
 }
\ No newline at end of file
diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 82bd684..fe41370 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -9,11 +9,12 @@ def cli():
     pass
 
 
+# TODO: input file type can be better defined
 @cli.command(name="download-pdb", help="Download PDB files from the PDB database.")
-@click.option("-i", "--input-file", required=True)
+@click.option("-i", "--input-file", type=str, required=True)
 @click.option(
     "output_directory",
-    "-d",
+    "-o",
     "--output-directory",
     type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
     required=True,
@@ -38,7 +39,7 @@ def download_pdb_structure(input_file, output_directory):
 )
 @click.option(
     "output_directory",
-    "-d",
+    "-o",
     "--output-directory",
     type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
     required=True,
@@ -57,7 +58,7 @@ def assess_prepped_protein(output_directory, input_openeye_du):
     du = load_openeye_design_unit(input_openeye_du)
 
     # should use a different id method
-    stem = Path(input_openeye_du).stem
+    stem = pathlib.Path(input_openeye_du).stem
     validator = oespruce.OEValidateDesignUnit()
     err_msgs = validator.GetMessages(validator.Validate(du))
     sq = du.GetStructureQuality()
@@ -79,7 +80,13 @@ def assess_prepped_protein(output_directory, input_openeye_du):
 )
 @click.option("input_sdf", "-i", "--input-sdf", required=True, type=str)
 @click.option("prepped_schema", "-s", "--prepped-schema", type=str)
-@click.option("output_directory", "-d", "--output-directory")
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+)
 def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_directory):
     try:
         from asapdiscovery.data.schema.complex import PreppedComplex
@@ -116,11 +123,13 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director
 @cli.command("prep-cif")
 @click.option("input_json", "-j", "--input-json", type=str, required=True)
 @click.option("input_cif", "-c", "--input-cif", type=str, required=True)
-@click.option("fasta_sequence", "-f", "--fasta-sequence", type=str, required=True)
+@click.option(
+    "fasta_sequence", "-f", "--fasta-sequence", type=str, default=None, required=True
+)
 @click.option("loop_db", "--loopdb", type=str, required=True)
 @click.option(
     "output_directory",
-    "-d",
+    "-o",
     "--output-directory",
     type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
     default="./",
@@ -138,7 +147,7 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
     with open(input_json, "r") as f:
         record_dict = json.load(f)
 
-    graphmol = load_openeye_cif1(args.input_cif)
+    graphmol = load_openeye_cif1(input_cif)
 
     # this is what you would do if you didn't want to use whatever ligand is in the protein
     # split_dict = split_openeye_mol(graphmol, keep_one_lig=False)
@@ -161,8 +170,8 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
 
     results, spruced = spruce_protein(
         initial_prot=oemol,
-        protein_sequence=args.fasta_sequence,
-        loop_db=args.loop_db,
+        protein_sequence=fasta_sequence,
+        loop_db=loop_db,
     )
 
     split_dict = split_openeye_mol(spruced)
@@ -180,7 +189,10 @@ def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
     results.to_json_file(output_directory / f"{target.target_name}_spruce_results.json")
 
 
-@cli.command("process-bindingdb")
+# TODO: help string
+@cli.command(
+    "process-bindingdb", help="Parse and verify SDF files downloaded from bindingdb."
+)
 @click.option(
     "input_directory",
     "-i",
@@ -208,6 +220,7 @@ def process_bindingdb(input_directory, output_directory):
     # get all sdf files
     sdfs = list(input_directory.glob("*3D.sdf"))
 
+    output = []
     for sdf in sdfs:
         # asap function to read separate ligands from a multi-ligand sdf file
         mols: list[Ligand] = MolFileFactory(filename=sdf).load()
@@ -284,7 +297,7 @@ def visualize_network(network_graphml, output_directory):
     from openfe.setup import LigandNetwork
 
     output_directory.mkdir(exist_ok=True, parents=True)
-    ligand_network = args.network_graphml
+    ligand_network = network_graphml
 
     if not ligand_network.exists():
         raise FileNotFoundError(
diff --git a/pkg/src/plumbdb/oespruce.py b/pkg/src/plumbdb/oespruce.py
index 3d24acf..7f986fe 100644
--- a/pkg/src/plumbdb/oespruce.py
+++ b/pkg/src/plumbdb/oespruce.py
@@ -1,23 +1,15 @@
-from pathlib import Path
-import json
+import pathlib
 
 from asapdiscovery.data.schema.schema_base import DataModelAbstractBase
 from asapdiscovery.data.backend.openeye import (
     oespruce,
     oechem,
     oegrid,
-    load_openeye_cif1,
 )
 from asapdiscovery.modeling.modeling import (
-    split_openeye_mol,
-    spruce_protein,
     get_oe_structure_metadata_from_sequence,
     openeye_perceive_residues,
 )
-from asapdiscovery.data.schema.target import Target
-from asapdiscovery.data.schema.ligand import Ligand
-
-from asapdiscovery.data.schema.complex import Complex
 
 
 class SpruceResults(DataModelAbstractBase):
@@ -28,10 +20,11 @@ class SpruceResults(DataModelAbstractBase):
     error_message: str
 
 
+# TODO: this was originally defined in asapdiscovery.modeling.modeling?
 def spruce_protein(
     initial_prot: oechem.OEGraphMol,
     protein_sequence: str = None,
-    loop_db: Path = None,
+    loop_db: pathlib.Path = None,
 ) -> oechem.OEDesignUnit or oechem.OEGraphMol:
     """
     Applies the OESpruce protein preparation pipeline to the given protein structure.

From a68166e016fd76de2a92b5a78479a722f067100c Mon Sep 17 00:00:00 2001
From: Ian Kenney <ianmichaelkenney@gmail.com>
Date: Tue, 28 Oct 2025 16:45:02 -0400
Subject: [PATCH 17/23] Replace tabs with spaces

---
 modules.nf | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/modules.nf b/modules.nf
index 25fee54..db0663e 100644
--- a/modules.nf
+++ b/modules.nf
@@ -33,7 +33,7 @@ process DOWNLOAD_PDB {
     script:
     """
     plumbline download-pdb \
-    	      --input-file "${input_json}"
+              --input-file "${input_json}"
     """
 }
 process PREP_CIF {
@@ -55,10 +55,10 @@ process PREP_CIF {
     script:
     """
     plumbline prep-cif \
-    	      --input-json "${input_json}" \
-    	      --input-cif "${input_cif}" \
-	      --fasta-sequence "${params.fasta}" \
-	      --output-directory "./"
+              --input-json "${input_json}" \
+              --input-cif "${input_cif}" \
+              --fasta-sequence "${params.fasta}" \
+              --output-directory "./"
     """
 }
 process PREP_FOR_DOCKING {
@@ -79,9 +79,9 @@ process PREP_FOR_DOCKING {
     script:
     """
     asap-cli protein-prep \
-    	     --target SARS-CoV-2-Mpro \
-	     --pdb-file "${prepped_pdb}" \
-	     --output-dir "./"
+             --target SARS-CoV-2-Mpro \
+             --pdb-file "${prepped_pdb}" \
+             --output-dir "./"
     """
 }
 process ASSESS_PREPPED_PROTEIN {
@@ -99,8 +99,8 @@ process ASSESS_PREPPED_PROTEIN {
     script:
     """
     plumbline assess-prepped-protein \
-    	      --input-file "${design_unit}" \
-	      --output-directory "./"
+              --input-file "${design_unit}" \
+              --output-directory "./"
     """
 }
 process GENERATE_CONSTRAINED_LIGAND_POSES {
@@ -118,9 +118,9 @@ process GENERATE_CONSTRAINED_LIGAND_POSES {
     script:
     """
     plumbline generate-constrained-ligand-poses \
-    	      --input-sdf "${params.congenericSeries}" \
-	      --prepped-schema "${prepped_complex_json_schema}" \
-	      --output-directory "./"
+              --input-sdf "${params.congenericSeries}" \
+              --prepped-schema "${prepped_complex_json_schema}" \
+              --output-directory "./"
     """
 
 }
@@ -142,10 +142,10 @@ process MAKE_FEC_INPUTS {
     asap-cli alchemy create fecs-workflow.json
 
     asap-cli alchemy plan \
-    	     -f fecs-workflow.json \
-    	     --name ${uuid}_plumb_alchemiscale_network \
-    	     --receptor "${prepped_complex}" \
-    	     --ligands "${posed_ligands}" \
+             -f fecs-workflow.json \
+             --name ${uuid}_plumb_alchemiscale_network \
+             --receptor "${prepped_complex}" \
+             --ligands "${posed_ligands}" \
     """
 }
 process VISUALIZE_NETWORK {
@@ -163,7 +163,7 @@ process VISUALIZE_NETWORK {
     script:
     """
     plumbline visualize-network \
-    	      --network-graphml "${network_graph}" \
-	      --output-directory "./"
+              --network-graphml "${network_graph}" \
+              --output-directory "./"
     """
 }
\ No newline at end of file

From 7e36e282c90077d96ae3f971a1db775b0f1d17fb Mon Sep 17 00:00:00 2001
From: Ariana Brenner Clerkin <ariana.clerkin@choderalab.org>
Date: Fri, 14 Nov 2025 16:14:31 -0500
Subject: [PATCH 18/23] added openeye to conda env

---
 pkg/env.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pkg/env.yaml b/pkg/env.yaml
index 6448b3e..6aadb94 100644
--- a/pkg/env.yaml
+++ b/pkg/env.yaml
@@ -1,8 +1,11 @@
 name: plumbdb
 channels:
   - conda-forge
+  - openeye
 
 dependencies:
+  - python=3.11
+  - openeye-toolkits
   - asapdiscovery
   - openfe
   - click

From 4135d700dc48068859de8cbd39da5f9b4ec7d646 Mon Sep 17 00:00:00 2001
From: Ariana Brenner Clerkin <ariana.clerkin@choderalab.org>
Date: Fri, 14 Nov 2025 16:14:52 -0500
Subject: [PATCH 19/23] added output option

---
 pkg/src/plumbdb/cli.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index fe41370..8e97562 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -287,6 +287,8 @@ def process_bindingdb(input_directory, output_directory):
 )
 @click.option(
     "output_directory",
+    "-o",
+    "--output-directory",
     type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
     required=True,
     default=pathlib.Path("./"),

From cbc386693f0cc91aff1c3531c3d32856858e517a Mon Sep 17 00:00:00 2001
From: Ariana Brenner Clerkin <ariana.clerkin@choderalab.org>
Date: Thu, 11 Dec 2025 13:52:16 -0500
Subject: [PATCH 20/23] renamed PDBID field

---
 pkg/src/plumbdb/cli.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 8e97562..ebfcb41 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -138,7 +138,8 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director
 def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
     from asapdiscovery.data.backend.openeye import load_openeye_cif1
     from asapdiscovery.modeling.modeling import split_openeye_mol
-    from asapdiscovery.data.schema.ligand import Ligand, Complex
+    from asapdiscovery.data.schema.ligand import Ligand
+    from asapdiscovery.data.schema.complex import Complex
     from plumbdb.oespruce import spruce_protein
     from asapdiscovery.data.schema.target import Target
 
@@ -235,8 +236,10 @@ def process_bindingdb(input_directory, output_directory):
                 "has_3d": mol.to_oemol().GetDimension() == 3,
                 "num_atoms": mol.to_oemol().NumAtoms(),
                 "smiles": mol.smiles,
-                "pdb_id": mol.tags.get("PDB ID ")[:4]
-                if mol.tags.get("PDB ID ")
+                # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space 
+                # if mol.tags.get("PDB ID") # removed trailing space
+                "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space 
+                if mol.tags.get("PDB ID(s) for Ligand-Target Complex") 
                 else "",
             }
 

From 7ef7ca907fc53b063761752110f5e2189df57378 Mon Sep 17 00:00:00 2001
From: apayne97 <alex.payne@choderalab.org>
Date: Thu, 11 Dec 2025 13:58:38 -0500
Subject: [PATCH 21/23] add prep-protein-for-docking step to avoid using the
 asapdiscovery cli which has ML import problems

---
 modules.nf             |   4 +-
 pkg/src/plumbdb/cli.py | 352 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 351 insertions(+), 5 deletions(-)

diff --git a/modules.nf b/modules.nf
index db0663e..3cafef4 100644
--- a/modules.nf
+++ b/modules.nf
@@ -78,7 +78,7 @@ process PREP_FOR_DOCKING {
 
     script:
     """
-    asap-cli protein-prep \
+    plumbline prep-protein-for-docking \
              --target SARS-CoV-2-Mpro \
              --pdb-file "${prepped_pdb}" \
              --output-dir "./"
@@ -166,4 +166,4 @@ process VISUALIZE_NETWORK {
               --network-graphml "${network_graph}" \
               --output-directory "./"
     """
-}
\ No newline at end of file
+}
diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index ebfcb41..7cdff9a 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -72,6 +72,352 @@ def assess_prepped_protein(output_directory, input_openeye_du):
     with open(output_directory / f"{stem}_quality_report.json", "w") as f:
         json.dump(report, f, indent=4)
 
+# Prep for docking
+from typing import TYPE_CHECKING, Optional
+# copying everything
+# TODO delete everything from here that isn't needed
+
+def postera(func):
+    return click.option(
+        "--postera",
+        is_flag=True,
+        default=False,
+        help="Whether to download complexes from Postera.",
+    )(func)
+
+
+def postera_molset_name(func):
+    return click.option(
+        "--postera-molset-name",
+        type=str,
+        default=None,
+        help="The name of the Postera molecule set to use.",
+    )(func)
+
+
+def postera_upload(func):
+    return click.option(
+        "--postera-upload",
+        is_flag=True,
+        default=False,
+        help="Whether to upload results to Postera.",
+    )(func)
+
+
+def postera_args(func):
+    return postera(postera_molset_name(postera_upload(func)))
+
+
+def use_dask(func):
+    return click.option(
+        "--use-dask",
+        is_flag=True,
+        default=False,
+        help="Whether to use dask for parallelism.",
+    )(func)
+
+
+def dask_type(func):
+    return click.option(
+        "--dask-type",
+        type=click.Choice(DaskType.get_values(), case_sensitive=False),
+        default=DaskType.LOCAL,
+        help="The type of dask cluster to use. Local mode is reccommended for most use cases.",
+    )(func)
+
+
+def failure_mode(func):
+    return click.option(
+        "--failure-mode",
+        type=click.Choice(FailureMode.get_values(), case_sensitive=False),
+        default=FailureMode.SKIP,
+        help="The failure mode for dask. Can be 'raise' or 'skip'.",
+        show_default=True,
+    )(func)
+
+
+def dask_n_workers(func):
+    return click.option(
+        "--dask-n-workers",
+        type=int,
+        default=None,
+        help="The number of workers to use with dask.",
+    )(func)
+
+
+def dask_args(func):
+    return use_dask(dask_type(dask_n_workers(failure_mode(func))))
+
+
+def target(func):
+    from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags
+    return click.option(
+        "--target",
+        type=click.Choice(TargetTags.get_values(), case_sensitive=True),
+        help="The target for the workflow",
+        required=True,
+    )(func)
+
+
+def ligands(func):
+    return click.option(
+        "-l",
+        "--ligands",
+        type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+        help="File containing ligands",
+    )(func)
+
+
+def output_dir(func):
+    return click.option(
+        "--output-dir",
+        type=click.Path(
+            resolve_path=True, exists=False, file_okay=False, dir_okay=True
+        ),
+        help="The directory to output results to.",
+        default="output",
+    )(func)
+
+
+def overwrite(func):
+    return click.option(
+        "--overwrite/--no-overwrite",
+        default=True,
+        help="Whether to overwrite the output directory if it exists.",
+    )(func)
+
+
+def input_json(func):
+    return click.option(
+        "--input-json",
+        type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+        help="Path to a json file containing the inputs to the workflow,  WARNING: overrides all other inputs.",
+    )(func)
+
+# flag to run all ml scorers
+def ml_score(func):
+    return click.option(
+        "--ml-score",
+        is_flag=True,
+        default=True,
+        help="Whether to run all ml scorers",
+    )(func)
+
+
+def fragalysis_dir(func):
+    return click.option(
+        "--fragalysis-dir",
+        type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
+        help="Path to a directory containing fragments to dock.",
+    )(func)
+
+
+def structure_dir(func):
+    return click.option(
+        "--structure-dir",
+        type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
+        help="Path to a directory containing structures.",
+    )(func)
+
+
+def pdb_file(func):
+    return click.option(
+        "--pdb-file",
+        type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+        help="Path to a pdb file containing a structure",
+    )(func)
+
+
+def cache_dir(func):
+    return click.option(
+        "--cache-dir",
+        type=click.Path(
+            resolve_path=True, exists=False, file_okay=False, dir_okay=True
+        ),
+        help="Path to a directory where design units are cached.",
+    )(func)
+
+
+def use_only_cache(func):
+    return click.option(
+        "--use-only-cache",
+        is_flag=True,
+        default=False,
+        help="Whether to only use the cache.",
+    )(func)
+
+
+def gen_cache_w_default(func):
+    return click.option(
+        "--gen-cache",
+        type=click.Path(
+            resolve_path=False, exists=False, file_okay=False, dir_okay=True
+        ),
+        help="Path to a directory where a design unit cache should be generated.",
+        default="prepped_structure_cache",
+    )(func)
+
+
+def md(func):
+    return click.option(
+        "--md",
+        is_flag=True,
+        default=False,
+        help="Whether to run MD",
+    )(func)
+
+
+def md_steps(func):
+    return click.option(
+        "--md-steps",
+        type=int,
+        default=2500000,
+        help="Number of MD steps",
+    )(func)
+
+def core_smarts(func):
+    return click.option(
+        "-cs",
+        "--core-smarts",
+        type=click.STRING,
+        help="The SMARTS which should be used to select which atoms to constrain to the reference structure.",
+    )(func)
+
+
+def save_to_cache(func):
+    return click.option(
+        "--save-to-cache/--no-save-to-cache",
+        help="If the newly generated structures should be saved to the cache folder.",
+        default=True,
+    )(func)
+
+
+def loglevel(func):
+    return click.option(
+        "--loglevel",
+        type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
+        help="The log level to use.",
+        default="INFO",
+        show_default=True,
+    )(func)
+
+
+def ref_chain(func):
+    return click.option(
+        "--ref-chain",
+        type=str,
+        default=None,
+        help="Chain ID to align to in reference structure containing the active site.",
+    )(func)
+
+
+def active_site_chain(func):
+    return click.option(
+        "--active-site-chain",
+        type=str,
+        default=None,
+        help="Active site chain ID to align to ref_chain in reference structure",
+    )(func)
+
+from asapdiscovery.data.util.dask_utils import DaskType, FailureMode
+
+if TYPE_CHECKING:
+    from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags
+
+
+@cli.command(
+    "prep-protein-for-docking",
+    help="Prep protein to make OE Design Units and corresponding schema.",
+)
+@target
+@click.option(
+    "--align",
+    type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+    help="Path to a reference structure to align to",
+)
+@ref_chain
+@active_site_chain
+@click.option(
+    "--seqres-yaml",
+    type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+    help="Path to a seqres yaml file to mutate to, if not specified will use the default for the target",
+)
+@click.option(
+    "--loop-db",
+    type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+    help="Path to a loop database to use for prepping",
+)
+@click.option(
+    "--oe-active-site-residue",
+    type=str,
+    help="OE formatted string of active site residue to use if not ligand bound",
+)
+@pdb_file
+@fragalysis_dir
+@structure_dir
+@click.option(
+    "--cache-dir",
+    help="The path to cached prepared complexes which can be used again.",
+    type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
+)
+@save_to_cache
+@dask_args
+@output_dir
+@input_json
+def protein_prep(
+    target: "TargetTags",
+    align: Optional[str] = None,
+    ref_chain: Optional[str] = None,
+    active_site_chain: Optional[str] = None,
+    seqres_yaml: Optional[str] = None,
+    loop_db: Optional[str] = None,
+    oe_active_site_residue: Optional[str] = None,
+    pdb_file: Optional[str] = None,
+    fragalysis_dir: Optional[str] = None,
+    structure_dir: Optional[str] = None,
+    cache_dir: Optional[str] = None,
+    save_to_cache: bool = True,
+    use_dask: bool = False,
+    dask_type: DaskType = DaskType.LOCAL,
+    dask_n_workers: Optional[int] = None,
+    failure_mode: FailureMode = FailureMode.SKIP,
+    output_dir: str = "output",
+    input_json: Optional[str] = None,
+):
+    """
+    Run protein prep on a set of structures.
+    """
+    from asapdiscovery.workflows.prep_workflows.protein_prep import (
+        ProteinPrepInputs,
+        protein_prep_workflow,
+    )
+
+    if input_json is not None:
+        print("Loading inputs from json file... Will override all other inputs.")
+        inputs = ProteinPrepInputs.from_json_file(input_json)
+
+    else:
+        inputs = ProteinPrepInputs(
+            target=target,
+            align=align,
+            ref_chain=ref_chain,
+            active_site_chain=active_site_chain,
+            seqres_yaml=seqres_yaml,
+            loop_db=loop_db,
+            oe_active_site_residue=oe_active_site_residue,
+            pdb_file=pdb_file,
+            fragalysis_dir=fragalysis_dir,
+            structure_dir=structure_dir,
+            cache_dir=cache_dir,
+            save_to_cache=save_to_cache,
+            use_dask=use_dask,
+            dask_type=dask_type,
+            dask_n_workers=dask_n_workers,
+            failure_mode=failure_mode,
+            output_dir=output_dir,
+        )
+
+    protein_prep_workflow(inputs)
 
 # TODO: check for openeye installation, maybe make it a decorator
 @cli.command(
@@ -236,10 +582,10 @@ def process_bindingdb(input_directory, output_directory):
                 "has_3d": mol.to_oemol().GetDimension() == 3,
                 "num_atoms": mol.to_oemol().NumAtoms(),
                 "smiles": mol.smiles,
-                # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space 
+                # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space
                 # if mol.tags.get("PDB ID") # removed trailing space
-                "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space 
-                if mol.tags.get("PDB ID(s) for Ligand-Target Complex") 
+                "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space
+                if mol.tags.get("PDB ID(s) for Ligand-Target Complex")
                 else "",
             }
 

From 1ffd0b8381538248b252312d377b1b133ebaf6e3 Mon Sep 17 00:00:00 2001
From: apayne97 <alex.payne@choderalab.org>
Date: Thu, 11 Dec 2025 16:40:34 -0500
Subject: [PATCH 22/23] add small command to rename title of ligands so that
 the openfe cli works

---
 pkg/src/plumbdb/cli.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 7cdff9a..d5f3004 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -621,6 +621,30 @@ def process_bindingdb(input_directory, output_directory):
                 )
             )
 
+@cli.command("rename-ligands-for-openfe")
+@click.option("-i",
+              "--input-sdf",
+              type=click.Path(file_okay=True, dir_okay=False, path_type=pathlib.Path),)
+@click.option(
+    "-o",
+    "--output-dir",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    default=pathlib.Path("./"),
+    help="Path to the output directory where the results will be stored",
+)
+def rename_ligands_for_sdf(input_sdf, output_dir):
+    from asapdiscovery.data.schema.ligand import Ligand
+    from asapdiscovery.data.readers.molfile import MolFileFactory
+    from asapdiscovery.data.schema.ligand import write_ligands_to_multi_sdf
+    mols: list[Ligand] = MolFileFactory(filename=input_sdf).load()
+    new_ligands = []
+    for mol in mols:
+        oemol = mol.to_oemol()
+        oemol.SetTitle(mol.compound_name)
+        new_ligands.append(Ligand.from_oemol(oemol))
+    output_sdf = output_dir / "renamed.sdf"
+    write_ligands_to_multi_sdf(output_sdf, new_ligands, overwrite=True)
 
 @cli.command(
     "visualize-network",

From aa4fa6c38b6b189e81dabf327d975e8460b4c392 Mon Sep 17 00:00:00 2001
From: Ariana Brenner Clerkin <ariana.clerkin@choderalab.org>
Date: Thu, 11 Dec 2025 16:45:49 -0500
Subject: [PATCH 23/23] renamed field BindingDB monomerid to BindingDB
 MonomerID

---
 pkg/src/plumbdb/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
index 7cdff9a..1d44b0d 100644
--- a/pkg/src/plumbdb/cli.py
+++ b/pkg/src/plumbdb/cli.py
@@ -450,7 +450,7 @@ def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_director
     # reconstruct ligands from smiles because of some weirdness with the sdf files
     ligs = [
         Ligand.from_smiles(
-            compound_name=lig.tags["BindingDB monomerid"],
+            compound_name=lig.tags["BindingDB MonomerID"], # Changed capitalization
             smiles=lig.smiles,
             tags=lig.dict(),
         )