diff --git a/modules.nf b/modules.nf
index 1f3dcb7..3cafef4 100644
--- a/modules.nf
+++ b/modules.nf
@@ -12,7 +12,9 @@ process PROCESS_BINDINGDB {
 
     script:
     """
-    python "${params.scripts}/process_bindingdb.py" --input-dir "${params.bindingDB}"
+    plumbline process-bindingdb \
+    	      --input-directory "${params.bindingDB}" \
+	      --output-directory "./"
     """
 }
 process DOWNLOAD_PDB {
@@ -30,7 +32,8 @@ process DOWNLOAD_PDB {
 
     script:
     """
-    python "${params.scripts}/download_pdb.py" --input-json "${input_json}"
+    plumbline download-pdb \
+              --input-file "${input_json}"
     """
 }
 process PREP_CIF {
@@ -51,7 +54,11 @@ process PREP_CIF {
 
     script:
     """
-    python "${params.scripts}/prep_cif.py" --input-json "${input_json}" --input-cif "${input_cif}" --fasta-sequence "${params.fasta}"
+    plumbline prep-cif \
+              --input-json "${input_json}" \
+              --input-cif "${input_cif}" \
+              --fasta-sequence "${params.fasta}" \
+              --output-directory "./"
     """
 }
 process PREP_FOR_DOCKING {
@@ -71,7 +78,10 @@ process PREP_FOR_DOCKING {
 
     script:
     """
-    asap-cli protein-prep --target SARS-CoV-2-Mpro --pdb-file "${prepped_pdb}" --output-dir "./"
+    plumbline prep-protein-for-docking \
+             --target SARS-CoV-2-Mpro \
+             --pdb-file "${prepped_pdb}" \
+             --output-dir "./"
     """
 }
 process ASSESS_PREPPED_PROTEIN {
@@ -88,7 +98,9 @@ process ASSESS_PREPPED_PROTEIN {
 
     script:
     """
-    python "${params.scripts}/assess_prepped_protein.py" --input-oedu "${design_unit}"
+    plumbline assess-prepped-protein \
+              --input-file "${design_unit}" \
+              --output-directory "./"
     """
 }
 process GENERATE_CONSTRAINED_LIGAND_POSES {
@@ -105,7 +117,10 @@ process GENERATE_CONSTRAINED_LIGAND_POSES {
 
     script:
     """
-    python "${params.scripts}/generate_constrained_ligand_poses.py" --input-sdf "${params.congenericSeries}" --prepped-schema "${prepped_complex_json_schema}"
+    plumbline generate-constrained-ligand-poses \
+              --input-sdf "${params.congenericSeries}" \
+              --prepped-schema "${prepped_complex_json_schema}" \
+              --output-directory "./"
     """
 
 }
@@ -127,10 +142,10 @@ process MAKE_FEC_INPUTS {
     asap-cli alchemy create fecs-workflow.json
 
     asap-cli alchemy plan \
-    -f fecs-workflow.json \
-    --name ${uuid}_plumb_alchemiscale_network \
-    --receptor "${prepped_complex}" \
-    --ligands "${posed_ligands}" \
+             -f fecs-workflow.json \
+             --name ${uuid}_plumb_alchemiscale_network \
+             --receptor "${prepped_complex}" \
+             --ligands "${posed_ligands}" \
     """
 }
 process VISUALIZE_NETWORK {
@@ -147,6 +162,8 @@ process VISUALIZE_NETWORK {
 
     script:
     """
-    python "${params.scripts}/visualize_network.py" --network-graphml "${network_graph}"
+    plumbline visualize-network \
+              --network-graphml "${network_graph}" \
+              --output-directory "./"
     """
-}
\ No newline at end of file
+}
diff --git a/pkg/README.md b/pkg/README.md
new file mode 100644
index 0000000..107f408
--- /dev/null
+++ b/pkg/README.md
@@ -0,0 +1,11 @@
+# plumbdb
+
+## Installation
+
+Since `plumbdb` is not yet registered with conda-forge, follow the developer installation instructions at [docs/developer.md](docs/developer.md#installation).
+
+## Usage
+
+### Command line
+
+`plumbdb` is usable via command line through the `plumbline` CLI application.
diff --git a/pkg/docs/developer.md b/pkg/docs/developer.md
new file mode 100644
index 0000000..29326e9
--- /dev/null
+++ b/pkg/docs/developer.md
@@ -0,0 +1,19 @@
+# Developer documentation
+
+## Installation
+
+`plumbdb` can currently only be installed via its python source with its dependencies handled by `conda`.
+
+```bash
+conda create -n plumbdb_dev -f env.yaml
+conda activate plumbdb_dev
+pip install -e ".[dev]"
+```
+
+## Running tests
+
+Tests are performed with [`pytest`](https://docs.pytest.org/), which is automatically installed as part of the `test` dependency group specified in `pyproject.toml`. With the test environment active and from within `plumb/pkg/`, run:
+
+```bash
+python -m pytest src/
+```
\ No newline at end of file
diff --git a/pkg/env.yaml b/pkg/env.yaml
new file mode 100644
index 0000000..6aadb94
--- /dev/null
+++ b/pkg/env.yaml
@@ -0,0 +1,11 @@
+name: plumbdb
+channels:
+  - conda-forge
+  - openeye
+
+dependencies:
+  - python=3.11
+  - openeye-toolkits
+  - asapdiscovery
+  - openfe
+  - click
diff --git a/pkg/pyproject.toml b/pkg/pyproject.toml
new file mode 100644
index 0000000..1c00003
--- /dev/null
+++ b/pkg/pyproject.toml
@@ -0,0 +1,31 @@
+[build-system]
+requires = [
+    "setuptools>=77.0.3",
+    "setuptools-scm>=8",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "plumbdb"
+description = ""
+readme = "README.md"
+authors = [
+	{name = "Ariana Brenner Clerkin", email = "ariana.clerkin@choderlab.org"}
+]
+license = "MIT"
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Operating System :: POSIX",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Chemistry",
+]
+requires-python = ">= 3.11"
+dynamic = ["version"]
+
+[project.scripts]
+plumbline = "plumbdb.cli:cli"
+
+[project.optional-dependencies]
+test = ["pytest"]
+dev = ["pytest", "ruff"]
\ No newline at end of file
diff --git a/pkg/src/plumbdb/__init__.py b/pkg/src/plumbdb/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pkg/src/plumbdb/cli.py b/pkg/src/plumbdb/cli.py
new file mode 100644
index 0000000..09bbad3
--- /dev/null
+++ b/pkg/src/plumbdb/cli.py
@@ -0,0 +1,687 @@
+import json
+import pathlib
+
+import click
+
+
+@click.group()
+def cli():
+    pass
+
+
+# TODO: input file type can be better defined
+@cli.command(name="download-pdb", help="Download PDB files from the PDB database.")
+@click.option("-i", "--input-file", type=str, required=True)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+)
+def download_pdb_structure(input_file, output_directory):
+    from asapdiscovery.data.services.rcsb.rcsb_download import download_pdb_structure
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+    with open(input_file, "r") as f:
+        record_dict = json.load(f)
+    downloaded = download_pdb_structure(
+        record_dict["pdb_id"], output_directory, file_format="cif1"
+    )
+    record_dict["cif"] = downloaded
+    with open(output_directory / "record.json", "w") as f:
+        json.dump(record_dict, f)
+
+
+@cli.command(
+    "assess-prepped-protein",
+    help="Assess the quality of the prepped protein structure.",
+)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+)
+@click.option("input_openeye_du", "-i", "--input-file", type=str, required=True)
+def assess_prepped_protein(output_directory, input_openeye_du):
+    try:
+        from openeye import oespruce
+    except ModuleNotFoundError:
+        raise click.UsageError("Could not import openeye")
+
+    from asapdiscovery.data.backend.openeye import load_openeye_design_unit
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+
+    du = load_openeye_design_unit(input_openeye_du)
+
+    # should use a different id method
+    stem = pathlib.Path(input_openeye_du).stem
+    validator = oespruce.OEValidateDesignUnit()
+    err_msgs = validator.GetMessages(validator.Validate(du))
+    sq = du.GetStructureQuality()
+
+    report = {
+        "errors": err_msgs,
+        "warnings": [],
+        "has_iridium_data": sq.HasIridiumData(),
+    }
+
+    with open(output_directory / f"{stem}_quality_report.json", "w") as f:
+        json.dump(report, f, indent=4)
+
+# Prep for docking
+from typing import TYPE_CHECKING, Optional
+# copying everything
+# TODO delete everything from here that isn't needed
+
+def postera(func):
+    return click.option(
+        "--postera",
+        is_flag=True,
+        default=False,
+        help="Whether to download complexes from Postera.",
+    )(func)
+
+
+def postera_molset_name(func):
+    return click.option(
+        "--postera-molset-name",
+        type=str,
+        default=None,
+        help="The name of the Postera molecule set to use.",
+    )(func)
+
+
+def postera_upload(func):
+    return click.option(
+        "--postera-upload",
+        is_flag=True,
+        default=False,
+        help="Whether to upload results to Postera.",
+    )(func)
+
+
+def postera_args(func):
+    return postera(postera_molset_name(postera_upload(func)))
+
+
+def use_dask(func):
+    return click.option(
+        "--use-dask",
+        is_flag=True,
+        default=False,
+        help="Whether to use dask for parallelism.",
+    )(func)
+
+
+def dask_type(func):
+    return click.option(
+        "--dask-type",
+        type=click.Choice(DaskType.get_values(), case_sensitive=False),
+        default=DaskType.LOCAL,
+        help="The type of dask cluster to use. Local mode is reccommended for most use cases.",
+    )(func)
+
+
+def failure_mode(func):
+    return click.option(
+        "--failure-mode",
+        type=click.Choice(FailureMode.get_values(), case_sensitive=False),
+        default=FailureMode.SKIP,
+        help="The failure mode for dask. Can be 'raise' or 'skip'.",
+        show_default=True,
+    )(func)
+
+
+def dask_n_workers(func):
+    return click.option(
+        "--dask-n-workers",
+        type=int,
+        default=None,
+        help="The number of workers to use with dask.",
+    )(func)
+
+
+def dask_args(func):
+    return use_dask(dask_type(dask_n_workers(failure_mode(func))))
+
+
+def target(func):
+    from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags
+    return click.option(
+        "--target",
+        type=click.Choice(TargetTags.get_values(), case_sensitive=True),
+        help="The target for the workflow",
+        required=True,
+    )(func)
+
+
+def ligands(func):
+    return click.option(
+        "-l",
+        "--ligands",
+        type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+        help="File containing ligands",
+    )(func)
+
+
+def output_dir(func):
+    return click.option(
+        "--output-dir",
+        type=click.Path(
+            resolve_path=True, exists=False, file_okay=False, dir_okay=True
+        ),
+        help="The directory to output results to.",
+        default="output",
+    )(func)
+
+
+def overwrite(func):
+    return click.option(
+        "--overwrite/--no-overwrite",
+        default=True,
+        help="Whether to overwrite the output directory if it exists.",
+    )(func)
+
+
+def input_json(func):
+    return click.option(
+        "--input-json",
+        type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+        help="Path to a json file containing the inputs to the workflow,  WARNING: overrides all other inputs.",
+    )(func)
+
+# flag to run all ml scorers
+def ml_score(func):
+    return click.option(
+        "--ml-score",
+        is_flag=True,
+        default=True,
+        help="Whether to run all ml scorers",
+    )(func)
+
+
+def fragalysis_dir(func):
+    return click.option(
+        "--fragalysis-dir",
+        type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
+        help="Path to a directory containing fragments to dock.",
+    )(func)
+
+
+def structure_dir(func):
+    return click.option(
+        "--structure-dir",
+        type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
+        help="Path to a directory containing structures.",
+    )(func)
+
+
+def pdb_file(func):
+    return click.option(
+        "--pdb-file",
+        type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+        help="Path to a pdb file containing a structure",
+    )(func)
+
+
+def cache_dir(func):
+    return click.option(
+        "--cache-dir",
+        type=click.Path(
+            resolve_path=True, exists=False, file_okay=False, dir_okay=True
+        ),
+        help="Path to a directory where design units are cached.",
+    )(func)
+
+
+def use_only_cache(func):
+    return click.option(
+        "--use-only-cache",
+        is_flag=True,
+        default=False,
+        help="Whether to only use the cache.",
+    )(func)
+
+
+def gen_cache_w_default(func):
+    return click.option(
+        "--gen-cache",
+        type=click.Path(
+            resolve_path=False, exists=False, file_okay=False, dir_okay=True
+        ),
+        help="Path to a directory where a design unit cache should be generated.",
+        default="prepped_structure_cache",
+    )(func)
+
+
+def md(func):
+    return click.option(
+        "--md",
+        is_flag=True,
+        default=False,
+        help="Whether to run MD",
+    )(func)
+
+
+def md_steps(func):
+    return click.option(
+        "--md-steps",
+        type=int,
+        default=2500000,
+        help="Number of MD steps",
+    )(func)
+
+def core_smarts(func):
+    return click.option(
+        "-cs",
+        "--core-smarts",
+        type=click.STRING,
+        help="The SMARTS which should be used to select which atoms to constrain to the reference structure.",
+    )(func)
+
+
+def save_to_cache(func):
+    return click.option(
+        "--save-to-cache/--no-save-to-cache",
+        help="If the newly generated structures should be saved to the cache folder.",
+        default=True,
+    )(func)
+
+
+def loglevel(func):
+    return click.option(
+        "--loglevel",
+        type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
+        help="The log level to use.",
+        default="INFO",
+        show_default=True,
+    )(func)
+
+
+def ref_chain(func):
+    return click.option(
+        "--ref-chain",
+        type=str,
+        default=None,
+        help="Chain ID to align to in reference structure containing the active site.",
+    )(func)
+
+
+def active_site_chain(func):
+    return click.option(
+        "--active-site-chain",
+        type=str,
+        default=None,
+        help="Active site chain ID to align to ref_chain in reference structure",
+    )(func)
+
+from asapdiscovery.data.util.dask_utils import DaskType, FailureMode
+
+if TYPE_CHECKING:
+    from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags
+
+
+@cli.command(
+    "prep-protein-for-docking",
+    help="Prep protein to make OE Design Units and corresponding schema.",
+)
+@target
+@click.option(
+    "--align",
+    type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+    help="Path to a reference structure to align to",
+)
+@ref_chain
+@active_site_chain
+@click.option(
+    "--seqres-yaml",
+    type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+    help="Path to a seqres yaml file to mutate to, if not specified will use the default for the target",
+)
+@click.option(
+    "--loop-db",
+    type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
+    help="Path to a loop database to use for prepping",
+)
+@click.option(
+    "--oe-active-site-residue",
+    type=str,
+    help="OE formatted string of active site residue to use if not ligand bound",
+)
+@pdb_file
+@fragalysis_dir
+@structure_dir
+@click.option(
+    "--cache-dir",
+    help="The path to cached prepared complexes which can be used again.",
+    type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
+)
+@save_to_cache
+@dask_args
+@output_dir
+@input_json
+def protein_prep(
+    target: "TargetTags",
+    align: Optional[str] = None,
+    ref_chain: Optional[str] = None,
+    active_site_chain: Optional[str] = None,
+    seqres_yaml: Optional[str] = None,
+    loop_db: Optional[str] = None,
+    oe_active_site_residue: Optional[str] = None,
+    pdb_file: Optional[str] = None,
+    fragalysis_dir: Optional[str] = None,
+    structure_dir: Optional[str] = None,
+    cache_dir: Optional[str] = None,
+    save_to_cache: bool = True,
+    use_dask: bool = False,
+    dask_type: DaskType = DaskType.LOCAL,
+    dask_n_workers: Optional[int] = None,
+    failure_mode: FailureMode = FailureMode.SKIP,
+    output_dir: str = "output",
+    input_json: Optional[str] = None,
+):
+    """
+    Run protein prep on a set of structures.
+    """
+    from asapdiscovery.workflows.prep_workflows.protein_prep import (
+        ProteinPrepInputs,
+        protein_prep_workflow,
+    )
+
+    if input_json is not None:
+        print("Loading inputs from json file... Will override all other inputs.")
+        inputs = ProteinPrepInputs.from_json_file(input_json)
+
+    else:
+        inputs = ProteinPrepInputs(
+            target=target,
+            align=align,
+            ref_chain=ref_chain,
+            active_site_chain=active_site_chain,
+            seqres_yaml=seqres_yaml,
+            loop_db=loop_db,
+            oe_active_site_residue=oe_active_site_residue,
+            pdb_file=pdb_file,
+            fragalysis_dir=fragalysis_dir,
+            structure_dir=structure_dir,
+            cache_dir=cache_dir,
+            save_to_cache=save_to_cache,
+            use_dask=use_dask,
+            dask_type=dask_type,
+            dask_n_workers=dask_n_workers,
+            failure_mode=failure_mode,
+            output_dir=output_dir,
+        )
+
+    protein_prep_workflow(inputs)
+
+# TODO: check for openeye installation, maybe make it a decorator
+@cli.command(
+    "generate-constrained-ligand-poses",
+    help="Generate constrained ligand poses using OpenEye tooling.",
+)
+@click.option("input_sdf", "-i", "--input-sdf", required=True, type=str)
+@click.option("prepped_schema", "-s", "--prepped-schema", type=str)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+)
+def generate_constrained_ligand_poses(input_sdf, prepped_schema, output_directory):
+    try:
+        from asapdiscovery.data.schema.complex import PreppedComplex
+        from asapdiscovery.data.readers.molfile import MolFileFactory
+        from asapdiscovery.data.schema.ligand import Ligand
+        from asapdiscovery.docking.schema.pose_generation import (
+            OpenEyeConstrainedPoseGenerator,
+        )
+        from asapdiscovery.data.backend.openeye import save_openeye_sdfs
+    except ModuleNotFoundError:
+        raise click.UsageError("Could not import openeye")
+
+    raw_ligands = MolFileFactory(filename=input_sdf).load()
+
+    # reconstruct ligands from smiles because of some weirdness with the sdf files
+    ligs = [
+        Ligand.from_smiles(
+            compound_name=lig.tags["BindingDB MonomerID"], # Changed capitalization
+            smiles=lig.smiles,
+            tags=lig.dict(),
+        )
+        for lig in raw_ligands
+    ]
+
+    prepped_complex = PreppedComplex.parse_file(prepped_schema)
+
+    poser = OpenEyeConstrainedPoseGenerator()
+    poses = poser.generate_poses(prepped_complex, ligands=ligs)
+    oemols = [ligand.to_oemol() for ligand in poses.posed_ligands]
+    # save to sdf file
+    save_openeye_sdfs(oemols, output_directory / "poses.sdf")
+
+
+@cli.command("prep-cif")
+@click.option("input_json", "-j", "--input-json", type=str, required=True)
+@click.option("input_cif", "-c", "--input-cif", type=str, required=True)
+@click.option(
+    "fasta_sequence", "-f", "--fasta-sequence", type=str, default=None, required=True
+)
+@click.option("loop_db", "--loopdb", type=str, required=True)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    default="./",
+    required=True,
+)
+def prep_cif(input_json, input_cif, fasta_sequence, loop_db, output_directory):
+    from asapdiscovery.data.backend.openeye import load_openeye_cif1
+    from asapdiscovery.modeling.modeling import split_openeye_mol
+    from asapdiscovery.data.schema.ligand import Ligand
+    from asapdiscovery.data.schema.complex import Complex
+    from plumbdb.oespruce import spruce_protein
+    from asapdiscovery.data.schema.target import Target
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+
+    with open(input_json, "r") as f:
+        record_dict = json.load(f)
+
+    graphmol = load_openeye_cif1(input_cif)
+
+    # this is what you would do if you didn't want to use whatever ligand is in the protein
+    # split_dict = split_openeye_mol(graphmol, keep_one_lig=False)
+    split_dict = split_openeye_mol(graphmol, keep_one_lig=True)
+
+    # # Save initial protein as pdb file
+    target = Target.from_oemol(
+        split_dict["prot"],
+        target_name=record_dict["pdb_id"],
+    )
+    # target.to_pdb("protein.pdb")
+
+    # this is what you would do if you didn't want to use whatever ligand is in the protein
+    ligand = Ligand.from_oemol(split_dict["lig"])
+    # ligand = Ligand.from_sdf(f'{record_dict["compound_name"]}.sdf')
+
+    combined_complex = Complex(target=target, ligand=ligand, ligand_chain="L")
+
+    oemol = combined_complex.to_combined_oemol()
+
+    results, spruced = spruce_protein(
+        initial_prot=oemol,
+        protein_sequence=fasta_sequence,
+        loop_db=loop_db,
+    )
+
+    split_dict = split_openeye_mol(spruced)
+    prepped_target = Target.from_oemol(split_dict["prot"], **target.dict())
+    prepped_ligand = Ligand.from_oemol(split_dict["lig"], **ligand.dict())
+    prepped_ligand.to_sdf(output_directory / f"{ligand.compound_name}_ligand.sdf")
+    prepped_target.to_pdb(output_directory / f"{target.target_name}_spruced.pdb")
+
+    prepped_complex = Complex(
+        target=prepped_target, ligand=prepped_ligand, ligand_chain="L"
+    )
+
+    filename = f"{target.target_name}_{ligand.compound_name}_spruced_complex.pdb"
+    prepped_complex.to_pdb(output_directory / filename)
+    results.to_json_file(output_directory / f"{target.target_name}_spruce_results.json")
+
+
+# TODO: help string
+@cli.command(
+    "process-bindingdb", help="Parse and verify SDF files downloaded from bindingdb."
+)
+@click.option(
+    "input_directory",
+    "-i",
+    "--input-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    help="SDF file to process",
+)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    help="Directory to write output files",
+)
+def process_bindingdb(input_directory, output_directory):
+    from asapdiscovery.data.schema.ligand import Ligand
+    from asapdiscovery.data.readers.molfile import MolFileFactory
+    import pandas as pd
+    import math
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+
+    # get all sdf files
+    sdfs = list(input_directory.glob("*3D.sdf"))
+
+    output = []
+    for sdf in sdfs:
+        # asap function to read separate ligands from a multi-ligand sdf file
+        mols: list[Ligand] = MolFileFactory(filename=sdf).load()
+
+        # create a dictionary for each ligand containing various relevant information
+        # there are some hidden choices here, for instance OpenEye is adding hydrogens which you might not want
+
+        for mol in mols:
+            mol_dict = {
+                "compound_name": mol.compound_name,
+                "filename": sdf.name,
+                "has_3d": mol.to_oemol().GetDimension() == 3,
+                "num_atoms": mol.to_oemol().NumAtoms(),
+                "smiles": mol.smiles,
+                # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space
+                # if mol.tags.get("PDB ID") # removed trailing space
+                "pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space
+                if mol.tags.get("PDB ID(s) for Ligand-Target Complex")
+                else "",
+            }
+
+            # any data in the SDF file is saved to the 'tags' attribute of an asapdiscovery Ligand object
+            mol_dict.update(mol.tags)
+
+            # write out sdf file
+            if mol_dict["has_3d"]:
+                mol.to_sdf(output_directory / f"{mol.compound_name}.sdf")
+
+            output.append(mol_dict)
+
+    df = pd.DataFrame.from_records(output)
+    df.to_csv(output_directory / "processed_bindingdb.csv", index=False)
+
+    # write separate csvs for 2D and 3D
+    df_2d = df[~df["has_3d"]]
+    df_3d = df[df["has_3d"]]
+    df_2d.to_csv(output_directory / "2d_bindingdb.csv", index=False)
+    df_3d.to_csv(output_directory / "3d_bindingdb.csv", index=False)
+
+    unique_sdf_filenames = df_3d["filename"].unique()
+    with open(output_directory / "unique_3D_sdf_filenames.txt", "w") as f:
+        for filename in unique_sdf_filenames:
+            f.write(f"{filename}\n")
+
+    # write out separate json records
+    for record in df_3d.to_dict(orient="records"):
+        with open(output_directory / f"{record['compound_name']}.json", "w") as f:
+            f.write(
+                json.dumps(
+                    record, indent=4, default=lambda x: None if math.isnan(x) else x
+                )
+            )
+
+@cli.command("rename-ligands-for-openfe")
+@click.option("-i",
+              "--input-sdf",
+              type=click.Path(file_okay=True, dir_okay=False, path_type=pathlib.Path),)
+@click.option(
+    "-o",
+    "--output-dir",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    default=pathlib.Path("./"),
+    help="Path to the output directory where the results will be stored",
+)
+def rename_ligands_for_sdf(input_sdf, output_dir):
+    from asapdiscovery.data.schema.ligand import Ligand
+    from asapdiscovery.data.readers.molfile import MolFileFactory
+    from asapdiscovery.data.schema.ligand import write_ligands_to_multi_sdf
+    mols: list[Ligand] = MolFileFactory(filename=input_sdf).load()
+    new_ligands = []
+    for mol in mols:
+        oemol = mol.to_oemol()
+        oemol.SetTitle(mol.compound_name)
+        new_ligands.append(Ligand.from_oemol(oemol))
+    output_sdf = output_dir / "renamed.sdf"
+    write_ligands_to_multi_sdf(output_sdf, new_ligands, overwrite=True)
+
+@cli.command(
+    "visualize-network",
+    help="Generate a network plot of the proposed alchemical network",
+)
+@click.option(
+    "network_graphml",
+    "-n",
+    "--network-graphml",
+    type=str,
+    required=True,
+    help="Path to the input JSON file containing the atom mapping network.",
+)
+@click.option(
+    "output_directory",
+    "-o",
+    "--output-directory",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=pathlib.Path),
+    required=True,
+    default=pathlib.Path("./"),
+    help="Path to the output directory where the results will be stored",
+)
+def visualize_network(network_graphml, output_directory):
+    from openfe.utils.atommapping_network_plotting import plot_atommapping_network
+    from openfe.setup import LigandNetwork
+
+    output_directory.mkdir(exist_ok=True, parents=True)
+    ligand_network = network_graphml
+
+    if not ligand_network.exists():
+        raise FileNotFoundError(
+            f"Could not find the ligand network file at {ligand_network}"
+        )
+
+    with open(ligand_network) as f:
+        graphml = f.read()
+
+    network = LigandNetwork.from_graphml(graphml)
+    fig = plot_atommapping_network(network)
+    fig.savefig(output_directory / "network_plot.png")
diff --git a/pkg/src/plumbdb/oespruce.py b/pkg/src/plumbdb/oespruce.py
new file mode 100644
index 0000000..7f986fe
--- /dev/null
+++ b/pkg/src/plumbdb/oespruce.py
@@ -0,0 +1,173 @@
+import pathlib
+
+from asapdiscovery.data.schema.schema_base import DataModelAbstractBase
+from asapdiscovery.data.backend.openeye import (
+    oespruce,
+    oechem,
+    oegrid,
+)
+from asapdiscovery.modeling.modeling import (
+    get_oe_structure_metadata_from_sequence,
+    openeye_perceive_residues,
+)
+
+
+class SpruceResults(DataModelAbstractBase):
+    build_loops_success: bool
+    build_sidechains_success: bool
+    add_caps_success: bool = None
+    place_hydrogens_success: bool
+    error_message: str
+
+
+# TODO: this was originally defined in asapdiscovery.modeling.modeling?
+def spruce_protein(
+    initial_prot: oechem.OEGraphMol,
+    protein_sequence: str = None,
+    loop_db: pathlib.Path = None,
+) -> oechem.OEDesignUnit or oechem.OEGraphMol:
+    """
+    Applies the OESpruce protein preparation pipeline to the given protein structure.
+
+    Parameters
+    ----------
+    initial_prot : oechem.OEMol
+        The input protein structure to be prepared.
+
+    protein_sequence : str, optional
+        The sequence of the protein for a single chain. If provided, this will be added to the Structure Metadata before applying the OESpruce pipeline.
+        Default is None.
+
+    loop_db : str, optional
+        The filename of the loop database to be used by the OESpruce pipeline. If provided, the pipeline will include the loop building step.
+        Default is None.
+
+    Returns
+    -------
+    (success: bool, spruce_error_msg: str, initial_prot: oechem.OEMol)
+        Returns a tuple of:
+        a boolean for whether sprucing was successful
+        a string of the error message if sprucing failed
+        the prepared protein structure.
+    """
+
+    # Add Hs to prep protein and ligand
+    # oechem.OEAddExplicitHydrogens(initial_prot)
+
+    # Even though we aren't making DUs, we still need to set up the options
+    opts = oespruce.OEMakeDesignUnitOptions()
+    opts.SetSuperpose(False)
+    opts.GetPrepOptions().SetStrictProtonationMode(True)
+
+    # Add caps when needed
+    # Allow truncation in case adding a cap causes a clash
+    cap_opts = oespruce.OECapBuilderOptions()
+    cap_opts.SetAllowTruncate(True)
+    is_terminal_predicate = oechem.OEOrAtom(
+        oechem.OEIsNTerminalAtom(), oechem.OEIsCTerminalAtom()
+    )
+
+    # Set Build Loop and Sidechain Opts
+    sc_opts = oespruce.OESidechainBuilderOptions()
+
+    loop_opts = oespruce.OELoopBuilderOptions()
+    loop_opts.SetSeqAlignMethod(oechem.OESeqAlignmentMethod_Identity)
+    loop_opts.SetSeqAlignGapPenalty(-1)
+    loop_opts.SetSeqAlignExtendPenalty(0)
+
+    # Don't build tails, too much work for little gain
+    loop_opts.SetBuildTails(False)
+
+    if loop_db is not None:
+        print(f"Adding loop db {loop_db}")
+        loop_opts.SetLoopDBFilename(str(loop_db))
+
+    # Construct spruce filter
+    spruce_opts = oespruce.OESpruceFilterOptions()
+    spruce = oespruce.OESpruceFilter(spruce_opts, opts)
+
+    # Spruce!
+
+    # These objects are for some reason needed in order to run spruce
+    grid = oegrid.OESkewGrid()
+
+    if protein_sequence:
+        # convert fasta sequence to 3-letter codes
+        try:
+            protein_sequence = " ".join(convert_to_three_letter_codes(protein_sequence))
+            print(type(protein_sequence))
+            print("Adding sequence metadata from sequence: ", protein_sequence)
+            metadata = get_oe_structure_metadata_from_sequence(
+                initial_prot, protein_sequence
+            )
+        except KeyError as e:
+            print(
+                f"Error converting protein sequence to 3-letter codes: {e}. Skipping sequence metadata."
+            )
+            protein_sequence = None
+
+    if not protein_sequence:
+        metadata = oespruce.OEStructureMetadata()
+
+    # Building the loops actually does use the sequence metadata
+    build_loops_success = oespruce.OEBuildLoops(
+        initial_prot, metadata, sc_opts, loop_opts
+    )
+
+    build_sidechains_success = oespruce.OEBuildSidechains(initial_prot, sc_opts)
+    print(type(initial_prot), type(is_terminal_predicate), type(cap_opts))
+    add_caps_success = oespruce.OECapTermini(
+        initial_prot, is_terminal_predicate, cap_opts
+    )
+    print(add_caps_success)
+    place_hydrogens_success = oechem.OEPlaceHydrogens(initial_prot)
+    spruce_error_code = spruce.StandardizeAndFilter(initial_prot, grid, metadata)
+    spruce_error_msg = spruce.GetMessages(spruce_error_code)
+
+    # Re-percieve residues so that atom number and connect records dont get screwed up
+    initial_prot = openeye_perceive_residues(initial_prot, preserve_all=False)
+    return (
+        SpruceResults(
+            build_loops_success=build_loops_success,
+            build_sidechains_success=build_sidechains_success,
+            # add_caps_success=None,  # add_caps_success,
+            place_hydrogens_success=place_hydrogens_success,
+            error_message=spruce_error_msg,
+        ),
+        initial_prot,
+    )
+
+
+def convert_to_three_letter_codes(sequence) -> list[str]:
+    """
+    Convert a protein sequence from 1-letter codes to 3-letter codes.
+    """
+    # Dictionary to map 1-letter codes to 3-letter codes
+    amino_acid_dict = {
+        "A": "ALA",
+        "R": "ARG",
+        "N": "ASN",
+        "D": "ASP",
+        "C": "CYS",
+        "Q": "GLN",
+        "E": "GLU",
+        "G": "GLY",
+        "H": "HIS",
+        "I": "ILE",
+        "L": "LEU",
+        "K": "LYS",
+        "M": "MET",
+        "F": "PHE",
+        "P": "PRO",
+        "S": "SER",
+        "T": "THR",
+        "W": "TRP",
+        "Y": "TYR",
+        "V": "VAL",
+    }
+
+    # Convert the sequence to 3-letter codes
+    three_letter_sequence = [amino_acid_dict[aa] for aa in sequence]
+
+    # Join the 3-letter codes with a space
+    return three_letter_sequence
diff --git a/pkg/src/plumbdb/tests/test_plumb.py b/pkg/src/plumbdb/tests/test_plumb.py
new file mode 100644
index 0000000..e59fd58
--- /dev/null
+++ b/pkg/src/plumbdb/tests/test_plumb.py
@@ -0,0 +1,2 @@
+def test_import():
+    import plumbdb