From 8adf07ee2426fe6ecedab677e2a011f95cb9d6a5 Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Wed, 8 Jan 2025 17:27:54 -0500 Subject: [PATCH 1/4] saving --- README.md | 197 +++++------------ docs/paste3/installation.md | 28 --- docs/source/api.rst | 38 ++-- docs/source/conf.py | 7 +- docs/source/index.rst | 36 ++-- docs/source/installation.md | 82 ++++++++ docs/source/notebooks/paste2_tutorial.ipynb | 33 +-- ...e3_dataset.ipynb => paste3_tutorial.ipynb} | 23 +- docs/source/notebooks/paste_tutorial.ipynb | 199 ++---------------- docs/source/tutorial.rst | 5 +- src/paste3/paste.py | 4 +- 11 files changed, 239 insertions(+), 413 deletions(-) delete mode 100644 docs/paste3/installation.md create mode 100644 docs/source/installation.md rename docs/source/notebooks/{paste3_dataset.ipynb => paste3_tutorial.ipynb} (86%) diff --git a/README.md b/README.md index 7280a3d..4d96200 100644 --- a/README.md +++ b/README.md @@ -2,170 +2,81 @@ [![Coverage Status](https://coveralls.io/repos/github/raphael-group/paste3/badge.svg?branch=main)](https://coveralls.io/github/raphael-group/paste3?branch=main) [![Docs](https://github.com/raphael-group/paste3/actions/workflows/docs.yml/badge.svg)](https://raphael-group.github.io/paste3/) -(Note: This repository integrates Paste and Paste 2, and is a work in progress) -# PASTE +# Paste 3 -![PASTE Overview](https://github.com/raphael-group/paste/blob/main/docs/source/_static/images/paste_overview.png) - -PASTE is a computational method that leverages both gene expression similarity and spatial distances between spots to align and integrate spatial transcriptomics data. In particular, there are two methods: -1. `pairwise_align`: align spots across pairwise slices. -2. `center_align`: integrate multiple slices into one center slice. - -You can read full paper [here](https://www.nature.com/articles/s41592-022-01459-6). - -Auto-generated documentation for this package is available [here](https://raphael-group.github.io/paste3/). - -Additional examples and the code to reproduce the paper's analyses can be found [here](https://github.com/raphael-group/paste_reproducibility). Preprocessed datasets used in the paper can be found on [zenodo](https://doi.org/10.5281/zenodo.6334774). - -### Recent News - -* PASTE is now published in [Nature Methods](https://www.nature.com/articles/s41592-022-01459-6)! - -* The code to reproduce the analisys can be found [here](https://github.com/raphael-group/paste_reproducibility). - -* As of version 1.2.0, PASTE now supports GPU implementation via Pytorch. For more details, see the GPU section of the [Tutorial notebook](docs/source/notebooks/getting-started.ipynb). - -### Installation - -The easiest way is to install PASTE on pypi: https://pypi.org/project/paste-bio/. - -`pip install paste-bio` - -Or you can install PASTE on bioconda: https://anaconda.org/bioconda/paste-bio. - -`conda install -c bioconda paste-bio` +**Paste 3** (Paste + Paste 2) is a Python package and NAPARI plugin that +provides advanced alignment methods of Spatial Transcriptonomics (ST) data +as detailed in the following publications: -Check out Tutorial.ipynb for an example of how to use PASTE. +### 1. *PASTE* +**Zeira, R., Land, M., Strzalkowski, A., et al.** +*Alignment and integration of spatial transcriptomics data.* +**Nat Methods**, 19, 567–575 (2022). -Alternatively, you can clone the respository and try the following example in a -notebook or the command line. +[Read the publication](https://doi.org/10.1038/s41592-022-01459-6) +[Original PASTE code](https://github.com/raphael-group/paste) -### Quick Start +--- -To use PASTE we require at least two slices of spatial-omics data (both -expression and coordinates) that are in -anndata format (i.e. read in by scanpy/squidpy). We have included a breast -cancer dataset from [1] in the [sample_data folder](tests/data/input/) of this repo -that we will use as an example below to show how to use PASTE. +### 2. *PASTE2* +**Liu X, Zeira R, Raphael BJ.** +*Partial alignment of multislice spatially resolved transcriptomics data.* +**Genome Res.** 2023 Jul; 33(7):1124-1132. +[Read the publication](https://doi.org/10.1101/gr.277670.123) +[Original PASTE2 code](https://github.com/raphael-group/paste2) -```python -import matplotlib.pyplot as plt -import matplotlib.patches as mpatches -import numpy as np -import scanpy as sc -import paste as pst +The motivation behind PASTE3 is to provide a NAPARI plugin +for practitioners to experiment with both PASTE and PASTE2 at an operational +level, as well as provide a common codebase for future development of ST +alignment algorithms. (`Paste-N`..) -# Load Slices -data_dir = 'tests/data/input/' # change this path to the data you wish to analyze +PASTE3 is built on `pytorch` and can leverage a GPU for performance if +available, though it is able to run just fine in the absence of a GPU, on all +major platforms. +Auto-generated documentation for the PASTE3 package is available [here](https://raphael-group.github.io/paste3/). -# Assume that the coordinates of slices are named slice_name + "_coor.csv" -def load_slices(data_dir, slice_names=["slice1", "slice2"]): - slices = [] - for slice_name in slice_names: - slice_i = sc.read_csv(data_dir + slice_name + ".csv") - slice_i_coor = np.genfromtxt(data_dir + slice_name + "_coor.csv", delimiter=',') - slice_i.obsm['spatial'] = slice_i_coor - # Preprocess slices - sc.pp.filter_genes(slice_i, min_counts=15) - sc.pp.filter_cells(slice_i, min_counts=100) - slices.append(slice_i) - return slices +Additional examples and the code to reproduce the original PASTE paper's analyses are available [here](https://github.com/raphael-group/paste_reproducibility). Preprocessed datasets used in the paper can be found on [zenodo](https://doi.org/10.5281/zenodo.6334774). +## Overview -slices = load_slices(data_dir) -slice1, slice2 = slices - -# Pairwise align the slices -pi12 = pst.pairwise_align(slice1, slice2) - -# To visualize the alignment you can stack the slices -# according to the alignment pi -slices, pis = [slice1, slice2], [pi12] -new_slices = pst.stack_slices_pairwise(slices, pis) - -slice_colors = ['#e41a1c', '#377eb8'] -plt.figure(figsize=(7, 7)) -for i in range(len(new_slices)): - pst.plot_slice(new_slices[i], slice_colors[i], s=400) -plt.legend(handles=[mpatches.Patch(color=slice_colors[0], label='1'), mpatches.Patch(color=slice_colors[1], label='2')]) -plt.gca().invert_yaxis() -plt.axis('off') -plt.show() - -# Center align slices -## We have to reload the slices as pairwise_alignment modifies the slices. -slices = load_slices(data_dir) -slice1, slice2 = slices - -# Construct a center slice -## choose one of the slices as the coordinate reference for the center slice, -## i.e. the center slice will have the same number of spots as this slice and -## the same coordinates. -initial_slice = slice1.copy() -slices = [slice1, slice2] -lmbda = len(slices) * [1 / len(slices)] # set hyperparameter to be uniform - -## Possible to pass in an initial pi (as keyword argument pis_init) -## to improve performance, see Tutorial.ipynb notebook for more details. -center_slice, pis = pst.center_align(initial_slice, slices, lmbda) - -## The low dimensional representation of our center slice is held -## in the matrices W and H, which can be used for downstream analyses -W = center_slice.uns['paste_W'] -H = center_slice.uns['paste_H'] -``` - -### GPU implementation -PASTE now is compatible with gpu via Pytorch. All we need to do is add the following two parameters to our main functions: -``` -pi12 = pst.pairwise_align(slice1, slice2, backend = ot.backend.TorchBackend(), use_gpu = True) - -center_slice, pis = pst.center_align(initial_slice, slices, lmbda, backend = ot.backend.TorchBackend(), use_gpu = True) -``` -For more details, see the GPU section of the [Tutorial notebook](docs/source/notebooks/getting-started.ipynb). - -### Command Line - -We provide the option of running PASTE from the command line. - -First, clone the repository: +![PASTE Overview](https://github.com/raphael-group/paste/blob/main/docs/source/_static/images/paste_overview.png) -`git clone https://github.com/raphael-group/paste.git` +The PASTE series of algorithms provide computational methods that leverage both +gene expression similarity and spatial distances between spots to align and +integrate spatial transcriptomics data. In particular, there are two modes of +operation: +1. `Pairwise-Alignment`: align spots between successive pairs of slices. +2. `Center-Alignment`: infer a `center slice` (low sparsity, low variance) and +align all slices with respect to this center slice. -Next, when providing files, you will need to provide two separate files: the gene expression data followed by spatial data (both as .csv) for the code to initialize one slice object. -Sample execution (based on this repo): `python paste-cmd-line.py -m center -f ./sample_data/slice1.csv ./sample_data/slice1_coor.csv ./sample_data/slice2.csv ./sample_data/slice2_coor.csv ./sample_data/slice3.csv ./sample_data/slice3_coor.csv` +### Installation -Note: `pairwise` will return pairwise alignment between each consecutive pair of slices (e.g. \[slice1,slice2\], \[slice2,slice3\]). +The easiest way is to install PASTE3 is using `pip`: -| Flag | Name | Description | Default Value | -| --- | --- | --- | --- | -| -m | mode | Select either `pairwise` or `center` | (str) `pairwise` | -| -f | files | Path to data files (.csv) | None | -| -d | direc | Directory to store output files | Current Directory | -| -a | alpha | Alpha parameter for PASTE | (float) `0.1` | -| -c | cost | Expression dissimilarity cost (`kl` or `Euclidean`) | (str) `kl` | -| -p | n_components | n_components for NMF step in `center_align` | (int) `15` | -| -l | lmbda | Lambda parameter in `center_align` | (floats) probability vector of length `n` | -| -i | intial_slice | Specify which file is also the intial slice in `center_align` | (int) `1` | -| -t | threshold | Convergence threshold for `center_align` | (float) `0.001` | -| -x | coordinates | Output new coordinates (toggle to turn on) | `False` | -| -w | weights | Weights files of spots in each slice (.csv) | None | -| -s | start | Initial alignments for OT. If not given uses uniform (.csv structure similar to alignment output) | None | +`pip install git+https://github.com/raphael-group/paste3.git` -`pairwise_align` outputs a (.csv) file containing mapping of spots between each consecutive pair of slices. The rows correspond to spots of the first slice, and cols the second. +Developers who wish to work with `paste3` in Python will likely want to review +the detailed [installation](https://raphael-group.github.io/paste3/installation) +page. -`center_align` outputs two files containing the low dimensional representation (NMF decomposition) of the center slice gene expression, and files containing a mapping of spots between the center slice (rows) to each input slice (cols). -### Sample Dataset +### Getting Started -Added sample spatial transcriptomics dataset consisting of four breast cancer slice courtesy of: +If you intend to use PASTE3 as a `napari` plugin, install `paste3` in a python +environment that has `napari` installed, or install `napari` after having +installed `paste3` as above. -[1] Ståhl, Patrik & Salmén, Fredrik & Vickovic, Sanja & Lundmark, Anna & Fernandez Navarro, Jose & Magnusson, Jens & Giacomello, Stefania & Asp, Michaela & Westholm, Jakub & Huss, Mikael & Mollbrink, Annelie & Linnarsson, Sten & Codeluppi, Simone & Borg, Åke & Pontén, Fredrik & Costea, Paul & Sahlén, Pelin Akan & Mulder, Jan & Bergmann, Olaf & Frisén, Jonas. (2016). Visualization and analysis of gene expression in tissue sections by spatial transcriptomics. Science. 353. 78-82. 10.1126/science.aaf2403. +`pip install napari` -Note: Original data is (.tsv), but we converted it to (.csv). +Open one of the sample datasets we provide (`File->Open Sample->Paste3->SCC Patient..`) +and then select one of the two modes of PASTE3 operations +(`Plugins->Paste3->Center Align` or `Plugins->Paste3->Pairwise Align`). -### References +With the default parameters, alignment should take a couple of minutes, though +you have the option of changing these to suit your needs. -Ron Zeira, Max Land, Alexander Strzalkowski and Benjamin J. Raphael. "Alignment and integration of spatial transcriptomics data". Nature Methods (2022). https://doi.org/10.1038/s41592-022-01459-6 +If you intend to use PASTE3 programmatically in your Python code, follow along +the [Getting Started](https://raphael-group.github.io/paste3/notebooks/paste_tutorial.html) +tutorial. diff --git a/docs/paste3/installation.md b/docs/paste3/installation.md deleted file mode 100644 index 80e8e95..0000000 --- a/docs/paste3/installation.md +++ /dev/null @@ -1,28 +0,0 @@ -# Setting up the environment - -1. Clone the repository and enter it: -```shell - git clone git@github.com:raphael-group/paste3.git - cd paste3 -``` -2. Create and activate a conda environment using the provided `environment.yml` file: -```shell - conda env create --file environment.yml - conda activate paste -``` -The command prompt will change to indicate the new conda environment by prepending `(paste)`. -3. When you are done using the package, deactivate the `paste` environment and return to `(base)` by entering the following command: -```shell -conda deactivate -``` - -# Installation - -1. Enter the `paste3` repository cloned before setting up the environment -```shell -cd paste3 -``` -2. Install the package: -```shell -pip install . -``` diff --git a/docs/source/api.rst b/docs/source/api.rst index 601e91d..f4cfd1f 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -1,9 +1,9 @@ API === - import paste3 + .. automodule:: paste3 -PASTE Alignment +Alignment ~~~~~~~~~~~~~~~~~~ .. autosummary:: @@ -11,6 +11,10 @@ PASTE Alignment paste.pairwise_align paste.center_align + paste.center_ot + paste.center_NMF + paste.my_fused_gromov_wasserstein + paste.line_search_partial Visualization ~~~~~~~~~~~~~ @@ -21,6 +25,7 @@ Visualization visualization.stack_slices_pairwise visualization.stack_slices_center visualization.plot_slice + visualization.generalized_procrustes_analysis Model Selection ~~~~~~~~~~~~~~~~ @@ -28,24 +33,9 @@ Model Selection .. autosummary:: :toctree: api - model_selection.create_graph - model_selection.generate_graph_from_labels - model_selection.edge_inconsistency_score - model_selection.calculate_convex_hull_edge_inconsistency - model_selection.plot_edge_curve - model_selection.select_overlap_fraction_plotting - -GLMPCA -~~~~~~~ - -.. autosummary:: - :toctree: api - - glmpca.ortho - glmpca.mat_binom_dev - glmpca.glmpca_init - glmpca.est_nb_theta - glmpca.glmpca + model_selection.generate_graph + model_selection.convex_hull_edge_inconsistency + model_selection.select_overlap_fraction Miscellaneous @@ -54,5 +44,11 @@ Miscellaneous .. autosummary:: :toctree: api - helper.filter_for_common_genes + helper.kl_divergence + helper.glmpca_distance + helper.pca_distance + helper.high_umi_gene_distance + helper.norm_and_center_coordinates + helper.get_common_genes helper.match_spots_using_spatial_heuristic + helper.dissimilarity_metric diff --git a/docs/source/conf.py b/docs/source/conf.py index 42b606e..41e4ea4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -2,7 +2,8 @@ from pathlib import Path HERE = Path(__file__).parent -sys.path.insert(0, Path.resolve(HERE.parent.parent)) +sys.path.insert(0, str(HERE.parent.parent / "src")) +import paste3 # noqa: E402 # Configuration file for the Sphinx documentation builder. # @@ -13,11 +14,11 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "paste3" -copyright = "2022, Raphael Lab" +copyright = "2024, Raphael Lab" author = "Ron Zeira, Max Land, Alexander Strzalkowski, Benjamin J. Raphael" # The full version, including alpha/beta/rc tags -release = "1.2.0" +release = paste3.__version__ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/index.rst b/docs/source/index.rst index 5a3ef71..53c6db0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,17 +1,11 @@ Welcome to PASTE3 documentation! ================================= -**PASTE3 (WIP)** package that provides combined functionality of PASTE and PASTE2. -**PASTE** is a computational method that leverages both gene expression similarity and spatial distances between spots to align and integrate spatial transcriptomics data, and -**PASTE2**, the extension of PASTE, is a method for partial alignment and 3D reconstruction of spatial transcriptomics slices when they do not fully overlap in space. -In particular, PASTE3 combines PASTE and PASTE2 to provide five main functionalities: +**PASTE3** package that provides combined functionality of PASTE and PASTE2. -1. Pairwise Alignment: align spots across pairwise slices. -2. Center Alignment: integrate multiple slices into one center slice. -3. Partial Pairwise Alignment: given a pair of slices and their overlap percentage, find a partial alignment matrix. -4. Select Overlap: decide the overlap percentage between two slices -5. Partial Stack Slices Pairwise: given a sequence of consecutive slices and the partial alignments between them, -project all slices onto the same 2D coordinate system. 3D reconstruction can be done by assigning a z-value to each slice. +PASTE Alignment +--------------- +**PASTE** is a computational method that leverages both gene expression similarity and spatial distances between spots to align and integrate spatial transcriptomics data. .. image:: _static/images/paste_overview.png :alt: PASTE Overview Figure @@ -19,21 +13,31 @@ project all slices onto the same 2D coordinate system. 3D reconstruction can be :align: center | +PASTE2 Alignment +---------------- +**PASTE2**, the extension of PASTE, is a method for partial alignment and 3D reconstruction of spatial transcriptomics slices when they do not fully overlap in space. + .. image:: _static/images/paste2.png :alt: PASTE2 Overview Figure :width: 800px :align: center -| -Manuscript ----------- +PASTE3 Alignment +---------------- + +PASTE3 combines PASTE and PASTE2 to provide five main functionalities: + +1. `Pairwise Alignment`: align spots across pairwise slices. +2. `Center Alignment`: integrate multiple slices into one center slice. +3. `Partial Pairwise Alignment`: given a pair of slices and their overlap percentage, find a partial alignment matrix. +4. `Select Overlap`: decide the overlap percentage between two slices +5. `Partial Stack Slices Pairwise`: given a sequence of consecutive slices and the partial alignments between them, project all slices onto the same 2D coordinate system. 3D reconstruction can be done by assigning a z-value to each slice. -You can view PASTE `preprint `_ on **bioRxiv**. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Contents: - paste3/installation.md + installation api tutorial diff --git a/docs/source/installation.md b/docs/source/installation.md new file mode 100644 index 0000000..7c4d8f8 --- /dev/null +++ b/docs/source/installation.md @@ -0,0 +1,82 @@ +## Installation + +Before you can use the `paste3` package, ensure that your system meets the following prerequisites: + +- Python 3.12 or later +- Pip package manager + +Clone the repository and enter it: + ``` + git clone https://github.com/raphael-group/paste3.git + cd paste3 + ``` + +### Creating a paste3 environment + +All of `paste3`'s dependencies are on [PyPI](https://pypi.org/). We have developed and tested `paste3` on Python 3.12, but it should work on later Python versions as well. +You can create a new virtual environment using `venv`, and install dependencies using `pip`. + +1. Verify that Python 3.12 or newer is installed on your system. + ``` + python --version + ``` + +2. Create a new environment and activate it. + ``` + python -m venv .venv + source .venv/bin/activate + ``` + +3. In the activated environment, install the paste3 package in editable mode, along with its `dev` and `docs` extras: + ``` + pip install -e .[dev,docs] + ``` + +### Using a different Python version than the system default + +If your Python version is not 3.12 or later, or if you're getting errors when using a non-tested Python version, we recommend using the [uv](https://github.com/astral-sh/uv) tool to create a virtual environment with the correct Python version. +`uv` is quick to [install](https://github.com/astral-sh/uv?tab=readme-ov-file#installation) and easy to use, both locally as well as on research clusters. + +Once `uv` is installed: + +1. Create a new environment with Python 3.12 and activate it. + ``` + uv venv --python 3.12 + source .venv/bin/activate + ``` + +2. In the activated environment, install the package in editable mode, along with its `dev` and `docs` extras: + ``` + uv pip install -e .[dev,docs] + ``` + +### paste3 environment using conda + +If you prefer using `conda`, you can use the provided `environment.yml` file to create a new conda environment with all the necessary dependencies pinned to versions that have worked for us in the past. +> **Note** +> +> `paste3` can get all its dependencies from `pypi` using `pip` and does not need [conda](https://docs.anaconda.com/miniconda/) for environment management. +Nevertheless, this might be the easiest option for most users who already have access to the `conda` executable locally or through a research cluster. The provided `environment.yml` file +has the defaults channel disabled, and can be used to create a new conda environment with all the necessary dependencies. +**It can therefore be used without getting a business or enterprise license from Anaconda. (See [Anaconda FAQs](https://www.anaconda.com/pricing/terms-of-service-faqs))** + +1. Create a new conda environment named `paste3` with Python version 3.12. + ``` + conda create --name paste3 python=3.12 pip + ``` + +2. Activate the environment. + ``` + conda activate paste3 + ``` + The command prompt will change to indicate the new conda environment by prepending `(paste3)`. + +3. In the activated environment, install the dependencies provided in `environment.yml`: + ``` + conda env update --file environment.yml + ``` + +4. In the activated environment, install the package in editable mode *without dependencies*. + ``` + pip install -e . --no-deps + ``` diff --git a/docs/source/notebooks/paste2_tutorial.ipynb b/docs/source/notebooks/paste2_tutorial.ipynb index 4383abd..e78b947 100644 --- a/docs/source/notebooks/paste2_tutorial.ipynb +++ b/docs/source/notebooks/paste2_tutorial.ipynb @@ -3,34 +3,43 @@ { "cell_type": "code", "execution_count": null, - "id": "56e5cd44", + "id": "30d3de80-424b-48e8-abd6-ef22da03611d", "metadata": {}, "outputs": [], "source": [ - "import matplotlib.patches as mpatches\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import scanpy as sc\n", - "import seaborn as sns" + "# This cell is to allow automatic notebook generation for docs\n", + "# You may want to comment this out if you have paste3 installed\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "sys.path.insert(0, str(Path.cwd().parent.parent.parent / \"src\"))" ] }, { "cell_type": "markdown", - "id": "79453c4b", + "id": "0f721a16-a25c-4c32-b562-d0e454e8f4e8", "metadata": {}, "source": [ - "# Install PASTE2 python package\n", + "# Using the PASTE2 algorithm\n", + "\n", + "This noteook highlights the creation of slices (Anndata objects), usage of the `pairwise_align` and `center_align` functions of `paste3`, along with stacking and plotting functionalities.\n", "\n", - "You can install the paste2 package at https://pypi.org/project/paste2/. We import paste2 as follows:" + "**This notebook primarily highlights how you would use the `paste3` package in `PASTE2` (i.e. partial alignment) mode, when adjacent slices do not fully overlap in space or have different cell type compositions.**" ] }, { "cell_type": "code", "execution_count": null, - "id": "603868f2", + "id": "56e5cd44", "metadata": {}, "outputs": [], "source": [ + "import matplotlib.patches as mpatches\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import scanpy as sc\n", + "import seaborn as sns\n", + "\n", "from paste3 import paste, visualization" ] }, @@ -145,11 +154,11 @@ "source": [ "# Compute partial pairwise alignment using PASTE2\n", "\n", - "Give a pair of partially overlapping slices, we can use PASTE2.partial_pairwise_align( ) to find an alignment matrix. To call the function, you need to input the AnnData objects of the two slices, as well as a parameter s, which indicates the overlap percentage of the two slices. In this tutorial, each pair of cropped subslices overlap at 70% of the areas, so we set overlap_fraction=0.7. For your own datasets you should visualize the slices and manually determine the approxiamte overlap percentage (this parameter does not have to be very accurate).\n", + "Give a pair of partially overlapping slices, we can use `partial_pairwise_align()` to find an alignment matrix. To call the function, you need to input the AnnData objects of the two slices, as well as a parameter s, which indicates the overlap percentage of the two slices. In this tutorial, each pair of cropped subslices overlap at 70% of the areas, so we set `overlap_fraction=0.7`. For your own datasets you should visualize the slices and manually determine the approximate overlap percentage (this parameter does not have to be very accurate).\n", "\n", "Now we compute an alignment matrix between each pair of slices in our example dataset.\n", "\n", - "**In the calls to `paste3.pairwise_align` below, we're using maxIter=20 here to specify a maximum of 20 iterations for pairwise_align. This is only to allow this demo to run in a resonable amount of time. In a real alignment scenario, you should not include this argument**." + "**In the calls to `pairwise_align` below, we're using maxIter=20 here to specify a maximum of 20 iterations for pairwise_align. This is only to allow this demo to run in a resonable amount of time. In a real alignment scenario, you should not include this argument**." ] }, { diff --git a/docs/source/notebooks/paste3_dataset.ipynb b/docs/source/notebooks/paste3_tutorial.ipynb similarity index 86% rename from docs/source/notebooks/paste3_dataset.ipynb rename to docs/source/notebooks/paste3_tutorial.ipynb index 90d837a..e9e8691 100644 --- a/docs/source/notebooks/paste3_dataset.ipynb +++ b/docs/source/notebooks/paste3_tutorial.ipynb @@ -1,11 +1,30 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "be2810c7-7f5c-4a0c-b3cf-5a75e9a45406", + "metadata": {}, + "outputs": [], + "source": [ + "# This cell is to allow automatic notebook generation for docs\n", + "# You may want to comment this out if you have paste3 installed\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "sys.path.insert(0, str(Path.cwd().parent.parent.parent / \"src\"))" + ] + }, { "cell_type": "markdown", "id": "6ed2400f-074c-41c1-9b9c-c4164e3e35ff", "metadata": {}, "source": [ - "# `Slice` and `AlignmentDataset` objects" + "# Using the PASTE/PASTE2 algorithm through a unifying API\n", + "\n", + "This noteook highlights the creation of slices (the `Slice` class) and datasets (the `AlignmentDataset` class), and the usage of the `pairwise_align` and `center_align` methods of the `AlignmentDataset` class.\n", + "\n", + "**This notebook primarily highlights how you would use the `paste3` package in either the `PASTE` (i.e. full alignment) mode, or the `PASTE2` (i.e. partial alignment) mode.**. This API also closely reflects how our napari plugin works under the hood, so getting familiar with this API will also help you get familiar with the various options available to you in the plugin." ] }, { @@ -30,8 +49,6 @@ }, "outputs": [], "source": [ - "from pathlib import Path\n", - "\n", "from paste3.dataset import AlignmentDataset\n", "from paste3.napari.data.ondemand import get_file" ] diff --git a/docs/source/notebooks/paste_tutorial.ipynb b/docs/source/notebooks/paste_tutorial.ipynb index aed7569..712a179 100644 --- a/docs/source/notebooks/paste_tutorial.ipynb +++ b/docs/source/notebooks/paste_tutorial.ipynb @@ -1,24 +1,28 @@ { "cells": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Getting Started" - ] - }, - { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Install paste python package" + "# This cell is to allow automatic notebook generation for docs\n", + "# You may want to comment this out if you have paste3 installed\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "sys.path.insert(0, str(Path.cwd().parent.parent.parent / \"src\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "You can install the package on pypi: https://pypi.org/project/paste-bio/" + "# Using the PASTE algorithm\n", + "\n", + "This noteook highlights the creation of slices (Anndata objects), usage of the `pairwise_align` and `center_align` functions of `paste3`, along with stacking and plotting functionalities.\n", + "\n", + "**This notebook primarily highlights how you would use the `paste3` package in `PASTE` (i.e. full alignment) mode, when the slices overlap over the full 2D assayed region, with a similar field of view and similar number and proportion of cell types.**" ] }, { @@ -45,7 +49,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Read data and create AnnData object" + "## Read data and create AnnData objects" ] }, { @@ -529,179 +533,6 @@ "plot_slice(new_slices[3], slice_colors[3], ax=axs[1, 1])\n", "plt.show()" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Gpu Implementation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "POT allows us to write backend agnostic code, allowing us to use Numpy, Pytorch, etc to calculate our computations (https://pythonot.github.io/gen_modules/ot.backend.html).\n", - "\n", - "We have updated our code to include gpu support for Pytorch." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, you want to make sure you have torch installed. One way to check is by running:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import ot\n", - "\n", - "ot.backend.get_backend_list()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We check to make sure you have access to gpu. PASTE automatically does this check for you, but it is still helpful to know if you want to debug why you can't seem to access your gpu." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "torch.cuda.is_available()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Running PASTE with gpu" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note: Since the breast dataset is small, cpu may actually be faster than gpu in this particular case. For larger datasets, you will see a greater improvement in gpu vs cpu.**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we read in our data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_dir = \"../../../tests/data/\"\n", - "\n", - "\n", - "# Assume that the coordinates of slices are named slice_name + \"_coor.csv\"\n", - "def load_slices(data_dir, slice_names):\n", - " slices = []\n", - " for slice_name in slice_names:\n", - " slice_i = sc.read_csv(data_dir + slice_name + \".csv\")\n", - " slice_i_coor = np.genfromtxt(data_dir + slice_name + \"_coor.csv\", delimiter=\",\")\n", - " slice_i.obsm[\"spatial\"] = slice_i_coor\n", - " # Preprocess slices\n", - " sc.pp.filter_genes(slice_i, min_counts=15)\n", - " sc.pp.filter_cells(slice_i, min_counts=100)\n", - " slices.append(slice_i)\n", - " return slices\n", - "\n", - "\n", - "slices = load_slices(data_dir, [\"slice1\", \"slice2\", \"slice3\", \"slice4\"])\n", - "slice1, slice2, slice3, slice4 = slices" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, running with gpu is as easy as setting two parameters in our function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "start = time.time()\n", - "\n", - "pi12, _ = pairwise_align(slice1, slice2, use_gpu=True)\n", - "pi23, _ = pairwise_align(slice2, slice3, use_gpu=True)\n", - "pi34, _ = pairwise_align(slice3, slice4, use_gpu=True)\n", - "\n", - "print(\"Runtime: \" + str(time.time() - start))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pd.DataFrame(pi12.cpu().numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We do the same with `center_align()`. \n", - "\n", - "Note: This time, we skip providing initial mappings `pi_init = b` as previously done above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "slices = load_slices(data_dir, [\"slice1\", \"slice2\", \"slice3\", \"slice4\"])\n", - "slice1, slice2, slice3, slice4 = slices\n", - "\n", - "slices = [slice1, slice2, slice3, slice4]\n", - "initial_slice = slice1.copy()\n", - "lmbda = len(slices) * [1 / len(slices)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "start = time.time()\n", - "\n", - "center_slice, pis = center_align(\n", - " initial_slice,\n", - " slices,\n", - " lmbda,\n", - " random_seed=5,\n", - " use_gpu=True,\n", - ")\n", - "\n", - "print(\"Runtime: \" + str(time.time() - start))" - ] } ], "metadata": { diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index c53ff11..530f2eb 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -1,8 +1,11 @@ Tutorials ========= -These notebook(s) should serve examples of how to use PASTE/ PASTE2 for your data analysis. +These notebook(s) provide sample code on how to use PASTE (full alignment), +PASTE2 (partial alignment), or PASTE3 (full or partial alignment) for your +data analysis. .. nbgallery:: notebooks/paste_tutorial notebooks/paste2_tutorial + notebooks/paste3_tutorial diff --git a/src/paste3/paste.py b/src/paste3/paste.py index 32b3934..1a366e8 100644 --- a/src/paste3/paste.py +++ b/src/paste3/paste.py @@ -443,7 +443,7 @@ def center_ot( spot_weights: list[float] | None = None, numItermax: int = 200, ) -> tuple[list[np.ndarray], np.ndarray]: - r"""Computes the optimal mappings \Pi^{(1)}, \ldots, \Pi^{(t)} given W (specified features) + r"""Computes the optimal mappings :math:`\Pi^{(1)}, \ldots, \Pi^{(t)}` given W (specified features) and H (coefficient matrix) by solving the pairwise slice alignment problem between the center slice and each slices separately @@ -813,7 +813,7 @@ def line_search_partial( ---------- alpha : float Regularization parameter balancing transcriptional dissimilarity and spatial distance among aligned spots. - Setting \alpha = 0 uses only transcriptional information, while \alpha = 1 uses only spatial coordinates. + Setting :math:`\\alpha = 0` uses only transcriptional information, while :math:`\\alpha = 1` uses only spatial coordinates. exp_dissim_matrix : torch.Tensor Expression dissimilarity matrix between two slices. pi : torch.Tensor From a6295f90999147cd8b73ba27e4acaebb51b9da22 Mon Sep 17 00:00:00 2001 From: Vineet Bansal Date: Wed, 8 Jan 2025 18:10:15 -0500 Subject: [PATCH 2/4] Update README.md --- README.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4d96200..664cb10 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ [![Coverage Status](https://coveralls.io/repos/github/raphael-group/paste3/badge.svg?branch=main)](https://coveralls.io/github/raphael-group/paste3?branch=main) [![Docs](https://github.com/raphael-group/paste3/actions/workflows/docs.yml/badge.svg)](https://raphael-group.github.io/paste3/) +