diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a0935c6d..aedd3d86 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.10.18] + python-version: [3.12.12] steps: - uses: actions/checkout@v2 diff --git a/.readthedocs.yml b/.readthedocs.yml index e841d344..8c918f7a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-24.04 tools: - python: "3.10" + python: "3.12" # Build documentation in the "docs/" directory with Sphinx sphinx: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 4a9d9091..720229cb 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -66,11 +66,18 @@ Ready to contribute? Here's how to set up `stlearn` for local development. 3. Install your local copy into a virtualenv. This is how you set up your fork for local development:: - $ conda create -n stlearn-dev python=3.10 --y + Run the following: + $ conda create -n stlearn-dev python=3.12 --y $ conda activate stlearn-dev $ cd stlearn/ $ pip install -e .[dev,test] + If you get an error for louvain package on MacOS, make sure you have cmake installed first (if you have brew): + $ brew install cmake + + You can also use conda to install these dependencies (after creating the environment): + $ conda install -c conda-forge louvain leidenalg python-igraph + Or if you prefer pip/virtualenv:: $ python -m venv stlearn-env diff --git a/HISTORY.rst b/HISTORY.rst index 923e63bb..2ed1f11a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,12 @@ History ======= +1.2.0 (2025-10-20) +------------------ +* Added support for Python 3.11 and 3.12. +* Upgraded scanpy to 1.11 - clustering will be different. +* Added more CCI tests. + 1.1.5 (2025-09-17) ------------------ * Add Leiden clustering wrapper. diff --git a/docs/installation.rst b/docs/installation.rst index f46d62f8..93f8beda 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -13,7 +13,7 @@ Install by PyPi Prepare conda environment for stLearn :: - conda create -n stlearn python=3.10 --y + conda create -n stlearn python=3.12 --y conda activate stlearn **Step 2:** diff --git a/docs/release_notes/1.2.0.rst b/docs/release_notes/1.2.0.rst new file mode 100644 index 00000000..4fde7e01 --- /dev/null +++ b/docs/release_notes/1.2.0.rst @@ -0,0 +1,7 @@ +1.1.5 `2025-10-20` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. rubric:: Features +* Added support for Python 3.11 and 3.12. +* Upgraded scanpy to 1.11 - clustering will be different. +* Added more CCI tests. \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 390df001..c3d4edff 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -1,6 +1,8 @@ Release Notes =================================================== +.. include:: 1.2.0.rst + .. include:: 1.1.5.rst .. include:: 1.1.1.rst diff --git a/pyproject.toml b/pyproject.toml index a775e514..c828cc9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta" [project] name = "stlearn" -version = "1.1.5" +version = "1.2.0" authors = [ {name = "Genomics and Machine Learning lab", email = "andrew.newman@uq.edu.au"}, ] description = "A downstream analysis toolkit for Spatial Transcriptomic data" readme = {file = "README.md", content-type = "text/markdown"} license = {text = "BSD license"} -requires-python = "~=3.10.0" +requires-python = ">=3.10,<3.13" keywords = ["stlearn"] classifiers = [ "Development Status :: 5 - Production/Stable", @@ -19,6 +19,8 @@ classifiers = [ "License :: OSI Approved :: BSD License", "Natural Language :: English", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] dynamic = ["dependencies"] @@ -34,6 +36,7 @@ dev = [ "furo==2024.8.6", "myst-parser>=0.18", "nbsphinx>=0.9.0", + "types-tensorflow>=2.8.0", "sphinx-autodoc-typehints>=1.24.0", "sphinx-autosummary-accessors>=2023.4.0", ] diff --git a/requirements.txt b/requirements.txt index 6c059949..dfe0ccfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,15 @@ -bokeh==3.7.3 -click==8.2.1 -leidenalg==0.10.2 -louvain==0.8.2 -numba==0.58.1 -numpy==1.26.4 -pillow==11.3.0 -scanpy==1.10.4 -scikit-image==0.22.0 -tensorflow==2.14.1 -keras==2.14.0 -types-tensorflow>=2.8.0 -imageio==2.37.0 -scipy==1.11.4 -scikit-learn==1.7.0 \ No newline at end of file +bokeh>=3.7.0,<4.0 +click>=8.2.0,<9.0 +leidenalg>=0.10.0,<0.11 +louvain>=0.8.2 +numba>=0.58.1 +numpy>=1.26.0,<2.0 +pillow>=11.0.0,<12.0 +scanpy>=1.11.0,<2.0 +scikit-image>=0.22.0,<0.23 +tensorflow>=2.14.1 +keras>=2.14.0 +pandas>=2.3.0 +imageio>=2.37.0,<3.0 +scipy>=1.11.0,<2.0 +scikit-learn>=1.7.0,<2.0 \ No newline at end of file diff --git a/stlearn/embedding/pca.py b/stlearn/embedding/pca.py index 8870994e..42a2ab54 100644 --- a/stlearn/embedding/pca.py +++ b/stlearn/embedding/pca.py @@ -9,7 +9,7 @@ def run_pca( data: AnnData | np.ndarray | spmatrix, n_comps: int = 50, zero_center: bool | None = True, - svd_solver: str = "auto", + svd_solver: str = "arpack", random_state: int | RandomState | None = 0, return_info: bool = False, use_highly_variable: bool | None = None, @@ -38,11 +38,11 @@ def run_pca( Passing `None` decides automatically based on sparseness of the data. svd_solver SVD solver to use: - `'arpack'` + `'arpack'` (the default - deterministic) for the ARPACK wrapper in SciPy (:func:`~scipy.sparse.linalg.svds`) `'randomized'` for the randomized algorithm due to Halko (2009). - `'auto'` (the default) + `'auto'` chooses automatically depending on the size of the problem. random_state Change to use different initial states for the optimization. diff --git a/stlearn/preprocessing/normalize.py b/stlearn/preprocessing/normalize.py index 376a2f04..e5ecbfad 100644 --- a/stlearn/preprocessing/normalize.py +++ b/stlearn/preprocessing/normalize.py @@ -1,6 +1,3 @@ -from collections.abc import Iterable -from typing import Literal - import numpy as np import scanpy from anndata import AnnData @@ -12,8 +9,7 @@ def normalize_total( exclude_highly_expressed: bool = False, max_fraction: float = 0.05, key_added: str | None = None, - layers: Literal["all"] | Iterable[str] | None = None, - layer_norm: str | None = None, + layer: str | None = None, inplace: bool = True, ) -> dict[str, np.ndarray] | None: """\ @@ -48,18 +44,6 @@ def normalize_total( key_added Name of the field in `adata.obs` where the normalization factor is stored. - layers - List of layers to normalize. Set to `'all'` to normalize all layers. - layer_norm - Specifies how to normalize layers: - * If `None`, after normalization, for each layer in *layers* each cell - has a total count equal to the median of the *counts_per_cell* before - normalization of the layer. - * If `'after'`, for each layer in *layers* each cell has - a total count equal to `target_sum`. - * If `'X'`, for each layer in *layers* each cell has a total count - equal to the median of total counts for observations (cells) of - `adata.X` before normalization. inplace Whether to update `adata` or return dictionary with normalized copies of `adata.X` and `adata.layers`. @@ -76,8 +60,7 @@ def normalize_total( exclude_highly_expressed=exclude_highly_expressed, max_fraction=max_fraction, key_added=key_added, - layers=layers, - layer_norm=layer_norm, + layer=layer, inplace=inplace, ) diff --git a/stlearn/wrapper/read.py b/stlearn/wrapper/read.py index 02f75209..f91bf5dc 100644 --- a/stlearn/wrapper/read.py +++ b/stlearn/wrapper/read.py @@ -11,8 +11,10 @@ import pandas as pd import scanpy from anndata import AnnData +from h5py import File from matplotlib.image import imread from PIL import Image +from scanpy import read_csv import stlearn from stlearn.types import _BACKGROUND, _QUALITY @@ -87,8 +89,6 @@ def Read10X( adata.uns["spatial"] = dict() - from h5py import File - with File(path / count_file, mode="r") as f: attrs = dict(f.attrs) @@ -369,7 +369,6 @@ def ReadMERFISH( coordinates = pd.read_excel(spatial_file, index_col=0) if coordinates.min().min() < 0: coordinates = coordinates + np.abs(coordinates.min().min()) + 100 - from scanpy import read_csv counts = read_csv(count_matrix_file).transpose() diff --git a/tests/test_CCI.py b/tests/test_CCI.py index 7dc639f5..0f9f2cbe 100644 --- a/tests/test_CCI.py +++ b/tests/test_CCI.py @@ -12,6 +12,12 @@ import stlearn.tl.cci.het_helpers as het_hs from tests.utils import read_test_data +# Per line - which cells to annotate +CELL_TYPE_ANNOTATIONS = ["CT1", "CT2", "CT3", "CT2", "CT1", "CT3", "CT2"] + +# 3 cell types: A,E -> CT1, B,G -> CT2, C,F -> CT3 +CELL_TYPE_LABELS = np.array(["CT1", "CT2", "CT3"]) + global adata adata = read_test_data() @@ -212,15 +218,17 @@ def test_get_interactions(self): 3 neighbours express receptor: * One is cell type 1, two are cell type 2. """ - cell_annots = [1, 2, 3, 2, 1, 3, 2] - cell_data = np.zeros((len(cell_annots), 3), dtype=np.float64) - for i, annot in enumerate(cell_annots): - cell_data[i, annot - 1] = 1 - all_set = np.array([str(i) for i in range(1, 4)]) - sig_bool = np.array([True] + ([False] * (len(cell_annots) - 1))) - l_bool = sig_bool - r_bool = np.array([False] * len(cell_annots)) - r_bool[[3, 4, 6]] = True + + # Create 0 matrix using the above annotations to create position. + # i.e. CT1 = 0. + cell_data = TestCCI.create_cci(CELL_TYPE_ANNOTATIONS, CELL_TYPE_LABELS) + + # Create middle ligand interacting with 3 neighbour receptors. + sig_bool = np.array([True] + ([False] * (len(CELL_TYPE_ANNOTATIONS) - 1))) + ligand_boolean = sig_bool.copy() + + receptor_boolean = np.array([False] * len(CELL_TYPE_ANNOTATIONS)) + receptor_boolean[[3, 4, 6]] = True # NOTE that format of output is an edge list for each celltype-celltype # interaction, where edge list represents interactions between: @@ -235,10 +243,10 @@ def test_get_interactions(self): cell_data, self.neighbourhood_bcs, self.neighbourhood_indices, - all_set, + CELL_TYPE_LABELS, sig_bool, - l_bool, - r_bool, + ligand_boolean, + receptor_boolean, 0, ) @@ -253,6 +261,48 @@ def test_get_interactions(self): self.assertEqual(len(observed_edgesi), len(expect_edgesi)) self.assertTrue(np.all(match_bool)) + def test_get_interaction_matrix(self): + """Test getting the interaction matrix for cell type pairs.""" + + # Create 0 matrix using the above annotations to create position. + # i.e. CT1 = 0. + cell_data = TestCCI.create_cci(CELL_TYPE_ANNOTATIONS, CELL_TYPE_LABELS) + + # Middle spot (A) is significant and expresses ligand + sig_bool = np.array([True] + ([False] * 6)) + ligand_bool = sig_bool.copy() + + # Neighbors D, E, G express receptor + receptor_bool = np.array([False] * len(CELL_TYPE_ANNOTATIONS)) + receptor_bool[[3, 4, 6]] = True + + # Get interaction matrix + int_matrix = het.get_interaction_matrix( + cell_data, + self.neighbourhood_bcs, + self.neighbourhood_indices, + CELL_TYPE_LABELS, + sig_bool, + ligand_bool, + receptor_bool, + cell_prop_cutoff=0.2 + ) + + # Expected: CT1 (A) -> CT2 (D,G): 2 interactions, CT1 -> CT1 (E): 1 interaction + # Matrix is [CT1->CT1, CT1->CT2, CT1->CT3, CT2->CT1, ...] + self.assertEqual(int_matrix.shape, (3, 3)) + self.assertEqual(int_matrix[0, 0], 1) # CT1 -> CT1 (A->E) + self.assertEqual(int_matrix[0, 1], 2) # CT1 -> CT2 (A->D, A->G) + self.assertEqual(int_matrix[0, 2], 0) # CT1 -> CT3 None + + @staticmethod + def create_cci(cell_annotations: list[str], unique_cell_type_labels): + cell_data = np.zeros((len(cell_annotations), len(unique_cell_type_labels)), + dtype=np.float64) + for i, annot in enumerate(cell_annotations): + ct_index = np.where(unique_cell_type_labels == annot)[0][0] + cell_data[i, ct_index] = 1 + return cell_data + # TODO next things to test: - # 1. Getting the interaction matrix. - # 2. Getting the LR scores. + # 1. Getting the LR scores.