Add Leiden clustering and update documentation #334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

newmana merged 7 commits into master from newmana/add_features

Sep 17, 2025

HISTORY.rst

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,11 @@ @@
     History
     =======
+.1.2 (2025-09-17)
+    ------------------
+    * Add Leiden clustering wrapper.
+    * Fix documentation, refactor code in spatial.SME.
 .1.1 (2025-07-07)
     ------------------
     * Support Python 3.10.x
@@ Expand Down @@

docs/release_notes/1.1.2.rst

-Original file line number
+Diff line change
@@ -0,0 +1,6 @@
+.1.2 `2025-09-17`
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+    .. rubric:: Features
+    * Add Leiden clustering wrapper.
+    * Fix documentation, refactor code in spatial.SME.

docs/release_notes/index.rst

-Original file line number
+Diff line change
@@ -1,6 +1,8 @@
     Release Notes
     ===================================================
+    .. include:: 1.1.2.rst
     .. include:: 1.1.1.rst
     .. include:: 0.4.6.rst
@@ Expand Down @@

pyproject.toml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
  
    [project]

    name = "stlearn"

    version = "1.1.1"

    version = "1.1.2"

    authors = [

        {name = "Genomics and Machine Learning lab", email = "andrew.newman@uq.edu.au"},

    ]

    @@ -14,7 +14,7 @@ license = {text = "BSD license"}
  
    requires-python = "~=3.10.0"

    keywords = ["stlearn"]

    classifiers = [

        "Development Status :: 2 - Pre-Alpha",

        "Development Status :: 5 - Production/Stable",

        "Intended Audience :: Developers",

        "License :: OSI Approved :: BSD License",

        "Natural Language :: English",

requirements.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,5 @@ tensorflow==2.14.1 @@
     keras==2.14.0
     types-tensorflow>=2.8.0
     imageio==2.37.0
-    scipy==1.11.4
+    scipy==1.11.4
+    scikit-learn==1.7.0

stlearn/spatial/SME/__init__.py

-Original file line number
+Diff line change
@@ -1,5 +1,6 @@
-    from .impute import SME_impute0, pseudo_spot
-    from .normalize import SME_normalize
+    from .pseudo_spot import pseudo_spot
+    from .sme_impute0 import SME_impute0
+    from .sme_normalize import SME_normalize
     __all__ = [
         "SME_normalize",
@@ Expand Down @@

stlearn/spatial/SME/_weighting_matrix.py

-Original file line number
+Diff line change
@@ -1,7 +1,9 @@
+    import math
     from typing import Literal
     import numpy as np
     from anndata import AnnData
+    from sklearn.linear_model import LinearRegression  # type: ignore
     from sklearn.metrics import pairwise_distances
     from tqdm import tqdm
@@ Expand All / @@ -17,16 +19,9 @@ @@
     ]
-    def calculate_weight_matrix(
-        adata: AnnData,
-        adata_imputed: AnnData | None = None,
-        pseudo_spots: bool = False,
-        platform: _PLATFORM = "Visium",
-    ) -> AnnData | None:
-        import math
-        from sklearn.linear_model import LinearRegression
+    def row_col_by_platform(
+        adata, platform
+    ) -> tuple[LinearRegression, LinearRegression, float]:
         rate: float
         if platform == "Visium":
             img_row = adata.obs["imagerow"]
@@ Expand All / @@ -46,64 +41,61 @@ def calculate_weight_matrix( @@
                     {platform!r} does not support.
                     """
             )
-        reg_row = LinearRegression().fit(array_row.values.reshape(-1, 1), img_row)
-        reg_col = LinearRegression().fit(array_col.values.reshape(-1, 1), img_col)
-        if pseudo_spots and adata_imputed:
-            pd = pairwise_distances(
-                adata_imputed.obs[["imagecol", "imagerow"]],
-                adata.obs[["imagecol", "imagerow"]],
-                metric="euclidean",
-            )
-            unit = math.sqrt(reg_row.coef_**2 + reg_col.coef_**2)
-            pd_norm = np.where(pd >= unit, 0, 1)
-            md = 1 - pairwise_distances(
-                adata_imputed.obsm["X_morphology"],
-                adata.obsm["X_morphology"],
-                metric="cosine",
-            )
-            md[md < 0] = 0
-            adata_imputed.uns["physical_distance"] = pd_norm
-            adata_imputed.uns["morphological_distance"] = md
-            adata_imputed.uns["weights_matrix_all"] = (
-                adata_imputed.uns["physical_distance"]
-                * adata_imputed.uns["morphological_distance"]
-            )
-        else:
-            pd = pairwise_distances(adata.obs[["imagecol", "imagerow"]], metric="euclidean")
-            unit = math.sqrt(reg_row.coef_**2 + reg_col.coef_**2)
-            pd_norm = np.where(pd >= rate * unit, 0, 1)
-            md = 1 - pairwise_distances(adata.obsm["X_morphology"], metric="cosine")
-            md[md < 0] = 0
-            gd = 1 - pairwise_distances(adata.obsm["X_pca"], metric="correlation")
-            adata.uns["gene_expression_correlation"] = gd
-            adata.uns["physical_distance"] = pd_norm
-            adata.uns["morphological_distance"] = md
-            adata.uns["weights_matrix_all"] = (
-                adata.uns["physical_distance"]
-                * adata.uns["morphological_distance"]
-                * adata.uns["gene_expression_correlation"]
-            )
-            adata.uns["weights_matrix_pd_gd"] = (
-                adata.uns["physical_distance"] * adata.uns["gene_expression_correlation"]
-            )
-            adata.uns["weights_matrix_pd_md"] = (
-                adata.uns["physical_distance"] * adata.uns["morphological_distance"]
-            )
-            adata.uns["weights_matrix_gd_md"] = (
-                adata.uns["gene_expression_correlation"]
-                * adata.uns["morphological_distance"]
-            )
-        return adata
+        regression = LinearRegression()
+        reg_row: LinearRegression = regression.fit(array_row.values.reshape(-1, 1), img_row)  # type: ignore
+        reg_col: LinearRegression = regression.fit(array_col.values.reshape(-1, 1), img_col)  # type: ignore
+        return reg_col, reg_row, rate
+    def weight_matrix(adata, platform):
+        reg_col, reg_row, rate = row_col_by_platform(adata, platform)
+        pd = pairwise_distances(adata.obs[["imagecol", "imagerow"]], metric="euclidean")
+        unit = math.sqrt(reg_row.coef_[0] ** 2 + reg_col.coef_[0] ** 2)
+        pd_norm = np.where(pd >= rate * unit, 0, 1)
+        md = 1 - pairwise_distances(adata.obsm["X_morphology"], metric="cosine")
+        md[md < 0] = 0
+        gd = 1 - pairwise_distances(adata.obsm["X_pca"], metric="correlation")
+        adata.uns["gene_expression_correlation"] = gd
+        adata.uns["physical_distance"] = pd_norm
+        adata.uns["morphological_distance"] = md
+        adata.uns["weights_matrix_all"] = (
+            adata.uns["physical_distance"]
+            * adata.uns["morphological_distance"]
+            * adata.uns["gene_expression_correlation"]
+        )
+        adata.uns["weights_matrix_pd_gd"] = (
+            adata.uns["physical_distance"] * adata.uns["gene_expression_correlation"]
+        )
+        adata.uns["weights_matrix_pd_md"] = (
+            adata.uns["physical_distance"] * adata.uns["morphological_distance"]
+        )
+        adata.uns["weights_matrix_gd_md"] = (
+            adata.uns["gene_expression_correlation"] * adata.uns["morphological_distance"]
+        )
+    def weight_matrix_imputed(adata, adata_imputed, platform):
+        reg_col, reg_row, _ = row_col_by_platform(adata, platform)
+        pd = pairwise_distances(
+            adata_imputed.obs[["imagecol", "imagerow"]],
+            adata.obs[["imagecol", "imagerow"]],
+            metric="euclidean",
+        )
+        unit = math.sqrt(reg_row.coef_[0] ** 2 + reg_col.coef_[0] ** 2)
+        pd_norm = np.where(pd >= unit, 0, 1)
+        md = 1 - pairwise_distances(
+            adata_imputed.obsm["X_morphology"],
+            adata.obsm["X_morphology"],
+            metric="cosine",
+        )
+        md[md < 0] = 0
+        adata_imputed.uns["physical_distance"] = pd_norm
+        adata_imputed.uns["morphological_distance"] = md
+        adata_imputed.uns["weights_matrix_all"] = (
+            adata_imputed.uns["physical_distance"]
+            * adata_imputed.uns["morphological_distance"]
+        )
     def impute_neighbour(
@@ Expand Down @@

stlearn/spatial/SME/impute.py → stlearn/spatial/SME/pseudo_spot.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -12,79 +12,10 @@ @@
     from ._weighting_matrix import (
         _PLATFORM,
         _WEIGHTING_MATRIX,
-        calculate_weight_matrix,
         impute_neighbour,
+        weight_matrix_imputed,
     )
-    def SME_impute0(
-        adata: AnnData,
-        use_data: str = "raw",
-        weights: _WEIGHTING_MATRIX = "weights_matrix_all",
-        platform: _PLATFORM = "Visium",
-        copy: bool = False,
-    ) -> AnnData | None:
-        """\
-        using spatial location (S), tissue morphological feature (M) and gene
-        expression (E) information to impute missing values
-        Parameters
-        ----------
-        adata
-            Annotated data matrix.
-        use_data
-            input data, can be `raw` counts or log transformed data
-        weights
-            weighting matrix for imputation.
-            if `weights_matrix_all`, matrix combined all information from spatial
-            location (S), tissue morphological feature (M) and gene expression (E)
-            if `weights_matrix_pd_md`, matrix combined information from spatial
-            location (S), tissue morphological feature (M)
-        platform
-            `Visium` or `Old_ST`
-        copy
-            Return a copy instead of writing to adata.
-        Returns
-        -------
-        Anndata
-        """
-        adata = adata.copy() if copy else adata
-        if use_data == "raw":
-            if isinstance(adata.X, csr_matrix):
-                count_embed = adata.X.toarray()
-            elif isinstance(adata.X, np.ndarray):
-                count_embed = adata.X
-            elif isinstance(adata.X, pd.Dataframe):
-                count_embed = adata.X.values
-            else:
-                raise ValueError(
-                    f"""\
-                        {type(adata.X)} is not a valid type.
-                        """
-                )
-        else:
-            count_embed = adata.obsm[use_data]
-        calculate_weight_matrix(adata, platform=platform)
-        impute_neighbour(adata, count_embed=count_embed, weights=weights)
-        imputed_data = adata.obsm["imputed_data"].astype(float)
-        mask = count_embed != 0
-        count_embed_ = count_embed.astype(float)
-        count_embed_[count_embed_ == 0] = np.nan
-        adjusted_count_matrix = np.nanmean(np.array([count_embed_, imputed_data]), axis=0)
-        adjusted_count_matrix[mask] = count_embed[mask]
-        key_added = use_data + "_SME_imputed"
-        adata.obsm[key_added] = adjusted_count_matrix
-        print("The data adjusted by SME is added to adata.obsm['" + key_added + "']")
-        return adata if copy else None
     _COPY = Literal["pseudo_spot_adata", "combined_adata"]
@@ Expand All / @@ -98,9 +29,8 @@ def pseudo_spot( @@
         copy: _COPY = "pseudo_spot_adata",
     ) -> AnnData | None:
         """\
-        using spatial location (S), tissue morphological feature (M) and gene
-        expression (E) information to impute gap between spots and increase resolution
-        for gene detection
+        Improve spatial resolution by imputing (creating) new spots from existing ones
+        using spatial, morphological, and expression (SME) information.
         Parameters
         ----------
@@ Expand Down Expand Up / @@ -306,9 +236,7 @@ def pseudo_spot( @@
         else:
             count_embed = adata.obsm[use_data]
-        calculate_weight_matrix(
-            adata, pseudo_spot_adata, pseudo_spots=True, platform=platform
-        )
+        weight_matrix_imputed(adata, pseudo_spot_adata, platform=platform)
         impute_neighbour(pseudo_spot_adata, count_embed=count_embed, weights=weights)
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add Leiden clustering and update documentation #334

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!