From 26bf957c001a10e5f4187ce539cb35902679834a Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Fri, 23 Jan 2026 14:59:26 +0100 Subject: [PATCH 1/9] added two window calculations --- src/squidpy/tl/_sliding_window.py | 237 ++++++++++++++++++++++++++---- 1 file changed, 207 insertions(+), 30 deletions(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index a3a5840e0..a01cdd9d1 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -1,6 +1,7 @@ from __future__ import annotations from itertools import product +from typing import Literal import numpy as np import pandas as pd @@ -23,7 +24,8 @@ def sliding_window( coord_columns: tuple[str, str] = ("globalX", "globalY"), sliding_window_key: str = "sliding_window_assignment", spatial_key: str = "spatial", - drop_partial_windows: bool = False, + partial_windows: Literal["adaptive", "drop", "split"] | None = None, + max_nr_cells: int | None = None, copy: bool = False, ) -> pd.DataFrame | None: """ @@ -42,8 +44,14 @@ def sliding_window( overlap: int Overlap size between consecutive windows. (0 = no overlap) %(spatial_key)s - drop_partial_windows: bool - If True, drop windows that are smaller than the window size at the borders. + partial_windows: Literal["adaptive", "drop", "split"] | None + If None, possibly small windows at the edges are kept. + If 'adaptive', all windows might be shrunken a bit to avoid small windows at the edges. + If 'drop', possibly small windows at the edges are removed. + If 'split', windows are split into subwindows until not exceeding `max_nr_cells` + max_nr_cells: int | None + The maximum number of cells allowed after merging two windows. + Required if `partial_windows = split` copy: bool If True, return the result, otherwise save it to the adata object. @@ -54,6 +62,17 @@ def sliding_window( """ if overlap < 0: raise ValueError("Overlap must be non-negative.") + if overlap >= window_size: + raise ValueError("Overlap must be less than the window size.") + if overlap >= window_size // 2 and window_size == "adaptive": + raise ValueError("Overlap must be less than window_size // 2 when using 'adaptive'") + + if partial_windows == "split" and max_nr_cells is None: + raise ValueError("max_nr_cells must be set when partial_windows is 'split'.") + if partial_windows != "split" and max_nr_cells is not None: + logg.warning("Ignoring max_nr_cells as partial_windows is not 'split'.") + if partial_windows == "split" and overlap != 0: + logg.warning("Ignoring overlap as it cannot be used with 'split'.") if isinstance(adata, SpatialData): adata = adata.table @@ -119,7 +138,11 @@ def sliding_window( max_y=max_y, window_size=window_size, overlap=overlap, - drop_partial_windows=drop_partial_windows, + partial_windows=partial_windows, + lib_coords=lib_coords, + x_col=x_col, + y_col=y_col, + max_nr_cells=max_nr_cells, ) lib_key = f"{lib}_" if lib is not None else "" @@ -131,22 +154,18 @@ def sliding_window( y_start = window["y_start"] y_end = window["y_end"] - mask = ( - (lib_coords[x_col] >= x_start) - & (lib_coords[x_col] <= x_end) - & (lib_coords[y_col] >= y_start) - & (lib_coords[y_col] <= y_end) + mask = _get_window_mask( + x_col=x_col, + y_col=y_col, + lib_coords=lib_coords, + x_start=x_start, + x_end=x_end, + y_start=y_start, + y_end=y_end, ) obs_indices = lib_coords.index[mask] if overlap == 0: - mask = ( - (lib_coords[x_col] >= x_start) - & (lib_coords[x_col] <= x_end) - & (lib_coords[y_col] >= y_start) - & (lib_coords[y_col] <= y_end) - ) - obs_indices = lib_coords.index[mask] sliding_window_df.loc[obs_indices, sliding_window_key] = f"{lib_key}window_{idx}" else: @@ -174,6 +193,51 @@ def sliding_window( _save_data(adata, attr="obs", key=col_name, data=col_data) +def _get_window_mask( + x_col: str, + y_col: str, + lib_coords: pd.DataFrame, + x_start: int, + x_end: int, + y_start: int, + y_end: int, +) -> pd.Series: + """ + Compute a boolean mask selecting coordinates that fall within a given window. + + Parameters + ---------- + x_col: str + Column name in `lib_coords` containing x-coordinates. + y_col: str + Column name in `lib_coords` containing y-coordinates. + lib_coords: pd.DataFrame + DataFrame containing spatial coordinates (e.g. `adata.obs` subset for one library). + Coordinate values are expected to be integers. + x_start: int + Lower bound of the window in x-direction (inclusive). + x_end: int + Upper bound of the window in x-direction (inclusive). + y_start: int + Lower bound of the window in y-direction (inclusive). + y_end: int + Upper bound of the window in y-direction (inclusive). + + Returns + ------- + pd.Series + Boolean mask indicating which rows in `lib_coords` fall inside the specified window. + """ + mask = ( + (lib_coords[x_col] >= x_start) + & (lib_coords[x_col] <= x_end) + & (lib_coords[y_col] >= y_start) + & (lib_coords[y_col] <= y_end) + ) + + return mask + + def _calculate_window_corners( min_x: int, max_x: int, @@ -181,7 +245,11 @@ def _calculate_window_corners( max_y: int, window_size: int, overlap: int = 0, - drop_partial_windows: bool = False, + partial_windows: Literal["adaptive", "drop", "split"] | None = None, + lib_coords: pd.DataFrame | None = None, + x_col: str | None = None, + y_col: str | None = None, + max_nr_cells: int | None = None, ) -> pd.DataFrame: """ Calculate the corner points of all windows covering the area from min_x to max_x and min_y to max_y, @@ -199,23 +267,38 @@ def _calculate_window_corners( maximum Y coordinate window_size: float size of each window + lib_coords: pd.DataFrame | None + coordinates of all samples for one library + x_col: str | None + the column in `lib_coords` corresponding to the x coordinates + y_col: str | None + the column in `lib_coords` corresponding to the y coordinates overlap: float overlap between consecutive windows (must be less than window_size) - drop_partial_windows: bool - if True, drop border windows that are smaller than window_size; - if False, create smaller windows at the borders to cover the remaining space. + partial_windows: Literal["adaptive", "drop", "split"] | None + If None, possibly small windows at the edges are kept. + If 'adaptive', all windows might be shrunken a bit to avoid small windows at the edges. + If 'drop', possibly small windows at the edges are removed. + If 'split', windows are split into subwindows until not exceeding `max_nr_cells` Returns ------- windows: pandas DataFrame with columns ['x_start', 'x_end', 'y_start', 'y_end'] """ - if overlap < 0: - raise ValueError("Overlap must be non-negative.") - if overlap >= window_size: - raise ValueError("Overlap must be less than the window size.") + # adjust x and y window size if 'adaptive' + if partial_windows == "adaptive": + number_x_windows = np.ceil((max_x - min_x) / window_size) + number_y_windows = np.ceil((max_y - min_y) / window_size) - x_step = window_size - overlap - y_step = window_size - overlap + x_window_size = (max_x - min_x) / number_x_windows + y_window_size = (max_y - min_y) / number_y_windows + else: + x_window_size = window_size + y_window_size = window_size + + # create the step sizes for each window + x_step = x_window_size - overlap + y_step = y_window_size - overlap # Generate starting points x_starts = np.arange(min_x, max_x, x_step) @@ -224,16 +307,110 @@ def _calculate_window_corners( # Create all combinations of x and y starting points starts = list(product(x_starts, y_starts)) windows = pd.DataFrame(starts, columns=["x_start", "y_start"]) - windows["x_end"] = windows["x_start"] + window_size - windows["y_end"] = windows["y_start"] + window_size + windows["x_end"] = windows["x_start"] + x_window_size + windows["y_end"] = windows["y_start"] + y_window_size # Adjust windows that extend beyond the bounds - if not drop_partial_windows: + if partial_windows is None: windows["x_end"] = windows["x_end"].clip(upper=max_x) windows["y_end"] = windows["y_end"].clip(upper=max_y) - else: + elif partial_windows == "adaptive": + pass + elif partial_windows == "drop": valid_windows = (windows["x_end"] <= max_x) & (windows["y_end"] <= max_y) windows = windows[valid_windows] + elif partial_windows == "split": + # split the slide recursively into windows with at most max_nr_cells + coord_x_sorted = lib_coords.sort_values(by=[x_col]) + coord_y_sorted = lib_coords.sort_values(by=[y_col]) + + windows = _split_window( + max_nr_cells, x_col, y_col, coord_x_sorted, coord_y_sorted, min_x, max_x, min_y, max_y + ).sort_values(["x_start", "x_end", "y_start", "y_end"]) + else: + raise ValueError(f"{partial_windows} is not a valid partial_windows argument.") windows = windows.reset_index(drop=True) return windows[["x_start", "x_end", "y_start", "y_end"]] + + +def _split_window( + max_cells: int, + x_col: str, + y_col: str, + coord_x_sorted: pd.DataFrame, + coord_y_sorted: pd.DataFrame, + x_start: int, + x_end: int, + y_start: int, + y_end: int, +) -> pd.DataFrame: + """ + Recursively split a rectangular window into subwindows such that each subwindow + contains at most `max_cells` cells and at least `max_cells` // 2 cells. + + Parameters + ---------- + max_cells : int + Maximum number of cells allowed per window. + x_col : str + Name of the column in `coord_x_sorted` and `coord_y_sorted` corresponding to + x coordinates. + y_col : str + Name of the column in `coord_x_sorted` and `coord_y_sorted` corresponding to + y coordinates. + coord_x_sorted : pandas.DataFrame + DataFrame containing cell coordinates, sorted by `x_col`. + coord_y_sorted : pandas.DataFrame + DataFrame containing cell coordinates, sorted by `y_col`. + x_start : int + Left (minimum) x coordinate of the current window. + x_end : int + Right (maximum) x coordinate of the current window. + y_start : int + Bottom (minimum) y coordinate of the current window. + y_end : int + Top (maximum) y coordinate of the current window. + + Returns + ------- + windows: pandas DataFrame with columns ['x_start', 'x_end', 'y_start', 'y_end'] + """ + # return current window if it contains less cells than max_cells + n_cells = _get_window_mask(x_col, y_col, coord_x_sorted, x_start, x_end, y_start, y_end).sum() + + if n_cells <= max_cells: + return pd.DataFrame({"x_start": [x_start], "x_end": [x_end], "y_start": [y_start], "y_end": [y_end]}) + + # define start and stop indices of subsetted windows + sub_coord_x_sorted = coord_x_sorted[ + _get_window_mask(x_col, y_col, coord_x_sorted, x_start, x_end, y_start, y_end) + ].reset_index(drop=True) + + sub_coord_y_sorted = coord_y_sorted[ + _get_window_mask(x_col, y_col, coord_y_sorted, x_start, x_end, y_start, y_end) + ].reset_index(drop=True) + + middle_pos = len(sub_coord_x_sorted) // 2 + + if (x_end - x_start) > (y_end - y_start): + # vertical split + x_middle = sub_coord_x_sorted[x_col].iloc[middle_pos] + + indices = ((x_start, x_middle, y_start, y_end), (x_middle, x_end, y_start, y_end)) + else: + # horizontal split + y_middle = sub_coord_y_sorted.loc[middle_pos, y_col] + + indices = ((x_start, x_end, y_start, y_middle), (x_start, x_end, y_middle, y_end)) + + # recursively continue with either left&right or upper&lower windows pairs + windows = [] + for x_start, x_end, y_start, y_end in indices: + windows.append( + _split_window( + max_cells, x_col, y_col, sub_coord_x_sorted, sub_coord_y_sorted, x_start, x_end, y_start, y_end + ) + ) + + return pd.concat(windows) From f86074e8f3381395f5498bae3aaf747dbef784b3 Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Thu, 5 Feb 2026 14:25:28 +0100 Subject: [PATCH 2/9] bugfix partial_windows --- src/squidpy/tl/_sliding_window.py | 46 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index a01cdd9d1..bd7333a0c 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -46,9 +46,9 @@ def sliding_window( %(spatial_key)s partial_windows: Literal["adaptive", "drop", "split"] | None If None, possibly small windows at the edges are kept. - If 'adaptive', all windows might be shrunken a bit to avoid small windows at the edges. - If 'drop', possibly small windows at the edges are removed. - If 'split', windows are split into subwindows until not exceeding `max_nr_cells` + If `adaptive`, all windows might be shrunken a bit to avoid small windows at the edges. + If `drop`, possibly small windows at the edges are removed. + If `split`, windows are split into subwindows until not exceeding `max_nr_cells` max_nr_cells: int | None The maximum number of cells allowed after merging two windows. Required if `partial_windows = split` @@ -60,19 +60,18 @@ def sliding_window( If ``copy = True``, returns the sliding window annotation(s) as pandas dataframe Otherwise, stores the sliding window annotation(s) in .obs. """ - if overlap < 0: - raise ValueError("Overlap must be non-negative.") - if overlap >= window_size: - raise ValueError("Overlap must be less than the window size.") - if overlap >= window_size // 2 and window_size == "adaptive": - raise ValueError("Overlap must be less than window_size // 2 when using 'adaptive'") - - if partial_windows == "split" and max_nr_cells is None: - raise ValueError("max_nr_cells must be set when partial_windows is 'split'.") - if partial_windows != "split" and max_nr_cells is not None: - logg.warning("Ignoring max_nr_cells as partial_windows is not 'split'.") - if partial_windows == "split" and overlap != 0: - logg.warning("Ignoring overlap as it cannot be used with 'split'.") + if partial_windows == "split": + if max_nr_cells is None: + raise ValueError("`max_nr_cells` must be set when `partial_windows == split`.") + if window_size is not None: + logg.warning(f"Ingoring `window_size` when using `{partial_windows}`.") + if overlap != 0: + logg.warning("Ignoring `overlap` as it cannot be used with `split`.") + else: + if max_nr_cells is not None: + logg.warning("Ignoring `max_nr_cells` as `partial_windows != split`.") + if overlap < 0: + raise ValueError("Overlap must be non-negative.") if isinstance(adata, SpatialData): adata = adata.table @@ -105,8 +104,13 @@ def sliding_window( # mostly arbitrary choice, except that full integers usually generate windows with 1-2 cells at the borders window_size = max(int(np.floor(coord_range // 3.95)), 1) - if window_size <= 0: - raise ValueError("Window size must be larger than 0.") + if partial_windows != "split": + if window_size <= 0: + raise ValueError("Window size must be larger than 0.") + if overlap >= window_size: + raise ValueError("Overlap must be less than the window size.") + if overlap >= window_size // 2 and window_size == "adaptive": + raise ValueError("Overlap must be less than `window_size` // 2 when using `adaptive`.") if library_key is not None and library_key not in adata.obs: raise ValueError(f"Library key '{library_key}' not found in adata.obs") @@ -265,7 +269,7 @@ def _calculate_window_corners( minimum Y coordinate max_y: float maximum Y coordinate - window_size: float + window_size: int size of each window lib_coords: pd.DataFrame | None coordinates of all samples for one library @@ -290,8 +294,8 @@ def _calculate_window_corners( number_x_windows = np.ceil((max_x - min_x) / window_size) number_y_windows = np.ceil((max_y - min_y) / window_size) - x_window_size = (max_x - min_x) / number_x_windows - y_window_size = (max_y - min_y) / number_y_windows + x_window_size = np.ceil((max_x - min_x) / number_x_windows) + y_window_size = np.ceil((max_y - min_y) / number_y_windows) else: x_window_size = window_size y_window_size = window_size From 248bae76f7f4dff25a8c5cc683ff2d65a5cd038b Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Thu, 5 Feb 2026 17:55:54 +0100 Subject: [PATCH 3/9] style improvement --- src/squidpy/tl/_sliding_window.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index bd7333a0c..c0ecb9e6f 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -64,12 +64,12 @@ def sliding_window( if max_nr_cells is None: raise ValueError("`max_nr_cells` must be set when `partial_windows == split`.") if window_size is not None: - logg.warning(f"Ingoring `window_size` when using `{partial_windows}`.") + logg.warning(f"Ingoring `window_size` when using `{partial_windows}`") if overlap != 0: - logg.warning("Ignoring `overlap` as it cannot be used with `split`.") + logg.warning("Ignoring `overlap` as it cannot be used with `split`") else: if max_nr_cells is not None: - logg.warning("Ignoring `max_nr_cells` as `partial_windows != split`.") + logg.warning("Ignoring `max_nr_cells` as `partial_windows != split`") if overlap < 0: raise ValueError("Overlap must be non-negative.") @@ -121,7 +121,7 @@ def sliding_window( sliding_window_df = pd.DataFrame(index=adata.obs.index) if sliding_window_key in adata.obs: - logg.warning(f"Overwriting existing column '{sliding_window_key}' in adata.obs.") + logg.warning(f"Overwriting existing column '{sliding_window_key}' in adata.obs") for lib in libraries: if lib is not None: From 80b464f37a02da01513e4862a4ead9b0f0992157 Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Thu, 5 Feb 2026 17:57:59 +0100 Subject: [PATCH 4/9] updated tests --- tests/tools/test_sliding_window.py | 225 +++++++++++++++++------------ 1 file changed, 136 insertions(+), 89 deletions(-) diff --git a/tests/tools/test_sliding_window.py b/tests/tools/test_sliding_window.py index 3dd670b00..57528752c 100644 --- a/tests/tools/test_sliding_window.py +++ b/tests/tools/test_sliding_window.py @@ -1,5 +1,6 @@ from __future__ import annotations +import pandas as pd import pytest from anndata import AnnData @@ -8,29 +9,30 @@ class TestSlidingWindow: @pytest.mark.parametrize( - "windowsize_overlap_drop", + "window_size, overlap, partial_windows", [ - (300, 0, False), - (300, 50, False), - (300, 50, True), + (300, 0, None), + (300, 50, None), + (300, 50, "drop"), ], ) def test_sliding_window_several_slices( self, adata_mibitof: AnnData, - windowsize_overlap_drop: tuple[int, int, bool], + window_size: int, + overlap: int, + partial_windows: str | None, sliding_window_key: str = "sliding_window_key", library_key: str = "library_id", ): - def _count_total_assignments(): - total_cells = 0 + def count_total_assignments(df: pd.DataFrame) -> int: + total = 0 for lib_key in ["point8", "point16", "point23"]: - cols_in_lib = df.columns[df.columns.str.contains(lib_key)] - for col in cols_in_lib: - total_cells += df[col].sum() - return total_cells + cols = df.columns[df.columns.str.contains(lib_key)] + for col in cols: + total += df[col].sum() + return total - window_size, overlap, drop_partial_windows = windowsize_overlap_drop df = sliding_window( adata_mibitof, library_key=library_key, @@ -38,24 +40,25 @@ def _count_total_assignments(): overlap=overlap, coord_columns=("globalX", "globalY"), sliding_window_key=sliding_window_key, + partial_windows=partial_windows, copy=True, - drop_partial_windows=drop_partial_windows, ) + assert len(df) == adata_mibitof.n_obs + if overlap == 0: - sliding_window_columns = [col for col in df.columns if sliding_window_key in col] - assert len(sliding_window_columns) == 1 # only one sliding window - assert df[sliding_window_key].isnull().sum() == 0 # no unassigned cells - assert len(df) == adata_mibitof.n_obs # correct amount of rows + # single categorical assignment + assert sliding_window_key in df.columns + assert df[sliding_window_key].notnull().all() else: - sliding_window_cols = df.columns[df.columns.str.contains("sliding_window")] + sliding_window_cols = df.columns[df.columns.str.contains(sliding_window_key)] - if drop_partial_windows: + if partial_windows == "drop": assert len(sliding_window_cols) == 27 - assert _count_total_assignments() == 2536 + assert count_total_assignments(df) == 2536 else: assert len(sliding_window_cols) == 70 - assert _count_total_assignments() == 4569 + assert count_total_assignments(df) == 4569 @pytest.mark.parametrize("overlap", [0, 2]) def test_sliding_window_square_grid( @@ -71,107 +74,151 @@ def test_sliding_window_square_grid( overlap=overlap, coord_columns=("globalX", "globalY"), sliding_window_key=sliding_window_key, + partial_windows=None, copy=True, ) - assert len(df) == adata_squaregrid.n_obs # correct amount of rows + assert len(df) == adata_squaregrid.n_obs if overlap == 0: - sliding_window_columns = [col for col in df.columns if sliding_window_key in col] - assert len(sliding_window_columns) == 1 # only one sliding window - assert df[sliding_window_key].isnull().sum() == 0 # no unassigned cells + assert sliding_window_key in df.columns + assert df[sliding_window_key].notnull().all() else: - for i in range(9): # we expect 9 windows - assert ( - f"{sliding_window_key}_window_{i}" in df.columns - ) # correct number of columns; multiple sliding windows + for i in range(9): # 3x3 grid + assert f"{sliding_window_key}_window_{i}" in df.columns - def test_sliding_window_invalid_window_size( - self, - adata_squaregrid: AnnData, - ): - with pytest.raises(ValueError, match="Window size must be larger than 0."): + def test_sliding_window_invalid_arguments(self, adata_squaregrid: AnnData): + with pytest.raises(ValueError, match="Window size must be larger than 0"): sliding_window( adata_squaregrid, - window_size=-10, + window_size=-1, overlap=0, coord_columns=("globalX", "globalY"), - sliding_window_key="sliding_window", copy=True, ) - with pytest.raises(ValueError, match="Overlap must be non-negative."): + with pytest.raises(ValueError, match="Overlap must be non-negative"): sliding_window( adata_squaregrid, window_size=10, - overlap=-10, + overlap=-1, coord_columns=("globalX", "globalY"), - sliding_window_key="sliding_window", copy=True, ) - def test_calculate_window_corners_overlap(self): - min_x = 0 - max_x = 200 - min_y = 0 - max_y = 200 - window_size = 100 - overlap = 20 + with pytest.raises(ValueError, match="max_nr_cells"): + sliding_window( + adata_squaregrid, + window_size=None, + overlap=0, + partial_windows="split", + coord_columns=("globalX", "globalY"), + copy=True, + ) + + def test_sliding_window_adaptive_assigns_all_cells( + self, + adata_squaregrid: AnnData, + sliding_window_key: str = "sliding_window_key", + ): + df = sliding_window( + adata_squaregrid, + window_size=5, + overlap=0, + coord_columns=("globalX", "globalY"), + sliding_window_key=sliding_window_key, + partial_windows="adaptive", + copy=True, + ) + + assert sliding_window_key in df.columns + assert df[sliding_window_key].notnull().all() + assert len(df) == adata_squaregrid.n_obs + + def test_sliding_window_split_respects_max_nr_cells( + self, + adata_mibitof: AnnData, + sliding_window_key: str = "sliding_window_key", + library_key: str = "library_id", + ): + max_nr_cells = 100 + + df = sliding_window( + adata_mibitof, + library_key=library_key, + window_size=None, + overlap=0, + coord_columns=("globalX", "globalY"), + sliding_window_key=sliding_window_key, + partial_windows="split", + max_nr_cells=max_nr_cells, + copy=True, + ) + + assert sliding_window_key in df.columns + assert df[sliding_window_key].notnull().all() + + counts = df[sliding_window_key].value_counts() + assert counts.max() <= max_nr_cells + assert counts.shape[0] > 1 # more than one window + +class TestCalculateWindowCorners: + def test_overlap(self): windows = _calculate_window_corners( - min_x=min_x, - max_x=max_x, - min_y=min_y, - max_y=max_y, - window_size=window_size, - overlap=overlap, - drop_partial_windows=False, + min_x=0, + max_x=200, + min_y=0, + max_y=200, + window_size=100, + overlap=20, + partial_windows=None, ) assert windows.shape == (9, 4) - assert windows.iloc[0].values.tolist() == [0, 100, 0, 100] - assert windows.iloc[-1].values.tolist() == [160, 200, 160, 200] - - def test_calculate_window_corners_no_overlap(self): - min_x = 0 - max_x = 200 - min_y = 0 - max_y = 200 - window_size = 100 - overlap = 0 + assert windows.iloc[0].tolist() == [0, 100, 0, 100] + assert windows.iloc[-1].tolist() == [160, 200, 160, 200] + def test_no_overlap(self): windows = _calculate_window_corners( - min_x=min_x, - max_x=max_x, - min_y=min_y, - max_y=max_y, - window_size=window_size, - overlap=overlap, - drop_partial_windows=False, + min_x=0, + max_x=200, + min_y=0, + max_y=200, + window_size=100, + overlap=0, + partial_windows=None, ) assert windows.shape == (4, 4) - assert windows.iloc[0].values.tolist() == [0, 100, 0, 100] - assert windows.iloc[-1].values.tolist() == [100, 200, 100, 200] - - def test_calculate_window_corners_drop_partial_windows(self): - min_x = 0 - max_x = 200 - min_y = 0 - max_y = 200 - window_size = 100 - overlap = 20 + assert windows.iloc[-1].tolist() == [100, 200, 100, 200] + def test_drop_partial_windows(self): windows = _calculate_window_corners( - min_x=min_x, - max_x=max_x, - min_y=min_y, - max_y=max_y, - window_size=window_size, - overlap=overlap, - drop_partial_windows=True, + min_x=0, + max_x=200, + min_y=0, + max_y=200, + window_size=100, + overlap=20, + partial_windows="drop", ) assert windows.shape == (4, 4) - assert windows.iloc[0].values.tolist() == [0, 100, 0, 100] - assert windows.iloc[-1].values.tolist() == [80, 180, 80, 180] + assert windows.iloc[-1].tolist() == [80, 180, 80, 180] + + def test_adaptive_windows_cover_extent(self): + windows = _calculate_window_corners( + min_x=0, + max_x=200, + min_y=0, + max_y=200, + window_size=90, + overlap=0, + partial_windows="adaptive", + ) + + assert windows["x_start"].min() == 0 + assert windows["y_start"].min() == 0 + assert windows["x_end"].max() >= 200 + assert windows["y_end"].max() >= 200 From 82407bb093b755ed158bc87abebcc5dc7c4eb8fc Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Fri, 6 Feb 2026 16:24:26 +0100 Subject: [PATCH 5/9] improved partial_window split --- src/squidpy/tl/_sliding_window.py | 5 ++++- tests/tools/test_sliding_window.py | 29 +++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index c0ecb9e6f..4f259adff 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -294,6 +294,7 @@ def _calculate_window_corners( number_x_windows = np.ceil((max_x - min_x) / window_size) number_y_windows = np.ceil((max_y - min_y) / window_size) + # use np.ceil to avoid float errors x_window_size = np.ceil((max_x - min_x) / number_x_windows) y_window_size = np.ceil((max_y - min_y) / number_y_windows) else: @@ -319,7 +320,9 @@ def _calculate_window_corners( windows["x_end"] = windows["x_end"].clip(upper=max_x) windows["y_end"] = windows["y_end"].clip(upper=max_y) elif partial_windows == "adaptive": - pass + # as window_size is an integer to avoid float errors, it can exceed max_x and max_y -> clip + windows["x_end"] = windows["x_end"].clip(upper=max_x) + windows["y_end"] = windows["y_end"].clip(upper=max_y) elif partial_windows == "drop": valid_windows = (windows["x_end"] <= max_x) & (windows["y_end"] <= max_y) windows = windows[valid_windows] diff --git a/tests/tools/test_sliding_window.py b/tests/tools/test_sliding_window.py index 57528752c..25004ac96 100644 --- a/tests/tools/test_sliding_window.py +++ b/tests/tools/test_sliding_window.py @@ -135,18 +135,24 @@ def test_sliding_window_adaptive_assigns_all_cells( assert df[sliding_window_key].notnull().all() assert len(df) == adata_squaregrid.n_obs - def test_sliding_window_split_respects_max_nr_cells( + def test_sliding_window_split_nr_cells( self, adata_mibitof: AnnData, sliding_window_key: str = "sliding_window_key", library_key: str = "library_id", ): + """ + Test that when using 'split', each window contains at most max_nr_cells + and at least max_nr_cells // 2 cells, + unless the total number of cells is smaller than max_nr_cells // 2. + """ max_nr_cells = 100 + total_cells = adata_mibitof.n_obs df = sliding_window( adata_mibitof, library_key=library_key, - window_size=None, + window_size=None, # ignored in split mode overlap=0, coord_columns=("globalX", "globalY"), sliding_window_key=sliding_window_key, @@ -155,12 +161,19 @@ def test_sliding_window_split_respects_max_nr_cells( copy=True, ) - assert sliding_window_key in df.columns - assert df[sliding_window_key].notnull().all() - counts = df[sliding_window_key].value_counts() + + # all windows respect the upper bound assert counts.max() <= max_nr_cells - assert counts.shape[0] > 1 # more than one window + + # determine strict lower bound + lower_bound = max_nr_cells // 2 + if total_cells < lower_bound: + # if total cells are too few, just one window is allowed smaller + assert counts.max() == total_cells + else: + # otherwise, every window must satisfy the lower bound + assert (counts >= lower_bound).all() class TestCalculateWindowCorners: @@ -220,5 +233,5 @@ def test_adaptive_windows_cover_extent(self): assert windows["x_start"].min() == 0 assert windows["y_start"].min() == 0 - assert windows["x_end"].max() >= 200 - assert windows["y_end"].max() >= 200 + assert windows["x_end"].max() == 200 + assert windows["y_end"].max() == 200 From 66c566a259a97ec395d1f1157e0042ca16485a0d Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Thu, 12 Feb 2026 18:39:13 +0100 Subject: [PATCH 6/9] bugfix partial_windows == "split" when overlap is set --- src/squidpy/tl/_sliding_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index 98718f8ac..65c68aec7 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -169,7 +169,7 @@ def sliding_window( ) obs_indices = lib_coords.index[mask] - if overlap == 0: + if overlap == 0 or partial_windows == "split": sliding_window_df.loc[obs_indices, sliding_window_key] = f"{lib_key}window_{idx}" else: From f515f93c50679e0a76a635eb40acb3005a1be500 Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Thu, 12 Feb 2026 19:04:36 +0100 Subject: [PATCH 7/9] simplify code --- src/squidpy/tl/_sliding_window.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index 65c68aec7..bc08f613b 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -128,7 +128,6 @@ def sliding_window( lib_mask = adata.obs[library_key] == lib lib_coords = coords.loc[lib_mask] else: - lib_mask = np.ones(len(adata), dtype=bool) lib_coords = coords min_x, max_x = lib_coords[x_col].min(), lib_coords[x_col].max() @@ -174,9 +173,7 @@ def sliding_window( else: col_name = f"{sliding_window_key}_{lib_key}window_{idx}" - sliding_window_df.loc[obs_indices, col_name] = True - # Avoid chained assignment for pandas CoW compatibility - sliding_window_df[col_name] = sliding_window_df[col_name].fillna(False) + sliding_window_df[col_name] = mask if overlap == 0: # create categorical variable for ordered windows From 6c7e7f59b91652c0c78aa5c7d39c8852718dd88e Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Fri, 13 Feb 2026 10:58:36 +0100 Subject: [PATCH 8/9] bugfix adaptive with overlap --- src/squidpy/tl/_sliding_window.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/squidpy/tl/_sliding_window.py b/src/squidpy/tl/_sliding_window.py index bc08f613b..6f0bccbb9 100644 --- a/src/squidpy/tl/_sliding_window.py +++ b/src/squidpy/tl/_sliding_window.py @@ -109,7 +109,7 @@ def sliding_window( raise ValueError("Window size must be larger than 0.") if overlap >= window_size: raise ValueError("Overlap must be less than the window size.") - if overlap >= window_size // 2 and window_size == "adaptive": + if overlap >= window_size // 2 and partial_windows == "adaptive": raise ValueError("Overlap must be less than `window_size` // 2 when using `adaptive`.") if library_key is not None and library_key not in adata.obs: @@ -289,12 +289,20 @@ def _calculate_window_corners( """ # adjust x and y window size if 'adaptive' if partial_windows == "adaptive": - number_x_windows = np.ceil((max_x - min_x) / window_size) - number_y_windows = np.ceil((max_y - min_y) / window_size) + total_width = max_x - min_x + total_height = max_y - min_y - # use np.ceil to avoid float errors - x_window_size = np.ceil((max_x - min_x) / number_x_windows) - y_window_size = np.ceil((max_y - min_y) / number_y_windows) + # number of windows in x and y direction + number_x_windows = np.ceil((total_width - overlap) / (window_size - overlap)) + number_y_windows = np.ceil((total_height - overlap) / (window_size - overlap)) + + # window size in x and y direction + x_window_size = (total_width + (number_x_windows - 1) * overlap) / number_x_windows + y_window_size = (total_height + (number_y_windows - 1) * overlap) / number_y_windows + + # avoid float errors + x_window_size = np.ceil(x_window_size) + y_window_size = np.ceil(y_window_size) else: x_window_size = window_size y_window_size = window_size @@ -321,6 +329,12 @@ def _calculate_window_corners( # as window_size is an integer to avoid float errors, it can exceed max_x and max_y -> clip windows["x_end"] = windows["x_end"].clip(upper=max_x) windows["y_end"] = windows["y_end"].clip(upper=max_y) + + # remove redundant windows in the corners + redundant_windows = ((windows["x_end"] - windows["x_start"]) <= overlap) | ( + (windows["y_end"] - windows["y_start"]) <= overlap + ) + windows = windows[~redundant_windows] elif partial_windows == "drop": valid_windows = (windows["x_end"] <= max_x) & (windows["y_end"] <= max_y) windows = windows[valid_windows] From c1fa430b0c88e9fbb5a56080122a3c262cfcd211 Mon Sep 17 00:00:00 2001 From: Felix Schroeder Date: Fri, 13 Feb 2026 10:59:34 +0100 Subject: [PATCH 9/9] Update notebooks submodule pointer --- docs/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks b/docs/notebooks index 1cbaf62a3..17d368281 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 1cbaf62a32f65b950552229d210b9884757ce116 +Subproject commit 17d368281ea7b11f7e1174436bbc2429191a0245