Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,43 @@ jobs:
fi
fi
uv sync --group=test
# Start storage emulators (S3, Azure, GCS) only on Linux; service containers are not available on Windows/macOS
- name: Build and start storage emulators
if: matrix.os == 'ubuntu-latest'
run: |
docker build -f tests/io/remote_storage/Dockerfile.emulators -t spatialdata-emulators .
docker run --rm -d --name spatialdata-emulators \
-p 5000:5000 -p 10000:10000 -p 4443:4443 \
spatialdata-emulators
- name: Wait for emulator ports
if: matrix.os == 'ubuntu-latest'
run: |
echo "Waiting for S3 (5000), Azure (10000), GCS (4443)..."
python3 -c "
import socket, time
for _ in range(45):
try:
for p in (5000, 10000, 4443):
socket.create_connection(('127.0.0.1', p), timeout=2)
print('Emulators ready.')
break
except (socket.error, OSError):
time.sleep(2)
else:
raise SystemExit('Emulators did not become ready.')
"
# On Linux, emulators run above so full suite (incl. tests/io/remote_storage/) runs. On Windows/macOS, skip remote_storage.
- name: Test
env:
MPLBACKEND: agg
PLATFORM: ${{ matrix.os }}
DISPLAY: :42
run: |
uv run pytest --cov --color=yes --cov-report=xml
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
uv run pytest --cov --color=yes --cov-report=xml
else
uv run pytest --cov --color=yes --cov-report=xml --ignore=tests/io/remote_storage/
fi
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,13 @@ dev = [
"bump2version",
]
test = [
"adlfs",
"gcsfs",
"moto[server]",
"pytest",
"pytest-cov",
"pytest-mock",
"pytest-timeout",
"torch",
]
docs = [
Expand Down
71 changes: 48 additions & 23 deletions src/spatialdata/_core/spatialdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def __init__(
tables: dict[str, AnnData] | Tables | None = None,
attrs: Mapping[Any, Any] | None = None,
) -> None:
self._path: Path | None = None
self._path: Path | UPath | None = None

self._shared_keys: set[str | None] = set()
self._images: Images = Images(shared_keys=self._shared_keys)
Expand Down Expand Up @@ -548,16 +548,16 @@ def is_backed(self) -> bool:
return self.path is not None

@property
def path(self) -> Path | None:
def path(self) -> Path | UPath | None:
"""Path to the Zarr storage."""
return self._path

@path.setter
def path(self, value: Path | None) -> None:
if value is None or isinstance(value, str | Path):
def path(self, value: Path | UPath | None) -> None:
if value is None or isinstance(value, (str, Path, UPath)):
self._path = value
else:
raise TypeError("Path must be `None`, a `str` or a `Path` object.")
raise TypeError("Path must be `None`, a `str`, a `Path` or a `UPath` object.")

def locate_element(self, element: SpatialElement) -> list[str]:
"""
Expand Down Expand Up @@ -1032,18 +1032,34 @@ def _symmetric_difference_with_zarr_store(self) -> tuple[list[str], list[str]]:

def _validate_can_safely_write_to_path(
self,
file_path: str | Path,
file_path: str | Path | UPath,
overwrite: bool = False,
saving_an_element: bool = False,
) -> None:
from spatialdata._io._utils import _backed_elements_contained_in_path, _is_subfolder, _resolve_zarr_store
from spatialdata._io._utils import (
_backed_elements_contained_in_path,
_is_subfolder,
_remote_zarr_store_exists,
_resolve_zarr_store,
)

if isinstance(file_path, str):
file_path = Path(file_path)

if not isinstance(file_path, Path):
raise ValueError(f"file_path must be a string or a Path object, type(file_path) = {type(file_path)}.")
if not isinstance(file_path, (Path, UPath)):
raise ValueError(f"file_path must be a string, Path or UPath object, type(file_path) = {type(file_path)}.")

if isinstance(file_path, UPath):
store = _resolve_zarr_store(file_path)
if _remote_zarr_store_exists(store) and not overwrite:
raise ValueError(
"The Zarr store already exists. Use `overwrite=True` to try overwriting the store. "
"Please note that only Zarr stores not currently in use by the current SpatialData object can be "
"overwritten."
)
return

# Local Path: existing logic
# TODO: add test for this
if os.path.exists(file_path):
store = _resolve_zarr_store(file_path)
Expand Down Expand Up @@ -1072,8 +1088,13 @@ def _validate_can_safely_write_to_path(
ERROR_MSG + "\nDetails: the target path contains one or more files that Dask use for "
"backing elements in the SpatialData object." + WORKAROUND
)
if self.path is not None and (
_is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path)
# Subfolder checks only for local paths (Path); skip when self.path is UPath
if (
self.path is not None
and isinstance(self.path, Path)
and (
_is_subfolder(parent=self.path, child=file_path) or _is_subfolder(parent=file_path, child=self.path)
)
):
if saving_an_element and _is_subfolder(parent=self.path, child=file_path):
raise ValueError(
Expand Down Expand Up @@ -1102,7 +1123,7 @@ def _validate_all_elements(self) -> None:
@_deprecation_alias(format="sdata_formats", version="0.7.0")
def write(
self,
file_path: str | Path,
file_path: str | Path | UPath | None = None,
overwrite: bool = False,
consolidate_metadata: bool = True,
update_sdata_path: bool = True,
Expand All @@ -1115,7 +1136,7 @@ def write(
Parameters
----------
file_path
The path to the Zarr store to write to.
The path to the Zarr store to write to. If ``None``, uses :attr:`path` (must be set).
overwrite
If `True`, overwrite the Zarr store if it already exists. If `False`, `write()` will fail if the Zarr store
already exists.
Expand Down Expand Up @@ -1161,8 +1182,13 @@ def write(

parsed = _parse_formats(sdata_formats)

if file_path is None:
if self.path is None:
raise ValueError("file_path must be provided when SpatialData.path is not set.")
file_path = self.path
if isinstance(file_path, str):
file_path = Path(file_path)
# Keep UPath as-is; do not convert to Path
self._validate_can_safely_write_to_path(file_path, overwrite=overwrite)
self._validate_all_elements()

Expand Down Expand Up @@ -1192,7 +1218,7 @@ def write(
def _write_element(
self,
element: SpatialElement | AnnData,
zarr_container_path: Path,
zarr_container_path: Path | UPath,
element_type: str,
element_name: str,
overwrite: bool,
Expand All @@ -1201,10 +1227,8 @@ def _write_element(
) -> None:
from spatialdata._io.io_zarr import _get_groups_for_element

if not isinstance(zarr_container_path, Path):
raise ValueError(
f"zarr_container_path must be a Path object, type(zarr_container_path) = {type(zarr_container_path)}."
)
if not isinstance(zarr_container_path, (Path, UPath)):
raise ValueError(f"zarr_container_path must be a Path or UPath, got {type(zarr_container_path).__name__}.")
file_path_of_element = zarr_container_path / element_type / element_name
self._validate_can_safely_write_to_path(
file_path=file_path_of_element, overwrite=overwrite, saving_an_element=True
Expand Down Expand Up @@ -1489,7 +1513,7 @@ def _validate_can_write_metadata_on_element(self, element_name: str) -> tuple[st

# check if the element exists in the Zarr storage
if not _group_for_element_exists(
zarr_path=Path(self.path),
zarr_path=self.path,
element_type=element_type,
element_name=element_name,
):
Expand All @@ -1503,7 +1527,7 @@ def _validate_can_write_metadata_on_element(self, element_name: str) -> tuple[st

# warn the users if the element is not self-contained, that is, it is Dask-backed by files outside the Zarr
# group for the element
element_zarr_path = Path(self.path) / element_type / element_name
element_zarr_path = self.path / element_type / element_name
if not _is_element_self_contained(element=element, element_path=element_zarr_path):
logger.info(
f"Element {element_type}/{element_name} is not self-contained. The metadata will be"
Expand Down Expand Up @@ -1544,7 +1568,7 @@ def write_channel_names(self, element_name: str | None = None) -> None:
# Mypy does not understand that path is not None so we have the check in the conditional
if element_type == "images" and self.path is not None:
_, _, element_group = _get_groups_for_element(
zarr_path=Path(self.path), element_type=element_type, element_name=element_name, use_consolidated=False
zarr_path=self.path, element_type=element_type, element_name=element_name, use_consolidated=False
)

from spatialdata._io._utils import overwrite_channel_names
Expand Down Expand Up @@ -1588,7 +1612,7 @@ def write_transformations(self, element_name: str | None = None) -> None:
# Mypy does not understand that path is not None so we have a conditional
assert self.path is not None
_, _, element_group = _get_groups_for_element(
zarr_path=Path(self.path),
zarr_path=self.path,
element_type=element_type,
element_name=element_name,
use_consolidated=False,
Expand Down Expand Up @@ -1956,7 +1980,8 @@ def h(s: str) -> str:

descr = "SpatialData object"
if self.path is not None:
descr += f", with associated Zarr store: {self.path.resolve()}"
path_descr = str(self.path) if isinstance(self.path, UPath) else self.path.resolve()
descr += f", with associated Zarr store: {path_descr}"

non_empty_elements = self._non_empty_elements()
last_element_index = len(non_empty_elements) - 1
Expand Down
2 changes: 2 additions & 0 deletions src/spatialdata/_io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

# Patch da.to_zarr so ome_zarr's **kwargs are passed as zarr_array_kwargs (avoids FutureWarning)
import spatialdata._io._dask_zarr_compat # noqa: F401
from spatialdata._io._utils import get_dask_backing_files
from spatialdata._io.format import SpatialDataFormatType
from spatialdata._io.io_points import write_points
Expand Down
55 changes: 55 additions & 0 deletions src/spatialdata/_io/_dask_zarr_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Compatibility layer for dask.array.to_zarr when callers pass array options via **kwargs.

ome_zarr.writer calls da.to_zarr(..., **options) with array options (compressor, dimension_names,
etc.). Dask deprecated **kwargs in favor of zarr_array_kwargs. This module patches da.to_zarr to
forward such kwargs into zarr_array_kwargs (excluding dask-internal keys like zarr_format that
zarr.Group.create_array() does not accept), avoiding the FutureWarning and keeping behavior correct.
"""

from __future__ import annotations

from typing import Any

import dask.array as _da

_orig_to_zarr = _da.to_zarr

# Keys from ome_zarr/dask **kwargs that must not be passed to zarr.Group.create_array()
# dimension_separator: not accepted by all zarr versions in the create_array() path.
_DASK_INTERNAL_KEYS = frozenset({"zarr_format", "dimension_separator"})


def _to_zarr(
arr: Any,
url: Any,
component: Any = None,
storage_options: Any = None,
region: Any = None,
compute: bool = True,
return_stored: bool = False,
zarr_array_kwargs: Any = None,
zarr_read_kwargs: Any = None,
**kwargs: Any,
) -> Any:
"""Forward deprecated **kwargs into zarr_array_kwargs, excluding _DASK_INTERNAL_KEYS."""
if kwargs:
zarr_array_kwargs = dict(zarr_array_kwargs) if zarr_array_kwargs else {}
for k, v in kwargs.items():
if k not in _DASK_INTERNAL_KEYS:
zarr_array_kwargs[k] = v
kwargs = {}
return _orig_to_zarr(
arr,
url,
component=component,
storage_options=storage_options,
region=region,
compute=compute,
return_stored=return_stored,
zarr_array_kwargs=zarr_array_kwargs,
zarr_read_kwargs=zarr_read_kwargs,
**kwargs,
)


_da.to_zarr = _to_zarr
Loading