Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ _version.py
node_modules/
.code-workspace

# memray report
*.bin

# test datasets (e.g. Xenium ones)
# symlinks
data
Expand Down
6 changes: 3 additions & 3 deletions asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
"project": "spatialdata-io",
"project_url": "https://github.com/scverse/spatialdata-io",
"repo": ".",
"branches": ["image-reader-chunkwise"],
"branches": ["faster-imports", "main"],
"dvcs": "git",
"environment_type": "virtualenv",
"pythons": ["3.12"],
"pythons": ["3.13"],
"build_command": [],
"install_command": ["python -m pip install {build_dir}[test]"],
"uninstall_command": ["python -m pip uninstall -y {project}"],
Expand All @@ -17,7 +17,7 @@
"hash_length": 8,
"build_cache_size": 2,
"install_timeout": 600,
"repeat": 3,
"repeat": 5,
"processes": 1,
"attribute_selection": ["time_*", "peakmem_*"]
}
2 changes: 1 addition & 1 deletion benchmarks/benchmark_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from spatialdata._logging import logger
from xarray import DataArray

from spatialdata_io import image # type: ignore[attr-defined]
from spatialdata_io import image

# =============================================================================
# CONFIGURATION - Edit these values to match your setup
Expand Down
103 changes: 103 additions & 0 deletions benchmarks/benchmark_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""ASV benchmarks for spatialdata-io import times.

Measures how long it takes to import the package and individual readers
in a fresh subprocess, isolating import overhead from runtime work.

Running (with the current environment, no virtualenv rebuild):
# Quick sanity check (single iteration):
asv run --python=same --quick --show-stderr -v -b ImportBenchmark

# Full benchmark on current commit:
asv run --python=same --show-stderr -v -b ImportBenchmark

# Compare two branches (using --python=same, one-liner):
git stash && git checkout main && pip install -e . -q \
&& asv run --python=same -v -b ImportBenchmark \
&& git checkout faster-imports && git stash pop && pip install -e . -q \
&& asv run --python=same -v -b ImportBenchmark
# Then view the comparison:
asv compare $(git rev-parse main) $(git rev-parse faster-imports)

# Compare two branches (let ASV build virtualenvs, slower first run):
asv continuous --show-stderr -v -b ImportBenchmark main faster-imports

# Generate an HTML report:
asv publish && asv preview
"""

import subprocess
import sys


def _import_time(statement: str) -> float:
"""Time an import in a fresh subprocess. Returns seconds."""
code = f"import time; t0=time.perf_counter(); {statement}; print(time.perf_counter()-t0)"
result = subprocess.run(
[sys.executable, "-c", code],
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(result.stderr)
return float(result.stdout.strip())


class ImportBenchmark:
"""Import-time benchmarks for spatialdata-io.

Each ``time_*`` method is a separate ASV benchmark.
They run in isolated subprocesses so that one import
does not warm the cache for the next.
"""

# ASV settings tuned for subprocess-based import timing:
timeout = 120 # seconds before ASV kills a benchmark; generous since each
# call spawns a subprocess (~2s each × 10 repeats = ~20s worst case)
repeat = 5 # number of timing samples ASV collects; high because import
# times have variance from OS caching / disk I/O / background load;
# ASV reports the median and IQR from these samples
number = 1 # calls per sample; must be 1 because each call spawns a fresh
# subprocess — running >1 would just re-import in a warm process
warmup_time = 0 # seconds of warm-up iterations before timing; disabled because
# each call is already a cold subprocess — warming up the parent
# process is meaningless
processes = 1 # number of ASV worker processes; 1 avoids parallel subprocesses
# competing for CPU / disk and inflating timings

# -- top-level package -------------------------------------------------

def time_import_spatialdata_io(self) -> float:
"""Wall time: ``import spatialdata_io`` (lazy, no readers loaded)."""
return _import_time("import spatialdata_io")

# -- single reader via the public API ----------------------------------

def time_from_spatialdata_io_import_xenium(self) -> float:
"""Wall time: ``from spatialdata_io import xenium``."""
return _import_time("from spatialdata_io import xenium")

def time_from_spatialdata_io_import_visium(self) -> float:
"""Wall time: ``from spatialdata_io import visium``."""
return _import_time("from spatialdata_io import visium")

def time_from_spatialdata_io_import_visium_hd(self) -> float:
"""Wall time: ``from spatialdata_io import visium_hd``."""
return _import_time("from spatialdata_io import visium_hd")

def time_from_spatialdata_io_import_merscope(self) -> float:
"""Wall time: ``from spatialdata_io import merscope``."""
return _import_time("from spatialdata_io import merscope")

def time_from_spatialdata_io_import_cosmx(self) -> float:
"""Wall time: ``from spatialdata_io import cosmx``."""
return _import_time("from spatialdata_io import cosmx")

# -- key dependencies (reference) --------------------------------------

def time_import_spatialdata(self) -> float:
"""Wall time: ``import spatialdata`` (reference)."""
return _import_time("import spatialdata")

def time_import_anndata(self) -> float:
"""Wall time: ``import anndata`` (reference)."""
return _import_time("import anndata")
2 changes: 1 addition & 1 deletion benchmarks/benchmark_xenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

from spatialdata import SpatialData

from spatialdata_io import xenium # type: ignore[attr-defined]
from spatialdata_io import xenium

# =============================================================================
# CONFIGURATION - Edit these paths to match your setup
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ lint.ignore = [
# Unused imports
"F401",
]
[tool.ruff.lint.per-file-ignores]
"src/spatialdata_io/__init__.py" = ["I001"]

[tool.jupytext]
formats = "ipynb,md"
Expand Down
115 changes: 77 additions & 38 deletions src/spatialdata_io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,36 @@
from importlib import import_module
from importlib.metadata import version
from typing import Any, TYPE_CHECKING

from spatialdata_io.converters.generic_to_zarr import generic_to_zarr
from spatialdata_io.readers.codex import codex
from spatialdata_io.readers.cosmx import cosmx
from spatialdata_io.readers.curio import curio
from spatialdata_io.readers.dbit import dbit
from spatialdata_io.readers.generic import generic, geojson, image
from spatialdata_io.readers.macsima import macsima
from spatialdata_io.readers.mcmicro import mcmicro
from spatialdata_io.readers.merscope import merscope
from spatialdata_io.readers.seqfish import seqfish
from spatialdata_io.readers.steinbock import steinbock
from spatialdata_io.readers.stereoseq import stereoseq
from spatialdata_io.readers.visium import visium
from spatialdata_io.readers.visium_hd import visium_hd
from spatialdata_io.readers.xenium import (
xenium,
xenium_aligned_image,
xenium_explorer_selection,
)

_readers_technologies = [
__version__ = version("spatialdata-io")

_LAZY_IMPORTS: dict[str, str] = {
# readers
"codex": "spatialdata_io.readers.codex",
"cosmx": "spatialdata_io.readers.cosmx",
"curio": "spatialdata_io.readers.curio",
"dbit": "spatialdata_io.readers.dbit",
"macsima": "spatialdata_io.readers.macsima",
"mcmicro": "spatialdata_io.readers.mcmicro",
"merscope": "spatialdata_io.readers.merscope",
"seqfish": "spatialdata_io.readers.seqfish",
"steinbock": "spatialdata_io.readers.steinbock",
"stereoseq": "spatialdata_io.readers.stereoseq",
"visium": "spatialdata_io.readers.visium",
"visium_hd": "spatialdata_io.readers.visium_hd",
"xenium": "spatialdata_io.readers.xenium",
"xenium_aligned_image": "spatialdata_io.readers.xenium",
"xenium_explorer_selection": "spatialdata_io.readers.xenium",
# readers file types
"generic": "spatialdata_io.readers.generic",
"geojson": "spatialdata_io.readers.generic",
"image": "spatialdata_io.readers.generic",
# converters
"generic_to_zarr": "spatialdata_io.converters.generic_to_zarr",
}

__all__ = [
# readers
"codex",
"cosmx",
"curio",
Expand All @@ -34,28 +44,57 @@
"visium",
"visium_hd",
"xenium",
]

_readers_file_types = [
"xenium_aligned_image",
"xenium_explorer_selection",
# readers file types
"generic",
"image",
"geojson",
]

_converters = [
"image",
# converters
"generic_to_zarr",
]


__all__ = (
[
"xenium_aligned_image",
"xenium_explorer_selection",
]
+ _readers_technologies
+ _readers_file_types
+ _converters
)
def __getattr__(name: str) -> Any:
if name in _LAZY_IMPORTS:
module_path = _LAZY_IMPORTS[name]
mod = import_module(module_path)
val = getattr(mod, name)
globals()[name] = val
return val
else:
try:
return globals()[name]
except KeyError as e:
raise AttributeError(f"Module 'spatialdata_io' has no attribute '{name}'") from e


__version__ = version("spatialdata-io")
def __dir__() -> list[str]:
return __all__ + ["__version__"]


if TYPE_CHECKING:
# readers
from spatialdata_io.readers.codex import codex
from spatialdata_io.readers.cosmx import cosmx
from spatialdata_io.readers.curio import curio
from spatialdata_io.readers.dbit import dbit
from spatialdata_io.readers.macsima import macsima
from spatialdata_io.readers.mcmicro import mcmicro
from spatialdata_io.readers.merscope import merscope
from spatialdata_io.readers.seqfish import seqfish
from spatialdata_io.readers.steinbock import steinbock
from spatialdata_io.readers.stereoseq import stereoseq
from spatialdata_io.readers.visium import visium
from spatialdata_io.readers.visium_hd import visium_hd
from spatialdata_io.readers.xenium import (
xenium,
xenium_aligned_image,
xenium_explorer_selection,
)

# readers file types
from spatialdata_io.readers.generic import generic, geojson, image

# converters
from spatialdata_io.converters.generic_to_zarr import generic_to_zarr
Loading
Loading