Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,24 @@ jobs:
run: uv sync

- name: Run ruff linter
run: uv run ruff check
run: uv run ruff check

- name: Run ruff formatter check
run: uv run ruff format --check

- name: Restore test hydrology data cache
id: cache-restore
uses: actions/cache/restore@v4
with:
path: .test_cache
key: test-hydrology-data-v2

- name: Run tests
run: uv run pytest
run: uv run pytest -n auto

- name: Save test hydrology data cache
if: steps.cache-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: .test_cache
key: test-hydrology-data-v2
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ output
plots
cache

# Test cache (downloaded hydrology data)
.test_cache

# mise cache
.mise.local.toml

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dev = [
"pytest~=9.0",
"pytest-sugar~=1.0",
"pytest-timeout~=2.3",
"pytest-xdist~=3.5",
"syrupy~=5.0",
]

Expand Down
138 changes: 134 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@

This module sets up the environment variables needed to access remote hydrology data
and provides common fixtures used across test modules.

Test Performance Optimizations:
- Uses a persistent cache directory for downloaded hydrology data (.test_cache/)
that persists across test runs, avoiding re-downloads (~85MB of data)
- The cache directory is preserved in CI using GitHub Actions cache
- Test outputs still use temporary directories that are cleaned up after tests
- Session-scoped fixtures (shared_*_result) are available for sharing expensive
delineation results across multiple tests (for future optimization)

Cache Management:
- To clear the local cache: rm -rf .test_cache/
- CI cache key: test-hydrology-data-v1 (increment version to invalidate)
- Set TEST_CACHE_DIR environment variable to override cache location
"""

import os
Expand Down Expand Up @@ -31,6 +44,36 @@
)


def get_test_cache_dir() -> Path:
"""
Get the persistent cache directory for test data.

Uses environment variable TEST_CACHE_DIR if set, otherwise uses a
platform-appropriate cache location. This directory persists across
test runs to avoid re-downloading data.
"""
if cache_dir := os.environ.get("TEST_CACHE_DIR"):
return Path(cache_dir)

# Use a persistent location in the project directory
# This allows caching in CI and local development
return Path(__file__).parent.parent / ".test_cache"


@pytest.fixture(scope="session")
def shared_cache_dir():
"""
Session-scoped fixture providing a persistent cache directory.

This directory persists across test runs to cache downloaded data files.
The directory is created if it doesn't exist but is NOT deleted after
tests complete, allowing the cache to be reused.
"""
cache_dir = get_test_cache_dir()
cache_dir.mkdir(parents=True, exist_ok=True)
return cache_dir


@pytest.fixture(scope="session")
def temp_output_dir():
"""Create a temporary directory for test outputs."""
Expand Down Expand Up @@ -105,21 +148,33 @@ def disconnected_basins_csv(tmp_path):


@pytest.fixture
def default_config():
"""Default configuration for tests - minimal output, no plots."""
def default_config(shared_cache_dir, temp_output_dir):
"""
Default configuration for tests - minimal output, no plots.

Uses the shared cache directory for downloaded data to avoid
re-downloading files for each test.
"""
return {
"VERBOSE": False,
"WRITE_OUTPUT": False,
"PLOTS": False,
"CONSOLIDATE": False,
"NETWORK_DIAGRAMS": False,
"SIMPLIFY": False,
"CACHE_DIR": str(shared_cache_dir),
"OUTPUT_DIR": str(temp_output_dir),
}


@pytest.fixture
def consolidate_config():
"""Configuration with consolidation enabled."""
def consolidate_config(shared_cache_dir, temp_output_dir):
"""
Configuration with consolidation enabled.

Uses the shared cache directory for downloaded data to avoid
re-downloading files for each test.
"""
return {
"VERBOSE": False,
"WRITE_OUTPUT": False,
Expand All @@ -128,4 +183,79 @@ def consolidate_config():
"MAX_AREA": 500,
"NETWORK_DIAGRAMS": False,
"SIMPLIFY": False,
"CACHE_DIR": str(shared_cache_dir),
"OUTPUT_DIR": str(temp_output_dir),
}


# Session-scoped CSV files for sharing delineation results
@pytest.fixture(scope="session")
def session_single_outlet_csv(tmp_path_factory):
"""Session-scoped single outlet CSV for sharing delineation results."""
csv_path = tmp_path_factory.mktemp("csv") / "single_outlet.csv"
csv_path.write_text(
"id,lng,lat,name,outlet_id,gage_id,priority\n"
"outlet1,-14.36201,65.50253,Lagarfljot River at Lagarfoss,outlet1,GAGE001,high\n"
)
return str(csv_path)


@pytest.fixture(scope="session")
def session_multi_subbasin_csv(tmp_path_factory):
"""Session-scoped multi-subbasin CSV for sharing delineation results."""
csv_path = tmp_path_factory.mktemp("csv") / "multi_subbasin.csv"
csv_path.write_text(
"id,lng,lat,name,outlet_id,gage_id,priority\n"
"main_outlet,-14.36201,65.50253,Lagarfljot River at Lagarfoss,main_outlet,GAGE001,high\n"
"upstream1,-15.0883,64.9839,Jokulsa I River at Fljotsdal Holl,main_outlet,GAGE002,medium\n"
"upstream2,-14.533,65.14,Gringa Dam,main_outlet,GAGE003,low\n"
)
return str(csv_path)


@pytest.fixture(scope="session")
def session_default_config(shared_cache_dir, temp_output_dir):
"""Session-scoped default configuration for shared delineation results."""
return {
"VERBOSE": False,
"WRITE_OUTPUT": False,
"PLOTS": False,
"CONSOLIDATE": False,
"NETWORK_DIAGRAMS": False,
"SIMPLIFY": False,
"CACHE_DIR": str(shared_cache_dir),
"OUTPUT_DIR": str(temp_output_dir),
}


@pytest.fixture(scope="session")
def shared_multi_subbasin_result(session_multi_subbasin_csv, session_default_config):
"""
Session-scoped fixture that runs multi-subbasin delineation once and shares the result.

This is a major performance optimization - instead of running the expensive
delineation operation for every test that uses multi_subbasin_csv with default_config,
we run it once and share the result.

Returns a tuple of (Graph, subbasins_gdf, rivers_gdf).
"""
# Import here to avoid circular imports
from upstream_delineator import config
from upstream_delineator.delineator_utils.delineate import delineate

config.set(session_default_config)
return delineate(session_multi_subbasin_csv, "shared_multi", session_default_config)


@pytest.fixture(scope="session")
def shared_single_outlet_result(session_single_outlet_csv, session_default_config):
"""
Session-scoped fixture that runs single outlet delineation once and shares the result.

Returns a tuple of (Graph, subbasins_gdf, rivers_gdf).
"""
from upstream_delineator import config
from upstream_delineator.delineator_utils.delineate import delineate

config.set(session_default_config)
return delineate(session_single_outlet_csv, "shared_single", session_default_config)
Loading
Loading