diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..e07eb21 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,44 @@ +name: CI Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + name: Python ${{ matrix.python-version }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install pytest pytest-cov nbconvert ipykernel + + - name: Run tests (offline only) + run: | + pytest tests/ -v --cov=xarray_subset_grid --cov-report=xml --cov-report=term-missing + # Note: online tests (network notebooks + AWS) require --online flag. + # They are skipped here intentionally to keep CI fast and reliable. + - name: Upload coverage report + uses: codecov/codecov-action@v4 + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12' + with: + files: ./coverage.xml + fail_ci_if_error: false \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..d2a0cee --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,34 @@ +API Reference +============= + +Accessor +-------- + +.. automodule:: xarray_subset_grid.accessor + :members: + :undoc-members: + :show-inheritance: + +Grid (Base Class) +----------------- + +.. automodule:: xarray_subset_grid.grid + :members: + :undoc-members: + :show-inheritance: + +Selector +-------- + +.. automodule:: xarray_subset_grid.selector + :members: + :undoc-members: + :show-inheritance: + +Utilities +--------- + +.. automodule:: xarray_subset_grid.utils + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index ab1af31..eb3e394 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,5 +15,6 @@ Contents: installation design grids + api notebooks contributing diff --git a/pixi.toml b/pixi.toml index f885e36..04c60dd 100644 --- a/pixi.toml +++ b/pixi.toml @@ -49,6 +49,8 @@ setuptools_scm = "*" python-build = "*" sphinx-autodoc-typehints = "*" myst-nb = "*" +ipykernel = "*" +nbconvert = "*" [feature.dev.tasks] lint = "ruff check tests xarray_subset_grid" diff --git a/pyproject.toml b/pyproject.toml index 444f409..76484dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,8 @@ dependencies = [ ] optional-dependencies.dev = [ + "ipykernel", + "nbconvert", "pre-commit", "pytest", "pytest-cov", diff --git a/tests/test_accessor.py b/tests/test_accessor.py new file mode 100644 index 0000000..fd161ce --- /dev/null +++ b/tests/test_accessor.py @@ -0,0 +1,158 @@ +""" +Tests for the GridDatasetAccessor (xsg accessor). + +These tests verify that the accessor is correctly exposed on xr.Dataset +objects and that all its methods behave correctly for both recognized +and unrecognized grid datasets. +""" +import numpy as np +import pytest +import xarray as xr +from pathlib import Path + +EXAMPLE_DATA = Path(__file__).parent / "example_data" + + +# ------------------------------------------------------------------ # +# Fixtures +# ------------------------------------------------------------------ # + +@pytest.fixture +def rgrid_ds(): + """Load a real regular-grid NetCDF file for accessor tests.""" + path = EXAMPLE_DATA / "AMSEAS-subset.nc" + return xr.open_dataset(path) + + +@pytest.fixture +def unknown_ds(): + """A minimal dataset that no grid implementation will recognize.""" + return xr.Dataset({"foo": (["x"], [1.0, 2.0, 3.0])}) + + +# ------------------------------------------------------------------ # +# Accessor existence tests +# ------------------------------------------------------------------ # + +class TestAccessorPresence: + def test_xsg_accessor_is_attached(self, rgrid_ds): + """The xsg accessor must be available on any xr.Dataset.""" + assert hasattr(rgrid_ds, "xsg") + + def test_xsg_accessor_on_unknown_ds(self, unknown_ds): + """The accessor should still be attached even if no grid is recognised.""" + assert hasattr(unknown_ds, "xsg") + + +# ------------------------------------------------------------------ # +# Grid recognition tests +# ------------------------------------------------------------------ # + +class TestGridRecognition: + def test_known_grid_is_not_none(self, rgrid_ds): + """A CF-compliant regular-grid dataset should be recognised.""" + assert rgrid_ds.xsg.grid is not None + + def test_unknown_grid_is_none(self, unknown_ds): + """An unstructured / unknown dataset should return None for grid.""" + assert unknown_ds.xsg.grid is None + + +# ------------------------------------------------------------------ # +# Property tests (including the data_vars bug regression) +# ------------------------------------------------------------------ # + +class TestAccessorProperties: + def test_grid_vars_returns_set(self, rgrid_ds): + gvars = rgrid_ds.xsg.grid_vars + assert isinstance(gvars, set) + + def test_data_vars_returns_set(self, rgrid_ds): + dvars = rgrid_ds.xsg.data_vars + assert isinstance(dvars, set) + + def test_data_vars_no_grid_does_not_raise(self, unknown_ds): + """ + Regression test for the data_vars bug. + Before the fix, this raised AttributeError because self._ds was + checked instead of self._grid, causing None.data_vars() to be called. + """ + result = unknown_ds.xsg.data_vars + assert result == set() + + def test_grid_vars_no_grid_returns_empty(self, unknown_ds): + assert unknown_ds.xsg.grid_vars == set() + + def test_extra_vars_no_grid_returns_empty(self, unknown_ds): + assert unknown_ds.xsg.extra_vars == set() + + def test_has_vertical_levels_returns_bool(self, rgrid_ds): + result = rgrid_ds.xsg.has_vertical_levels + assert isinstance(result, bool) + + def test_has_vertical_levels_false_on_unknown(self, unknown_ds): + assert unknown_ds.xsg.has_vertical_levels is False + + +# ------------------------------------------------------------------ # +# Subsetting tests +# ------------------------------------------------------------------ # + +class TestSubsetting: + def test_subset_bbox_returns_dataset(self, rgrid_ds): + """subset_bbox should return an xr.Dataset for a recognised grid.""" + # Build bbox dynamically from actual coordinate range + lats = rgrid_ds.cf["latitude"].values + lons = rgrid_ds.cf["longitude"].values + lat_min, lat_max = float(lats.min()), float(lats.max()) + lon_min, lon_max = float(lons.min()), float(lons.max()) + # Use the centre quarter of the domain + lat_mid = (lat_min + lat_max) / 2 + lon_mid = (lon_min + lon_max) / 2 + bbox = (lon_min, lat_min, lon_mid, lat_mid) + ds_sub = rgrid_ds.xsg.subset_bbox(bbox) + assert ds_sub is not None + assert isinstance(ds_sub, xr.Dataset) + + def test_subset_polygon_returns_dataset(self, rgrid_ds): + """subset_polygon should return an xr.Dataset for a recognised grid.""" + lats = rgrid_ds.cf["latitude"].values + lons = rgrid_ds.cf["longitude"].values + lat_min, lat_max = float(lats.min()), float(lats.max()) + lon_min, lon_max = float(lons.min()), float(lons.max()) + lat_mid = (lat_min + lat_max) / 2 + lon_mid = (lon_min + lon_max) / 2 + poly = np.array([ + [lon_min, lat_min], + [lon_mid, lat_min], + [lon_mid, lat_mid], + [lon_min, lat_mid], + [lon_min, lat_min], + ]) + ds_sub = rgrid_ds.xsg.subset_polygon(poly) + assert ds_sub is not None + assert isinstance(ds_sub, xr.Dataset) + + def test_subset_bbox_no_grid_returns_none(self, unknown_ds): + result = unknown_ds.xsg.subset_bbox((-80, 30, -70, 40)) + assert result is None + + def test_subset_polygon_no_grid_returns_none(self, unknown_ds): + poly = np.array([[-80, 30], [-70, 30], [-70, 40], [-80, 40], [-80, 30]]) + result = unknown_ds.xsg.subset_polygon(poly) + assert result is None + + def test_subset_vars_keeps_grid_vars(self, rgrid_ds): + """Subsetting to a variable should always retain grid variables.""" + data_vars = list(rgrid_ds.xsg.data_vars) + if not data_vars: + pytest.skip("No data vars found in this dataset") + ds_sub = rgrid_ds.xsg.subset_vars([data_vars[0]]) + grid_vars = rgrid_ds.xsg.grid_vars + for gvar in grid_vars: + assert gvar in ds_sub + + def test_subset_surface_level_no_vertical(self, unknown_ds): + """Subsetting surface level on dataset with no verticals returns dataset unchanged.""" + result = unknown_ds.xsg.subset_surface_level(method="nearest") + assert result is unknown_ds \ No newline at end of file diff --git a/tests/test_grids/test_sgrid.py b/tests/test_grids/test_sgrid.py index dce7379..cb969ed 100644 --- a/tests/test_grids/test_sgrid.py +++ b/tests/test_grids/test_sgrid.py @@ -1,5 +1,5 @@ from pathlib import Path - +import zarr import numpy as np import pytest import xarray as xr @@ -52,7 +52,7 @@ def test_grid_topology_location_parse(): @pytest.mark.skipif( - zarr__version__ >= 3, reason="zarr3.0.8 doesn't support FSpec AWS (it might soon)" + int(zarr.__version__.split(".")[0]) >= 3, reason="zarr3.0.8 doesn't support FSpec AWS (it might soon)" ) @pytest.mark.online def test_polygon_subset(): diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py new file mode 100644 index 0000000..ca84f5d --- /dev/null +++ b/tests/test_notebooks.py @@ -0,0 +1,137 @@ +""" +Tests that auto-run the example notebooks using nbconvert. + +This addresses issue #106 — we needed a way to automatically run all +example notebooks and detect breakage in CI. + +Approach (as suggested in the issue): + - A Python script discovers all .ipynb files in docs/examples/ + - Each notebook is executed via nbconvert in a subprocess + - Notebooks requiring network access (OPeNDAP, THREDDS, S3) are marked + with @pytest.mark.online and are skipped unless --online is passed, + matching the existing pattern used in the rest of the test suite. + - Notebooks using only local example_data run in the default offline suite. + +Usage: + # Run offline notebooks only (default): + pytest tests/test_notebooks.py -v + + # Run all notebooks including network ones: + pytest tests/test_notebooks.py -v --online +""" + +import subprocess +import sys +import tempfile +from pathlib import Path + +import pytest + +# Root of the examples directory +EXAMPLES_DIR = Path(__file__).parent.parent / "docs" / "examples" + +# Notebooks that use ONLY local data from docs/examples/example_data/ +# These run in the default (offline) test suite. +OFFLINE_NOTEBOOKS = [ + # No notebooks currently use only local data. + # Add notebook filenames here when local-only examples are created. + # e.g. "my_local_example.ipynb" +] + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _all_notebooks(): + """Return all .ipynb files in docs/examples/, sorted.""" + return sorted(EXAMPLES_DIR.glob("*.ipynb")) + + +def _online_notebooks(): + """Return notebooks that require network access.""" + offline = set(OFFLINE_NOTEBOOKS) + return [nb for nb in _all_notebooks() if nb.name not in offline] + + +def _offline_notebooks(): + """Return notebooks that use only local data.""" + return [ + EXAMPLES_DIR / name + for name in OFFLINE_NOTEBOOKS + if (EXAMPLES_DIR / name).exists() + ] + + +def _run_notebook(notebook_path: Path) -> tuple[bool, str, str]: + """Execute a notebook via nbconvert and return (success, stdout, stderr). + + The notebook is executed in a temporary directory so no output files + are left behind in the repository. + The subprocess runs with the notebook's own directory as cwd so that + relative paths inside the notebook (e.g. to example_data/) resolve + correctly. + """ + with tempfile.TemporaryDirectory() as tmpdir: + result = subprocess.run( + [ + sys.executable, "-m", "nbconvert", + "--to", "notebook", + "--execute", + "--ExecutePreprocessor.timeout=300", + "--output-dir", tmpdir, + str(notebook_path), + ], + capture_output=True, + text=True, + cwd=str(notebook_path.parent), # resolve relative paths correctly + ) + return result.returncode == 0, result.stdout, result.stderr + + +# --------------------------------------------------------------------------- +# Offline notebook tests (no --online flag required) +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "notebook", + _offline_notebooks(), + ids=[nb.name for nb in _offline_notebooks()], +) +def test_notebook_offline(notebook): + """Execute notebooks that use only local data. + + These run as part of the default test suite so CI catches regressions + without needing network access. + """ + success, stdout, stderr = _run_notebook(notebook) + if not success: + pytest.fail( + f"Notebook '{notebook.name}' failed to execute.\n" + f"--- stdout ---\n{stdout}\n" + f"--- stderr ---\n{stderr}" + ) + + +# --------------------------------------------------------------------------- +# Online notebook tests (requires --online flag) +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "notebook", + _online_notebooks(), + ids=[nb.name for nb in _online_notebooks()], +) +@pytest.mark.online +def test_notebook_online(notebook): + """Execute notebooks that require network access (OPeNDAP, THREDDS, S3). + + Skipped by default. Run with: + pytest tests/test_notebooks.py --online + """ + success, stdout, stderr = _run_notebook(notebook) + if not success: + pytest.fail( + f"Notebook '{notebook.name}' failed to execute.\n" + f"--- stdout ---\n{stdout}\n" + f"--- stderr ---\n{stderr}" + ) \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 6b64a2c..b899e19 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,6 @@ import os +import xarray as xr import numpy as np import pytest @@ -125,3 +126,56 @@ def test_ray_tracing_numpy(): result = ray_tracing_numpy(points[:, 0], points[:, 1], poly) assert np.array_equal(result, [False, True, False]) + +# ---------- compute_2d_subset_mask ---------- + +from xarray_subset_grid.utils import compute_2d_subset_mask + + +def test_compute_2d_subset_mask_basic(): + """compute_2d_subset_mask should mark points inside the polygon as True.""" + lat_vals = np.linspace(30, 40, 20) + lon_vals = np.linspace(-80, -70, 20) + lon2d, lat2d = np.meshgrid(lon_vals, lat_vals) + + lat = xr.DataArray(lat2d, dims=["y", "x"]) + lon = xr.DataArray(lon2d, dims=["y", "x"]) + + # Polygon covering the centre of the domain + poly = np.array([ + [-76, 33], + [-74, 33], + [-74, 37], + [-76, 37], + [-76, 33], + ]) + + mask = compute_2d_subset_mask(lat, lon, poly) + + assert mask.shape == lat2d.shape + # At least some points should be inside + assert mask.values.any() + # Not all points should be inside + assert not mask.values.all() + + +def test_compute_2d_subset_mask_empty_poly(): + """A polygon completely outside the domain should mask everything out.""" + lat_vals = np.linspace(30, 40, 10) + lon_vals = np.linspace(-80, -70, 10) + lon2d, lat2d = np.meshgrid(lon_vals, lat_vals) + + lat = xr.DataArray(lat2d, dims=["y", "x"]) + lon = xr.DataArray(lon2d, dims=["y", "x"]) + + # Polygon far away from the data + poly = np.array([ + [0, 0], + [1, 0], + [1, 1], + [0, 1], + [0, 0], + ]) + + mask = compute_2d_subset_mask(lat, lon, poly) + assert not mask.values.any() diff --git a/xarray_subset_grid/accessor.py b/xarray_subset_grid/accessor.py index 7a4ca7a..0405114 100644 --- a/xarray_subset_grid/accessor.py +++ b/xarray_subset_grid/accessor.py @@ -78,7 +78,7 @@ def data_vars(self) -> set[str]: data analysis. These can be discarded when subsetting the dataset when they are not needed. """ - if self._ds: + if self._grid: return self._grid.data_vars(self._ds) return set()