diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 84d0e17..9a1bf0b 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -20,9 +20,10 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v5 - - run: pip install . - - run: pip install pylint - - run: pylint --unsafe-load-any-extension=y --disable=fixme $(git ls-files '*.py') + - run: | + pip install . + pip install pylint + pylint --unsafe-load-any-extension=y --disable=fixme $(git ls-files '*.py') precommit: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ddf124..c49176f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,10 +13,11 @@ repos: - id: end-of-file-fixer - id: debug-statements -# - repo: https://github.com/christopher-hacker/enforce-notebook-run-order -# rev: 2.1.1 -# hooks: -# - id: enforce-notebook-run-order + - repo: https://github.com/christopher-hacker/enforce-notebook-run-order + rev: 2.1.1 + hooks: + - id: enforce-notebook-run-order + exclude: tests/examples/bad.ipynb - repo: local hooks: @@ -29,9 +30,9 @@ repos: types: [jupyter] exclude: tests/examples/bad.ipynb - - id: check-badges - name: check badges - entry: check_badges + - id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: python -m hooks.check_notebook_open_atmos_structure additional_dependencies: - nbformat - pytest diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 29786dc..00b401f 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -6,10 +6,10 @@ stages: [pre-commit] types: [jupyter] -- id: check-badges - name: check badges - description: check badges in Jupyter Notebook - entry: check_badges +- id: check-notebook-open-atmos-structure + name: check notebook has open-atmos structure + entry: check_notebook_open_atmos_structure + description: check notebook has open-atmos structure language: python stages: [pre-commit] types: [jupyter] diff --git a/hooks/check_badges.py b/hooks/check_notebook_open_atmos_structure.py similarity index 72% rename from hooks/check_badges.py rename to hooks/check_notebook_open_atmos_structure.py index 7cc398c..22d2cba 100755 --- a/hooks/check_badges.py +++ b/hooks/check_notebook_open_atmos_structure.py @@ -1,22 +1,6 @@ #!/usr/bin/env python3 -# pylint: disable=missing-function-docstring """ -Checks/repairs notebook badge headers. - -This version optionally uses Git to discover the repository root (to build -repo-relative notebook URLs) and uses Python's logging module instead of print() -for structured messages. - -Behavior: -- By default it will attempt to detect the git repo root (if GitPython is - installed and the file is in a git working tree) and fall back to Path.cwd(). -- Use --no-git to force using Path.cwd(). -- Use --repo-root PATH to explicitly set repository root. -- Use --verbose to enable debug logging. - -Usage: - check_badges --repo-name=devops_tests [--repo-owner=open-atmos] [--fix-header] - [--no-git] [--repo-root PATH] [--verbose] FILES... +Checks notebooks structure required in open-atmos projects. """ from __future__ import annotations @@ -30,6 +14,7 @@ import nbformat from nbformat import NotebookNode +from .open_atmos_colab_header import check_colab_header from .utils import NotebookTestError REPO_OWNER_DEFAULT = "open-atmos" @@ -38,8 +23,38 @@ logger = logging.getLogger(__name__) +def resolve_repo_root( + start_path: Path, + *, + explicit_root: Path | None, + prefer_git: bool, +) -> Path: + """Resolve the repository root for the given path.""" + if explicit_root is not None: + return explicit_root + if prefer_git: + try: + # Import locally so the module doesn't hard-depend on GitPython at import time + from git import Repo # pylint: disable=import-outside-toplevel + + try: + repo = Repo(start_path, search_parent_directories=True) + if repo.working_tree_dir: + root = Path(repo.working_tree_dir) + logger.debug("Discovered git repository root: %s", root) + return root + except Exception as exc: # pylint: disable=broad-exception-caught + logger.debug("Git repo detection failed for %s: %s", start_path, exc) + except ImportError as exc: + logger.debug("GitPython not available or import failed: %s", exc) + + cwd = Path.cwd() + logger.debug("Using current working directory as repo root: %s", cwd) + return cwd + + def relative_path(absolute_path, repo_root): - """returns a path relative to the repo base (converting backslashes to slashes on Windows)""" + """Return the path relative to the repository root.""" absolute_path = Path(absolute_path).resolve() repo_root = Path(repo_root).resolve() @@ -54,6 +69,7 @@ def relative_path(absolute_path, repo_root): def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the GitHub-preview badge.""" svg_badge_url = ( "https://img.shields.io/static/v1?" + "label=render%20on&logo=github&color=87ce3e&message=GitHub" @@ -63,6 +79,7 @@ def preview_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the Binder badge.""" svg_badge_url = "https://mybinder.org/badge_logo.svg" link = ( f"https://mybinder.org/v2/gh/{repo_owner}/{repo_name}.git/main?urlpath=lab/tree/" @@ -72,6 +89,7 @@ def mybinder_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> st def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: + """Create markdown for the Colab badge.""" svg_badge_url = "https://colab.research.google.com/assets/colab-badge.svg" link = ( f"https://colab.research.google.com/github/{repo_owner}/{repo_name}/blob/main/" @@ -80,65 +98,34 @@ def colab_badge_markdown(relpath: str, repo_name: str, repo_owner: str) -> str: return f"[![launch on Colab]({svg_badge_url})]({link})" -def find_repo_root(start_path: Path, prefer_git: bool = True) -> Path: - """ - Find repository root for the given start_path. - - If prefer_git is True, attempt to use GitPython to locate the repository root - (searching parent directories). If that fails, fall back to cwd(). - """ - if prefer_git: - try: - # Import locally so the module doesn't hard-depend on GitPython at import time - from git import Repo # pylint: disable=import-outside-toplevel - - try: - repo = Repo(start_path, search_parent_directories=True) - if repo.working_tree_dir: - root = Path(repo.working_tree_dir) - logger.debug("Discovered git repository root: %s", root) - return root - except Exception as exc: # pylint: disable=broad-exception-caught - logger.debug("Git repo detection failed for %s: %s", start_path, exc) - except ImportError as exc: - logger.debug("GitPython not available or import failed: %s", exc) - - cwd = Path.cwd() - logger.debug("Using current working directory as repo root: %s", cwd) - return cwd - - def expected_badges_for( notebook_path: Path, repo_name: str, repo_owner: str, - repo_root: Optional[Path] = None, + repo_root: Optional[Path], ) -> List[str]: """ Return the canonical badge lines expected for notebook_path. If repo_root is provided, attempt to build a relative path from it; otherwise find repository root automatically (using find_repo_root). """ - if repo_root is None: - repo_root = find_repo_root(notebook_path) - - if repo_root is None: - raise ValueError("Could not determine repo root") - relpath = relative_path(notebook_path, repo_root) + args = (relpath, repo_name, repo_owner) return [ - preview_badge_markdown(relpath, repo_name, repo_owner), - mybinder_badge_markdown(relpath, repo_name, repo_owner), - colab_badge_markdown(relpath, repo_name, repo_owner), + preview_badge_markdown(*args), + mybinder_badge_markdown(*args), + colab_badge_markdown(*args), ] def read_notebook(path: Path) -> NotebookNode: + """Read a Jupyter notebook without format conversion.""" with path.open(encoding="utf8") as fp: return nbformat.read(fp, nbformat.NO_CONVERT) def write_notebook(path: Path, nb: NotebookNode) -> None: + """Write a Jupyter notebook to disk.""" with path.open("w", encoding="utf8") as fp: nbformat.write(nb, fp) @@ -209,53 +196,67 @@ def test_second_cell_is_a_markdown_cell(notebook_filename: str) -> None: raise ValueError("Second cell is not a markdown cell") -def main(argv: Sequence[str] | None = None) -> int: - parser = argparse.ArgumentParser() - parser.add_argument("--repo-name", required=True) - parser.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) - parser.add_argument( +def build_parser() -> argparse.ArgumentParser: + """Build parser for command line arguments.""" + p = argparse.ArgumentParser() + p.add_argument("--repo-name", required=True) + p.add_argument("--repo-owner", default=REPO_OWNER_DEFAULT) + p.add_argument( "--fix-header", action="store_true", help="If set, attempt to fix notebooks missing the header.", ) - parser.add_argument( + p.add_argument( "--no-git", action="store_true", help="Do not attempt to detect git repo root; use cwd()", ) - parser.add_argument( + p.add_argument( "--repo-root", help="Explicit repository root to use when building URLs" ) - parser.add_argument("--verbose", action="store_true", help="Enable debug logging") - parser.add_argument("filenames", nargs="*", help="Filenames to check.") - args = parser.parse_args(argv) + p.add_argument("--verbose", action="store_true", help="Enable debug logging") + p.add_argument("filenames", nargs="*", help="Filenames to check.") + return p - # configure logging - level = logging.DEBUG if args.verbose else logging.INFO + +def configure_logging(verbose: bool) -> None: + """Configure logging with --verbose flag""" + level = logging.DEBUG if verbose else logging.INFO logging.basicConfig(level=level, format="%(levelname)s: %(message)s") + +def main(argv: Sequence[str] | None = None) -> int: + """Test notebook structure: + - first cell with 3 correct badges, + - second cell is of type markdown (best if with notebook description), + - third cell is a Colab magick cell. + """ + args = build_parser().parse_args(argv) + configure_logging(args.verbose) + explicit_repo_root = Path(args.repo_root) if args.repo_root else None prefer_git = not args.no_git - repo_root_path: Optional[Path] = Path(args.repo_root) if args.repo_root else None - retval = 0 + + failed = False for filename in args.filenames: path = Path(filename) try: - effective_repo_root = repo_root_path or ( - find_repo_root(path, prefer_git) if prefer_git else Path.cwd() + repo_root = resolve_repo_root( + path, + explicit_root=explicit_repo_root, + prefer_git=prefer_git, ) - test_notebook_has_at_least_three_cells(filename) test_first_cell_contains_three_badges( - filename, args.repo_name, args.repo_owner, effective_repo_root + filename, args.repo_name, args.repo_owner, repo_root ) test_second_cell_is_a_markdown_cell(filename) + check_colab_header(path, args.repo_name, args.fix_header, "") + logger.info("%s: OK", path) - logger.info("%s: OK", filename) - retval = retval or 0 except NotebookTestError as exc: - logger.error("%s: %s", filename, exc) - retval = 1 - return retval + logger.error("%s: %s", path, exc) + failed = True + return int(failed) if __name__ == "__main__": diff --git a/hooks/check_notebooks.py b/hooks/check_notebooks.py index c23648b..22078d1 100755 --- a/hooks/check_notebooks.py +++ b/hooks/check_notebooks.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 -# pylint: disable=missing-module-docstring +""" +Checks notebook execution status for Jupyter notebooks""" + from __future__ import annotations from collections.abc import Sequence diff --git a/hooks/open_atmos_colab_header.py b/hooks/open_atmos_colab_header.py new file mode 100644 index 0000000..c77923f --- /dev/null +++ b/hooks/open_atmos_colab_header.py @@ -0,0 +1,115 @@ +"""Extract version from existing header and check if header is correct""" + +from __future__ import annotations + +import re +import nbformat + +_PIP_INSTALL_RE = re.compile( + r"pip_install_on_colab\(\s*" + r"['\"](?P[^'\"]+)['\"]\s*,\s*" + r"['\"](?P
[^'\"]+)['\"]\s*\)" +) + + +def extract_versions(cell_source: str, repo_name: str): + """ + Extract version info from cell source + Returns: + (examples_version, main_version) or (None, None) if invalid. + """ + text_found = _PIP_INSTALL_RE.search(cell_source) + if not text_found: + return None, None + + examples_pkg = text_found.group("examples") + main_pkg = text_found.group("main") + + if not main_pkg.startswith(repo_name) or not examples_pkg.startswith( + f"{repo_name}-examples" + ): + return None, None + return examples_pkg[len(f"{repo_name}-examples") :], main_pkg[len(repo_name) :] + + +def resolve_version(existing: str | None, hook_version: str | None) -> str: + """ + Precedence: + 1. Version in notebook + 2. Hook version + 3. No version + """ + if existing: + return existing + if hook_version: + return hook_version + return "" + + +def build_header(repo_name: str, version: str) -> str: + """required header pattern in open-atmos notebooks""" + return f"""import os, sys +os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS +if 'google.colab' in sys.modules: + !pip --quiet install open-atmos-jupyter-utils + from open_atmos_jupyter_utils import pip_install_on_colab + pip_install_on_colab('{repo_name}-examples{version}', '{repo_name}{version}')""" + + +HEADER_REQUIRED_PATTERNS = [ + "google.colab", + "open-atmos-jupyter-utils", + "pip_install_on_colab", +] + + +def looks_like_header(cell_source: str) -> bool: + """check if the cell source looks like required header""" + return all(pat in cell_source for pat in HEADER_REQUIRED_PATTERNS) + + +def check_colab_header(notebook_path, repo_name, fix, hook_version): + """check if colab header is correct""" + nb = nbformat.read(notebook_path, as_version=nbformat.NO_CONVERT) + header_index = None + + for idx, cell in enumerate(nb.cells): + if cell.cell_type == "code" and looks_like_header(cell.source): + header_index = idx + break + + if header_index is None: + final_version = resolve_version(None, hook_version) + header_source = build_header(repo_name, final_version) + nb.cells.insert(2, nbformat.v4.new_code_cell(header_source)) + nbformat.write(nb, notebook_path) + return True + + header_cell = nb.cells[header_index] + examples_version, main_version = extract_versions(header_cell.source, repo_name) + + if examples_version is None or main_version is None: + raise ValueError("Colab header is malformed") + + if examples_version != main_version: + raise ValueError( + f"Version mismatch in header: {examples_version!r} != {main_version!r}" + ) + + final_version = resolve_version(main_version, hook_version) + correct_header = build_header(repo_name, final_version) + + modified = False + if header_cell.source != correct_header: + if not fix: + raise ValueError("Colab header is incorrect") + header_cell.source = correct_header + modified = True + + if header_index != 2: + nb.cells.insert(2, nb.cells.pop(header_index)) + modified = True + + if modified: + nbformat.write(nb, notebook_path) + return modified diff --git a/pyproject.toml b/pyproject.toml index 7fea13a..d25de5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,6 @@ dynamic = ['version'] [project.scripts] check_notebooks = "hooks.check_notebooks:main" -check_badges = "hooks.check_badges:main" +check_notebook_open_atmos_structure = "hooks.check_notebook_open_atmos_structure:main" notebooks_output = "hooks.notebooks_output:main" notebooks_using_jupyter_utils = "hooks.notebooks_using_jupyter_utils:main" diff --git a/tests/examples/good.ipynb b/tests/examples/good.ipynb index 329f58c..c43ec47 100644 --- a/tests/examples/good.ipynb +++ b/tests/examples/good.ipynb @@ -1,32 +1,29 @@ { "cells": [ { - "cell_type": "markdown", - "id": "18b1047b", "metadata": {}, + "cell_type": "markdown", "source": [ "[![preview notebook](https://img.shields.io/static/v1?label=render%20on&logo=github&color=87ce3e&message=GitHub)](https://github.com/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb)\n", "[![launch on mybinder.org](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/open-atmos/devops_tests.git/main?urlpath=lab/tree/tests/examples/good.ipynb)\n", "[![launch on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/open-atmos/devops_tests/blob/main/tests/examples/good.ipynb)" - ] + ], + "id": "7896d6a8e70874de" }, { - "cell_type": "markdown", - "id": "59bad8ebb84ac4ce", "metadata": {}, - "source": [ - "Notebook description" - ] + "cell_type": "markdown", + "source": "Notebook description", + "id": "dd6f026f26ee6758" }, { - "cell_type": "code", - "id": "cc05fb4d", "metadata": { "ExecuteTime": { - "end_time": "2026-02-06T19:44:32.687512Z", - "start_time": "2026-02-06T19:44:32.681345Z" + "end_time": "2026-02-07T18:01:46.587131Z", + "start_time": "2026-02-07T18:01:46.580954Z" } }, + "cell_type": "code", "source": [ "import os, sys\n", "os.environ['NUMBA_THREADING_LAYER'] = 'workqueue' # PySDM & PyMPDATA don't work with TBB; OpenMP has extra dependencies on macOS\n", @@ -35,40 +32,29 @@ " from open_atmos_jupyter_utils import pip_install_on_colab\n", " pip_install_on_colab('devops_tests-examples', 'devops_tests')" ], + "id": "62a42069512ea38a", "outputs": [], "execution_count": 1 }, { - "cell_type": "code", - "id": "405d8abe602ec659", "metadata": { "ExecuteTime": { - "end_time": "2026-02-06T19:44:32.693544Z", - "start_time": "2026-02-06T19:44:32.692046Z" + "end_time": "2026-02-07T18:01:46.630852Z", + "start_time": "2026-02-07T18:01:46.628713Z" } }, - "source": [], + "cell_type": "code", + "source": "", + "id": "5115215c3656a30b", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "name": "python3", "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" + "display_name": "Python 3 (ipykernel)" } }, "nbformat": 4, diff --git a/tests/test_check_badges_examples.py b/tests/test_check_notebook_open_atmos_structure.py similarity index 90% rename from tests/test_check_badges_examples.py rename to tests/test_check_notebook_open_atmos_structure.py index 494f41a..ef3c8f1 100644 --- a/tests/test_check_badges_examples.py +++ b/tests/test_check_notebook_open_atmos_structure.py @@ -1,17 +1,18 @@ # pylint: disable=missing-function-docstring """ -Unit tests for hooks/check_badges.py: good and bad notebook examples. +Unit tests for hooks/check_notebook_open_atmos_structure.py: good and bad notebook examples. These tests write small notebooks to temporary files and call the badge-check functions that expect filenames. """ +import pathlib import nbformat from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell import pytest -from hooks import check_badges as cb +from hooks import check_notebook_open_atmos_structure as cb def _write_nb_and_return_path(tmp_path, nb, name="nb.ipynb"): @@ -70,7 +71,9 @@ def test_first_cell_bad_badges_raises(tmp_path): ) path = _write_nb_and_return_path(tmp_path, nb, name="badbadges.ipynb") with pytest.raises(ValueError): - cb.test_first_cell_contains_three_badges(path, "devops_tests") + cb.test_first_cell_contains_three_badges( + path, "devops_tests", repo_root=pathlib.Path("") + ) def test_second_cell_not_markdown_raises(tmp_path): diff --git a/tests/test_run_hooks_on_examples.py b/tests/test_run_hooks_on_examples.py index 6372c74..82cefe1 100644 --- a/tests/test_run_hooks_on_examples.py +++ b/tests/test_run_hooks_on_examples.py @@ -1,17 +1,5 @@ # pylint: disable=missing-function-docstring -""" -Run the hooks on good and bad example notebooks and assert behavior + logs. - -This test executes the hook modules the same way pre-commit would: - python -m hooks.check_badges --repo-name=... PATH - python -m hooks.check_notebooks PATH - -It captures stdout/stderr (which is where logging.basicConfig writes), asserts -expected exit codes, and checks stderr for expected failure substrings. - -Adjust the candidate paths in _find_example() if your examples are stored elsewhere. -""" from __future__ import annotations @@ -42,44 +30,6 @@ def _run_module(module: str, args: list[str]) -> subprocess.CompletedProcess: return subprocess.run(cmd, capture_output=True, text=True, check=False) -@pytest.mark.parametrize( - "name, expected_badge_exit, expected_badge_msg", - [ - ("good.ipynb", 0, ""), - ("bad.ipynb", 1, "Missing badges"), - ], -) -def test_check_badges_on_examples( - name: str, expected_badge_exit: int, expected_badge_msg: str -): - nb_path = _find_example(name) - if nb_path is None: - pytest.skip(f"No example notebook found for {name};") - - res = _run_module("hooks.check_badges", ["--repo-name=devops_tests", str(nb_path)]) - combined_msgs = (res.stderr or "") + "\n" + (res.stdout or "") - - # Check exit code first - if expected_badge_exit == 0: - assert ( - res.returncode == 0 - ), f"Expected success; stderr:\n{res.stderr}\nstdout:\n{res.stdout}" - # For success, ensure no obvious error strings are present - assert "Missing badges" not in combined_msgs - assert "ERROR" not in combined_msgs - assert "Traceback" not in combined_msgs - else: - assert ( - res.returncode != 0 - ), f"Expected failure; got exit {res.returncode}\nstdout:\n{res.stdout}" - - assert expected_badge_msg in combined_msgs, ( - f"Expected to find {expected_badge_msg!r} in output" - f"\n---STDERR---\n{res.stderr}" - f"\n---STDOUT---\n{res.stdout}" - ) - - @pytest.mark.parametrize( "name, should_fail, expected_msg_substr", [