diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index c29683ba..a83be02d 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -11,6 +11,7 @@ on: - dev paths: - 'pyproject.toml' + - 'docs/**/*.qmd' - '**.py' - '.github/workflows/*.yml' - '.github/actions/**/*.yml' @@ -72,6 +73,11 @@ jobs: cache-number: ${{ env.CACHE_NUMBER }} prepare-test-data: "true" + - name: generate qmd docs tests + id: generate_qmd_tests + shell: bash -el {0} + run: python scripts/generate_doc_code_tests.py + # Runs test suite and calculates coverage - name: run test suite id: run_test_suite @@ -111,6 +117,6 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} - name: fail job after debug session if tests failed - if: steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure' + if: steps.generate_qmd_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure' shell: bash run: exit 1 diff --git a/.gitignore b/.gitignore index b963988f..697dbf2c 100644 --- a/.gitignore +++ b/.gitignore @@ -89,6 +89,7 @@ docs/**/*.ipynb scratch/** .ruff_cache uv.lock +tests/test_autogenerated_doccode/ docs/index_files docs/index.quarto_ipynb diff --git a/docs/contributing/testing.qmd b/docs/contributing/testing.qmd index 23a700f9..5fab6207 100644 --- a/docs/contributing/testing.qmd +++ b/docs/contributing/testing.qmd @@ -45,6 +45,20 @@ To run the docstring tests use the following: pytest dascore --doctest-modules ``` +To validate executable examples in the hand-written documentation without building the full site, generate the mirrored tests with: + +```bash +python scripts/generate_doc_code_tests.py +``` + +Then run the generated tests: + +```bash +pytest tests/test_autogenerated_doccode +``` + +The `tests/test_autogenerated_doccode` directory is intentionally gitignored and should be regenerated locally rather than committed. + ## Writing Tests Tests should go into the `tests/` folder, which mirrors the structure of the main package. diff --git a/environment.yml b/environment.yml index 9a876c87..52f85af2 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,7 @@ channels: - conda-forge dependencies: - pytest + - pytest-timeout - numpy>=1.24 - pydantic>=2.1 - pip diff --git a/pyproject.toml b/pyproject.toml index ec42c644..006f4b7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -226,6 +226,7 @@ markers = [ "network: tests that require network-style filesystem access", "slow: tests skipped by default unless explicitly selected with -m slow", "benchmark: performance benchmark tests", + "docs_examples: autogenerated tests for executable qmd examples", ] [tool.ruff.format] diff --git a/scripts/generate_doc_code_tests.py b/scripts/generate_doc_code_tests.py new file mode 100644 index 00000000..665b8e67 --- /dev/null +++ b/scripts/generate_doc_code_tests.py @@ -0,0 +1,477 @@ +"""Generate inline pytest modules from executable Python snippets in qmd files. + +This script mirrors hand-written documentation examples into +``tests/test_autogenerated_doccode`` so they run as part of the normal pytest +suite. + +How it works: + +1. Discover every source ``.qmd`` file under ``docs/``, excluding generated API + docs under ``docs/api/``. +2. Parse each qmd file and keep only executable Quarto ``{python ...}`` fences. +3. Respect document-level ``execute.eval: false`` and chunk-level + ``#| eval: false`` / ``#| execute: false`` switches. +4. Generate one pytest module per qmd file with a single ``test_main`` function. +5. Hoist any ``from __future__ import ...`` lines to module scope, then inline + the remaining Python blocks into the generated test in original source + order. +6. Insert a comment before each inlined block that points back to the qmd file + and the first executable line of that block. +7. Write one generated ``conftest.py`` that provides the common execution + context: run from the qmd directory, keep matplotlib non-interactive, and + close figures after the test. + +The generated tests are intentionally treated as build artifacts rather than +committed sources. Re-running this script rewrites the entire generated tree so +stale files disappear automatically. +""" + +from __future__ import annotations + +import argparse +import shlex +import shutil +import textwrap +from dataclasses import dataclass +from pathlib import Path +from textwrap import indent + +# The generator always runs from the repository checkout. +REPO_ROOT = Path(__file__).resolve().parent.parent +# Source qmd files live under docs/. +DOCS_PATH = REPO_ROOT / "docs" +# Generated pytest files are mirrored into a dedicated test tree. +TESTS_PATH = REPO_ROOT / "tests" / "test_autogenerated_doccode" +# API docs are generated elsewhere and should not be mirrored again here. +API_DOCS_PATH = DOCS_PATH / "api" +# Force stable cross-platform text IO for both reads and writes. +TEXT_ENCODING = "utf-8" + +# This template becomes one standalone pytest module per source qmd file. +AUTOGEN_HEADER = '''"""Autogenerated from {source_path}.""" + +{future_imports}import pytest +import builtins + +from tests.test_autogenerated_doccode.conftest import qmd_test_context + + +@pytest.mark.docs_examples +def test_main(): + """Execute the python chunks from the source QMD.""" + # Keep `print` in module-global scope so one chunk can rebind it and later + # chunks see the same shared runtime name before the per-chunk reset below. + global print + source_qmd = {source_path_literal} + with qmd_test_context(source_qmd): +{body} +''' + +# The generated conftest keeps reusable setup out of every generated module +# while still avoiding a bespoke runtime executor. +CONFTEST_MODULE = '''"""Shared helpers for autogenerated qmd tests.""" + +from __future__ import annotations + +import os +from contextlib import contextmanager +from pathlib import Path +import sys + +import matplotlib + + +@contextmanager +def qmd_test_context(source_qmd: str): + """Run one autogenerated qmd test in the source document directory.""" + root = Path(__file__).resolve().parents[2] + source_path = root / source_qmd + old = Path.cwd() + original_backend = matplotlib.get_backend() + # Configure plotting lazily so importing the generated conftest does not + # affect unrelated tests in the same process. + matplotlib.use("Agg", force=True) + import matplotlib.pyplot as plt + from matplotlib.figure import Figure + + original_show = plt.show + original_figure_show = Figure.show + was_interactive = plt.isinteractive() + try: + # Windows CI often defaults to cp1252, which cannot print some of the + # unicode characters used in DASCore's rich/text output. `reconfigure` + # mutates the existing stream in place, so we intentionally do not try + # to restore the previous encoding on exit. + if hasattr(sys.stdout, "reconfigure"): + sys.stdout.reconfigure(encoding="utf-8", errors="replace") + if hasattr(sys.stderr, "reconfigure"): + sys.stderr.reconfigure(encoding="utf-8", errors="replace") + plt.ioff() + plt.show = lambda *args, **kwargs: None + Figure.show = lambda self, *args, **kwargs: None + os.chdir(source_path.parent) + yield + finally: + os.chdir(old) + plt.show = original_show + Figure.show = original_figure_show + if was_interactive: + plt.ion() + else: + plt.ioff() + # Always close figures so one doc example cannot leak state to another. + plt.close("all") + if matplotlib.get_backend() != original_backend: + matplotlib.use(original_backend, force=True) +''' + + +@dataclass(frozen=True) +class Chunk: + """A chunk extracted from a qmd file.""" + + # First executable line inside the qmd code fence. + start_line: int + # The literal Python source that will be compiled and executed. + source: str + + +@dataclass(frozen=True) +class QmdFile: + """The executable content extracted from a qmd file.""" + + # Source qmd path on disk. + path: Path + # All executable python chunks found in the document. + chunks: tuple[Chunk, ...] + + +def _parse_bool(value: str) -> bool | None: + """Parse a yaml-like bool.""" + # Front matter and chunk options use yaml-like booleans. + cleaned = value.strip().strip("'\"").lower() + if cleaned in {"true", "yes", "1"}: + return True + if cleaned in {"false", "no", "0"}: + return False + return None + + +def _split_front_matter(lines: list[str]) -> tuple[list[str], list[str]]: + """Split a qmd file into front matter and body lines.""" + # Only treat a leading --- block as front matter. + if not lines or lines[0].strip() != "---": + return [], lines + for index in range(1, len(lines)): + if lines[index].strip() == "---": + return lines[1:index], lines[index + 1 :] + return [], lines + + +def _parse_doc_eval(front_matter: list[str]) -> bool: + """Return the document-level eval flag.""" + # Quarto can disable execution for the entire document with: + # execute: + # eval: false + in_execute = False + execute_indent: int | None = None + for line in front_matter: + stripped = line.strip() + # Ignore blank lines and front-matter comments. + if not stripped or stripped.startswith("#"): + continue + indent_size = len(line) - len(line.lstrip(" ")) + if stripped.startswith("execute:"): + # Track when we are inside the execute: subsection. + in_execute = True + execute_indent = indent_size + remainder = stripped.partition(":")[2].strip() + if remainder: + parsed = _parse_bool(remainder) + if parsed is not None: + return parsed + continue + if in_execute and execute_indent is not None and indent_size <= execute_indent: + in_execute = False + if in_execute and ":" in stripped: + key, _, value = stripped.partition(":") + if key.strip() == "eval": + parsed = _parse_bool(value) + if parsed is not None: + return parsed + return True + + +def _parse_fence_header(spec: str) -> tuple[str | None, dict[str, str]]: + """Parse a Quarto fence header into language and key/value options.""" + # The raw fence spec arrives like "{python}" or + # "{python filename="example.py"}". + cleaned = spec.strip() + if not cleaned.startswith("{") or not cleaned.endswith("}"): + return None, {} + inner = cleaned[1:-1].strip() + if not inner: + return None, {} + # Accept either comma-delimited or space-delimited Quarto options while + # preserving quoted values like filename="example file.py". + tokens = shlex.split(inner.replace(",", " ")) + if not tokens: + return None, {} + language = tokens[0].strip() + options: dict[str, str] = {} + for token in tokens[1:]: + if "=" not in token: + continue + key, _, value = token.partition("=") + options[key.strip()] = value.strip() + return language, options + + +def _is_python_fence(spec: str) -> bool: + """Return True if the fence should execute as python.""" + language, options = _parse_fence_header(spec) + # Plain markdown-style {.python} fences are not Quarto executable cells. + if language is None or language.startswith("."): + return False + for key in ("eval", "execute"): + parsed = _parse_bool(options.get(key, "")) + if parsed is False: + return False + return language == "python" + + +def _chunk_is_executable(chunk_lines: list[str]) -> bool: + """Return True if a chunk should be executed.""" + # Chunk-local `#| eval: false` or `#| execute: false` overrides the + # document-level default. + for line in chunk_lines: + stripped = line.strip() + if not stripped.startswith("#|") or ":" not in stripped: + continue + option = stripped[2:].strip() + key, _, value = option.partition(":") + parsed = _parse_bool(value) + if parsed is None: + continue + if key.strip() in {"eval", "execute"} and parsed is False: + return False + return True + + +def extract_qmd_file(path: Path) -> QmdFile: + """Extract executable python chunks from a qmd file.""" + # Read with explicit UTF-8 so Windows does not fall back to cp1252. + lines = path.read_text(encoding=TEXT_ENCODING).splitlines() + front_matter, body = _split_front_matter(lines) + # Skip the whole document if execution is disabled in front matter. + if not _parse_doc_eval(front_matter): + return QmdFile(path=path, chunks=()) + + chunks: list[Chunk] = [] + in_chunk = False + chunk_lines: list[str] = [] + chunk_start = 0 + should_capture = False + + # `body` excludes front matter, but we still want source comments to point + # back to original qmd line numbers. + body_start_line = len(front_matter) + 1 if front_matter else 0 + for offset, line in enumerate(body, start=body_start_line + 1): + stripped = line.strip() + # Opening fence: decide whether this cell is executable python. + if not in_chunk and stripped.startswith("```{"): + in_chunk = True + should_capture = _is_python_fence(stripped[3:]) + chunk_lines = [] + # Record the first executable line inside the fence, not the fence + # header itself, so the generated source comment points at code. + chunk_start = offset + 1 + continue + # Closing fence: finalize the captured chunk if it should execute. + if in_chunk and stripped == "```": + if should_capture and _chunk_is_executable(chunk_lines): + # Preserve the chunk as a single executable unit so variables + # defined earlier in the block remain available later. + source = textwrap.dedent("\n".join(chunk_lines)).strip() + if source: + chunks.append(Chunk(start_line=chunk_start, source=source + "\n")) + in_chunk = False + should_capture = False + chunk_lines = [] + continue + # Any non-fence line inside the current chunk is part of the source. + if in_chunk: + chunk_lines.append(line) + return QmdFile(path=path, chunks=tuple(chunks)) + + +def iter_source_qmd_files(base_path: Path = DOCS_PATH) -> list[Path]: + """Return source qmd files to mirror into tests.""" + # Mirror every qmd under docs/, except generated API docs. + return [ + path + for path in sorted(base_path.rglob("*.qmd")) + if API_DOCS_PATH not in path.parents and path != API_DOCS_PATH + ] + + +def get_output_path( + source_path: Path, + tests_path: Path = TESTS_PATH, + docs_path: Path = DOCS_PATH, +) -> Path: + """Map a qmd file to its generated pytest module.""" + # Keep the generated tree shaped like docs/, but swap the filename to + # `test_.py` so pytest discovers it naturally. + relative = source_path.relative_to(docs_path) + filename = f"test_{source_path.stem}.py" + if len(relative.parts) == 1: + return tests_path / filename + return tests_path.joinpath(*relative.parts[:-1], filename) + + +def _render_chunk_comment(source_path: str, start_line: int) -> str: + """Render the source-location comment for one inlined chunk.""" + return f"### {source_path}:{start_line}" + + +def _split_future_imports(source: str) -> tuple[list[str], list[str]]: + """Split future imports from the rest of one chunk.""" + future_imports: list[str] = [] + body_lines: list[str] = [] + for line in source.splitlines(): + stripped = line.strip() + if stripped.startswith("from __future__ import "): + future_imports.append(stripped) + continue + body_lines.append(line) + return future_imports, body_lines + + +def _collect_future_imports( + chunks: tuple[Chunk, ...], +) -> tuple[tuple[str, ...], tuple[Chunk, ...]]: + """Hoist and deduplicate future imports across all chunks.""" + seen: set[str] = set() + future_imports: list[str] = [] + cleaned_chunks: list[Chunk] = [] + for chunk in chunks: + chunk_future_imports, body_lines = _split_future_imports(chunk.source) + for future_import in chunk_future_imports: + if future_import not in seen: + seen.add(future_import) + future_imports.append(future_import) + body_source = "\n".join(body_lines).strip() + if body_source: + cleaned_chunks.append( + Chunk(start_line=chunk.start_line, source=body_source + "\n") + ) + return tuple(future_imports), tuple(cleaned_chunks) + + +def _render_chunk_source(source_path: str, chunk: Chunk) -> str: + """Render one chunk as plain inline test code.""" + # Keep the qmd source location visible right above the pasted code block. + chunk_block = ( + f"{_render_chunk_comment(source_path, chunk.start_line)}\n" + f"{chunk.source.rstrip()}\n\n" + # Reset `print` after each chunk so one example's reassignment does not + # leak into later chunks in the same generated test module. + "print = builtins.print" + ) + # Indent so the emitted code lives inside `with qmd_test_context(...):`. + return indent(chunk_block.rstrip(), " " * 8) + + +def render_test_module(source_path: Path, chunks: tuple[Chunk, ...]) -> str: + """Render a generated pytest module.""" + # Normalize to forward slashes so generated modules are stable across + # platforms and Windows paths do not introduce escape sequences. + relative_source_path = source_path.relative_to(REPO_ROOT).as_posix() + # Future imports must appear at module scope, so hoist them before inlining + # the remaining chunk bodies into the test function. + future_imports, cleaned_chunks = _collect_future_imports(chunks) + future_import_block = "" + if future_imports: + future_import_block = "\n".join(future_imports) + "\n\n" + # Emit chunks directly into the test body so the generated file is readable + # without reconstructing execution from a serialized tuple payload. + body = "\n\n".join( + _render_chunk_source(relative_source_path, chunk) for chunk in cleaned_chunks + ) + if not body: + body = " pass" + return AUTOGEN_HEADER.format( + source_path=relative_source_path, + future_imports=future_import_block, + source_path_literal=repr(relative_source_path), + body=body, + ) + + +def write_test_tree( + qmd_files: list[QmdFile], + tests_path: Path = TESTS_PATH, +) -> list[Path]: + """Rewrite the generated qmd test tree.""" + # Regeneration is full-rewrite rather than incremental so stale tests + # disappear automatically when docs are removed or stop containing code. + if tests_path.exists(): + try: + shutil.rmtree(tests_path) + except OSError as e: + print( # noqa: T201 + f"Failed to remove generated test tree at {tests_path}: {e}" + ) + raise + # Seed the package root and generated conftest first. + tests_path.mkdir(parents=True) + (tests_path / "__init__.py").write_text("", encoding=TEXT_ENCODING) + conftest_path = tests_path / "conftest.py" + conftest_path.write_text(CONFTEST_MODULE, encoding=TEXT_ENCODING) + + written = {tests_path / "__init__.py", conftest_path} + for qmd_file in qmd_files: + # Skip documents that contain no executable python cells. + if not qmd_file.chunks: + continue + output_path = get_output_path(qmd_file.path, tests_path=tests_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + current = output_path.parent + # Ensure nested generated tests remain importable as pytest packages. + while current != tests_path: + init_path = current / "__init__.py" + if not init_path.exists(): + init_path.write_text("", encoding=TEXT_ENCODING) + written.add(init_path) + current = current.parent + # Write the generated pytest module for this one source document. + output_path.write_text( + render_test_module(qmd_file.path, qmd_file.chunks), + encoding=TEXT_ENCODING, + ) + written.add(output_path) + return sorted(written) + + +def generate_doc_code_tests() -> list[Path]: + """Generate the qmd-backed test tree.""" + # Extract all source docs first, then mirror only the executable ones. + qmd_files = [extract_qmd_file(path) for path in iter_source_qmd_files()] + return write_test_tree(qmd_files) + + +def main() -> int: + """Generate qmd-backed tests from docs sources.""" + # The CLI is intentionally simple today, but argparse gives us a stable + # entrypoint if we need options later. + parser = argparse.ArgumentParser(description=__doc__) + parser.parse_args() + written = generate_doc_code_tests() + rel_path = TESTS_PATH.relative_to(REPO_ROOT) + print(f"Generated {len(written)} files in {rel_path}") # noqa: T201 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/test_generate_doc_code_tests.py b/scripts/test_generate_doc_code_tests.py new file mode 100644 index 00000000..c7a772d5 --- /dev/null +++ b/scripts/test_generate_doc_code_tests.py @@ -0,0 +1,286 @@ +"""Tests for generating qmd-backed pytest modules.""" + +from __future__ import annotations + +from pathlib import Path + +from generate_doc_code_tests import ( + DOCS_PATH, + REPO_ROOT, + TEXT_ENCODING, + Chunk, + QmdFile, + extract_qmd_file, + get_output_path, + render_test_module, + write_test_tree, +) + + +def _write(path: Path, text: str) -> Path: + # Tests create tiny synthetic qmd files instead of touching real docs. + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding=TEXT_ENCODING) + return path + + +class TestExtractQmdFile: + """Tests for extracting executable qmd chunks.""" + + def test_skips_plain_python_fences(self, tmp_path): + """Only executable quarto python fences should be included.""" + # `{python}` should execute, while `{.python}` is just a syntax fence. + path = _write( + tmp_path / "example.qmd", + """--- +title: Example +--- + +```{python} +import dascore as dc +``` + +```{.python} +print("not executed") +``` +""", + ) + out = extract_qmd_file(path) + assert out.chunks == (Chunk(start_line=5, source="import dascore as dc\n"),) + + def test_skips_doc_with_eval_false(self, tmp_path): + """Document-level eval false should skip all chunks.""" + # Front-matter execution settings should disable every chunk below. + path = _write( + tmp_path / "example.qmd", + """--- +execute: + eval: false +--- + +```{python} +print("nope") +``` +""", + ) + assert extract_qmd_file(path).chunks == () + + def test_skips_doc_with_inline_execute_false(self, tmp_path): + """Inline document-level execute false should skip all chunks.""" + path = _write( + tmp_path / "example.qmd", + """--- +execute: false +--- + +```{python} +print("nope") +``` +""", + ) + assert extract_qmd_file(path).chunks == () + + def test_extracts_python_fence_with_space_delimited_options(self, tmp_path): + """Quarto python fences can include filename options after a space.""" + # Quarto allows options after a space, not just after commas. + path = _write( + tmp_path / "example.qmd", + """```{python filename="example.py"} +print("run") +``` +""", + ) + out = extract_qmd_file(path) + assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),) + + def test_skips_python_fence_with_inline_eval_false(self, tmp_path): + """Inline fence options should be able to disable execution.""" + path = _write( + tmp_path / "example.qmd", + """```{python, eval=false} +print("skip") +``` + +```{python} +print("run") +``` +""", + ) + out = extract_qmd_file(path) + assert out.chunks == (Chunk(start_line=6, source='print("run")\n'),) + + def test_skips_python_fence_with_inline_execute_zero(self, tmp_path): + """Space-delimited inline execute flags should also be respected.""" + path = _write( + tmp_path / "example.qmd", + """```{python execute=0} +print("skip") +``` + +```{python} +print("run") +``` +""", + ) + out = extract_qmd_file(path) + assert out.chunks == (Chunk(start_line=6, source='print("run")\n'),) + + def test_reads_qmd_with_utf8_encoding(self, tmp_path, monkeypatch): + """Extraction should force UTF-8 instead of platform default encodings.""" + path = _write(tmp_path / "example.qmd", "placeholder") + called: dict[str, str | None] = {} + original = Path.read_text + + def _read_text(self, *args, **kwargs): + # Intercept the read call so we can assert the requested encoding. + if self == path: + called["encoding"] = kwargs.get("encoding") + return """```{python} +print("run") +```""" + return original(self, *args, **kwargs) + + monkeypatch.setattr(Path, "read_text", _read_text) + + out = extract_qmd_file(path) + + assert called["encoding"] == TEXT_ENCODING + assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),) + + def test_dedents_indented_python_fence(self, tmp_path): + """Indented fenced code should be normalized before storage.""" + path = _write( + tmp_path / "example.qmd", + """```{python} + if True: + print("run") +``` +""", + ) + out = extract_qmd_file(path) + assert out.chunks == ( + Chunk(start_line=2, source='if True:\n print("run")\n'), + ) + + def test_skips_chunk_with_eval_or_execute_false(self, tmp_path): + """Chunk-level execution flags should be honored.""" + # Chunk-local options should override the document default. + path = _write( + tmp_path / "example.qmd", + """```{python} +#| eval: false +print("skip") +``` + +```{python} +#| execute: false +print("skip") +``` + +```{python} +print("run") +``` +""", + ) + out = extract_qmd_file(path) + assert out.chunks == (Chunk(start_line=12, source='print("run")\n'),) + + +class TestOutputPaths: + """Tests for source-to-test path mapping.""" + + def test_root_doc_maps_to_root_test(self): + """Top-level docs should stay at the top of generated tests.""" + # docs/index.qmd becomes tests/test_autogenerated_doccode/test_index.py + source = DOCS_PATH / "index.qmd" + tests = Path("/repo/tests/test_autogenerated_doccode") + assert get_output_path(source, tests_path=tests) == tests / "test_index.py" + + def test_nested_doc_maps_to_nested_test(self): + """Nested docs should keep their relative directory structure.""" + # Nested docs should preserve their package-like layout. + source = DOCS_PATH / "tutorial" / "file_io.qmd" + tests = Path("/repo/tests/test_autogenerated_doccode") + expected = tests / "tutorial" / "test_file_io.py" + assert get_output_path(source, tests_path=tests) == expected + + def test_custom_docs_root_can_be_supplied(self): + """Mapping should support callers with a non-default docs root.""" + docs_root = Path("/repo/custom_docs") + source = docs_root / "guide" / "example.qmd" + tests = Path("/repo/tests/test_autogenerated_doccode") + expected = tests / "guide" / "test_example.py" + assert ( + get_output_path(source, tests_path=tests, docs_path=docs_root) == expected + ) + + +class TestRenderAndWrite: + """Tests for generated module output.""" + + def test_render_includes_source_and_chunk_payload(self): + """Generated modules should inline source code with source comments.""" + # The generated file should contain enough literal data to run by itself. + source = REPO_ROOT / "docs" / "tutorial" / "example.qmd" + module = render_test_module(source, (Chunk(start_line=12, source="x = 1\n"),)) + assert "Autogenerated from docs/tutorial/example.qmd" in module + assert "@pytest.mark.docs_examples" in module + assert "def test_main()" in module + assert "source_qmd = 'docs/tutorial/example.qmd'" in module + assert "with qmd_test_context(source_qmd):" in module + assert "### docs/tutorial/example.qmd:12" in module + assert "x = 1" in module + assert "CHUNKS =" not in module + assert "SOURCE_QMD =" not in module + assert "_runtime" not in module + + def test_render_hoists_future_imports(self): + """Future imports should move to module scope.""" + source = REPO_ROOT / "docs" / "tutorial" / "example.qmd" + module = render_test_module( + source, + ( + Chunk( + start_line=12, + source="from __future__ import annotations\nx = 1\n", + ), + ), + ) + assert "from __future__ import annotations\n\nimport pytest" in module + assert "# docs/tutorial/example.qmd:12" in module + assert " x = 1" in module + assert " from __future__ import annotations" not in module + + def test_write_tree_removes_stale_files(self, tmp_path): + """Regeneration should replace the whole generated test tree.""" + # Full regeneration should remove old outputs before writing new ones. + tests_path = tmp_path / "tests" / "test_autogenerated_doccode" + stale = tests_path / "obsolete.py" + stale.parent.mkdir(parents=True) + stale.write_text("old") + + source = DOCS_PATH / "tutorial" / "file_io.qmd" + chunks = (Chunk(start_line=1, source="print(1)\n"),) + qmd_file = QmdFile(path=source, chunks=chunks) + + written = write_test_tree([qmd_file], tests_path=tests_path) + output = tests_path / "tutorial" / "test_file_io.py" + + assert output in written + assert output.exists() + assert not stale.exists() + assert (tests_path / "conftest.py").exists() + + def test_write_tree_skips_docs_without_executable_chunks(self, tmp_path): + """Docs with no executable chunks should not emit per-doc test modules.""" + tests_path = tmp_path / "tests" / "test_autogenerated_doccode" + source = DOCS_PATH / "tutorial" / "no_code.qmd" + qmd_file = QmdFile(path=source, chunks=()) + + written = write_test_tree([qmd_file], tests_path=tests_path) + output = tests_path / "tutorial" / "test_no_code.py" + + assert (tests_path / "__init__.py") in written + assert (tests_path / "conftest.py") in written + assert output not in written + assert not output.exists() diff --git a/tests/test_io/_common_io_test_utils.py b/tests/test_io/_common_io_test_utils.py index 4e1ee69a..7e512d6f 100644 --- a/tests/test_io/_common_io_test_utils.py +++ b/tests/test_io/_common_io_test_utils.py @@ -1,8 +1,4 @@ -"""Shared helpers for common IO test matrices. - -This module includes timeout guards used by localhost-backed remote tests so -fixture or IO hangs fail quickly instead of stalling the suite. -""" +"""Shared helpers for common IO test matrices.""" from __future__ import annotations @@ -72,7 +68,7 @@ def get_representative_io_test( @contextmanager def fail_on_timeout(seconds: float, label: str): - """Fail the current test if the enclosed block exceeds the timeout.""" + """Fail fast around fixture lifecycle work when it exceeds a time budget.""" if ( threading.current_thread() is not threading.main_thread() or not hasattr(signal_mod, "SIGALRM") @@ -84,7 +80,7 @@ def fail_on_timeout(seconds: float, label: str): previous_handler = signal_mod.getsignal(signal_mod.SIGALRM) - def _handle_timeout(signum, frame): + def _handle_timeout(_signum, _frame): raise TimeoutError(f"{label} exceeded {seconds} seconds") try: diff --git a/tests/test_io/conftest.py b/tests/test_io/conftest.py index e9db6ce2..d4e34052 100644 --- a/tests/test_io/conftest.py +++ b/tests/test_io/conftest.py @@ -14,7 +14,8 @@ import time from collections.abc import Callable from functools import partial -from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer +from http import HTTPStatus +from http.server import HTTPServer, SimpleHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path from urllib.error import URLError from urllib.request import urlopen @@ -41,6 +42,127 @@ def copyfile(self, source, outputfile): return None +class _RegressionHTTPRequestHandler(_SilentSimpleHTTPRequestHandler): + """A stricter localhost handler for flaky plain-HTTP regression tests.""" + + protocol_version = "HTTP/1.0" + + def end_headers(self): + """Disable keep-alive so one test request cannot leak into the next.""" + self.send_header("Connection", "close") + super().end_headers() + + +class _RangeHTTPRequestHandler(_SilentSimpleHTTPRequestHandler): + """A simple localhost handler with explicit single-range support.""" + + protocol_version = "HTTP/1.0" + + def handle(self): + """Serve exactly one request per connection, then close cleanly.""" + self.close_connection = True + self.handle_one_request() + + def end_headers(self): + """Disable keep-alive so each ranged request stands alone.""" + self.send_header("Connection", "close") + super().end_headers() + + def send_head(self): + """Serve files and honor one RFC 7233 byte range when requested.""" + path = self.translate_path(self.path) + if os.path.isdir(path): + return super().send_head() + if path.endswith("/") or not os.path.isfile(path): + self.send_error(HTTPStatus.NOT_FOUND, "File not found") + return None + + try: + file_handle = open(path, "rb") + except OSError: + self.send_error(HTTPStatus.NOT_FOUND, "File not found") + return None + + try: + stat_result = os.fstat(file_handle.fileno()) + size = stat_result.st_size + range_header = self.headers.get("Range") + start = 0 + end = size - 1 + status = HTTPStatus.OK + + if range_header: + start, end = self._parse_range_header(range_header, size) + if start is None: + self.send_response(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + self.send_header("Content-Range", f"bytes */{size}") + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Length", "0") + self.end_headers() + file_handle.close() + return None + status = HTTPStatus.PARTIAL_CONTENT + + self.send_response(status) + self.send_header("Content-type", self.guess_type(path)) + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Length", str(end - start + 1)) + self.send_header( + "Last-Modified", self.date_time_string(stat_result.st_mtime) + ) + if status == HTTPStatus.PARTIAL_CONTENT: + self.send_header("Content-Range", f"bytes {start}-{end}/{size}") + self.end_headers() + self._range = (start, end) + file_handle.seek(start) + return file_handle + except Exception: + file_handle.close() + raise + + def copyfile(self, source, outputfile): + """Copy only the selected range when one was requested.""" + byte_range = getattr(self, "_range", None) + if byte_range is None: + return super().copyfile(source, outputfile) + start, end = byte_range + remaining = end - start + 1 + try: + while remaining > 0: + chunk = source.read(min(64 * 1024, remaining)) + if not chunk: + break + outputfile.write(chunk) + remaining -= len(chunk) + except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError): + return None + finally: + self._range = None + + @staticmethod + def _parse_range_header(header: str, size: int) -> tuple[int | None, int | None]: + """Parse a single bytes range, returning `(None, None)` when invalid.""" + if not header.startswith("bytes="): + return (None, None) + spec = header[len("bytes=") :].strip() + if "," in spec or "-" not in spec: + return (None, None) + start_str, end_str = spec.split("-", maxsplit=1) + if not start_str: + if not end_str: + return (None, None) + length = int(end_str) + if length <= 0: + return (None, None) + start = max(size - length, 0) + return (start, size - 1) + start = int(start_str) + end = size - 1 if not end_str else int(end_str) + if start < 0 or end < start or start >= size: + return (None, None) + return (start, min(end, size - 1)) + + def _link_or_copy(source: Path, dest: Path) -> None: """Populate one served file path using the cheapest available local copy.""" dest.parent.mkdir(parents=True, exist_ok=True) @@ -175,11 +297,10 @@ def _ensure(fetch_name: str, relative_path: str | Path | None = None) -> Path: def http_regression_das_path(http_regression_data_root, ensure_http_regression_file): """Return an isolated HTTP tree containing only the regression fixtures.""" handler = partial( - _SilentSimpleHTTPRequestHandler, + _RegressionHTTPRequestHandler, directory=str(http_regression_data_root), ) - server = ThreadingHTTPServer(("127.0.0.1", 0), handler) - server.daemon_threads = True + server = HTTPServer(("127.0.0.1", 0), handler) thread = threading.Thread(target=server.serve_forever, daemon=True) thread.start() probe_path = "example_dasdae_event_1.h5" @@ -208,46 +329,12 @@ def http_regression_das_path(http_regression_data_root, ensure_http_regression_f @pytest.fixture(scope="session") def http_range_das_path(http_test_data_root, ensure_http_fetch_file): """Return a UPath pointing at a localhost HTTP server with range support.""" - uvicorn = pytest.importorskip("uvicorn") - starlette_cls = pytest.importorskip("starlette.applications").Starlette - responses = pytest.importorskip("starlette.responses") - file_response_cls = responses.FileResponse - response_cls = responses.Response - route_cls = pytest.importorskip("starlette.routing").Route - served_root = Path(http_test_data_root) - - async def _serve_file(request): - rel_path = Path(request.path_params["path"]) - file_path = served_root / rel_path - root_path = os.path.abspath(served_root) - candidate_path = os.path.abspath(file_path) - if os.path.commonpath([root_path, candidate_path]) != root_path: - return response_cls(status_code=404) - if not file_path.exists() or not file_path.is_file(): - return response_cls(status_code=404) - return file_response_cls(file_path) - - app = starlette_cls(routes=[route_cls("/{path:path}", _serve_file)]) - config = uvicorn.Config( - app, - host="127.0.0.1", - port=0, - log_level="warning", - ws="none", - # Avoid indefinite teardown hangs if a client leaves a keep-alive - # connection open when the fixture shuts the server down. - timeout_graceful_shutdown=1, - ) - server = uvicorn.Server(config) - sock = config.bind_socket() - host, port = sock.getsockname()[:2] - - def _run(): - server.run(sockets=[sock]) - - thread = threading.Thread(target=_run, daemon=True) + handler = partial(_RangeHTTPRequestHandler, directory=str(http_test_data_root)) + server = HTTPServer(("127.0.0.1", 0), handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) thread.start() try: + host, port = server.server_address probe_url = f"http://{host}:{port}/das/example_dasdae_event_1.h5" with fail_on_timeout(10, "http_range_das_path readiness probe"): for _ in range(50): @@ -263,11 +350,10 @@ def _run(): yield UPath(f"http://{host}:{port}/das") finally: with fail_on_timeout(10, "http_range_das_path teardown"): - server.should_exit = True + server.shutdown() + server.server_close() thread.join(timeout=5) if thread.is_alive(): - sock.close() - thread.join(timeout=1) pytest.fail("Range-capable HTTP server thread did not exit cleanly.") diff --git a/tests/test_io/test_remote_common_io.py b/tests/test_io/test_remote_common_io.py index 2d97a1a6..770f303f 100644 --- a/tests/test_io/test_remote_common_io.py +++ b/tests/test_io/test_remote_common_io.py @@ -8,7 +8,6 @@ from dascore.config import set_config from dascore.utils.misc import suppress_warnings from tests.test_io._common_io_test_utils import ( - fail_on_timeout, get_flat_io_test, get_representative_io_test, skip_missing, @@ -16,7 +15,7 @@ ) from tests.test_io.test_common_io import COMMON_IO_READ_TESTS -pytestmark = pytest.mark.network +pytestmark = [pytest.mark.network, pytest.mark.timeout(30)] REMOTE_GET_FORMAT_CASES = get_flat_io_test(COMMON_IO_READ_TESTS) REMOTE_REPRESENTATIVE_CASES = get_representative_io_test(COMMON_IO_READ_TESTS) @@ -78,8 +77,7 @@ def test_expected_version(self, remote_get_format_case): """Each IO should identify its own remote test fixture.""" io, path = remote_get_format_case with skip_missing(): - with fail_on_timeout(30, f"dc.get_format({path})"): - out = dc.get_format(path) + out = dc.get_format(path) assert out == (io.name, io.version) @@ -90,8 +88,7 @@ def test_read_returns_spools(self, remote_read_case): """Each remotely supported file should read into a spool.""" _io, path = remote_read_case with skip_missing(): - with fail_on_timeout(30, f"dc.read({path})"): - out = dc.read(path) + out = dc.read(path) assert isinstance(out, dc.BaseSpool) assert len(out) > 0 assert all(isinstance(x, dc.Patch) for x in out) @@ -104,8 +101,7 @@ def test_scan_has_source_metadata(self, remote_scan_case): """Public scans of remote files should retain source metadata.""" io, path = remote_scan_case with skip_missing(): - with fail_on_timeout(30, f"dc.scan({path})"): - summary_list = dc.scan(path) + summary_list = dc.scan(path) assert len(summary_list) > 0 for summary in summary_list: assert str(summary.source_path) == str(path) diff --git a/tests/test_io/test_remote_http.py b/tests/test_io/test_remote_http.py index 5ed13d10..6f869ce2 100644 --- a/tests/test_io/test_remote_http.py +++ b/tests/test_io/test_remote_http.py @@ -14,7 +14,7 @@ from dascore.utils.misc import suppress_warnings from dascore.utils.remote_io import clear_remote_file_cache, get_remote_cache_path -pytestmark = pytest.mark.network +pytestmark = [pytest.mark.network, pytest.mark.timeout(30)] @pytest.fixture(autouse=True) @@ -124,7 +124,6 @@ def test_http_hdf5_get_format_requires_metadata_cache_opt_in( sys.platform == "win32", reason="Flaky plain-HTTP fallback on Windows.", ) - @pytest.mark.timeout(30) def test_http_hdf5_fallback_warns_once_and_reuses_cached_local_copy( self, http_regression_das_path, ensure_http_regression_file ): @@ -138,13 +137,19 @@ def test_http_hdf5_fallback_warns_once_and_reuses_cached_local_copy( with pytest.warns(UserWarning, match="Downloading remote file"): dc.get_format(path) cached_files = list(get_remote_cache_path().rglob(fname)) - assert len(cached_files) <= 1 - assert len(dc.read(path)) + assert len(cached_files) == 1 + assert cached_files[0].exists() + + spool = dc.read(path) + assert spool cached_files_2 = list(get_remote_cache_path().rglob(fname)) assert len(cached_files_2) == 1 assert cached_files_2[0].exists() - assert dc.read(path) + assert cached_files_2 == cached_files + + spool_2 = dc.read(path) + assert spool_2 cached_files_3 = list(get_remote_cache_path().rglob(fname)) assert cached_files_3 == cached_files_2