From d96d3c38e971766981accc1d56446770060f2459 Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 07:15:43 +0200
Subject: [PATCH 01/13] add qmd doctester

---
 .github/workflows/runtests.yml                |  19 +-
 .gitignore                                    |   1 +
 docs/contributing/testing.qmd                 |   3 +
 pyproject.toml                                |   1 +
 .../generate_autogenerated_doccode_tests.py   | 280 ++++++++++++++++++
 ...st_generate_autogenerated_doccode_tests.py | 132 +++++++++
 6 files changed, 433 insertions(+), 3 deletions(-)
 create mode 100644 scripts/generate_autogenerated_doccode_tests.py
 create mode 100644 scripts/test_generate_autogenerated_doccode_tests.py

diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml
index c29683ba..4894b66d 100644
--- a/.github/workflows/runtests.yml
+++ b/.github/workflows/runtests.yml
@@ -11,6 +11,7 @@ on:
     - dev
     paths:
       - 'pyproject.toml'
+      - 'docs/**/*.qmd'
       - '**.py'
       - '.github/workflows/*.yml'
       - '.github/actions/**/*.yml'
@@ -72,6 +73,18 @@ jobs:
           cache-number: ${{ env.CACHE_NUMBER }}
           prepare-test-data: "true"
 
+      - name: regenerate qmd-backed docs tests
+        id: generate_qmd_tests
+        continue-on-error: ${{ env.debug_enabled == 'true' }}
+        shell: bash -el {0}
+        run: python scripts/generate_autogenerated_doccode_tests.py
+
+      - name: run qmd docs tests
+        id: run_qmd_docs_tests
+        continue-on-error: ${{ env.debug_enabled == 'true' }}
+        shell: bash -el {0}
+        run: pytest tests/test_autogenerated_doccode
+
       # Runs test suite and calculates coverage
       - name: run test suite
         id: run_test_suite
@@ -87,7 +100,7 @@ jobs:
         run: ./.github/test_code.sh doctest
 
       - name: note SSH key requirement for debug access
-        if: env.debug_enabled == 'true' && (steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
+        if: env.debug_enabled == 'true' && (steps.generate_qmd_tests.outcome == 'failure' || steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
         shell: bash
         run: |
           message="Debug access is limited to the workflow actor's GitHub SSH keys. Connect from a machine that has the matching private key loaded."
@@ -95,7 +108,7 @@ jobs:
           echo "$message" >> "$GITHUB_STEP_SUMMARY"
 
       - name: Setup tmate session
-        if: env.debug_enabled == 'true' && (steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
+        if: env.debug_enabled == 'true' && (steps.generate_qmd_tests.outcome == 'failure' || steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
         uses: mxschmitt/action-tmate@v3
         timeout-minutes: 30
         with:
@@ -111,6 +124,6 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
 
       - name: fail job after debug session if tests failed
-        if: steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure'
+        if: steps.generate_qmd_tests.outcome == 'failure' || steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure'
         shell: bash
         run: exit 1
diff --git a/.gitignore b/.gitignore
index b963988f..697dbf2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -89,6 +89,7 @@ docs/**/*.ipynb
 scratch/**
 .ruff_cache
 uv.lock
+tests/test_autogenerated_doccode/
 
 docs/index_files
 docs/index.quarto_ipynb
diff --git a/docs/contributing/testing.qmd b/docs/contributing/testing.qmd
index 23a700f9..c9fb5c99 100644
--- a/docs/contributing/testing.qmd
+++ b/docs/contributing/testing.qmd
@@ -45,6 +45,9 @@ To run the docstring tests use the following:
 pytest dascore --doctest-modules
 ```
 
+To validate executable examples in the hand-written documentation without building the full site, generate the mirrored tests with `python scripts/generate_autogenerated_doccode_tests.py` and then run `pytest tests/test_autogenerated_doccode`.
+The `tests/test_autogenerated_doccode` directory is intentionally gitignored and should be regenerated locally rather than committed.
+
 ## Writing Tests
 
 Tests should go into the `tests/` folder, which mirrors the structure of the main package.
diff --git a/pyproject.toml b/pyproject.toml
index d9afe2a2..de5571bf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -225,6 +225,7 @@ markers = [
     "network: tests that require network-style filesystem access",
     "slow: tests skipped by default unless explicitly selected with -m slow",
     "benchmark: performance benchmark tests",
+    "docs_examples: autogenerated tests for executable qmd examples",
 ]
 
 [tool.ruff.format]
diff --git a/scripts/generate_autogenerated_doccode_tests.py b/scripts/generate_autogenerated_doccode_tests.py
new file mode 100644
index 00000000..4db58dc9
--- /dev/null
+++ b/scripts/generate_autogenerated_doccode_tests.py
@@ -0,0 +1,280 @@
+"""Generate pytest modules from executable python snippets in qmd files."""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+from dataclasses import dataclass
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+DOCS_PATH = REPO_ROOT / "docs"
+TESTS_PATH = REPO_ROOT / "tests" / "test_autogenerated_doccode"
+API_DOCS_PATH = DOCS_PATH / "api"
+
+AUTOGEN_HEADER = """\"\"\"Autogenerated from {source_path}.\"\"\"
+
+from __future__ import annotations
+
+import pytest
+
+from tests.test_autogenerated_doccode._runtime import run_qmd_chunks
+
+SOURCE_QMD = {source_path_literal}
+CHUNKS = {chunks_literal}
+
+
+@pytest.mark.docs_examples
+def test_qmd_examples():
+    \"\"\"Execute the python chunks from the source QMD.\"\"\"
+    run_qmd_chunks(SOURCE_QMD, CHUNKS)
+"""
+
+RUNTIME_MODULE = '''"""Helpers for generated qmd tests."""
+
+from __future__ import annotations
+
+import os
+from contextlib import contextmanager
+from pathlib import Path
+
+import matplotlib
+
+matplotlib.use("Agg", force=True)
+import matplotlib.pyplot as plt
+from matplotlib.figure import Figure
+
+plt.ioff()
+plt.show = lambda *args, **kwargs: None
+Figure.show = lambda self, *args, **kwargs: None
+
+
+@contextmanager
+def _pushd(path: Path):
+    """Temporarily change the working directory."""
+    old = Path.cwd()
+    os.chdir(path)
+    try:
+        yield
+    finally:
+        os.chdir(old)
+
+
+def run_qmd_chunks(source_qmd: str, chunks: tuple[tuple[int, str], ...]) -> None:
+    """Execute the chunks for a qmd-backed generated test."""
+    root = Path(__file__).resolve().parents[2]
+    source_path = root / source_qmd
+    namespace = {"__name__": "__qmd_test__"}
+    try:
+        with _pushd(source_path.parent):
+            for start_line, source in chunks:
+                code = compile(source, f"{source_qmd}:{start_line}", "exec")
+                exec(code, namespace, namespace)
+    finally:
+        plt.close("all")
+'''
+
+
+@dataclass(frozen=True)
+class Chunk:
+    """A chunk extracted from a qmd file."""
+
+    start_line: int
+    source: str
+
+
+@dataclass(frozen=True)
+class QmdFile:
+    """The executable content extracted from a qmd file."""
+
+    path: Path
+    chunks: tuple[Chunk, ...]
+
+
+def _parse_bool(value: str) -> bool | None:
+    """Parse a yaml-like bool."""
+    cleaned = value.strip().strip("'\"").lower()
+    if cleaned == "true":
+        return True
+    if cleaned == "false":
+        return False
+    return None
+
+
+def _split_front_matter(lines: list[str]) -> tuple[list[str], list[str]]:
+    """Split a qmd file into front matter and body lines."""
+    if not lines or lines[0].strip() != "---":
+        return [], lines
+    for index in range(1, len(lines)):
+        if lines[index].strip() == "---":
+            return lines[1:index], lines[index + 1 :]
+    return [], lines
+
+
+def _parse_doc_eval(front_matter: list[str]) -> bool:
+    """Return the document-level eval flag."""
+    in_execute = False
+    execute_indent = 0
+    for line in front_matter:
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
+            continue
+        indent = len(line) - len(line.lstrip(" "))
+        if stripped.startswith("execute:"):
+            in_execute = True
+            execute_indent = indent
+            remainder = stripped.partition(":")[2].strip()
+            if remainder:
+                parsed = _parse_bool(remainder)
+                if parsed is not None:
+                    return parsed
+            continue
+        if in_execute and indent <= execute_indent:
+            in_execute = False
+        if in_execute and ":" in stripped:
+            key, _, value = stripped.partition(":")
+            if key.strip() == "eval":
+                parsed = _parse_bool(value)
+                if parsed is not None:
+                    return parsed
+    return True
+
+
+def _is_python_fence(spec: str) -> bool:
+    """Return True if the fence should execute as python."""
+    cleaned = spec.strip()
+    if not cleaned.startswith("{") or cleaned.startswith("{.python"):
+        return False
+    inner = cleaned[1:-1].strip()
+    if not inner:
+        return False
+    language = inner.split(",", 1)[0].strip()
+    return language == "python"
+
+
+def _chunk_is_executable(chunk_lines: list[str]) -> bool:
+    """Return True if a chunk should be executed."""
+    for line in chunk_lines:
+        stripped = line.strip()
+        if not stripped.startswith("#|") or ":" not in stripped:
+            continue
+        option = stripped[2:].strip()
+        key, _, value = option.partition(":")
+        parsed = _parse_bool(value)
+        if parsed is None:
+            continue
+        if key.strip() in {"eval", "execute"} and parsed is False:
+            return False
+    return True
+
+
+def extract_qmd_file(path: Path) -> QmdFile:
+    """Extract executable python chunks from a qmd file."""
+    lines = path.read_text().splitlines()
+    front_matter, body = _split_front_matter(lines)
+    if not _parse_doc_eval(front_matter):
+        return QmdFile(path=path, chunks=())
+
+    chunks: list[Chunk] = []
+    in_chunk = False
+    chunk_lines: list[str] = []
+    chunk_start = 0
+    should_capture = False
+
+    body_start_line = len(front_matter) + 1 if front_matter else 0
+    for offset, line in enumerate(body, start=body_start_line + 1):
+        stripped = line.strip()
+        if not in_chunk and stripped.startswith("```{"):
+            in_chunk = True
+            should_capture = _is_python_fence(stripped[3:])
+            chunk_lines = []
+            chunk_start = offset + 1
+            continue
+        if in_chunk and stripped == "```":
+            if should_capture and _chunk_is_executable(chunk_lines):
+                source = "\n".join(chunk_lines).strip()
+                if source:
+                    chunks.append(Chunk(start_line=chunk_start, source=source + "\n"))
+            in_chunk = False
+            should_capture = False
+            chunk_lines = []
+            continue
+        if in_chunk:
+            chunk_lines.append(line)
+    return QmdFile(path=path, chunks=tuple(chunks))
+
+
+def iter_source_qmd_files(base_path: Path = DOCS_PATH) -> list[Path]:
+    """Return source qmd files to mirror into tests."""
+    return [
+        path
+        for path in sorted(base_path.rglob("*.qmd"))
+        if API_DOCS_PATH not in path.parents and path != API_DOCS_PATH
+    ]
+
+
+def get_output_path(source_path: Path, tests_path: Path = TESTS_PATH) -> Path:
+    """Map a qmd file to its generated pytest module."""
+    relative = source_path.relative_to(DOCS_PATH)
+    filename = f"test_{source_path.stem}.py"
+    if len(relative.parts) == 1:
+        return tests_path / filename
+    return tests_path.joinpath(*relative.parts[:-1], filename)
+
+
+def render_test_module(source_path: Path, chunks: tuple[Chunk, ...]) -> str:
+    """Render a generated pytest module."""
+    chunk_payload = tuple((chunk.start_line, chunk.source) for chunk in chunks)
+    return AUTOGEN_HEADER.format(
+        source_path=source_path.relative_to(REPO_ROOT),
+        source_path_literal=repr(source_path.relative_to(REPO_ROOT).as_posix()),
+        chunks_literal=repr(chunk_payload),
+    )
+
+
+def write_test_tree(qmd_files: list[QmdFile], tests_path: Path = TESTS_PATH) -> list[Path]:
+    """Rewrite the generated qmd test tree."""
+    if tests_path.exists():
+        shutil.rmtree(tests_path)
+    tests_path.mkdir(parents=True)
+    (tests_path / "__init__.py").write_text("")
+    runtime_path = tests_path / "_runtime.py"
+    runtime_path.write_text(RUNTIME_MODULE)
+
+    written = {tests_path / "__init__.py", runtime_path}
+    for qmd_file in qmd_files:
+        if not qmd_file.chunks:
+            continue
+        output_path = get_output_path(qmd_file.path, tests_path=tests_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        current = output_path.parent
+        while current != tests_path:
+            init_path = current / "__init__.py"
+            if not init_path.exists():
+                init_path.write_text("")
+            written.add(init_path)
+            current = current.parent
+        output_path.write_text(render_test_module(qmd_file.path, qmd_file.chunks))
+        written.add(output_path)
+    return sorted(written)
+
+
+def generate_autogenerated_doccode_tests() -> list[Path]:
+    """Generate the qmd-backed test tree."""
+    qmd_files = [extract_qmd_file(path) for path in iter_source_qmd_files()]
+    return write_test_tree(qmd_files)
+
+
+def main() -> int:
+    """Generate qmd-backed tests from docs sources."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.parse_args()
+    written = generate_autogenerated_doccode_tests()
+    rel_path = TESTS_PATH.relative_to(REPO_ROOT)
+    print(f"Generated {len(written)} files in {rel_path}")  # noqa: T201
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/test_generate_autogenerated_doccode_tests.py b/scripts/test_generate_autogenerated_doccode_tests.py
new file mode 100644
index 00000000..a77ea2bc
--- /dev/null
+++ b/scripts/test_generate_autogenerated_doccode_tests.py
@@ -0,0 +1,132 @@
+"""Tests for generating autogenerated doccode pytest modules."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from generate_autogenerated_doccode_tests import (
+    Chunk,
+    DOCS_PATH,
+    QmdFile,
+    REPO_ROOT,
+    extract_qmd_file,
+    get_output_path,
+    render_test_module,
+    write_test_tree,
+)
+
+
+def _write(path: Path, text: str) -> Path:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text)
+    return path
+
+
+class TestExtractQmdFile:
+    """Tests for extracting executable qmd chunks."""
+
+    def test_skips_plain_python_fences(self, tmp_path):
+        """Only executable quarto python fences should be included."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """---
+title: Example
+---
+
+```{python}
+import dascore as dc
+```
+
+```{.python}
+print("not executed")
+```
+""",
+        )
+        out = extract_qmd_file(path)
+        assert out.chunks == (Chunk(start_line=5, source="import dascore as dc\n"),)
+
+    def test_skips_doc_with_eval_false(self, tmp_path):
+        """Document-level eval false should skip all chunks."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """---
+execute:
+  eval: false
+---
+
+```{python}
+print("nope")
+```
+""",
+        )
+        assert extract_qmd_file(path).chunks == ()
+
+    def test_skips_chunk_with_eval_or_execute_false(self, tmp_path):
+        """Chunk-level execution flags should be honored."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """```{python}
+#| eval: false
+print("skip")
+```
+
+```{python}
+#| execute: false
+print("skip")
+```
+
+```{python}
+print("run")
+```
+""",
+        )
+        out = extract_qmd_file(path)
+        assert out.chunks == (Chunk(start_line=12, source='print("run")\n'),)
+
+
+class TestOutputPaths:
+    """Tests for source-to-test path mapping."""
+
+    def test_root_doc_maps_to_root_test(self):
+        """Top-level docs should stay at the top of autogenerated tests."""
+        source = DOCS_PATH / "index.qmd"
+        tests = Path("/repo/tests/test_autogenerated_doccode")
+        assert get_output_path(source, tests_path=tests) == tests / "test_index.py"
+
+    def test_nested_doc_maps_to_nested_test(self):
+        """Nested docs should keep their relative directory structure."""
+        source = DOCS_PATH / "tutorial" / "file_io.qmd"
+        tests = Path("/repo/tests/test_autogenerated_doccode")
+        expected = tests / "tutorial" / "test_file_io.py"
+        assert get_output_path(source, tests_path=tests) == expected
+
+
+class TestRenderAndWrite:
+    """Tests for generated module output."""
+
+    def test_render_includes_source_and_chunk_payload(self):
+        """Generated modules should point back to the source qmd."""
+        source = REPO_ROOT / "docs" / "tutorial" / "example.qmd"
+        module = render_test_module(source, (Chunk(start_line=12, source="x = 1\n"),))
+        assert "Autogenerated from docs/tutorial/example.qmd" in module
+        assert "@pytest.mark.docs_examples" in module
+        assert "SOURCE_QMD = 'docs/tutorial/example.qmd'" in module
+        assert "CHUNKS = ((12, 'x = 1\\n'),)" in module
+
+    def test_write_tree_removes_stale_files(self, tmp_path):
+        """Regeneration should replace the whole generated test tree."""
+        tests_path = tmp_path / "tests" / "test_autogenerated_doccode"
+        stale = tests_path / "obsolete.py"
+        stale.parent.mkdir(parents=True)
+        stale.write_text("old")
+
+        source = DOCS_PATH / "tutorial" / "file_io.qmd"
+        qmd_file = QmdFile(path=source, chunks=(Chunk(start_line=1, source="print(1)\n"),))
+
+        written = write_test_tree([qmd_file], tests_path=tests_path)
+        output = tests_path / "tutorial" / "test_file_io.py"
+
+        assert output in written
+        assert output.exists()
+        assert not stale.exists()
+        assert (tests_path / "_runtime.py").exists()

From 688354b9992892d52489d81b3d0eb3d33930396c Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 08:28:06 +0200
Subject: [PATCH 02/13] address review

---
 docs/contributing/testing.qmd                 | 13 ++++++++++++-
 .../generate_autogenerated_doccode_tests.py   | 16 ++++++++++++----
 ...st_generate_autogenerated_doccode_tests.py | 19 ++++++++++++++++---
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/docs/contributing/testing.qmd b/docs/contributing/testing.qmd
index c9fb5c99..3f9c5a0d 100644
--- a/docs/contributing/testing.qmd
+++ b/docs/contributing/testing.qmd
@@ -45,7 +45,18 @@ To run the docstring tests use the following:
 pytest dascore --doctest-modules
 ```
 
-To validate executable examples in the hand-written documentation without building the full site, generate the mirrored tests with `python scripts/generate_autogenerated_doccode_tests.py` and then run `pytest tests/test_autogenerated_doccode`.
+To validate executable examples in the hand-written documentation without building the full site, generate the mirrored tests with:
+
+```bash
+python scripts/generate_autogenerated_doccode_tests.py
+```
+
+Then run the generated tests:
+
+```bash
+pytest tests/test_autogenerated_doccode
+```
+
 The `tests/test_autogenerated_doccode` directory is intentionally gitignored and should be regenerated locally rather than committed.
 
 ## Writing Tests
diff --git a/scripts/generate_autogenerated_doccode_tests.py b/scripts/generate_autogenerated_doccode_tests.py
index 4db58dc9..4369e57f 100644
--- a/scripts/generate_autogenerated_doccode_tests.py
+++ b/scripts/generate_autogenerated_doccode_tests.py
@@ -7,7 +7,6 @@
 from dataclasses import dataclass
 from pathlib import Path
 
-
 REPO_ROOT = Path(__file__).resolve().parent.parent
 DOCS_PATH = REPO_ROOT / "docs"
 TESTS_PATH = REPO_ROOT / "tests" / "test_autogenerated_doccode"
@@ -149,7 +148,7 @@ def _is_python_fence(spec: str) -> bool:
     inner = cleaned[1:-1].strip()
     if not inner:
         return False
-    language = inner.split(",", 1)[0].strip()
+    language = inner.split(",", 1)[0].split(None, 1)[0].strip()
     return language == "python"
 
 
@@ -233,10 +232,19 @@ def render_test_module(source_path: Path, chunks: tuple[Chunk, ...]) -> str:
     )
 
 
-def write_test_tree(qmd_files: list[QmdFile], tests_path: Path = TESTS_PATH) -> list[Path]:
+def write_test_tree(
+    qmd_files: list[QmdFile],
+    tests_path: Path = TESTS_PATH,
+) -> list[Path]:
     """Rewrite the generated qmd test tree."""
     if tests_path.exists():
-        shutil.rmtree(tests_path)
+        try:
+            shutil.rmtree(tests_path)
+        except Exception as e:
+            print(  # noqa: T201
+                f"Failed to remove generated test tree at {tests_path}: {e}"
+            )
+            raise
     tests_path.mkdir(parents=True)
     (tests_path / "__init__.py").write_text("")
     runtime_path = tests_path / "_runtime.py"
diff --git a/scripts/test_generate_autogenerated_doccode_tests.py b/scripts/test_generate_autogenerated_doccode_tests.py
index a77ea2bc..75dbeff8 100644
--- a/scripts/test_generate_autogenerated_doccode_tests.py
+++ b/scripts/test_generate_autogenerated_doccode_tests.py
@@ -5,10 +5,10 @@
 from pathlib import Path
 
 from generate_autogenerated_doccode_tests import (
-    Chunk,
     DOCS_PATH,
-    QmdFile,
     REPO_ROOT,
+    Chunk,
+    QmdFile,
     extract_qmd_file,
     get_output_path,
     render_test_module,
@@ -61,6 +61,18 @@ def test_skips_doc_with_eval_false(self, tmp_path):
         )
         assert extract_qmd_file(path).chunks == ()
 
+    def test_extracts_python_fence_with_space_delimited_options(self, tmp_path):
+        """Quarto python fences can include filename options after a space."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """```{python filename="example.py"}
+print("run")
+```
+""",
+        )
+        out = extract_qmd_file(path)
+        assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),)
+
     def test_skips_chunk_with_eval_or_execute_false(self, tmp_path):
         """Chunk-level execution flags should be honored."""
         path = _write(
@@ -121,7 +133,8 @@ def test_write_tree_removes_stale_files(self, tmp_path):
         stale.write_text("old")
 
         source = DOCS_PATH / "tutorial" / "file_io.qmd"
-        qmd_file = QmdFile(path=source, chunks=(Chunk(start_line=1, source="print(1)\n"),))
+        chunks = (Chunk(start_line=1, source="print(1)\n"),)
+        qmd_file = QmdFile(path=source, chunks=chunks)
 
         written = write_test_tree([qmd_file], tests_path=tests_path)
         output = tests_path / "tutorial" / "test_file_io.py"

From 2fd877995976ceacc991f29346bcebaf1a233862 Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 09:04:03 +0200
Subject: [PATCH 03/13] fix winddows bug

---
 .../generate_autogenerated_doccode_tests.py   | 14 +++++++----
 ...st_generate_autogenerated_doccode_tests.py | 24 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/scripts/generate_autogenerated_doccode_tests.py b/scripts/generate_autogenerated_doccode_tests.py
index 4369e57f..738fdfe0 100644
--- a/scripts/generate_autogenerated_doccode_tests.py
+++ b/scripts/generate_autogenerated_doccode_tests.py
@@ -11,6 +11,7 @@
 DOCS_PATH = REPO_ROOT / "docs"
 TESTS_PATH = REPO_ROOT / "tests" / "test_autogenerated_doccode"
 API_DOCS_PATH = DOCS_PATH / "api"
+TEXT_ENCODING = "utf-8"
 
 AUTOGEN_HEADER = """\"\"\"Autogenerated from {source_path}.\"\"\"
 
@@ -170,7 +171,7 @@ def _chunk_is_executable(chunk_lines: list[str]) -> bool:
 
 def extract_qmd_file(path: Path) -> QmdFile:
     """Extract executable python chunks from a qmd file."""
-    lines = path.read_text().splitlines()
+    lines = path.read_text(encoding=TEXT_ENCODING).splitlines()
     front_matter, body = _split_front_matter(lines)
     if not _parse_doc_eval(front_matter):
         return QmdFile(path=path, chunks=())
@@ -246,9 +247,9 @@ def write_test_tree(
             )
             raise
     tests_path.mkdir(parents=True)
-    (tests_path / "__init__.py").write_text("")
+    (tests_path / "__init__.py").write_text("", encoding=TEXT_ENCODING)
     runtime_path = tests_path / "_runtime.py"
-    runtime_path.write_text(RUNTIME_MODULE)
+    runtime_path.write_text(RUNTIME_MODULE, encoding=TEXT_ENCODING)
 
     written = {tests_path / "__init__.py", runtime_path}
     for qmd_file in qmd_files:
@@ -260,10 +261,13 @@ def write_test_tree(
         while current != tests_path:
             init_path = current / "__init__.py"
             if not init_path.exists():
-                init_path.write_text("")
+                init_path.write_text("", encoding=TEXT_ENCODING)
             written.add(init_path)
             current = current.parent
-        output_path.write_text(render_test_module(qmd_file.path, qmd_file.chunks))
+        output_path.write_text(
+            render_test_module(qmd_file.path, qmd_file.chunks),
+            encoding=TEXT_ENCODING,
+        )
         written.add(output_path)
     return sorted(written)
 
diff --git a/scripts/test_generate_autogenerated_doccode_tests.py b/scripts/test_generate_autogenerated_doccode_tests.py
index 75dbeff8..41c08da7 100644
--- a/scripts/test_generate_autogenerated_doccode_tests.py
+++ b/scripts/test_generate_autogenerated_doccode_tests.py
@@ -7,6 +7,7 @@
 from generate_autogenerated_doccode_tests import (
     DOCS_PATH,
     REPO_ROOT,
+    TEXT_ENCODING,
     Chunk,
     QmdFile,
     extract_qmd_file,
@@ -18,7 +19,7 @@
 
 def _write(path: Path, text: str) -> Path:
     path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(text)
+    path.write_text(text, encoding=TEXT_ENCODING)
     return path
 
 
@@ -73,6 +74,27 @@ def test_extracts_python_fence_with_space_delimited_options(self, tmp_path):
         out = extract_qmd_file(path)
         assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),)
 
+    def test_reads_qmd_with_utf8_encoding(self, tmp_path, monkeypatch):
+        """Extraction should force UTF-8 instead of platform default encodings."""
+        path = _write(tmp_path / "example.qmd", "placeholder")
+        called: dict[str, str | None] = {}
+        original = Path.read_text
+
+        def _read_text(self, *args, **kwargs):
+            if self == path:
+                called["encoding"] = kwargs.get("encoding")
+                return """```{python}
+print("run")
+```"""
+            return original(self, *args, **kwargs)
+
+        monkeypatch.setattr(Path, "read_text", _read_text)
+
+        out = extract_qmd_file(path)
+
+        assert called["encoding"] == TEXT_ENCODING
+        assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),)
+
     def test_skips_chunk_with_eval_or_execute_false(self, tmp_path):
         """Chunk-level execution flags should be honored."""
         path = _write(

From a39add14ebc52d8d3ca70f985b1275ffc93d4d5f Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 09:40:50 +0200
Subject: [PATCH 04/13] combine test job

---
 .github/workflows/runtests.yml                | 20 ++++++++-----------
 .../generate_autogenerated_doccode_tests.py   |  5 +++--
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml
index 4894b66d..b0bfec6d 100644
--- a/.github/workflows/runtests.yml
+++ b/.github/workflows/runtests.yml
@@ -73,24 +73,20 @@ jobs:
           cache-number: ${{ env.CACHE_NUMBER }}
           prepare-test-data: "true"
 
-      - name: regenerate qmd-backed docs tests
-        id: generate_qmd_tests
-        continue-on-error: ${{ env.debug_enabled == 'true' }}
-        shell: bash -el {0}
-        run: python scripts/generate_autogenerated_doccode_tests.py
-
-      - name: run qmd docs tests
+      - name: generate and run qmd docs tests
         id: run_qmd_docs_tests
         continue-on-error: ${{ env.debug_enabled == 'true' }}
         shell: bash -el {0}
-        run: pytest tests/test_autogenerated_doccode
+        run: |
+          python scripts/generate_autogenerated_doccode_tests.py
+          pytest tests/test_autogenerated_doccode
 
       # Runs test suite and calculates coverage
       - name: run test suite
         id: run_test_suite
         continue-on-error: ${{ env.debug_enabled == 'true' }}
         shell: bash -el {0}
-        run: ./.github/test_code.sh
+        run: python -m pytest tests -s --cov dascore --cov-append --cov-report=xml --ignore=tests/test_autogenerated_doccode
 
       # Runs examples in docstrings
       - name: test docstrings
@@ -100,7 +96,7 @@ jobs:
         run: ./.github/test_code.sh doctest
 
       - name: note SSH key requirement for debug access
-        if: env.debug_enabled == 'true' && (steps.generate_qmd_tests.outcome == 'failure' || steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
+        if: env.debug_enabled == 'true' && (steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
         shell: bash
         run: |
           message="Debug access is limited to the workflow actor's GitHub SSH keys. Connect from a machine that has the matching private key loaded."
@@ -108,7 +104,7 @@ jobs:
           echo "$message" >> "$GITHUB_STEP_SUMMARY"
 
       - name: Setup tmate session
-        if: env.debug_enabled == 'true' && (steps.generate_qmd_tests.outcome == 'failure' || steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
+        if: env.debug_enabled == 'true' && (steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
         uses: mxschmitt/action-tmate@v3
         timeout-minutes: 30
         with:
@@ -124,6 +120,6 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
 
       - name: fail job after debug session if tests failed
-        if: steps.generate_qmd_tests.outcome == 'failure' || steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure'
+        if: steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure'
         shell: bash
         run: exit 1
diff --git a/scripts/generate_autogenerated_doccode_tests.py b/scripts/generate_autogenerated_doccode_tests.py
index 738fdfe0..d37493ab 100644
--- a/scripts/generate_autogenerated_doccode_tests.py
+++ b/scripts/generate_autogenerated_doccode_tests.py
@@ -226,9 +226,10 @@ def get_output_path(source_path: Path, tests_path: Path = TESTS_PATH) -> Path:
 def render_test_module(source_path: Path, chunks: tuple[Chunk, ...]) -> str:
     """Render a generated pytest module."""
     chunk_payload = tuple((chunk.start_line, chunk.source) for chunk in chunks)
+    relative_source_path = source_path.relative_to(REPO_ROOT).as_posix()
     return AUTOGEN_HEADER.format(
-        source_path=source_path.relative_to(REPO_ROOT),
-        source_path_literal=repr(source_path.relative_to(REPO_ROOT).as_posix()),
+        source_path=relative_source_path,
+        source_path_literal=repr(relative_source_path),
         chunks_literal=repr(chunk_payload),
     )
 

From f5a2ffcc74212f2a8dfb91ac014ee40b93468c3c Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 11:02:37 +0200
Subject: [PATCH 05/13] simplify script generation

---
 .github/workflows/runtests.yml                |  17 +-
 docs/contributing/testing.qmd                 |   2 +-
 ...de_tests.py => generate_doc_code_tests.py} | 171 ++++++++++++++----
 ...sts.py => test_generate_doc_code_tests.py} |  33 +++-
 4 files changed, 168 insertions(+), 55 deletions(-)
 rename scripts/{generate_autogenerated_doccode_tests.py => generate_doc_code_tests.py} (53%)
 rename scripts/{test_generate_autogenerated_doccode_tests.py => test_generate_doc_code_tests.py} (74%)

diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml
index b0bfec6d..a83be02d 100644
--- a/.github/workflows/runtests.yml
+++ b/.github/workflows/runtests.yml
@@ -73,20 +73,17 @@ jobs:
           cache-number: ${{ env.CACHE_NUMBER }}
           prepare-test-data: "true"
 
-      - name: generate and run qmd docs tests
-        id: run_qmd_docs_tests
-        continue-on-error: ${{ env.debug_enabled == 'true' }}
+      - name: generate qmd docs tests
+        id: generate_qmd_tests
         shell: bash -el {0}
-        run: |
-          python scripts/generate_autogenerated_doccode_tests.py
-          pytest tests/test_autogenerated_doccode
+        run: python scripts/generate_doc_code_tests.py
 
       # Runs test suite and calculates coverage
       - name: run test suite
         id: run_test_suite
         continue-on-error: ${{ env.debug_enabled == 'true' }}
         shell: bash -el {0}
-        run: python -m pytest tests -s --cov dascore --cov-append --cov-report=xml --ignore=tests/test_autogenerated_doccode
+        run: ./.github/test_code.sh
 
       # Runs examples in docstrings
       - name: test docstrings
@@ -96,7 +93,7 @@ jobs:
         run: ./.github/test_code.sh doctest
 
       - name: note SSH key requirement for debug access
-        if: env.debug_enabled == 'true' && (steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
+        if: env.debug_enabled == 'true' && (steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
         shell: bash
         run: |
           message="Debug access is limited to the workflow actor's GitHub SSH keys. Connect from a machine that has the matching private key loaded."
@@ -104,7 +101,7 @@ jobs:
           echo "$message" >> "$GITHUB_STEP_SUMMARY"
 
       - name: Setup tmate session
-        if: env.debug_enabled == 'true' && (steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
+        if: env.debug_enabled == 'true' && (steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure')
         uses: mxschmitt/action-tmate@v3
         timeout-minutes: 30
         with:
@@ -120,6 +117,6 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
 
       - name: fail job after debug session if tests failed
-        if: steps.run_qmd_docs_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure'
+        if: steps.generate_qmd_tests.outcome == 'failure' || steps.run_test_suite.outcome == 'failure' || steps.run_docstrings.outcome == 'failure'
         shell: bash
         run: exit 1
diff --git a/docs/contributing/testing.qmd b/docs/contributing/testing.qmd
index 3f9c5a0d..5fab6207 100644
--- a/docs/contributing/testing.qmd
+++ b/docs/contributing/testing.qmd
@@ -48,7 +48,7 @@ pytest dascore --doctest-modules
 To validate executable examples in the hand-written documentation without building the full site, generate the mirrored tests with:
 
 ```bash
-python scripts/generate_autogenerated_doccode_tests.py
+python scripts/generate_doc_code_tests.py
 ```
 
 Then run the generated tests:
diff --git a/scripts/generate_autogenerated_doccode_tests.py b/scripts/generate_doc_code_tests.py
similarity index 53%
rename from scripts/generate_autogenerated_doccode_tests.py
rename to scripts/generate_doc_code_tests.py
index d37493ab..906e4847 100644
--- a/scripts/generate_autogenerated_doccode_tests.py
+++ b/scripts/generate_doc_code_tests.py
@@ -1,4 +1,30 @@
-"""Generate pytest modules from executable python snippets in qmd files."""
+"""Generate inline pytest modules from executable Python snippets in qmd files.
+
+This script mirrors hand-written documentation examples into
+``tests/test_autogenerated_doccode`` so they run as part of the normal pytest
+suite.
+
+How it works:
+
+1. Discover every source ``.qmd`` file under ``docs/``, excluding generated API
+   docs under ``docs/api/``.
+2. Parse each qmd file and keep only executable Quarto ``{python ...}`` fences.
+3. Respect document-level ``execute.eval: false`` and chunk-level
+   ``#| eval: false`` / ``#| execute: false`` switches.
+4. Generate one pytest module per qmd file with a single ``test_main`` function.
+5. Emit the Python blocks in original source order as inline
+   ``exec(compile(...))`` calls so block scope is shared and ``__future__``
+   imports remain legal.
+6. Insert a comment before each inlined block that points back to the qmd file
+   and the first executable line of that block.
+7. Write one generated ``conftest.py`` that provides the common execution
+   context: run from the qmd directory, keep matplotlib non-interactive, and
+   close figures after the test.
+
+The generated tests are intentionally treated as build artifacts rather than
+committed sources. Re-running this script rewrites the entire generated tree so
+stale files disappear automatically.
+"""
 
 from __future__ import annotations
 
@@ -6,32 +32,41 @@
 import shutil
 from dataclasses import dataclass
 from pathlib import Path
+from textwrap import indent
 
+# The generator always runs from the repository checkout.
 REPO_ROOT = Path(__file__).resolve().parent.parent
+# Source qmd files live under docs/.
 DOCS_PATH = REPO_ROOT / "docs"
+# Generated pytest files are mirrored into a dedicated test tree.
 TESTS_PATH = REPO_ROOT / "tests" / "test_autogenerated_doccode"
+# API docs are generated elsewhere and should not be mirrored again here.
 API_DOCS_PATH = DOCS_PATH / "api"
+# Force stable cross-platform text IO for both reads and writes.
 TEXT_ENCODING = "utf-8"
 
-AUTOGEN_HEADER = """\"\"\"Autogenerated from {source_path}.\"\"\"
+# This template becomes one standalone pytest module per source qmd file.
+AUTOGEN_HEADER = '''"""Autogenerated from {source_path}."""
 
 from __future__ import annotations
 
 import pytest
 
-from tests.test_autogenerated_doccode._runtime import run_qmd_chunks
-
-SOURCE_QMD = {source_path_literal}
-CHUNKS = {chunks_literal}
+from tests.test_autogenerated_doccode.conftest import qmd_test_context
 
 
 @pytest.mark.docs_examples
-def test_qmd_examples():
-    \"\"\"Execute the python chunks from the source QMD.\"\"\"
-    run_qmd_chunks(SOURCE_QMD, CHUNKS)
-"""
+def test_main():
+    """Execute the python chunks from the source QMD."""
+    source_qmd = {source_path_literal}
+    namespace = {{"__name__": "__qmd_test__"}}
+    with qmd_test_context(source_qmd):
+{body}
+'''
 
-RUNTIME_MODULE = '''"""Helpers for generated qmd tests."""
+# The generated conftest keeps reusable setup out of every generated module
+# while still avoiding a bespoke runtime executor.
+CONFTEST_MODULE = '''"""Shared helpers for autogenerated qmd tests."""
 
 from __future__ import annotations
 
@@ -41,37 +76,29 @@ def test_qmd_examples():
 
 import matplotlib
 
+# Use a non-interactive backend because generated examples may plot figures.
 matplotlib.use("Agg", force=True)
 import matplotlib.pyplot as plt
 from matplotlib.figure import Figure
 
+# Prevent example code from trying to open GUI windows during tests.
 plt.ioff()
 plt.show = lambda *args, **kwargs: None
 Figure.show = lambda self, *args, **kwargs: None
 
 
 @contextmanager
-def _pushd(path: Path):
-    """Temporarily change the working directory."""
+def qmd_test_context(source_qmd: str):
+    """Run one autogenerated qmd test in the source document directory."""
+    root = Path(__file__).resolve().parents[2]
+    source_path = root / source_qmd
     old = Path.cwd()
-    os.chdir(path)
     try:
+        os.chdir(source_path.parent)
         yield
     finally:
         os.chdir(old)
-
-
-def run_qmd_chunks(source_qmd: str, chunks: tuple[tuple[int, str], ...]) -> None:
-    """Execute the chunks for a qmd-backed generated test."""
-    root = Path(__file__).resolve().parents[2]
-    source_path = root / source_qmd
-    namespace = {"__name__": "__qmd_test__"}
-    try:
-        with _pushd(source_path.parent):
-            for start_line, source in chunks:
-                code = compile(source, f"{source_qmd}:{start_line}", "exec")
-                exec(code, namespace, namespace)
-    finally:
+        # Always close figures so one doc example cannot leak state to another.
         plt.close("all")
 '''
 
@@ -80,7 +107,9 @@ def run_qmd_chunks(source_qmd: str, chunks: tuple[tuple[int, str], ...]) -> None
 class Chunk:
     """A chunk extracted from a qmd file."""
 
+    # First executable line inside the qmd code fence.
     start_line: int
+    # The literal Python source that will be compiled and executed.
     source: str
 
 
@@ -88,12 +117,15 @@ class Chunk:
 class QmdFile:
     """The executable content extracted from a qmd file."""
 
+    # Source qmd path on disk.
     path: Path
+    # All executable python chunks found in the document.
     chunks: tuple[Chunk, ...]
 
 
 def _parse_bool(value: str) -> bool | None:
     """Parse a yaml-like bool."""
+    # Front matter and chunk options use yaml-like booleans.
     cleaned = value.strip().strip("'\"").lower()
     if cleaned == "true":
         return True
@@ -104,6 +136,7 @@ def _parse_bool(value: str) -> bool | None:
 
 def _split_front_matter(lines: list[str]) -> tuple[list[str], list[str]]:
     """Split a qmd file into front matter and body lines."""
+    # Only treat a leading --- block as front matter.
     if not lines or lines[0].strip() != "---":
         return [], lines
     for index in range(1, len(lines)):
@@ -114,23 +147,28 @@ def _split_front_matter(lines: list[str]) -> tuple[list[str], list[str]]:
 
 def _parse_doc_eval(front_matter: list[str]) -> bool:
     """Return the document-level eval flag."""
+    # Quarto can disable execution for the entire document with:
+    # execute:
+    #   eval: false
     in_execute = False
     execute_indent = 0
     for line in front_matter:
         stripped = line.strip()
+        # Ignore blank lines and front-matter comments.
         if not stripped or stripped.startswith("#"):
             continue
-        indent = len(line) - len(line.lstrip(" "))
+        indent_size = len(line) - len(line.lstrip(" "))
         if stripped.startswith("execute:"):
+            # Track when we are inside the execute: subsection.
             in_execute = True
-            execute_indent = indent
+            execute_indent = indent_size
             remainder = stripped.partition(":")[2].strip()
             if remainder:
                 parsed = _parse_bool(remainder)
                 if parsed is not None:
                     return parsed
             continue
-        if in_execute and indent <= execute_indent:
+        if in_execute and indent_size <= execute_indent:
             in_execute = False
         if in_execute and ":" in stripped:
             key, _, value = stripped.partition(":")
@@ -143,18 +181,24 @@ def _parse_doc_eval(front_matter: list[str]) -> bool:
 
 def _is_python_fence(spec: str) -> bool:
     """Return True if the fence should execute as python."""
+    # The raw fence spec arrives like "{python}" or
+    # "{python filename="example.py"}".
     cleaned = spec.strip()
+    # Plain markdown-style {.python} fences are not Quarto executable cells.
     if not cleaned.startswith("{") or cleaned.startswith("{.python"):
         return False
     inner = cleaned[1:-1].strip()
     if not inner:
         return False
+    # Accept either comma-delimited or space-delimited Quarto options.
     language = inner.split(",", 1)[0].split(None, 1)[0].strip()
     return language == "python"
 
 
 def _chunk_is_executable(chunk_lines: list[str]) -> bool:
     """Return True if a chunk should be executed."""
+    # Chunk-local `#| eval: false` or `#| execute: false` overrides the
+    # document-level default.
     for line in chunk_lines:
         stripped = line.strip()
         if not stripped.startswith("#|") or ":" not in stripped:
@@ -171,8 +215,10 @@ def _chunk_is_executable(chunk_lines: list[str]) -> bool:
 
 def extract_qmd_file(path: Path) -> QmdFile:
     """Extract executable python chunks from a qmd file."""
+    # Read with explicit UTF-8 so Windows does not fall back to cp1252.
     lines = path.read_text(encoding=TEXT_ENCODING).splitlines()
     front_matter, body = _split_front_matter(lines)
+    # Skip the whole document if execution is disabled in front matter.
     if not _parse_doc_eval(front_matter):
         return QmdFile(path=path, chunks=())
 
@@ -182,17 +228,25 @@ def extract_qmd_file(path: Path) -> QmdFile:
     chunk_start = 0
     should_capture = False
 
+    # `body` excludes front matter, but we still want source comments to point
+    # back to original qmd line numbers.
     body_start_line = len(front_matter) + 1 if front_matter else 0
     for offset, line in enumerate(body, start=body_start_line + 1):
         stripped = line.strip()
+        # Opening fence: decide whether this cell is executable python.
         if not in_chunk and stripped.startswith("```{"):
             in_chunk = True
             should_capture = _is_python_fence(stripped[3:])
             chunk_lines = []
+            # Record the first executable line inside the fence, not the fence
+            # header itself, so the generated source comment points at code.
             chunk_start = offset + 1
             continue
+        # Closing fence: finalize the captured chunk if it should execute.
         if in_chunk and stripped == "```":
             if should_capture and _chunk_is_executable(chunk_lines):
+                # Preserve the chunk as a single executable unit so variables
+                # defined earlier in the block remain available later.
                 source = "\n".join(chunk_lines).strip()
                 if source:
                     chunks.append(Chunk(start_line=chunk_start, source=source + "\n"))
@@ -200,6 +254,7 @@ def extract_qmd_file(path: Path) -> QmdFile:
             should_capture = False
             chunk_lines = []
             continue
+        # Any non-fence line inside the current chunk is part of the source.
         if in_chunk:
             chunk_lines.append(line)
     return QmdFile(path=path, chunks=tuple(chunks))
@@ -207,6 +262,7 @@ def extract_qmd_file(path: Path) -> QmdFile:
 
 def iter_source_qmd_files(base_path: Path = DOCS_PATH) -> list[Path]:
     """Return source qmd files to mirror into tests."""
+    # Mirror every qmd under docs/, except generated API docs.
     return [
         path
         for path in sorted(base_path.rglob("*.qmd"))
@@ -216,6 +272,8 @@ def iter_source_qmd_files(base_path: Path = DOCS_PATH) -> list[Path]:
 
 def get_output_path(source_path: Path, tests_path: Path = TESTS_PATH) -> Path:
     """Map a qmd file to its generated pytest module."""
+    # Keep the generated tree shaped like docs/, but swap the filename to
+    # `test_<stem>.py` so pytest discovers it naturally.
     relative = source_path.relative_to(DOCS_PATH)
     filename = f"test_{source_path.stem}.py"
     if len(relative.parts) == 1:
@@ -223,14 +281,44 @@ def get_output_path(source_path: Path, tests_path: Path = TESTS_PATH) -> Path:
     return tests_path.joinpath(*relative.parts[:-1], filename)
 
 
+def _render_chunk_comment(source_path: str, start_line: int) -> str:
+    """Render the source-location comment for one inlined chunk."""
+    return f"# {source_path}:{start_line}"
+
+
+def _render_chunk_source(source_path: str, chunk: Chunk) -> str:
+    """Render one chunk as inline test code."""
+    # Keep the qmd source location visible right above the pasted code block.
+    chunk_block = f"""{_render_chunk_comment(source_path, chunk.start_line)}
+exec(
+    compile(
+        {chunk.source!r},
+        "{source_path}:{chunk.start_line}",
+        "exec",
+    ),
+    namespace,
+    namespace,
+)"""
+    # Indent so the emitted code lives inside `with qmd_test_context(...):`.
+    return indent(chunk_block.rstrip(), " " * 8)
+
+
 def render_test_module(source_path: Path, chunks: tuple[Chunk, ...]) -> str:
     """Render a generated pytest module."""
-    chunk_payload = tuple((chunk.start_line, chunk.source) for chunk in chunks)
+    # Normalize to forward slashes so generated modules are stable across
+    # platforms and Windows paths do not introduce escape sequences.
     relative_source_path = source_path.relative_to(REPO_ROOT).as_posix()
+    # Emit chunks directly into the test body so the generated file is readable
+    # without reconstructing execution from a serialized tuple payload.
+    body = "\n\n".join(
+        _render_chunk_source(relative_source_path, chunk) for chunk in chunks
+    )
+    if not body:
+        body = "        pass"
     return AUTOGEN_HEADER.format(
         source_path=relative_source_path,
         source_path_literal=repr(relative_source_path),
-        chunks_literal=repr(chunk_payload),
+        body=body,
     )
 
 
@@ -239,6 +327,8 @@ def write_test_tree(
     tests_path: Path = TESTS_PATH,
 ) -> list[Path]:
     """Rewrite the generated qmd test tree."""
+    # Regeneration is full-rewrite rather than incremental so stale tests
+    # disappear automatically when docs are removed or stop containing code.
     if tests_path.exists():
         try:
             shutil.rmtree(tests_path)
@@ -247,24 +337,28 @@ def write_test_tree(
                 f"Failed to remove generated test tree at {tests_path}: {e}"
             )
             raise
+    # Seed the package root and generated conftest first.
     tests_path.mkdir(parents=True)
     (tests_path / "__init__.py").write_text("", encoding=TEXT_ENCODING)
-    runtime_path = tests_path / "_runtime.py"
-    runtime_path.write_text(RUNTIME_MODULE, encoding=TEXT_ENCODING)
+    conftest_path = tests_path / "conftest.py"
+    conftest_path.write_text(CONFTEST_MODULE, encoding=TEXT_ENCODING)
 
-    written = {tests_path / "__init__.py", runtime_path}
+    written = {tests_path / "__init__.py", conftest_path}
     for qmd_file in qmd_files:
+        # Skip documents that contain no executable python cells.
         if not qmd_file.chunks:
             continue
         output_path = get_output_path(qmd_file.path, tests_path=tests_path)
         output_path.parent.mkdir(parents=True, exist_ok=True)
         current = output_path.parent
+        # Ensure nested generated tests remain importable as pytest packages.
         while current != tests_path:
             init_path = current / "__init__.py"
             if not init_path.exists():
                 init_path.write_text("", encoding=TEXT_ENCODING)
             written.add(init_path)
             current = current.parent
+        # Write the generated pytest module for this one source document.
         output_path.write_text(
             render_test_module(qmd_file.path, qmd_file.chunks),
             encoding=TEXT_ENCODING,
@@ -273,17 +367,20 @@ def write_test_tree(
     return sorted(written)
 
 
-def generate_autogenerated_doccode_tests() -> list[Path]:
+def generate_doc_code_tests() -> list[Path]:
     """Generate the qmd-backed test tree."""
+    # Extract all source docs first, then mirror only the executable ones.
     qmd_files = [extract_qmd_file(path) for path in iter_source_qmd_files()]
     return write_test_tree(qmd_files)
 
 
 def main() -> int:
     """Generate qmd-backed tests from docs sources."""
+    # The CLI is intentionally simple today, but argparse gives us a stable
+    # entrypoint if we need options later.
     parser = argparse.ArgumentParser(description=__doc__)
     parser.parse_args()
-    written = generate_autogenerated_doccode_tests()
+    written = generate_doc_code_tests()
     rel_path = TESTS_PATH.relative_to(REPO_ROOT)
     print(f"Generated {len(written)} files in {rel_path}")  # noqa: T201
     return 0
diff --git a/scripts/test_generate_autogenerated_doccode_tests.py b/scripts/test_generate_doc_code_tests.py
similarity index 74%
rename from scripts/test_generate_autogenerated_doccode_tests.py
rename to scripts/test_generate_doc_code_tests.py
index 41c08da7..a5f82038 100644
--- a/scripts/test_generate_autogenerated_doccode_tests.py
+++ b/scripts/test_generate_doc_code_tests.py
@@ -1,10 +1,10 @@
-"""Tests for generating autogenerated doccode pytest modules."""
+"""Tests for generating qmd-backed pytest modules."""
 
 from __future__ import annotations
 
 from pathlib import Path
 
-from generate_autogenerated_doccode_tests import (
+from generate_doc_code_tests import (
     DOCS_PATH,
     REPO_ROOT,
     TEXT_ENCODING,
@@ -18,6 +18,7 @@
 
 
 def _write(path: Path, text: str) -> Path:
+    # Tests create tiny synthetic qmd files instead of touching real docs.
     path.parent.mkdir(parents=True, exist_ok=True)
     path.write_text(text, encoding=TEXT_ENCODING)
     return path
@@ -28,6 +29,7 @@ class TestExtractQmdFile:
 
     def test_skips_plain_python_fences(self, tmp_path):
         """Only executable quarto python fences should be included."""
+        # `{python}` should execute, while `{.python}` is just a syntax fence.
         path = _write(
             tmp_path / "example.qmd",
             """---
@@ -48,6 +50,7 @@ def test_skips_plain_python_fences(self, tmp_path):
 
     def test_skips_doc_with_eval_false(self, tmp_path):
         """Document-level eval false should skip all chunks."""
+        # Front-matter execution settings should disable every chunk below.
         path = _write(
             tmp_path / "example.qmd",
             """---
@@ -64,6 +67,7 @@ def test_skips_doc_with_eval_false(self, tmp_path):
 
     def test_extracts_python_fence_with_space_delimited_options(self, tmp_path):
         """Quarto python fences can include filename options after a space."""
+        # Quarto allows options after a space, not just after commas.
         path = _write(
             tmp_path / "example.qmd",
             """```{python filename="example.py"}
@@ -81,6 +85,7 @@ def test_reads_qmd_with_utf8_encoding(self, tmp_path, monkeypatch):
         original = Path.read_text
 
         def _read_text(self, *args, **kwargs):
+            # Intercept the read call so we can assert the requested encoding.
             if self == path:
                 called["encoding"] = kwargs.get("encoding")
                 return """```{python}
@@ -97,6 +102,7 @@ def _read_text(self, *args, **kwargs):
 
     def test_skips_chunk_with_eval_or_execute_false(self, tmp_path):
         """Chunk-level execution flags should be honored."""
+        # Chunk-local options should override the document default.
         path = _write(
             tmp_path / "example.qmd",
             """```{python}
@@ -122,13 +128,15 @@ class TestOutputPaths:
     """Tests for source-to-test path mapping."""
 
     def test_root_doc_maps_to_root_test(self):
-        """Top-level docs should stay at the top of autogenerated tests."""
+        """Top-level docs should stay at the top of generated tests."""
+        # docs/index.qmd becomes tests/test_autogenerated_doccode/test_index.py
         source = DOCS_PATH / "index.qmd"
         tests = Path("/repo/tests/test_autogenerated_doccode")
         assert get_output_path(source, tests_path=tests) == tests / "test_index.py"
 
     def test_nested_doc_maps_to_nested_test(self):
         """Nested docs should keep their relative directory structure."""
+        # Nested docs should preserve their package-like layout.
         source = DOCS_PATH / "tutorial" / "file_io.qmd"
         tests = Path("/repo/tests/test_autogenerated_doccode")
         expected = tests / "tutorial" / "test_file_io.py"
@@ -139,16 +147,27 @@ class TestRenderAndWrite:
     """Tests for generated module output."""
 
     def test_render_includes_source_and_chunk_payload(self):
-        """Generated modules should point back to the source qmd."""
+        """Generated modules should inline source code with source comments."""
+        # The generated file should contain enough literal data to run by itself.
         source = REPO_ROOT / "docs" / "tutorial" / "example.qmd"
         module = render_test_module(source, (Chunk(start_line=12, source="x = 1\n"),))
         assert "Autogenerated from docs/tutorial/example.qmd" in module
         assert "@pytest.mark.docs_examples" in module
-        assert "SOURCE_QMD = 'docs/tutorial/example.qmd'" in module
-        assert "CHUNKS = ((12, 'x = 1\\n'),)" in module
+        assert "def test_main()" in module
+        assert "source_qmd = 'docs/tutorial/example.qmd'" in module
+        assert 'namespace = {"__name__": "__qmd_test__"}' in module
+        assert "with qmd_test_context(source_qmd):" in module
+        assert "# docs/tutorial/example.qmd:12" in module
+        assert "compile(" in module
+        assert "namespace," in module
+        assert "'x = 1\\n'" in module
+        assert "CHUNKS =" not in module
+        assert "SOURCE_QMD =" not in module
+        assert "_runtime" not in module
 
     def test_write_tree_removes_stale_files(self, tmp_path):
         """Regeneration should replace the whole generated test tree."""
+        # Full regeneration should remove old outputs before writing new ones.
         tests_path = tmp_path / "tests" / "test_autogenerated_doccode"
         stale = tests_path / "obsolete.py"
         stale.parent.mkdir(parents=True)
@@ -164,4 +183,4 @@ def test_write_tree_removes_stale_files(self, tmp_path):
         assert output in written
         assert output.exists()
         assert not stale.exists()
-        assert (tests_path / "_runtime.py").exists()
+        assert (tests_path / "conftest.py").exists()

From 9e224671a6463b50f261eea9e321563b01c40733 Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 11:18:45 +0200
Subject: [PATCH 06/13] simplify tests again

---
 scripts/generate_doc_code_tests.py      | 72 ++++++++++++++++++-------
 scripts/test_generate_doc_code_tests.py | 22 ++++++--
 2 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/scripts/generate_doc_code_tests.py b/scripts/generate_doc_code_tests.py
index 906e4847..a94d14a8 100644
--- a/scripts/generate_doc_code_tests.py
+++ b/scripts/generate_doc_code_tests.py
@@ -12,9 +12,9 @@
 3. Respect document-level ``execute.eval: false`` and chunk-level
    ``#| eval: false`` / ``#| execute: false`` switches.
 4. Generate one pytest module per qmd file with a single ``test_main`` function.
-5. Emit the Python blocks in original source order as inline
-   ``exec(compile(...))`` calls so block scope is shared and ``__future__``
-   imports remain legal.
+5. Hoist any ``from __future__ import ...`` lines to module scope, then inline
+   the remaining Python blocks into the generated test in original source
+   order.
 6. Insert a comment before each inlined block that points back to the qmd file
    and the first executable line of that block.
 7. Write one generated ``conftest.py`` that provides the common execution
@@ -48,9 +48,7 @@
 # This template becomes one standalone pytest module per source qmd file.
 AUTOGEN_HEADER = '''"""Autogenerated from {source_path}."""
 
-from __future__ import annotations
-
-import pytest
+{future_imports}import pytest
 
 from tests.test_autogenerated_doccode.conftest import qmd_test_context
 
@@ -59,7 +57,6 @@
 def test_main():
     """Execute the python chunks from the source QMD."""
     source_qmd = {source_path_literal}
-    namespace = {{"__name__": "__qmd_test__"}}
     with qmd_test_context(source_qmd):
 {body}
 '''
@@ -283,22 +280,50 @@ def get_output_path(source_path: Path, tests_path: Path = TESTS_PATH) -> Path:
 
 def _render_chunk_comment(source_path: str, start_line: int) -> str:
     """Render the source-location comment for one inlined chunk."""
-    return f"# {source_path}:{start_line}"
+    return f"### {source_path}:{start_line}"
+
+
+def _split_future_imports(source: str) -> tuple[list[str], list[str]]:
+    """Split future imports from the rest of one chunk."""
+    future_imports: list[str] = []
+    body_lines: list[str] = []
+    for line in source.splitlines():
+        stripped = line.strip()
+        if stripped.startswith("from __future__ import "):
+            future_imports.append(stripped)
+            continue
+        body_lines.append(line)
+    return future_imports, body_lines
+
+
+def _collect_future_imports(
+    chunks: tuple[Chunk, ...],
+) -> tuple[tuple[str, ...], tuple[Chunk, ...]]:
+    """Hoist and deduplicate future imports across all chunks."""
+    seen: set[str] = set()
+    future_imports: list[str] = []
+    cleaned_chunks: list[Chunk] = []
+    for chunk in chunks:
+        chunk_future_imports, body_lines = _split_future_imports(chunk.source)
+        for future_import in chunk_future_imports:
+            if future_import not in seen:
+                seen.add(future_import)
+                future_imports.append(future_import)
+        body_source = "\n".join(body_lines).strip()
+        if body_source:
+            cleaned_chunks.append(
+                Chunk(start_line=chunk.start_line, source=body_source + "\n")
+            )
+    return tuple(future_imports), tuple(cleaned_chunks)
 
 
 def _render_chunk_source(source_path: str, chunk: Chunk) -> str:
-    """Render one chunk as inline test code."""
+    """Render one chunk as plain inline test code."""
     # Keep the qmd source location visible right above the pasted code block.
-    chunk_block = f"""{_render_chunk_comment(source_path, chunk.start_line)}
-exec(
-    compile(
-        {chunk.source!r},
-        "{source_path}:{chunk.start_line}",
-        "exec",
-    ),
-    namespace,
-    namespace,
-)"""
+    chunk_block = (
+        f"{_render_chunk_comment(source_path, chunk.start_line)}\n"
+        f"{chunk.source.rstrip()}"
+    )
     # Indent so the emitted code lives inside `with qmd_test_context(...):`.
     return indent(chunk_block.rstrip(), " " * 8)
 
@@ -308,15 +333,22 @@ def render_test_module(source_path: Path, chunks: tuple[Chunk, ...]) -> str:
     # Normalize to forward slashes so generated modules are stable across
     # platforms and Windows paths do not introduce escape sequences.
     relative_source_path = source_path.relative_to(REPO_ROOT).as_posix()
+    # Future imports must appear at module scope, so hoist them before inlining
+    # the remaining chunk bodies into the test function.
+    future_imports, cleaned_chunks = _collect_future_imports(chunks)
+    future_import_block = ""
+    if future_imports:
+        future_import_block = "\n".join(future_imports) + "\n\n"
     # Emit chunks directly into the test body so the generated file is readable
     # without reconstructing execution from a serialized tuple payload.
     body = "\n\n".join(
-        _render_chunk_source(relative_source_path, chunk) for chunk in chunks
+        _render_chunk_source(relative_source_path, chunk) for chunk in cleaned_chunks
     )
     if not body:
         body = "        pass"
     return AUTOGEN_HEADER.format(
         source_path=relative_source_path,
+        future_imports=future_import_block,
         source_path_literal=repr(relative_source_path),
         body=body,
     )
diff --git a/scripts/test_generate_doc_code_tests.py b/scripts/test_generate_doc_code_tests.py
index a5f82038..7b7af57e 100644
--- a/scripts/test_generate_doc_code_tests.py
+++ b/scripts/test_generate_doc_code_tests.py
@@ -155,16 +155,30 @@ def test_render_includes_source_and_chunk_payload(self):
         assert "@pytest.mark.docs_examples" in module
         assert "def test_main()" in module
         assert "source_qmd = 'docs/tutorial/example.qmd'" in module
-        assert 'namespace = {"__name__": "__qmd_test__"}' in module
         assert "with qmd_test_context(source_qmd):" in module
         assert "# docs/tutorial/example.qmd:12" in module
-        assert "compile(" in module
-        assert "namespace," in module
-        assert "'x = 1\\n'" in module
+        assert "x = 1" in module
         assert "CHUNKS =" not in module
         assert "SOURCE_QMD =" not in module
         assert "_runtime" not in module
 
+    def test_render_hoists_future_imports(self):
+        """Future imports should move to module scope."""
+        source = REPO_ROOT / "docs" / "tutorial" / "example.qmd"
+        module = render_test_module(
+            source,
+            (
+                Chunk(
+                    start_line=12,
+                    source="from __future__ import annotations\nx = 1\n",
+                ),
+            ),
+        )
+        assert "from __future__ import annotations\n\nimport pytest" in module
+        assert "# docs/tutorial/example.qmd:12" in module
+        assert "        x = 1" in module
+        assert "        from __future__ import annotations" not in module
+
     def test_write_tree_removes_stale_files(self, tmp_path):
         """Regeneration should replace the whole generated test tree."""
         # Full regeneration should remove old outputs before writing new ones.

From d0f3138a3ee030ebd166bb11887ca1fecdeae328 Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 11:44:28 +0200
Subject: [PATCH 07/13] fix print issue

---
 scripts/generate_doc_code_tests.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_doc_code_tests.py b/scripts/generate_doc_code_tests.py
index a94d14a8..fcdb57d4 100644
--- a/scripts/generate_doc_code_tests.py
+++ b/scripts/generate_doc_code_tests.py
@@ -49,6 +49,7 @@
 AUTOGEN_HEADER = '''"""Autogenerated from {source_path}."""
 
 {future_imports}import pytest
+import builtins
 
 from tests.test_autogenerated_doccode.conftest import qmd_test_context
 
@@ -56,6 +57,7 @@
 @pytest.mark.docs_examples
 def test_main():
     """Execute the python chunks from the source QMD."""
+    global print
     source_qmd = {source_path_literal}
     with qmd_test_context(source_qmd):
 {body}
@@ -70,6 +72,7 @@ def test_main():
 import os
 from contextlib import contextmanager
 from pathlib import Path
+import sys
 
 import matplotlib
 
@@ -90,11 +93,21 @@ def qmd_test_context(source_qmd: str):
     root = Path(__file__).resolve().parents[2]
     source_path = root / source_qmd
     old = Path.cwd()
+    stdout = sys.stdout
+    stderr = sys.stderr
     try:
+        # Windows CI often defaults to cp1252, which cannot print some of the
+        # unicode characters used in DASCore's rich/text output.
+        if hasattr(sys.stdout, "reconfigure"):
+            sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+        if hasattr(sys.stderr, "reconfigure"):
+            sys.stderr.reconfigure(encoding="utf-8", errors="replace")
         os.chdir(source_path.parent)
         yield
     finally:
         os.chdir(old)
+        sys.stdout = stdout
+        sys.stderr = stderr
         # Always close figures so one doc example cannot leak state to another.
         plt.close("all")
 '''
@@ -322,7 +335,8 @@ def _render_chunk_source(source_path: str, chunk: Chunk) -> str:
     # Keep the qmd source location visible right above the pasted code block.
     chunk_block = (
         f"{_render_chunk_comment(source_path, chunk.start_line)}\n"
-        f"{chunk.source.rstrip()}"
+        f"{chunk.source.rstrip()}\n\n"
+        "print = builtins.print"
     )
     # Indent so the emitted code lives inside `with qmd_test_context(...):`.
     return indent(chunk_block.rstrip(), " " * 8)

From 9b55acbed7d1351802520485451a680b439aebac Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Sat, 4 Apr 2026 11:53:02 +0200
Subject: [PATCH 08/13] fix windows doctest

---
 scripts/generate_doc_code_tests.py      | 75 ++++++++++++++++++-------
 scripts/test_generate_doc_code_tests.py | 47 ++++++++++++++++
 2 files changed, 102 insertions(+), 20 deletions(-)

diff --git a/scripts/generate_doc_code_tests.py b/scripts/generate_doc_code_tests.py
index fcdb57d4..0d3b2d4d 100644
--- a/scripts/generate_doc_code_tests.py
+++ b/scripts/generate_doc_code_tests.py
@@ -29,7 +29,9 @@
 from __future__ import annotations
 
 import argparse
+import shlex
 import shutil
+import textwrap
 from dataclasses import dataclass
 from pathlib import Path
 from textwrap import indent
@@ -76,16 +78,6 @@ def test_main():
 
 import matplotlib
 
-# Use a non-interactive backend because generated examples may plot figures.
-matplotlib.use("Agg", force=True)
-import matplotlib.pyplot as plt
-from matplotlib.figure import Figure
-
-# Prevent example code from trying to open GUI windows during tests.
-plt.ioff()
-plt.show = lambda *args, **kwargs: None
-Figure.show = lambda self, *args, **kwargs: None
-
 
 @contextmanager
 def qmd_test_context(source_qmd: str):
@@ -95,6 +87,16 @@ def qmd_test_context(source_qmd: str):
     old = Path.cwd()
     stdout = sys.stdout
     stderr = sys.stderr
+    original_backend = matplotlib.get_backend()
+    # Configure plotting lazily so importing the generated conftest does not
+    # affect unrelated tests in the same process.
+    matplotlib.use("Agg", force=True)
+    import matplotlib.pyplot as plt
+    from matplotlib.figure import Figure
+
+    original_show = plt.show
+    original_figure_show = Figure.show
+    was_interactive = plt.isinteractive()
     try:
         # Windows CI often defaults to cp1252, which cannot print some of the
         # unicode characters used in DASCore's rich/text output.
@@ -102,14 +104,25 @@ def qmd_test_context(source_qmd: str):
             sys.stdout.reconfigure(encoding="utf-8", errors="replace")
         if hasattr(sys.stderr, "reconfigure"):
             sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+        plt.ioff()
+        plt.show = lambda *args, **kwargs: None
+        Figure.show = lambda self, *args, **kwargs: None
         os.chdir(source_path.parent)
         yield
     finally:
         os.chdir(old)
         sys.stdout = stdout
         sys.stderr = stderr
+        plt.show = original_show
+        Figure.show = original_figure_show
+        if was_interactive:
+            plt.ion()
+        else:
+            plt.ioff()
         # Always close figures so one doc example cannot leak state to another.
         plt.close("all")
+        if matplotlib.get_backend() != original_backend:
+            matplotlib.use(original_backend, force=True)
 '''
 
 
@@ -137,9 +150,9 @@ def _parse_bool(value: str) -> bool | None:
     """Parse a yaml-like bool."""
     # Front matter and chunk options use yaml-like booleans.
     cleaned = value.strip().strip("'\"").lower()
-    if cleaned == "true":
+    if cleaned in {"true", "yes", "1"}:
         return True
-    if cleaned == "false":
+    if cleaned in {"false", "no", "0"}:
         return False
     return None
 
@@ -189,19 +202,41 @@ def _parse_doc_eval(front_matter: list[str]) -> bool:
     return True
 
 
-def _is_python_fence(spec: str) -> bool:
-    """Return True if the fence should execute as python."""
+def _parse_fence_header(spec: str) -> tuple[str | None, dict[str, str]]:
+    """Parse a Quarto fence header into language and key/value options."""
     # The raw fence spec arrives like "{python}" or
     # "{python filename="example.py"}".
     cleaned = spec.strip()
-    # Plain markdown-style {.python} fences are not Quarto executable cells.
-    if not cleaned.startswith("{") or cleaned.startswith("{.python"):
-        return False
+    if not cleaned.startswith("{") or not cleaned.endswith("}"):
+        return None, {}
     inner = cleaned[1:-1].strip()
     if not inner:
+        return None, {}
+    # Accept either comma-delimited or space-delimited Quarto options while
+    # preserving quoted values like filename="example file.py".
+    tokens = shlex.split(inner.replace(",", " "))
+    if not tokens:
+        return None, {}
+    language = tokens[0].strip()
+    options: dict[str, str] = {}
+    for token in tokens[1:]:
+        if "=" not in token:
+            continue
+        key, _, value = token.partition("=")
+        options[key.strip()] = value.strip()
+    return language, options
+
+
+def _is_python_fence(spec: str) -> bool:
+    """Return True if the fence should execute as python."""
+    language, options = _parse_fence_header(spec)
+    # Plain markdown-style {.python} fences are not Quarto executable cells.
+    if language is None or language.startswith("."):
         return False
-    # Accept either comma-delimited or space-delimited Quarto options.
-    language = inner.split(",", 1)[0].split(None, 1)[0].strip()
+    for key in ("eval", "execute"):
+        parsed = _parse_bool(options.get(key, ""))
+        if parsed is False:
+            return False
     return language == "python"
 
 
@@ -257,7 +292,7 @@ def extract_qmd_file(path: Path) -> QmdFile:
             if should_capture and _chunk_is_executable(chunk_lines):
                 # Preserve the chunk as a single executable unit so variables
                 # defined earlier in the block remain available later.
-                source = "\n".join(chunk_lines).strip()
+                source = textwrap.dedent("\n".join(chunk_lines)).strip()
                 if source:
                     chunks.append(Chunk(start_line=chunk_start, source=source + "\n"))
             in_chunk = False
diff --git a/scripts/test_generate_doc_code_tests.py b/scripts/test_generate_doc_code_tests.py
index 7b7af57e..90491874 100644
--- a/scripts/test_generate_doc_code_tests.py
+++ b/scripts/test_generate_doc_code_tests.py
@@ -78,6 +78,38 @@ def test_extracts_python_fence_with_space_delimited_options(self, tmp_path):
         out = extract_qmd_file(path)
         assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),)
 
+    def test_skips_python_fence_with_inline_eval_false(self, tmp_path):
+        """Inline fence options should be able to disable execution."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """```{python, eval=false}
+print("skip")
+```
+
+```{python}
+print("run")
+```
+""",
+        )
+        out = extract_qmd_file(path)
+        assert out.chunks == (Chunk(start_line=6, source='print("run")\n'),)
+
+    def test_skips_python_fence_with_inline_execute_zero(self, tmp_path):
+        """Space-delimited inline execute flags should also be respected."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """```{python execute=0}
+print("skip")
+```
+
+```{python}
+print("run")
+```
+""",
+        )
+        out = extract_qmd_file(path)
+        assert out.chunks == (Chunk(start_line=6, source='print("run")\n'),)
+
     def test_reads_qmd_with_utf8_encoding(self, tmp_path, monkeypatch):
         """Extraction should force UTF-8 instead of platform default encodings."""
         path = _write(tmp_path / "example.qmd", "placeholder")
@@ -100,6 +132,21 @@ def _read_text(self, *args, **kwargs):
         assert called["encoding"] == TEXT_ENCODING
         assert out.chunks == (Chunk(start_line=2, source='print("run")\n'),)
 
+    def test_dedents_indented_python_fence(self, tmp_path):
+        """Indented fenced code should be normalized before storage."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """```{python}
+    if True:
+        print("run")
+```
+""",
+        )
+        out = extract_qmd_file(path)
+        assert out.chunks == (
+            Chunk(start_line=2, source='if True:\n    print("run")\n'),
+        )
+
     def test_skips_chunk_with_eval_or_execute_false(self, tmp_path):
         """Chunk-level execution flags should be honored."""
         # Chunk-local options should override the document default.

From 39232a37b539b3fcf6220b436d8c40a9b9329fa7 Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Mon, 6 Apr 2026 09:42:23 +0200
Subject: [PATCH 09/13] try simplify http tests

---
 tests/test_io/conftest.py         | 18 ++++++++++++++----
 tests/test_io/test_remote_http.py | 21 +++++++++++++++------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/tests/test_io/conftest.py b/tests/test_io/conftest.py
index e9db6ce2..03d2702c 100644
--- a/tests/test_io/conftest.py
+++ b/tests/test_io/conftest.py
@@ -14,7 +14,7 @@
 import time
 from collections.abc import Callable
 from functools import partial
-from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
+from http.server import HTTPServer, SimpleHTTPRequestHandler, ThreadingHTTPServer
 from pathlib import Path
 from urllib.error import URLError
 from urllib.request import urlopen
@@ -41,6 +41,17 @@ def copyfile(self, source, outputfile):
             return None
 
 
+class _RegressionHTTPRequestHandler(_SilentSimpleHTTPRequestHandler):
+    """A stricter localhost handler for flaky plain-HTTP regression tests."""
+
+    protocol_version = "HTTP/1.0"
+
+    def end_headers(self):
+        """Disable keep-alive so one test request cannot leak into the next."""
+        self.send_header("Connection", "close")
+        super().end_headers()
+
+
 def _link_or_copy(source: Path, dest: Path) -> None:
     """Populate one served file path using the cheapest available local copy."""
     dest.parent.mkdir(parents=True, exist_ok=True)
@@ -175,11 +186,10 @@ def _ensure(fetch_name: str, relative_path: str | Path | None = None) -> Path:
 def http_regression_das_path(http_regression_data_root, ensure_http_regression_file):
     """Return an isolated HTTP tree containing only the regression fixtures."""
     handler = partial(
-        _SilentSimpleHTTPRequestHandler,
+        _RegressionHTTPRequestHandler,
         directory=str(http_regression_data_root),
     )
-    server = ThreadingHTTPServer(("127.0.0.1", 0), handler)
-    server.daemon_threads = True
+    server = HTTPServer(("127.0.0.1", 0), handler)
     thread = threading.Thread(target=server.serve_forever, daemon=True)
     thread.start()
     probe_path = "example_dasdae_event_1.h5"
diff --git a/tests/test_io/test_remote_http.py b/tests/test_io/test_remote_http.py
index 5ed13d10..3127fffb 100644
--- a/tests/test_io/test_remote_http.py
+++ b/tests/test_io/test_remote_http.py
@@ -13,6 +13,7 @@
 from dascore.exceptions import InvalidSpoolError, RemoteCacheError
 from dascore.utils.misc import suppress_warnings
 from dascore.utils.remote_io import clear_remote_file_cache, get_remote_cache_path
+from tests.test_io._common_io_test_utils import fail_on_timeout
 
 pytestmark = pytest.mark.network
 
@@ -124,7 +125,6 @@ def test_http_hdf5_get_format_requires_metadata_cache_opt_in(
         sys.platform == "win32",
         reason="Flaky plain-HTTP fallback on Windows.",
     )
-    @pytest.mark.timeout(30)
     def test_http_hdf5_fallback_warns_once_and_reuses_cached_local_copy(
         self, http_regression_das_path, ensure_http_regression_file
     ):
@@ -135,16 +135,25 @@ def test_http_hdf5_fallback_warns_once_and_reuses_cached_local_copy(
         with set_config(
             allow_remote_cache_for_metadata=True, warn_on_remote_cache=True
         ):
-            with pytest.warns(UserWarning, match="Downloading remote file"):
-                dc.get_format(path)
+            with fail_on_timeout(30, f"dc.get_format({path})"):
+                with pytest.warns(UserWarning, match="Downloading remote file"):
+                    dc.get_format(path)
         cached_files = list(get_remote_cache_path().rglob(fname))
-        assert len(cached_files) <= 1
-        assert len(dc.read(path))
+        assert len(cached_files) == 1
+        assert cached_files[0].exists()
+
+        with fail_on_timeout(30, f"dc.read({path})"):
+            spool = dc.read(path)
+        assert spool
 
         cached_files_2 = list(get_remote_cache_path().rglob(fname))
         assert len(cached_files_2) == 1
         assert cached_files_2[0].exists()
-        assert dc.read(path)
+        assert cached_files_2 == cached_files
+
+        with fail_on_timeout(30, f"dc.read({path}) second reuse"):
+            spool_2 = dc.read(path)
+        assert spool_2
         cached_files_3 = list(get_remote_cache_path().rglob(fname))
         assert cached_files_3 == cached_files_2
 

From 09fec1619353f34a246a5e783febe03bf6094878 Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Mon, 6 Apr 2026 10:05:44 +0200
Subject: [PATCH 10/13] standardize timeout

---
 environment.yml                        |  1 +
 tests/test_io/_common_io_test_utils.py | 36 +------------
 tests/test_io/conftest.py              | 73 +++++++++++---------------
 tests/test_io/test_remote_common_io.py | 12 ++---
 tests/test_io/test_remote_http.py      | 14 ++---
 5 files changed, 43 insertions(+), 93 deletions(-)

diff --git a/environment.yml b/environment.yml
index 9a876c87..52f85af2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,6 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - pytest
+  - pytest-timeout
   - numpy>=1.24
   - pydantic>=2.1
   - pip
diff --git a/tests/test_io/_common_io_test_utils.py b/tests/test_io/_common_io_test_utils.py
index 4e1ee69a..a553342c 100644
--- a/tests/test_io/_common_io_test_utils.py
+++ b/tests/test_io/_common_io_test_utils.py
@@ -1,14 +1,8 @@
-"""Shared helpers for common IO test matrices.
-
-This module includes timeout guards used by localhost-backed remote tests so
-fixture or IO hangs fail quickly instead of stalling the suite.
-"""
+"""Shared helpers for common IO test matrices."""
 
 from __future__ import annotations
 
-import signal as signal_mod
 import socket
-import threading
 from contextlib import contextmanager
 from urllib import error as urllib_error
 
@@ -68,31 +62,3 @@ def get_representative_io_test(
     for io, fetch_name_list in common_io_read_tests.items():
         out.append([io, next(iter(iterate(fetch_name_list)))])
     return out
-
-
-@contextmanager
-def fail_on_timeout(seconds: float, label: str):
-    """Fail the current test if the enclosed block exceeds the timeout."""
-    if (
-        threading.current_thread() is not threading.main_thread()
-        or not hasattr(signal_mod, "SIGALRM")
-        or not hasattr(signal_mod, "ITIMER_REAL")
-        or not hasattr(signal_mod, "setitimer")
-    ):
-        yield
-        return
-
-    previous_handler = signal_mod.getsignal(signal_mod.SIGALRM)
-
-    def _handle_timeout(signum, frame):
-        raise TimeoutError(f"{label} exceeded {seconds} seconds")
-
-    try:
-        signal_mod.signal(signal_mod.SIGALRM, _handle_timeout)
-        signal_mod.setitimer(signal_mod.ITIMER_REAL, seconds)
-        yield
-    except TimeoutError as exc:
-        pytest.fail(str(exc))
-    finally:
-        signal_mod.setitimer(signal_mod.ITIMER_REAL, 0)
-        signal_mod.signal(signal_mod.SIGALRM, previous_handler)
diff --git a/tests/test_io/conftest.py b/tests/test_io/conftest.py
index 03d2702c..14372ebd 100644
--- a/tests/test_io/conftest.py
+++ b/tests/test_io/conftest.py
@@ -23,7 +23,6 @@
 
 from dascore.compat import UPath
 from dascore.utils.downloader import fetch
-from tests.test_io._common_io_test_utils import fail_on_timeout
 
 
 class _SilentSimpleHTTPRequestHandler(SimpleHTTPRequestHandler):
@@ -140,21 +139,19 @@ def http_das_path(http_test_data_root, ensure_http_fetch_file):
     try:
         host, port = server.server_address
         probe_url = f"http://{host}:{port}/das/{probe_path}"
-        with fail_on_timeout(10, "http_das_path readiness probe"):
-            for _ in range(50):
-                try:
-                    with urlopen(probe_url, timeout=5):
-                        break
-                except (URLError, OSError):
-                    time.sleep(0.1)
-            else:
-                pytest.fail("HTTP test server did not become ready in time.")
+        for _ in range(50):
+            try:
+                with urlopen(probe_url, timeout=5):
+                    break
+            except (URLError, OSError):
+                time.sleep(0.1)
+        else:
+            pytest.fail("HTTP test server did not become ready in time.")
         yield UPath(f"http://{host}:{port}/das")
     finally:
-        with fail_on_timeout(10, "http_das_path teardown"):
-            server.shutdown()
-            server.server_close()
-            thread.join(timeout=5)
+        server.shutdown()
+        server.server_close()
+        thread.join(timeout=5)
         if thread.is_alive():
             pytest.fail("HTTP test server thread did not exit cleanly.")
 
@@ -196,21 +193,19 @@ def http_regression_das_path(http_regression_data_root, ensure_http_regression_f
     try:
         host, port = server.server_address
         probe_url = f"http://{host}:{port}/das/{probe_path}"
-        with fail_on_timeout(10, "http_regression_das_path readiness probe"):
-            for _ in range(50):
-                try:
-                    with urlopen(probe_url, timeout=5):
-                        break
-                except (URLError, OSError):
-                    time.sleep(0.1)
-            else:
-                pytest.fail("HTTP test server did not become ready in time.")
+        for _ in range(50):
+            try:
+                with urlopen(probe_url, timeout=5):
+                    break
+            except (URLError, OSError):
+                time.sleep(0.1)
+        else:
+            pytest.fail("HTTP test server did not become ready in time.")
         yield UPath(f"http://{host}:{port}/das")
     finally:
-        with fail_on_timeout(10, "http_regression_das_path teardown"):
-            server.shutdown()
-            server.server_close()
-            thread.join(timeout=5)
+        server.shutdown()
+        server.server_close()
+        thread.join(timeout=5)
         if thread.is_alive():
             pytest.fail("HTTP regression server thread did not exit cleanly.")
 
@@ -259,22 +254,18 @@ def _run():
     thread.start()
     try:
         probe_url = f"http://{host}:{port}/das/example_dasdae_event_1.h5"
-        with fail_on_timeout(10, "http_range_das_path readiness probe"):
-            for _ in range(50):
-                try:
-                    with urlopen(probe_url, timeout=5):
-                        break
-                except (URLError, OSError):
-                    time.sleep(0.1)
-            else:
-                pytest.fail(
-                    "Range-capable HTTP test server did not become ready in time."
-                )
+        for _ in range(50):
+            try:
+                with urlopen(probe_url, timeout=5):
+                    break
+            except (URLError, OSError):
+                time.sleep(0.1)
+        else:
+            pytest.fail("Range-capable HTTP test server did not become ready in time.")
         yield UPath(f"http://{host}:{port}/das")
     finally:
-        with fail_on_timeout(10, "http_range_das_path teardown"):
-            server.should_exit = True
-            thread.join(timeout=5)
+        server.should_exit = True
+        thread.join(timeout=5)
         if thread.is_alive():
             sock.close()
             thread.join(timeout=1)
diff --git a/tests/test_io/test_remote_common_io.py b/tests/test_io/test_remote_common_io.py
index 2d97a1a6..770f303f 100644
--- a/tests/test_io/test_remote_common_io.py
+++ b/tests/test_io/test_remote_common_io.py
@@ -8,7 +8,6 @@
 from dascore.config import set_config
 from dascore.utils.misc import suppress_warnings
 from tests.test_io._common_io_test_utils import (
-    fail_on_timeout,
     get_flat_io_test,
     get_representative_io_test,
     skip_missing,
@@ -16,7 +15,7 @@
 )
 from tests.test_io.test_common_io import COMMON_IO_READ_TESTS
 
-pytestmark = pytest.mark.network
+pytestmark = [pytest.mark.network, pytest.mark.timeout(30)]
 
 REMOTE_GET_FORMAT_CASES = get_flat_io_test(COMMON_IO_READ_TESTS)
 REMOTE_REPRESENTATIVE_CASES = get_representative_io_test(COMMON_IO_READ_TESTS)
@@ -78,8 +77,7 @@ def test_expected_version(self, remote_get_format_case):
         """Each IO should identify its own remote test fixture."""
         io, path = remote_get_format_case
         with skip_missing():
-            with fail_on_timeout(30, f"dc.get_format({path})"):
-                out = dc.get_format(path)
+            out = dc.get_format(path)
         assert out == (io.name, io.version)
 
 
@@ -90,8 +88,7 @@ def test_read_returns_spools(self, remote_read_case):
         """Each remotely supported file should read into a spool."""
         _io, path = remote_read_case
         with skip_missing():
-            with fail_on_timeout(30, f"dc.read({path})"):
-                out = dc.read(path)
+            out = dc.read(path)
         assert isinstance(out, dc.BaseSpool)
         assert len(out) > 0
         assert all(isinstance(x, dc.Patch) for x in out)
@@ -104,8 +101,7 @@ def test_scan_has_source_metadata(self, remote_scan_case):
         """Public scans of remote files should retain source metadata."""
         io, path = remote_scan_case
         with skip_missing():
-            with fail_on_timeout(30, f"dc.scan({path})"):
-                summary_list = dc.scan(path)
+            summary_list = dc.scan(path)
         assert len(summary_list) > 0
         for summary in summary_list:
             assert str(summary.source_path) == str(path)
diff --git a/tests/test_io/test_remote_http.py b/tests/test_io/test_remote_http.py
index 3127fffb..6f869ce2 100644
--- a/tests/test_io/test_remote_http.py
+++ b/tests/test_io/test_remote_http.py
@@ -13,9 +13,8 @@
 from dascore.exceptions import InvalidSpoolError, RemoteCacheError
 from dascore.utils.misc import suppress_warnings
 from dascore.utils.remote_io import clear_remote_file_cache, get_remote_cache_path
-from tests.test_io._common_io_test_utils import fail_on_timeout
 
-pytestmark = pytest.mark.network
+pytestmark = [pytest.mark.network, pytest.mark.timeout(30)]
 
 
 @pytest.fixture(autouse=True)
@@ -135,15 +134,13 @@ def test_http_hdf5_fallback_warns_once_and_reuses_cached_local_copy(
         with set_config(
             allow_remote_cache_for_metadata=True, warn_on_remote_cache=True
         ):
-            with fail_on_timeout(30, f"dc.get_format({path})"):
-                with pytest.warns(UserWarning, match="Downloading remote file"):
-                    dc.get_format(path)
+            with pytest.warns(UserWarning, match="Downloading remote file"):
+                dc.get_format(path)
         cached_files = list(get_remote_cache_path().rglob(fname))
         assert len(cached_files) == 1
         assert cached_files[0].exists()
 
-        with fail_on_timeout(30, f"dc.read({path})"):
-            spool = dc.read(path)
+        spool = dc.read(path)
         assert spool
 
         cached_files_2 = list(get_remote_cache_path().rglob(fname))
@@ -151,8 +148,7 @@ def test_http_hdf5_fallback_warns_once_and_reuses_cached_local_copy(
         assert cached_files_2[0].exists()
         assert cached_files_2 == cached_files
 
-        with fail_on_timeout(30, f"dc.read({path}) second reuse"):
-            spool_2 = dc.read(path)
+        spool_2 = dc.read(path)
         assert spool_2
         cached_files_3 = list(get_remote_cache_path().rglob(fname))
         assert cached_files_3 == cached_files_2

From fb12e957e70bd1ba80184ed209ecd94058e8b48f Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Mon, 6 Apr 2026 10:27:52 +0200
Subject: [PATCH 11/13] review

---
 scripts/generate_doc_code_tests.py      |  6 +-
 scripts/test_generate_doc_code_tests.py | 14 +++++
 tests/test_io/_common_io_test_utils.py  | 30 ++++++++++
 tests/test_io/conftest.py               | 73 ++++++++++++++-----------
 4 files changed, 90 insertions(+), 33 deletions(-)

diff --git a/scripts/generate_doc_code_tests.py b/scripts/generate_doc_code_tests.py
index 0d3b2d4d..3fcd89af 100644
--- a/scripts/generate_doc_code_tests.py
+++ b/scripts/generate_doc_code_tests.py
@@ -59,6 +59,8 @@
 @pytest.mark.docs_examples
 def test_main():
     """Execute the python chunks from the source QMD."""
+    # Keep `print` in module-global scope so one chunk can rebind it and later
+    # chunks see the same shared runtime name before the per-chunk reset below.
     global print
     source_qmd = {source_path_literal}
     with qmd_test_context(source_qmd):
@@ -371,6 +373,8 @@ def _render_chunk_source(source_path: str, chunk: Chunk) -> str:
     chunk_block = (
         f"{_render_chunk_comment(source_path, chunk.start_line)}\n"
         f"{chunk.source.rstrip()}\n\n"
+        # Reset `print` after each chunk so one example's reassignment does not
+        # leak into later chunks in the same generated test module.
         "print = builtins.print"
     )
     # Indent so the emitted code lives inside `with qmd_test_context(...):`.
@@ -413,7 +417,7 @@ def write_test_tree(
     if tests_path.exists():
         try:
             shutil.rmtree(tests_path)
-        except Exception as e:
+        except OSError as e:
             print(  # noqa: T201
                 f"Failed to remove generated test tree at {tests_path}: {e}"
             )
diff --git a/scripts/test_generate_doc_code_tests.py b/scripts/test_generate_doc_code_tests.py
index 90491874..f537474a 100644
--- a/scripts/test_generate_doc_code_tests.py
+++ b/scripts/test_generate_doc_code_tests.py
@@ -245,3 +245,17 @@ def test_write_tree_removes_stale_files(self, tmp_path):
         assert output.exists()
         assert not stale.exists()
         assert (tests_path / "conftest.py").exists()
+
+    def test_write_tree_skips_docs_without_executable_chunks(self, tmp_path):
+        """Docs with no executable chunks should not emit per-doc test modules."""
+        tests_path = tmp_path / "tests" / "test_autogenerated_doccode"
+        source = DOCS_PATH / "tutorial" / "no_code.qmd"
+        qmd_file = QmdFile(path=source, chunks=())
+
+        written = write_test_tree([qmd_file], tests_path=tests_path)
+        output = tests_path / "tutorial" / "test_no_code.py"
+
+        assert (tests_path / "__init__.py") in written
+        assert (tests_path / "conftest.py") in written
+        assert output not in written
+        assert not output.exists()
diff --git a/tests/test_io/_common_io_test_utils.py b/tests/test_io/_common_io_test_utils.py
index a553342c..7e512d6f 100644
--- a/tests/test_io/_common_io_test_utils.py
+++ b/tests/test_io/_common_io_test_utils.py
@@ -2,7 +2,9 @@
 
 from __future__ import annotations
 
+import signal as signal_mod
 import socket
+import threading
 from contextlib import contextmanager
 from urllib import error as urllib_error
 
@@ -62,3 +64,31 @@ def get_representative_io_test(
     for io, fetch_name_list in common_io_read_tests.items():
         out.append([io, next(iter(iterate(fetch_name_list)))])
     return out
+
+
+@contextmanager
+def fail_on_timeout(seconds: float, label: str):
+    """Fail fast around fixture lifecycle work when it exceeds a time budget."""
+    if (
+        threading.current_thread() is not threading.main_thread()
+        or not hasattr(signal_mod, "SIGALRM")
+        or not hasattr(signal_mod, "ITIMER_REAL")
+        or not hasattr(signal_mod, "setitimer")
+    ):
+        yield
+        return
+
+    previous_handler = signal_mod.getsignal(signal_mod.SIGALRM)
+
+    def _handle_timeout(_signum, _frame):
+        raise TimeoutError(f"{label} exceeded {seconds} seconds")
+
+    try:
+        signal_mod.signal(signal_mod.SIGALRM, _handle_timeout)
+        signal_mod.setitimer(signal_mod.ITIMER_REAL, seconds)
+        yield
+    except TimeoutError as exc:
+        pytest.fail(str(exc))
+    finally:
+        signal_mod.setitimer(signal_mod.ITIMER_REAL, 0)
+        signal_mod.signal(signal_mod.SIGALRM, previous_handler)
diff --git a/tests/test_io/conftest.py b/tests/test_io/conftest.py
index 14372ebd..03d2702c 100644
--- a/tests/test_io/conftest.py
+++ b/tests/test_io/conftest.py
@@ -23,6 +23,7 @@
 
 from dascore.compat import UPath
 from dascore.utils.downloader import fetch
+from tests.test_io._common_io_test_utils import fail_on_timeout
 
 
 class _SilentSimpleHTTPRequestHandler(SimpleHTTPRequestHandler):
@@ -139,19 +140,21 @@ def http_das_path(http_test_data_root, ensure_http_fetch_file):
     try:
         host, port = server.server_address
         probe_url = f"http://{host}:{port}/das/{probe_path}"
-        for _ in range(50):
-            try:
-                with urlopen(probe_url, timeout=5):
-                    break
-            except (URLError, OSError):
-                time.sleep(0.1)
-        else:
-            pytest.fail("HTTP test server did not become ready in time.")
+        with fail_on_timeout(10, "http_das_path readiness probe"):
+            for _ in range(50):
+                try:
+                    with urlopen(probe_url, timeout=5):
+                        break
+                except (URLError, OSError):
+                    time.sleep(0.1)
+            else:
+                pytest.fail("HTTP test server did not become ready in time.")
         yield UPath(f"http://{host}:{port}/das")
     finally:
-        server.shutdown()
-        server.server_close()
-        thread.join(timeout=5)
+        with fail_on_timeout(10, "http_das_path teardown"):
+            server.shutdown()
+            server.server_close()
+            thread.join(timeout=5)
         if thread.is_alive():
             pytest.fail("HTTP test server thread did not exit cleanly.")
 
@@ -193,19 +196,21 @@ def http_regression_das_path(http_regression_data_root, ensure_http_regression_f
     try:
         host, port = server.server_address
         probe_url = f"http://{host}:{port}/das/{probe_path}"
-        for _ in range(50):
-            try:
-                with urlopen(probe_url, timeout=5):
-                    break
-            except (URLError, OSError):
-                time.sleep(0.1)
-        else:
-            pytest.fail("HTTP test server did not become ready in time.")
+        with fail_on_timeout(10, "http_regression_das_path readiness probe"):
+            for _ in range(50):
+                try:
+                    with urlopen(probe_url, timeout=5):
+                        break
+                except (URLError, OSError):
+                    time.sleep(0.1)
+            else:
+                pytest.fail("HTTP test server did not become ready in time.")
         yield UPath(f"http://{host}:{port}/das")
     finally:
-        server.shutdown()
-        server.server_close()
-        thread.join(timeout=5)
+        with fail_on_timeout(10, "http_regression_das_path teardown"):
+            server.shutdown()
+            server.server_close()
+            thread.join(timeout=5)
         if thread.is_alive():
             pytest.fail("HTTP regression server thread did not exit cleanly.")
 
@@ -254,18 +259,22 @@ def _run():
     thread.start()
     try:
         probe_url = f"http://{host}:{port}/das/example_dasdae_event_1.h5"
-        for _ in range(50):
-            try:
-                with urlopen(probe_url, timeout=5):
-                    break
-            except (URLError, OSError):
-                time.sleep(0.1)
-        else:
-            pytest.fail("Range-capable HTTP test server did not become ready in time.")
+        with fail_on_timeout(10, "http_range_das_path readiness probe"):
+            for _ in range(50):
+                try:
+                    with urlopen(probe_url, timeout=5):
+                        break
+                except (URLError, OSError):
+                    time.sleep(0.1)
+            else:
+                pytest.fail(
+                    "Range-capable HTTP test server did not become ready in time."
+                )
         yield UPath(f"http://{host}:{port}/das")
     finally:
-        server.should_exit = True
-        thread.join(timeout=5)
+        with fail_on_timeout(10, "http_range_das_path teardown"):
+            server.should_exit = True
+            thread.join(timeout=5)
         if thread.is_alive():
             sock.close()
             thread.join(timeout=1)

From 748f3e3982946f2baa5538ab1afb8054cc3afc3b Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Mon, 6 Apr 2026 11:18:21 +0200
Subject: [PATCH 12/13] refactor https server

---
 tests/test_io/conftest.py | 158 ++++++++++++++++++++++++++++----------
 1 file changed, 117 insertions(+), 41 deletions(-)

diff --git a/tests/test_io/conftest.py b/tests/test_io/conftest.py
index 03d2702c..d4e34052 100644
--- a/tests/test_io/conftest.py
+++ b/tests/test_io/conftest.py
@@ -14,6 +14,7 @@
 import time
 from collections.abc import Callable
 from functools import partial
+from http import HTTPStatus
 from http.server import HTTPServer, SimpleHTTPRequestHandler, ThreadingHTTPServer
 from pathlib import Path
 from urllib.error import URLError
@@ -52,6 +53,116 @@ def end_headers(self):
         super().end_headers()
 
 
+class _RangeHTTPRequestHandler(_SilentSimpleHTTPRequestHandler):
+    """A simple localhost handler with explicit single-range support."""
+
+    protocol_version = "HTTP/1.0"
+
+    def handle(self):
+        """Serve exactly one request per connection, then close cleanly."""
+        self.close_connection = True
+        self.handle_one_request()
+
+    def end_headers(self):
+        """Disable keep-alive so each ranged request stands alone."""
+        self.send_header("Connection", "close")
+        super().end_headers()
+
+    def send_head(self):
+        """Serve files and honor one RFC 7233 byte range when requested."""
+        path = self.translate_path(self.path)
+        if os.path.isdir(path):
+            return super().send_head()
+        if path.endswith("/") or not os.path.isfile(path):
+            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
+            return None
+
+        try:
+            file_handle = open(path, "rb")
+        except OSError:
+            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
+            return None
+
+        try:
+            stat_result = os.fstat(file_handle.fileno())
+            size = stat_result.st_size
+            range_header = self.headers.get("Range")
+            start = 0
+            end = size - 1
+            status = HTTPStatus.OK
+
+            if range_header:
+                start, end = self._parse_range_header(range_header, size)
+                if start is None:
+                    self.send_response(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE)
+                    self.send_header("Content-Range", f"bytes */{size}")
+                    self.send_header("Accept-Ranges", "bytes")
+                    self.send_header("Content-Length", "0")
+                    self.end_headers()
+                    file_handle.close()
+                    return None
+                status = HTTPStatus.PARTIAL_CONTENT
+
+            self.send_response(status)
+            self.send_header("Content-type", self.guess_type(path))
+            self.send_header("Accept-Ranges", "bytes")
+            self.send_header("Content-Length", str(end - start + 1))
+            self.send_header(
+                "Last-Modified", self.date_time_string(stat_result.st_mtime)
+            )
+            if status == HTTPStatus.PARTIAL_CONTENT:
+                self.send_header("Content-Range", f"bytes {start}-{end}/{size}")
+            self.end_headers()
+            self._range = (start, end)
+            file_handle.seek(start)
+            return file_handle
+        except Exception:
+            file_handle.close()
+            raise
+
+    def copyfile(self, source, outputfile):
+        """Copy only the selected range when one was requested."""
+        byte_range = getattr(self, "_range", None)
+        if byte_range is None:
+            return super().copyfile(source, outputfile)
+        start, end = byte_range
+        remaining = end - start + 1
+        try:
+            while remaining > 0:
+                chunk = source.read(min(64 * 1024, remaining))
+                if not chunk:
+                    break
+                outputfile.write(chunk)
+                remaining -= len(chunk)
+        except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
+            return None
+        finally:
+            self._range = None
+
+    @staticmethod
+    def _parse_range_header(header: str, size: int) -> tuple[int | None, int | None]:
+        """Parse a single bytes range, returning `(None, None)` when invalid."""
+        if not header.startswith("bytes="):
+            return (None, None)
+        spec = header[len("bytes=") :].strip()
+        if "," in spec or "-" not in spec:
+            return (None, None)
+        start_str, end_str = spec.split("-", maxsplit=1)
+        if not start_str:
+            if not end_str:
+                return (None, None)
+            length = int(end_str)
+            if length <= 0:
+                return (None, None)
+            start = max(size - length, 0)
+            return (start, size - 1)
+        start = int(start_str)
+        end = size - 1 if not end_str else int(end_str)
+        if start < 0 or end < start or start >= size:
+            return (None, None)
+        return (start, min(end, size - 1))
+
+
 def _link_or_copy(source: Path, dest: Path) -> None:
     """Populate one served file path using the cheapest available local copy."""
     dest.parent.mkdir(parents=True, exist_ok=True)
@@ -218,46 +329,12 @@ def http_regression_das_path(http_regression_data_root, ensure_http_regression_f
 @pytest.fixture(scope="session")
 def http_range_das_path(http_test_data_root, ensure_http_fetch_file):
     """Return a UPath pointing at a localhost HTTP server with range support."""
-    uvicorn = pytest.importorskip("uvicorn")
-    starlette_cls = pytest.importorskip("starlette.applications").Starlette
-    responses = pytest.importorskip("starlette.responses")
-    file_response_cls = responses.FileResponse
-    response_cls = responses.Response
-    route_cls = pytest.importorskip("starlette.routing").Route
-    served_root = Path(http_test_data_root)
-
-    async def _serve_file(request):
-        rel_path = Path(request.path_params["path"])
-        file_path = served_root / rel_path
-        root_path = os.path.abspath(served_root)
-        candidate_path = os.path.abspath(file_path)
-        if os.path.commonpath([root_path, candidate_path]) != root_path:
-            return response_cls(status_code=404)
-        if not file_path.exists() or not file_path.is_file():
-            return response_cls(status_code=404)
-        return file_response_cls(file_path)
-
-    app = starlette_cls(routes=[route_cls("/{path:path}", _serve_file)])
-    config = uvicorn.Config(
-        app,
-        host="127.0.0.1",
-        port=0,
-        log_level="warning",
-        ws="none",
-        # Avoid indefinite teardown hangs if a client leaves a keep-alive
-        # connection open when the fixture shuts the server down.
-        timeout_graceful_shutdown=1,
-    )
-    server = uvicorn.Server(config)
-    sock = config.bind_socket()
-    host, port = sock.getsockname()[:2]
-
-    def _run():
-        server.run(sockets=[sock])
-
-    thread = threading.Thread(target=_run, daemon=True)
+    handler = partial(_RangeHTTPRequestHandler, directory=str(http_test_data_root))
+    server = HTTPServer(("127.0.0.1", 0), handler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
     thread.start()
     try:
+        host, port = server.server_address
         probe_url = f"http://{host}:{port}/das/example_dasdae_event_1.h5"
         with fail_on_timeout(10, "http_range_das_path readiness probe"):
             for _ in range(50):
@@ -273,11 +350,10 @@ def _run():
         yield UPath(f"http://{host}:{port}/das")
     finally:
         with fail_on_timeout(10, "http_range_das_path teardown"):
-            server.should_exit = True
+            server.shutdown()
+            server.server_close()
             thread.join(timeout=5)
         if thread.is_alive():
-            sock.close()
-            thread.join(timeout=1)
             pytest.fail("Range-capable HTTP server thread did not exit cleanly.")
 
 

From a62a9b12a908408d193e004422f565691e92cecd Mon Sep 17 00:00:00 2001
From: Derrick Chambers <chambers.ja.derrick@gmail.com>
Date: Mon, 6 Apr 2026 11:54:58 +0200
Subject: [PATCH 13/13] another review

---
 scripts/generate_doc_code_tests.py      | 20 +++++++++---------
 scripts/test_generate_doc_code_tests.py | 27 ++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/scripts/generate_doc_code_tests.py b/scripts/generate_doc_code_tests.py
index 3fcd89af..665b8e67 100644
--- a/scripts/generate_doc_code_tests.py
+++ b/scripts/generate_doc_code_tests.py
@@ -87,8 +87,6 @@ def qmd_test_context(source_qmd: str):
     root = Path(__file__).resolve().parents[2]
     source_path = root / source_qmd
     old = Path.cwd()
-    stdout = sys.stdout
-    stderr = sys.stderr
     original_backend = matplotlib.get_backend()
     # Configure plotting lazily so importing the generated conftest does not
     # affect unrelated tests in the same process.
@@ -101,7 +99,9 @@ def qmd_test_context(source_qmd: str):
     was_interactive = plt.isinteractive()
     try:
         # Windows CI often defaults to cp1252, which cannot print some of the
-        # unicode characters used in DASCore's rich/text output.
+        # unicode characters used in DASCore's rich/text output. `reconfigure`
+        # mutates the existing stream in place, so we intentionally do not try
+        # to restore the previous encoding on exit.
         if hasattr(sys.stdout, "reconfigure"):
             sys.stdout.reconfigure(encoding="utf-8", errors="replace")
         if hasattr(sys.stderr, "reconfigure"):
@@ -113,8 +113,6 @@ def qmd_test_context(source_qmd: str):
         yield
     finally:
         os.chdir(old)
-        sys.stdout = stdout
-        sys.stderr = stderr
         plt.show = original_show
         Figure.show = original_figure_show
         if was_interactive:
@@ -176,7 +174,7 @@ def _parse_doc_eval(front_matter: list[str]) -> bool:
     # execute:
     #   eval: false
     in_execute = False
-    execute_indent = 0
+    execute_indent: int | None = None
     for line in front_matter:
         stripped = line.strip()
         # Ignore blank lines and front-matter comments.
@@ -193,7 +191,7 @@ def _parse_doc_eval(front_matter: list[str]) -> bool:
                 if parsed is not None:
                     return parsed
             continue
-        if in_execute and indent_size <= execute_indent:
+        if in_execute and execute_indent is not None and indent_size <= execute_indent:
             in_execute = False
         if in_execute and ":" in stripped:
             key, _, value = stripped.partition(":")
@@ -317,11 +315,15 @@ def iter_source_qmd_files(base_path: Path = DOCS_PATH) -> list[Path]:
     ]
 
 
-def get_output_path(source_path: Path, tests_path: Path = TESTS_PATH) -> Path:
+def get_output_path(
+    source_path: Path,
+    tests_path: Path = TESTS_PATH,
+    docs_path: Path = DOCS_PATH,
+) -> Path:
     """Map a qmd file to its generated pytest module."""
     # Keep the generated tree shaped like docs/, but swap the filename to
     # `test_<stem>.py` so pytest discovers it naturally.
-    relative = source_path.relative_to(DOCS_PATH)
+    relative = source_path.relative_to(docs_path)
     filename = f"test_{source_path.stem}.py"
     if len(relative.parts) == 1:
         return tests_path / filename
diff --git a/scripts/test_generate_doc_code_tests.py b/scripts/test_generate_doc_code_tests.py
index f537474a..c7a772d5 100644
--- a/scripts/test_generate_doc_code_tests.py
+++ b/scripts/test_generate_doc_code_tests.py
@@ -58,6 +58,21 @@ def test_skips_doc_with_eval_false(self, tmp_path):
   eval: false
 ---
 
+```{python}
+print("nope")
+```
+""",
+        )
+        assert extract_qmd_file(path).chunks == ()
+
+    def test_skips_doc_with_inline_execute_false(self, tmp_path):
+        """Inline document-level execute false should skip all chunks."""
+        path = _write(
+            tmp_path / "example.qmd",
+            """---
+execute: false
+---
+
 ```{python}
 print("nope")
 ```
@@ -189,6 +204,16 @@ def test_nested_doc_maps_to_nested_test(self):
         expected = tests / "tutorial" / "test_file_io.py"
         assert get_output_path(source, tests_path=tests) == expected
 
+    def test_custom_docs_root_can_be_supplied(self):
+        """Mapping should support callers with a non-default docs root."""
+        docs_root = Path("/repo/custom_docs")
+        source = docs_root / "guide" / "example.qmd"
+        tests = Path("/repo/tests/test_autogenerated_doccode")
+        expected = tests / "guide" / "test_example.py"
+        assert (
+            get_output_path(source, tests_path=tests, docs_path=docs_root) == expected
+        )
+
 
 class TestRenderAndWrite:
     """Tests for generated module output."""
@@ -203,7 +228,7 @@ def test_render_includes_source_and_chunk_payload(self):
         assert "def test_main()" in module
         assert "source_qmd = 'docs/tutorial/example.qmd'" in module
         assert "with qmd_test_context(source_qmd):" in module
-        assert "# docs/tutorial/example.qmd:12" in module
+        assert "### docs/tutorial/example.qmd:12" in module
         assert "x = 1" in module
         assert "CHUNKS =" not in module
         assert "SOURCE_QMD =" not in module