diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 5efd5cc8..00000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: test - -on: - push: - branches: - - main - pull_request: - branches: - - main - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - default: - runs-on: ${{ matrix.os }}-latest - strategy: - matrix: - os: [ubuntu, macos] - python-version: ["3.11", "3.12", "3.13"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install packages - run: | - python -m pip install -r test_requirements.txt - python -m pip list - - - name: Test - run: | - # Avoid deprecation error. - export JUPYTER_PLATFORM_DIRS=1 - jupyter --paths - - make test diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 94eb0e59..a2f71fb8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -187,11 +187,12 @@ when opened as a notebook. Another difference we want to see between the HTML and the notebook version is that we want to avoid putting the solutions in the notebook version, to allow more space for thought about the exercise. Both to modify any ugly formatting, and to remove the exercise solutions, we -post-process the pages with a script `_scripts/process_notebooks.py` to load -the pages as text notebooks, and write out `.ipynb` files with modified markup -that looks better in a Jupyter interface. Some of the authoring advice here is -to allow that process to work smoothly, because the `process_notebooks.py` file -reads the input Myst-MD format notebooks using +post-process the pages with a script `jljb-write-dir` from the [jljb +package](https://github.com/matthew-brett/jljb) to load the pages as text +notebooks, and write out `.ipynb` files with modified markup that looks better +in a Jupyter interface. Some of the authoring advice here is to allow that +process to work smoothly, because the `process_notebooks.py` file reads the +input Myst-MD format notebooks using [Jupytext](https://jupytext.readthedocs.io) before converting to Jupyter `.ipynb` files. diff --git a/Makefile b/Makefile index 79b13fcc..8acdb452 100644 --- a/Makefile +++ b/Makefile @@ -11,14 +11,7 @@ html: jl: # Jupyter-lite files for book build. $(PIP_INSTALL_CMD) -r jl-build-requirements.txt - rm -rf $(JL_DIR) - mkdir $(JL_DIR) - cp -r data images $(JL_DIR) - $(PYTHON) _scripts/process_notebooks.py $(JL_DIR) - $(PYTHON) -m jupyter lite build \ - --contents $(JL_DIR) \ - --output-dir $(BUILD_DIR)/interact \ - --lite-dir $(JL_DIR) + jljb-write-dir $(BUILD_DIR)/interact data images --jl-tmp $(JL_DIR) lint: pre-commit run --all-files --show-diff-on-failure --color always diff --git a/_config.yml b/_config.yml index 8683e013..4b74365b 100644 --- a/_config.yml +++ b/_config.yml @@ -21,7 +21,6 @@ exclude_patterns: - LICENSE.md - CONTRIBUTING.md - todo.md - - _scripts/* - _notes/* - _to_ignore.md - data/LICENSE.txt diff --git a/_scripts/examples2nb.py b/_scripts/examples2nb.py deleted file mode 100755 index 12f708dc..00000000 --- a/_scripts/examples2nb.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python3 -"""Process sphinx-gallery examples in notebook""" - -from argparse import ArgumentParser, RawDescriptionHelpFormatter -import ast -from copy import deepcopy -from functools import reduce -import operator -import re -from pathlib import Path - -import jupytext -import nbformat - - -HEADER = jupytext.reads( - """\ ---- -jupyter: - orphan: true - jupytext: - formats: ipynb,Rmd - text_representation: - extension: .Rmd - format_name: rmarkdown - format_version: '1.2' - jupytext_version: 1.17.1 - kernelspec: - display_name: Python 3 (ipykernel) - language: python - name: python3 ---- - -""", - fmt="Rmd", -) - -# New Markdown cell function -NMC = nbformat.versions[HEADER["nbformat"]].new_markdown_cell - -# Default encoding for notebooks and examples. -NB_ENCODING = "utf-8" - - -def get_ref_targets(root_path, nb_ext=".Rmd", excludes=()): - refs = [] - for nb_path in root_path.glob("**/*" + nb_ext): - if nb_path in excludes: - continue - refs += re.findall( - r"^\s*\(\s*([a-zA-Z0-9-_]+)\s*\)=\s*$", - nb_path.read_text(NB_ENCODING), - flags=re.MULTILINE, - ) - return refs - - -FIG_EG_RE = re.compile( - r""" -^(\s*:::+|```)\s*\{(?:figure|image)\}\s* -auto_examples/.*?images/sphx_glr_(?P\w+?)_\d{3}\.png -.*? -\s*\1""", - flags=re.MULTILINE | re.VERBOSE | re.DOTALL, -) - - -def get_eg_stems(nb_path): - """Analyze notebook for references to example output""" - refs = [] - nb = jupytext.read(nb_path) - for cell in nb.cells: - if cell["cell_type"] != "markdown": - continue - for ref in [m.groupdict()["stem"] for m in FIG_EG_RE.finditer(cell["source"])]: - if ref not in refs: - refs.append(ref) - return refs - - -def proc_str(s): - s = s.strip() - lines = s.splitlines() - title = None - if len(lines) > 2 and re.match(r"^[=-]{2,}\s*$", lines[1]): - title = lines[0].strip() - lines = lines[2:] - if len(lines) and lines[0].strip() == "": - lines = lines[1:] - return "\n".join(lines), title - - -def process_example(eg_path, import_lines=None): - import_lines = [] if import_lines is None else import_lines - txt = eg_path.read_text(NB_ENCODING) - nb = jupytext.reads(txt, "py:nomarker") - title = None - # Convert standalone multiline strings to Markdown cells. - out_cells = [] - for cell in nb.cells: - if cell["cell_type"] != "code": - out_cells.append(cell) - continue - body = ast.parse(cell.source).body - # Multiline string. - if ( - len(body) == 1 - and isinstance(body[0], ast.Expr) - and isinstance(body[0].value, ast.Constant) - and isinstance(body[0].value.value, str) - ): - src, cell_title = proc_str(body[0].value.value) - cell["cell_type"] = "markdown" - cell["source"] = src - title = cell_title if title is None else title - out_cells.append(cell) - continue - out_lines = [] - show_cell = False - for L in cell["source"].splitlines(): - sL = L.strip() - if sL.startswith("plt.show"): - show_cell = True - continue - if sL.startswith("import "): - if sL in import_lines: - continue - import_lines.append(sL) - out_lines.append(L) - if out_lines: - cell["source"] = "\n".join(out_lines) - if show_cell: - cell["metadata"] = cell.get("metadata", {}) - cell["metadata"]["tags"] = list( - set(cell["metadata"].get("tags", [])).union(["hide-input"]) - ) - out_cells.append(cell) - nb.cells = out_cells - # Get title from filename if not already found. - if title is None and (m := re.match(r"plot_(.+)\.py", eg_path.name)): - title = m.groups()[0] - return nb, title - - -def get_example_paths(eg_dirs): - return reduce(operator.add, [sorted(Path(d).glob("**/plot_*.py")) for d in eg_dirs]) - - -def process_nb_examples(root_path, nb_path, eg_paths, check_refs=True): - # Get all references (something)= - ref_defs = get_ref_targets(root_path) - # Get all examples. - examples = {} - nb_imp_lines = [] - # Analyze notebook for references to examples - eg_stems = get_eg_stems(nb_path) - - def eg_sorter(pth): - return [eg_stems.index(pth.stem) if pth.stem in eg_stems else len(eg_stems)] - - # Sort examples in notebook order. - eg_paths = sorted(eg_paths, key=eg_sorter) # Relies on stable sort. - - for eg_path in eg_paths: - nb, title = process_example(eg_path, nb_imp_lines) - eg_stem = eg_path.stem - ref = ( - eg_stem - if title is None - else re.sub(r"[^a-zA-Z0-9]+", "-", title).lower().strip("-") - ) - if check_refs and ref in ref_defs: - raise ValueError(f"Reference {ref} already used in project") - examples[eg_stem] = nb, title, ref - # Try to detect possible titles for each reference. - # Run through examples in notebook order - nb_out = deepcopy(HEADER) - cells = nb_out.cells - cells.append(NMC(f"# Examples for {nb_path}")) - for eg_stem in eg_stems: - cells += output_example(eg_stem, examples, header_level=2) - remaining = [s for s in examples if s not in eg_stems] - if remaining: - cells.append(NMC("## Other examples")) - for eg_stem in remaining: - cells += output_example(eg_stem, examples, header_level=3) - return nb_out - - -def output_example(eg_stem, examples, header_level=2): - nb, title, ref = examples[eg_stem] - title = ref.replace("-", " ").title() if title is None else title - return [ - NMC(f"({ref})=\n\n{'#' * header_level} {title}\n\n") - ] + nb.cells - - -def get_parser(): - parser = ArgumentParser( - description=__doc__, # Usage from docstring - formatter_class=RawDescriptionHelpFormatter, - ) - parser.add_argument("nb_file", help="notebook file") - parser.add_argument("--eg-dir", help="path to examples", nargs="*") - parser.add_argument("--root-dir", help="root path to book", default=".") - parser.add_argument("--eg-nb", help="Output notebook filename") - parser.add_argument( - "--no-check-refs", - action="store_true", - help="Do not check if example refs are unique", - ) - return parser - - -def main(): - args = get_parser().parse_args() - # Process inputs and set defaults. - nb_pth = Path(args.nb_file) - if not nb_pth.is_file(): - raise RuntimeError(f"Notebook {nb_pth} is not a file") - if args.eg_dir: - eg_dirs = [Path(f) for f in args.eg_dir] - elif (eg_dir := nb_pth.parent / "examples").is_dir() or ( - eg_dir := nb_pth.parent.parent / "examples" - ).is_dir(): - eg_dirs = [eg_dir] - else: - raise RuntimeError("Cannot find examples directory") - if not (eg_pths := get_example_paths(eg_dirs)): - raise RuntimeError(f"No examples in {eg_dirs}") - eg_nb = ( - Path(args.eg_nb) - if args.eg_nb is not None - else (nb_pth.parent / (nb_pth.stem + "_examples" + nb_pth.suffix)) - ) - # Generate, write examples notebook. - out_nb = process_nb_examples( - Path(args.root_dir), nb_pth, eg_pths, not args.no_check_refs - ) - jupytext.write(out_nb, eg_nb, fmt="rmarkdown") - - -if __name__ == "__main__": - main() diff --git a/_scripts/post_parser.py b/_scripts/post_parser.py deleted file mode 100755 index 7e6d1ac2..00000000 --- a/_scripts/post_parser.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python3 -"""Post-ReST to Myst parser""" - -from argparse import ArgumentParser, RawDescriptionHelpFormatter -from pathlib import Path -import re -import textwrap - - -RMD_HEADER = """\ ---- -jupyter: - jupytext: - formats: ipynb,Rmd - text_representation: - extension: .Rmd - format_name: rmarkdown - format_version: '1.2' - jupytext_version: 1.17.1 - kernelspec: - display_name: Python 3 (ipykernel) - language: python - name: python3 ---- -""" - - -def process_python_block(lines, tags=()): - if [L.strip().startswith(">>> ") for L in lines if L.strip()][0]: - return process_doctest_block(lines) - return [get_hdr(tags)] + lines[:] + ["```"] - - -_PY_BLOCK = """\ ->>> 7 * 3. -21.0 ->>> 2**10 -1024 ->>> 8 % 3 -2 -""".splitlines() - - -_EXP_PY_BLOCK = [ - "```{python}", - "7 * 3.", - "```", - "", - "```{python}", - "2**10", - "```", - "", - "```{python}", - "8 % 3", - "```", -] - - -def test_process_python_block(): - assert process_python_block(_PY_BLOCK) == _EXP_PY_BLOCK - assert process_doctest_block(_PY_BLOCK) == _EXP_PY_BLOCK - - -IPY_IN = re.compile(r"In \[\d+\]: (.*)$") -IPY_OUT = re.compile(r"Out \[\d+\]: (.*)$") - - -def process_verbatim_block(lines): - out_lines = [] - for line in lines: - if line.strip() in ("@verbatim", ":verbatim:"): - continue - line = IPY_IN.sub(r"\1", line) - line = IPY_OUT.sub(r"\1", line) - out_lines.append(line) - return ["```python", ""] + out_lines + ["```"] - - -_IPY_BLOCK = """\ - In [53]: a = "hello, world!" - In [54]: a[2] = 'z' - --------------------------------------------------------------------------- - Traceback (most recent call last): - File "", line 1, in - TypeError: 'str' object does not support item assignment - - In [55]: a.replace('l', 'z', 1) - Out[55]: 'hezlo, world!' - In [56]: a.replace('l', 'z') - Out[56]: 'hezzo, worzd!' -""".splitlines() - - -_IPY_CONT_RE = re.compile(r"\s*\.{3,}: (.*)$") - - -def process_ipython_block(lines): - text = textwrap.dedent("\n".join(lines)) - if "@verbatim" in text or ":verbatim:" in text: - return process_verbatim_block(text.splitlines()) - out_lines = ["```{python}"] - state = "start" - last_i = len(lines) - 1 - for i, line in enumerate(text.splitlines()): - if state == "start" and line.strip() == "": - continue - if m := IPY_IN.match(line): - if state == "output" and i != last_i: - out_lines += ["```", "", "```{python}"] - state = "code" - out_lines.append(m.groups()[0]) - continue - if state == "code" and (m := _IPY_CONT_RE.match(line)): - out_lines.append(m.groups()[0]) - continue - # In code, but no code input line. - if line.strip(): - state = "output" - return out_lines + ["```"] - - -def test_ipython_block(): - assert process_ipython_block(_IPY_BLOCK) == [ - "```{python}", - 'a = "hello, world!"', - "a[2] = 'z'", - "```", - "", - "```{python}", - "a.replace('l', 'z', 1)", - "```", - "", - "```{python}", - "a.replace('l', 'z')", - "```", - ] - - -_DOCTEST_BLOCK = r""" ->>> a = "hello, world!" ->>> a[3:6] # 3rd to 6th (excluded) elements: elements 3, 4, 5 -'lo,' ->>> a[2:10:2] # Syntax: a[start:stop:step] -'lo o' ->>> a[::3] # every three characters, from beginning to end -'hl r!' -""".splitlines() - - -def get_hdr(tags): - if not tags: - return "```{python}" - joined_tags = ", ".join(f'"{t}"' for t in tags) - return f"```{{python tags=c({joined_tags})}}" - - -def process_doctest_block(lines, tags=()): - if not any(L.strip().startswith(">>> ") for L in lines): - return process_python_block(lines, tags) - lines = textwrap.dedent("\n".join(lines)).splitlines() - cell_hdr = get_hdr(tags) - out_lines = [cell_hdr] - state = "start" - last_i = len(lines) - 1 - for i, line in enumerate(lines): - if state == "start" and line.strip() == "": - continue - if line.startswith(">>> "): - if state == "output" and i != last_i: - out_lines += ["```", "", cell_hdr] - state = "code" - out_lines.append(line[4:]) - continue - if state == "code" and line.startswith("... "): - out_lines.append(line[4:]) - continue - state = "output" - return out_lines + ["```"] - - -def test_doctest_block(): - assert process_doctest_block(_DOCTEST_BLOCK) == [ - "```{python}", - 'a = "hello, world!"', - "a[3:6] # 3rd to 6th (excluded) elements: elements 3, 4, 5", - "```", - "", - "```{python}", - "a[2:10:2] # Syntax: a[start:stop:step]", - "```", - "", - "```{python}", - "a[::3] # every three characters, from beginning to end", - "```", - ] - - -def process_eval_rst_block(lines): - return [textwrap.dedent("\n".join(lines))] - - -_EVAL_RST_BLOCK = """\ -```{eval-rst} -.. ipython:: - - In [1]: a = [1, 2, 3] - - In [2]: b = a - - In [3]: a - Out[3]: [1, 2, 3] - - In [4]: b - Out[4]: [1, 2, 3] - - In [5]: a is b - Out[5]: True - - In [6]: b[1] = 'hi!' - - In [7]: a - Out[7]: [1, 'hi!', 3] -``` -""".splitlines() - - -def test_ipython_block_in_rst(): - assert parse_lines(_EVAL_RST_BLOCK) == [ - "```{python}", - "a = [1, 2, 3]", - "b = a", - "a", - "```", - "", - "```{python}", - "b", - "```", - "", - "```{python}", - "a is b", - "```", - "", - "```{python}", - "b[1] = 'hi!'", - "a", - "```", - ] - - -STATE_PROCESSOR = { - "python-block": process_python_block, - "ipython-block": process_ipython_block, - "doctest-block": process_doctest_block, - "eval-rst-block": process_eval_rst_block, -} - - -def parse_lines(lines): - parsed_lines = [] - state = "default" - block_lines = [] - for i, line in enumerate(lines): - if state == "default": - if re.match(r"```\s*\{eval-rst\}\s*$", line): - if re.match(r"\.\.\s+ipython::", lines[i + 1]): - state = "ipython-block-header" - else: - state = "eval-rst-block" - # Remove all eval-rst blocks. - continue - LS = line.strip() - if LS == "```": - state = "python-block" - continue - if LS == "```pycon": - state = "doctest-block" - continue - if LS.startswith("```"): - state = "other-block" - directive = line - continue - if state == "ipython-block-header": - # Drop ipython line - state = "ipython-block" - continue - if state.endswith("block"): - if line.strip() != "```": - block_lines.append(line) - continue - parsed_lines += ( - STATE_PROCESSOR[state](block_lines) - if state in STATE_PROCESSOR - else [directive] + block_lines + [line] - ) - block_lines = [] - state = "default" - continue - parsed_lines.append(line) - - return parsed_lines - - -def strip_content(lines): - text = "\n".join(lines) - text = re.sub(r"^\.\.\s+currentmodule:: .*\n", "", text, flags=re.MULTILINE) - text = re.sub(r"\s+#\s*doctest:.*$", "", text, flags=re.MULTILINE) - text = re.sub( - r"^:::\s*\{topic\}\s*\**(.*?)\**$", - r":::{admonition} \1", - text, - flags=re.MULTILINE, - ) - text = re.sub( - r"^:::\s*\{seealso\}$\n*(.*?)^:::\s*$", - ":::{admonition} See also\n\n\\1:::\n", - text, - flags=re.MULTILINE | re.DOTALL, - ) - return re.sub( - r"\`\`\`\s*\{contents\}.*?^\`\`\`\s*\n", - "", - text, - flags=re.MULTILINE | re.DOTALL, - ).splitlines() - - -def process_percent_block(lines): - # The first one or more lines should be considered comments. - for i, line in enumerate(lines): - if line.strip().startswith(">>> "): - head_lines = [ - ">>> # " + L - for L in lines[:i] - if (L.strip() and "for doctest" not in L.lower()) - ] - return process_doctest_block(head_lines + lines[i:], tags=("hide-input",)) - return [""] - - -def process_percent(lines): - out_lines = [] - block_lines = [] - state = "default" - for line in lines: - pct_line = line.startswith("% ") - if state == "default": - if not pct_line: - out_lines.append(line) - continue - state = "percent-lines" - if state == "percent-lines": - if line.startswith("%"): - block_lines.append(line[2:]) - else: # End of block - out_lines += process_percent_block(block_lines) - assert not line.strip() - state = "default" - block_lines = [] - return out_lines - - -def process_md(fname): - fpath = Path(fname) - out_lines = fpath.read_text().splitlines()[:] - for parser in [parse_lines, strip_content, process_percent]: - out_lines = parser(out_lines) - content = "\n".join(out_lines) - out_path = fpath - if fpath.suffix == ".md" and "```{python}" in content: - out_path = fpath.with_suffix(".Rmd") - fpath.unlink() - content = f"{RMD_HEADER}\n{content}" - out_path.write_text(content) - - -def get_parser(): - parser = ArgumentParser( - description=__doc__, # Usage from docstring - formatter_class=RawDescriptionHelpFormatter, - ) - parser.add_argument("in_md", nargs="+", help="Input Markdown files") - return parser - - -def main(): - parser = get_parser() - args = parser.parse_args() - for fname in args.in_md: - process_md(fname) - - -if __name__ == "__main__": - main() diff --git a/_scripts/process_notebooks.py b/_scripts/process_notebooks.py deleted file mode 100755 index 66549a53..00000000 --- a/_scripts/process_notebooks.py +++ /dev/null @@ -1,399 +0,0 @@ -#!/usr/bin/env python3 -"""Process notebooks - -* Replace local kernel with Pyodide kernel in metadata. -* Filter: - * Note and admonition markers. - * Exercise markers. - * Solution blocks. -* Write notebooks to output directory. -* Write JSON jupyterlite file. -""" - -from argparse import ArgumentParser, RawDescriptionHelpFormatter -from copy import deepcopy -from pathlib import Path -import re -from urllib.parse import quote as urlquote, urlparse - -import docutils.core as duc -import docutils.nodes as dun -from docutils.utils import Reporter -from sphinx.util.matching import get_matching_files -from myst_parser.docutils_ import Parser -import yaml - -_END_DIV_RE = re.compile(r"^\s*(:::+|```+|~~~+)\s*$") -import jupytext - -_JL_JSON_FMT = r"""\ -{{ - "jupyter-lite-schema-version": 0, - "jupyter-config-data": {{ - "contentsStorageName": "rss-{language}" - }} -}} -""" - -_DIV_RE = r"\s*(:::+|```+|~~~+)\s*" - - -_ADM_HEADER = re.compile( - rf""" - ^{_DIV_RE} - \{{\s*(?P\S+)\s*\}}\s* - (?P.*)\s*$ - """, - flags=re.VERBOSE, -) - - -_EX_SOL_MARKER = re.compile( - rf""" - (?P\n*) - {_DIV_RE} - \{{\s* - (?Pexercise|solution)- - (?Pstart|end) - \s*\}} - \s* - (?P\S+)?\s* - \n - (?P\s*:\S+: \s* \S+\s*\n)* - \n* - \s*(\2)\s* - \n - """, - flags=re.VERBOSE, -) - - -_SOL_MARKED = re.compile( - r""" - \n? - \n - .*? - \n? - """, - flags=re.VERBOSE | re.MULTILINE | re.DOTALL, -) - - -_END_DIV_RE = re.compile(rf"^{_DIV_RE}$") - - -# https://myst-parser.readthedocs.io/en/latest/syntax/optional.html#syntax-extensions -MYST_EXTENSIONS = [ - "amsmath", - "attrs_inline", - "colon_fence", - "deflist", - "dollarmath", - "fieldlist", - "html_admonition", - "html_image", - "linkify", - "replacements", - "smartquotes", - "strikethrough", - "substitution", - "tasklist", -] - - -DEF_JUPYTERLITE_CONFIG = { - "in_nb_ext": ".md", - "out_nb_ext": ".ipynb", - "in_nb_fmt": "myst", - "remove_remove": True, - "proc_admonitions": True, -} - - -def _replace_markers(m): - st_end = m["st_end"] - if m["ex_sol"] == "exercise": - return f"{m['newlines']}**{st_end.capitalize()} of exercise**\n\n" - return f"\n\n" - - -def get_admonition_lines(nb_text, nb_path): - parser = Parser() - doc = duc.publish_doctree( - source=nb_text, - source_path=str(nb_path), - settings_overrides={ - "myst_enable_extensions": MYST_EXTENSIONS, - "report_level": Reporter.SEVERE_LEVEL, - }, - parser=parser, - ) - lines = nb_text.splitlines() - n_lines = len(lines) - admonition_lines = [] - for admonition in doc.findall(dun.Admonition): - start_line = admonition.line - 1 - # Find first node of subsequent doctree. - node0 = next( - admonition.findall(include_self=False, descend=False, ascend=True), None - ) - # There can be a system_message as next node, in which case the correct - # line is in the 'line' attribute. - last_line = node0.get("line", node0.line) - 2 if node0 else n_lines - 1 - for end_line in range(last_line, start_line + 1, -1): - if _END_DIV_RE.match(lines[end_line]): - break - else: - raise ValueError("Could not find end div") - admonition_lines.append((start_line, end_line)) - return admonition_lines - - -_ADM_HEADER = re.compile( - r""" - ^\s*(:::+|```+|~~~+)\s* - \{\s*(?P\S+)\s*\}\s* - (?P.*)\s*$ - """, - flags=re.VERBOSE, -) - - -_DIR_OPTION = re.compile(r"^\s*:\w+:") - - -def process_admonitions(nb_text, nb_path): - lines = nb_text.splitlines() - out_lines = [] - start_i = last = 0 - for first, last in get_admonition_lines(nb_text, nb_path): - m = _ADM_HEADER.match(lines[first]) - if not m: - raise ValueError(f"Cannot get match from {lines[first]}") - out_lines += lines[start_i:first] - start_i = last + 1 - ad_type, ad_title = m["ad_type"], m["ad_title"] - suffix = f": {ad_title}" if ad_title else "" - in_i = first + 1 - while _DIR_OPTION.match(lines[in_i]): - in_i += 1 - adm_txt = "\n".join(lines[in_i:last]).strip("\n") - out_lines.append( - f"**Start of {ad_type}{suffix}**\n\n{adm_txt}\n\n**End of {ad_type}**" - ) - return "\n".join(out_lines + lines[start_i:]) - - -def process_cells(nb, processors): - """Process cells in notebooks. - - Parameters - ---------- - nb : dict - processors : sequence - Sequences of callables, taking a cell as input, and returning a cell as - output. If None returned, delete this cell. - - Returns - ------- - out_nb : dict - """ - out_nb = deepcopy(nb) - out_cells = [] - for cell in out_nb["cells"]: - for processor in processors: - cell = processor(cell) - if cell is None: - break - if cell: - out_cells.append(cell) - out_nb["cells"] = out_cells - return out_nb - - -_LABEL = re.compile(r"^\s*\(\s*\S+\s*\)\=\s*\n", flags=re.MULTILINE) - -_GLUE_DIR = re.compile( - r""" - (:::+|```+)\s* - \{\s*glue:*\s*\}\s+ - (\w+)\n - (?:\s*:doc: .*?)* - \n\s*\1\s*\n - """, - flags=re.MULTILINE | re.DOTALL | re.VERBOSE, -) - - -_GLUE_ROLE = re.compile( - r""" - \{\s*glue:{0,1}\s*\}\s*`(.*)?` - """, - flags=re.MULTILINE | re.DOTALL | re.VERBOSE, -) - - -def label_processor(cell): - if cell["cell_type"] == "markdown": - cell["source"] = _LABEL.sub("", cell["source"]) - return cell - - -def remove_processor(cell): - tags = cell.get("metadata", {}).get("tags", {}) - if "remove-cell" in tags: - return None - return cell - - -_GLUE_DIR = re.compile( - r""" - (:::+|```+)\s* - \{\s*glue:*\s*\}\s+ - (?P\w+)\n - (\s*:doc:\s*(?P.*?)$){0,1} - \n\s*\1\s*\n - """, - flags=re.MULTILINE | re.DOTALL | re.VERBOSE, -) - - -_GLUE_ROLE = re.compile( - r""" - \{\s*glue:{0,1}\s*\}\s*`(.*?)` - """, - flags=re.MULTILINE | re.DOTALL | re.VERBOSE, -) - - -def _glue_replacer(m): - d = m.groupdict() - ref, doc = d["ref"], d["doc"] - doc_msg = f' in "{doc}"' if doc else "" - return f"(Ref to `{ref}`{doc_msg})\n" - - -def glue_processor(cell): - if cell["cell_type"] != "markdown": - return cell - cell["source"] = _GLUE_DIR.sub(_glue_replacer, cell["source"]) - cell["source"] = _GLUE_ROLE.sub(r"(Ref to `\1`)", cell["source"]) - return cell - - -def load_process_nb(nb_path, fmt="myst", url=None, proc_admonitions=True): - """Load and process notebook - - Deal with: - - * Note and admonition markers. - * Exercise markers. - * Solution blocks. - - Parameters - ---------- - nb_path : file-like - Path to notebook - fmt : str, optional - Format of notebook (for Jupytext) - url : str, optional - URL for output page. - proc_admonitions : {True, False}, optional - If True, process admonition blocks to plain paragraphs. - - Returns - ------- - nb : dict - Notebook as loaded and parsed. - """ - link_txt = "corresponding page" - page_link = f"[{link_txt}]({url})" if url else link_txt - nb_path = Path(nb_path) - nb_text = nb_path.read_text() - nbt1 = _EX_SOL_MARKER.sub(_replace_markers, nb_text) - nbt2 = _SOL_MARKED.sub(f"\n**See the {page_link} for solution**\n\n", nbt1) - if proc_admonitions: - nbt2 = process_admonitions(nbt2, nb_path) - nb = jupytext.reads(nbt2, fmt={"format_name": fmt, "extension": nb_path.suffix}) - return process_cells(nb, [label_processor, glue_processor]) - - -def process_notebooks( - config, output_dir, kernel_name="python", kernel_dname="Python (Pyodide)" -): - # Get processing params from jupyterlite config section. - jl_config = config["jupyterlite"] - input_dir = Path(config["input_dir"]) - # Use sphinx utility to find not-excluded files. - for fn in get_matching_files( - input_dir, exclude_patterns=config["exclude_patterns"] - ): - rel_path = Path(fn) - if rel_path.suffix != jl_config["in_nb_ext"]: - continue - print(f"Processing {rel_path}") - nb_url = ( - config["base_path"] - + "/" - + urlquote(rel_path.with_suffix(".html").as_posix()) - ) - nb = load_process_nb( - input_dir / rel_path, - jl_config["in_nb_fmt"], - nb_url, - jl_config["proc_admonitions"], - ) - if jl_config["remove_remove"]: - nb = process_cells(nb, [remove_processor]) - nb["metadata"]["kernelspec"] = { - "name": kernel_name, - "display_name": kernel_dname, - } - out_path = (output_dir / rel_path).with_suffix(jl_config["out_nb_ext"]) - out_path.parent.mkdir(exist_ok=True, parents=True) - jupytext.write(nb, out_path) - - -def get_parser(): - parser = ArgumentParser( - description=__doc__, # Usage from docstring - formatter_class=RawDescriptionHelpFormatter, - ) - parser.add_argument( - "output_dir", help="Directory to which we will output notebooks" - ) - parser.add_argument( - "--config-dir", default=".", help="Directory containing `_config.yml` file" - ) - return parser - - -def load_config(config_path): - config_path = Path(config_path).resolve() - with (config_path / "_config.yml").open("rt") as fobj: - config = yaml.safe_load(fobj) - # Post-processing. - config["input_dir"] = Path( - config.get("repository", {}).get("path_to_book", config_path) - ) - config["base_path"] = urlparse(config.get("html", {}).get("baseurl", "")).path - config["exclude_patterns"] = config.get("exclude_patterns", []) - config["exclude_patterns"].append("_build") - config["jupyterlite"] = dict( - DEF_JUPYTERLITE_CONFIG, **config.get("jupyterlite", {}) - ) - return config - - -def main(): - parser = get_parser() - args = parser.parse_args() - config = load_config(Path(args.config_dir)) - out_path = Path(args.output_dir) - out_path.mkdir(parents=True, exist_ok=True) - process_notebooks(config, out_path) - (out_path / "jupyter-lite.json").write_text(_JL_JSON_FMT.format(language="python")) - - -if __name__ == "__main__": - main() diff --git a/_scripts/run_regex.py b/_scripts/run_regex.py deleted file mode 100755 index a6a59428..00000000 --- a/_scripts/run_regex.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -"""Run a regex over a file""" - -from argparse import ArgumentParser, RawDescriptionHelpFormatter -from pathlib import Path -import re - -IMAGE_NOT_EXAMPLE = re.compile( - r""" -^```{image} -\s+(?!auto_examples) -(?P\S+)$ -.*? -```""", - flags=re.DOTALL | re.MULTILINE | re.VERBOSE, -) - - -REPLACER = r"![](\1)" - - -def run_regexp(fname, regex, replacer): - pth = Path(fname) - in_contents = pth.read_text() - out_contents = regex.sub(replacer, in_contents) - pth.write_text(out_contents) - - -def get_parser(): - parser = ArgumentParser( - description=__doc__, # Usage from docstring - formatter_class=RawDescriptionHelpFormatter, - ) - parser.add_argument("fname", nargs="+", help="Files on which to run regexp") - return parser - - -def main(): - parser = get_parser() - args = parser.parse_args() - for fname in args.fname: - run_regexp(fname, IMAGE_NOT_EXAMPLE, REPLACER) - - -if __name__ == "__main__": - main() diff --git a/_scripts/tests/eg.Rmd b/_scripts/tests/eg.Rmd deleted file mode 100644 index 68f59f5b..00000000 --- a/_scripts/tests/eg.Rmd +++ /dev/null @@ -1,188 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,Rmd - notebook_metadata_filter: all,-language_info - split_at_heading: true - text_representation: - extension: .Rmd - format_name: rmarkdown - format_version: '1.2' - jupytext_version: 1.17.1 - kernelspec: - display_name: Python 3 (ipykernel) - language: python - name: python3 ---- - -# Pandas from Numpy - -## What is Pandas? - -Pandas is an open-source python library for data manipulation and analysis. - - -``` {note} - -**Why is Pandas called Pandas?** - -The “Pandas” name is short for “panel data”. The library was named after the -type of econometrics panel data that it was designed to analyse. [Panel -data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where -the same observational units (e.g. countries) are observed over multiple -instances across time. - -``` - - -The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). - -The standard way to make a new Data Frame is to ask Pandas to read a data file -(like a `.csv` file) into a Data Frame. Before we do that however, we will -build our own Data Frame from scratch, beginning with the fundamental building -block for Data Frames: Numpy arrays. - -```{python} -# import the libraries needed for this page -import numpy as np -import pandas as pd -``` - -## Numpy arrays - -Let's say we have some data that applies to a set of countries, and we have some countries in mind: - -```{python} -country_names_array = np.array(['Australia', 'Brazil', 'Canada', - 'China', 'Germany', 'Spain', - 'France', 'United Kingdom', 'India', - 'Italy', 'Japan', 'South Korea', - 'Mexico', 'Russia', 'United States']) -country_names_array -``` - -For compactness, we'll also want to use the corresponding [standard -three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each -country, like so: - -Both Data Frames contain the same data, and the same labels. In fact, we can -use the `.equals` method of Data Frames to ask Pandas whether it agrees the -Data Frames are equivalent: - -```{python} -df.equals(loaded_labeled_df) -``` - -They are equivalent. - - -```{exercise-start} -:label: index-in-display -:class: dropdown -``` - - -In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. -If you look very carefully at the notebook output for the two data frames, you -may be able to spot the difference. Pandas `.equals` does not care about this -difference, but let's imagine we did. Try to work out how to change the `df` -Data Frame to give *exactly* the same display as we see for -`loaded_labeled_df`. - - -```{exercise-end} -``` - - - -```{solution-start} index-in-display -:class: dropdown -``` - - -You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: - -```{python} -loaded_labeled_df.index -``` - -compared to: - -```{python} -df.index -``` - -We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. - -The simplest way to do that is: - -```{python} -# Make a copy of the `df` Data Frame. This step is unnecessary to solving -# the problem, it is just to be neat. -df_copy = df.copy() -``` - -```{python} -# Set the Index name. -df_copy.index.name = 'Code' -df_copy -``` - - -```{solution-end} -``` - - - -``` {admonition} My title - -Some interesting information. - -``` - - -Some more text. - - -``` {exercise-start} -:label: differing-indices -:class: dropdown -``` - - -```{python} -# df5 -``` - -After these examples, what is your final working theory about the algorithm -Pandas uses to match the Indices of Series, when creating Data Frames? - - -``` {exercise-end} -``` - - - -``` {solution-start} differing-indices -:class: dropdown -``` - - -Here's our hypothesis of the algorithm: - -* First check if the Series Indices are the same. If so, use the Index of any - Series. -* If they are not the same, first sort all Series by their Index values, and - use the resulting sorted Index. - -What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? - - -``` {solution-end} -``` - - -(plot-frames)= -## Convenient Plotting with Data Frames - -Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/eg.md b/_scripts/tests/eg.md deleted file mode 100644 index 43234c66..00000000 --- a/_scripts/tests/eg.md +++ /dev/null @@ -1,203 +0,0 @@ ---- -jupytext: - notebook_metadata_filter: all,-language_info - split_at_heading: true - text_representation: - extension: .md - format_name: myst - format_version: 0.13 - jupytext_version: 1.18.0-dev -kernelspec: - display_name: Python 3 (ipykernel) - language: python - name: python3 ---- - -# Pandas from Numpy - -+++ - -## What is Pandas? - -Pandas is an open-source python library for data manipulation and analysis. - -+++ - -```{note} - -**Why is Pandas called Pandas?** - -The “Pandas” name is short for “panel data”. The library was named after the -type of econometrics panel data that it was designed to analyse. [Panel -data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where -the same observational units (e.g. countries) are observed over multiple -instances across time. - -``` - -+++ - -The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). - -The standard way to make a new Data Frame is to ask Pandas to read a data file -(like a `.csv` file) into a Data Frame. Before we do that however, we will -build our own Data Frame from scratch, beginning with the fundamental building -block for Data Frames: Numpy arrays. - -```{code-cell} -# import the libraries needed for this page -import numpy as np -import pandas as pd -``` - -## Numpy arrays - -Let's say we have some data that applies to a set of countries, and we have some countries in mind: - -```{code-cell} -country_names_array = np.array(['Australia', 'Brazil', 'Canada', - 'China', 'Germany', 'Spain', - 'France', 'United Kingdom', 'India', - 'Italy', 'Japan', 'South Korea', - 'Mexico', 'Russia', 'United States']) -country_names_array -``` - -For compactness, we'll also want to use the corresponding [standard -three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each -country, like so: - -Both Data Frames contain the same data, and the same labels. In fact, we can -use the `.equals` method of Data Frames to ask Pandas whether it agrees the -Data Frames are equivalent: - -```{code-cell} -df.equals(loaded_labeled_df) -``` - -They are equivalent. - -+++ - -```{exercise-start} -:label: index-in-display -:class: dropdown -``` - -+++ - -In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. -If you look very carefully at the notebook output for the two data frames, you -may be able to spot the difference. Pandas `.equals` does not care about this -difference, but let's imagine we did. Try to work out how to change the `df` -Data Frame to give _exactly_ the same display as we see for -`loaded_labeled_df`. - -+++ - -```{exercise-end} - -``` - -+++ - -```{solution-start} index-in-display -:class: dropdown -``` - -+++ - -You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: - -```{code-cell} -loaded_labeled_df.index -``` - -compared to: - -```{code-cell} -df.index -``` - -We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. - -The simplest way to do that is: - -```{code-cell} -# Make a copy of the `df` Data Frame. This step is unnecessary to solving -# the problem, it is just to be neat. -df_copy = df.copy() -``` - -```{code-cell} -# Set the Index name. -df_copy.index.name = 'Code' -df_copy -``` - -```{solution-end} - -``` - -+++ - -```{admonition} My title - -Some interesting information. - -``` - -+++ - -Some more text. - -+++ - -```{exercise-start} -:label: differing-indices -:class: dropdown -``` - -```{code-cell} -# df5 -``` - -After these examples, what is your final working theory about the algorithm -Pandas uses to match the Indices of Series, when creating Data Frames? - -+++ - -```{exercise-end} - -``` - -+++ - -```{solution-start} differing-indices -:class: dropdown -``` - -+++ - -Here's our hypothesis of the algorithm: - -- First check if the Series Indices are the same. If so, use the Index of any - Series. -- If they are not the same, first sort all Series by their Index values, and - use the resulting sorted Index. - -What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? - -+++ - -```{solution-end} - -``` - -+++ - -(plot-frames)= - -## Convenient Plotting with Data Frames - -Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/eg2.Rmd b/_scripts/tests/eg2.Rmd deleted file mode 100644 index c2896b3d..00000000 --- a/_scripts/tests/eg2.Rmd +++ /dev/null @@ -1,169 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,Rmd - notebook_metadata_filter: all,-language_info - split_at_heading: true - text_representation: - extension: .Rmd - format_name: rmarkdown - format_version: '1.2' - jupytext_version: 1.17.1 - kernelspec: - display_name: Python 3 (ipykernel) - language: python - name: python3 - orphan: true ---- - -# Pandas from Numpy - -## What is Pandas? - -Pandas is an open-source python library for data manipulation and analysis. - -::: {note} - -**Why is Pandas called Pandas?** - -The “Pandas” name is short for “panel data”. The library was named after the -type of econometrics panel data that it was designed to analyse. [Panel -data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where -the same observational units (e.g. countries) are observed over multiple -instances across time. - -::: - -The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). - -The standard way to make a new Data Frame is to ask Pandas to read a data file -(like a `.csv` file) into a Data Frame. Before we do that however, we will -build our own Data Frame from scratch, beginning with the fundamental building -block for Data Frames: Numpy arrays. - -```{python} -# import the libraries needed for this page -import numpy as np -import pandas as pd -``` - -## Numpy arrays - -Let's say we have some data that applies to a set of countries, and we have some countries in mind: - -```{python} -country_names_array = np.array(['Australia', 'Brazil', 'Canada', - 'China', 'Germany', 'Spain', - 'France', 'United Kingdom', 'India', - 'Italy', 'Japan', 'South Korea', - 'Mexico', 'Russia', 'United States']) -country_names_array -``` - -For compactness, we'll also want to use the corresponding [standard -three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each -country, like so: - -Both Data Frames contain the same data, and the same labels. In fact, we can -use the `.equals` method of Data Frames to ask Pandas whether it agrees the -Data Frames are equivalent: - -```{python} -A = 2 -B = 3 -C = A + B -C -``` - -They are equivalent. - -::: {exercise-start} -:label: a-first-exercise -:class: dropdown -::: - -In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. -If you look very carefully at the notebook output for the two data frames, you -may be able to spot the difference. Pandas `.equals` does not care about this -difference, but let's imagine we did. Try to work out how to change the `df` -Data Frame to give *exactly* the same display as we see for -`loaded_labeled_df`. - -::: {exercise-end} -::: - -::: {solution-start} a-first-exercise -:class: dropdown -::: - -You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: - -```{python} -B -``` - -compared to: - -```{python} -C -``` - -We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. - -The simplest way to do that is: - -```{python} -D = C * 4 -``` - -```{python} -E = D + 10 -``` - -::: {solution-end} -::: - -::: {admonition} My title - -Some interesting information. - -::: - -Some more text. - -::: {exercise-start} -:label: differing-indices -:class: dropdown -::: - - -```{python} -# df5 -``` - -After these examples, what is your final working theory about the algorithm -Pandas uses to match the Indices of Series, when creating Data Frames? - -::: {exercise-end} -::: - -::: {solution-start} differing-indices -:class: dropdown -::: - -Here's our hypothesis of the algorithm: - -* First check if the Series Indices are the same. If so, use the Index of any - Series. -* If they are not the same, first sort all Series by their Index values, and - use the resulting sorted Index. - -What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? - -::: {solution-end} -::: - -(plot-frames)= -## Convenient Plotting with Data Frames - -Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/eg2.md b/_scripts/tests/eg2.md deleted file mode 100644 index 4d0136bc..00000000 --- a/_scripts/tests/eg2.md +++ /dev/null @@ -1,169 +0,0 @@ ---- -jupytext: - notebook_metadata_filter: all,-language_info - split_at_heading: true - text_representation: - extension: .md - format_name: myst - format_version: 0.13 - jupytext_version: 1.18.0-dev -kernelspec: - display_name: Python 3 (ipykernel) - language: python - name: python3 -orphan: true ---- - -# Pandas from Numpy - -+++ - -## What is Pandas? - -Pandas is an open-source python library for data manipulation and analysis. - -::: {note} - -**Why is Pandas called Pandas?** - -The “Pandas” name is short for “panel data”. The library was named after the -type of econometrics panel data that it was designed to analyse. [Panel -data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where -the same observational units (e.g. countries) are observed over multiple -instances across time. - -::: - -The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). - -The standard way to make a new Data Frame is to ask Pandas to read a data file -(like a `.csv` file) into a Data Frame. Before we do that however, we will -build our own Data Frame from scratch, beginning with the fundamental building -block for Data Frames: Numpy arrays. - -```{code-cell} -# import the libraries needed for this page -import numpy as np -import pandas as pd -``` - -## Numpy arrays - -Let's say we have some data that applies to a set of countries, and we have some countries in mind: - -```{code-cell} -country_names_array = np.array(['Australia', 'Brazil', 'Canada', - 'China', 'Germany', 'Spain', - 'France', 'United Kingdom', 'India', - 'Italy', 'Japan', 'South Korea', - 'Mexico', 'Russia', 'United States']) -country_names_array -``` - -For compactness, we'll also want to use the corresponding [standard -three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each -country, like so: - -Both Data Frames contain the same data, and the same labels. In fact, we can -use the `.equals` method of Data Frames to ask Pandas whether it agrees the -Data Frames are equivalent: - -```{code-cell} -A = 2 -B = 3 -C = A + B -C -``` - -They are equivalent. - -::: {exercise-start} -:label: a-first-exercise -:class: dropdown -::: - -In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. -If you look very carefully at the notebook output for the two data frames, you -may be able to spot the difference. Pandas `.equals` does not care about this -difference, but let's imagine we did. Try to work out how to change the `df` -Data Frame to give _exactly_ the same display as we see for -`loaded_labeled_df`. - -::: {exercise-end} -::: - -::: {solution-start} a-first-exercise -:class: dropdown -::: - -You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: - -```{code-cell} -B -``` - -compared to: - -```{code-cell} -C -``` - -We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. - -The simplest way to do that is: - -```{code-cell} -D = C * 4 -``` - -```{code-cell} -E = D + 10 -``` - -::: {solution-end} -::: - -::: {admonition} My title - -Some interesting information. - -::: - -Some more text. - -::: {exercise-start} -:label: differing-indices -:class: dropdown -::: - -```{code-cell} -# df5 -``` - -After these examples, what is your final working theory about the algorithm -Pandas uses to match the Indices of Series, when creating Data Frames? - -::: {exercise-end} -::: - -::: {solution-start} differing-indices -:class: dropdown -::: - -Here's our hypothesis of the algorithm: - -- First check if the Series Indices are the same. If so, use the Index of any - Series. -- If they are not the same, first sort all Series by their Index values, and - use the resulting sorted Index. - -What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? - -::: {solution-end} -::: - -(plot-frames)= - -## Convenient Plotting with Data Frames - -Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/test_process.py b/_scripts/tests/test_process.py deleted file mode 100644 index 92b729fb..00000000 --- a/_scripts/tests/test_process.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Test notebook parsing""" - -from copy import deepcopy -import re -import sys -from pathlib import Path - -import jupytext - -import pytest - -HERE = Path(__file__).parent -THERE = HERE.parent -EG1_NB_PATH = HERE / "eg.Rmd" -EG2_NB_PATH = HERE / "eg2.Rmd" - -sys.path.append(str(THERE)) - -import process_notebooks as pn - - -def nb2rmd(nb, fmt="myst", ext=".Rmd"): - return jupytext.writes(nb, fmt) - - -@pytest.mark.parametrize("nb_path", (EG1_NB_PATH, EG2_NB_PATH)) -def test_process_nbs(nb_path): - url = f"foo/{nb_path.stem}.html" - out_nb = pn.load_process_nb(nb_path, fmt="myst", url=url) - out_txt = nb2rmd(out_nb) - out_lines = out_txt.splitlines() - assert out_lines.count("**Start of exercise**") == 2 - assert out_lines.count("**End of exercise**") == 2 - assert out_lines.count(f"**See the [corresponding page]({url}) for solution**") == 2 - # A bit of solution text, should not be there after processing. - assert "You probably spotted that" not in out_txt - assert "Here's our hypothesis of the algorithm:" not in out_txt - # Admonitions - assert out_lines.count("**Start of note**") == 1 - assert out_lines.count("**End of note**") == 1 - assert out_lines.count("**Start of admonition: My title**") == 1 - assert out_lines.count("**End of admonition**") == 1 - # Labels - assert "plot-frames" not in out_txt - - -@pytest.mark.parametrize("nb_path", (EG1_NB_PATH, EG2_NB_PATH)) -def test_admonition_finding(nb_path): - nb_text = nb_path.read_text() - nb_lines = nb_text.splitlines() - ad_lines = pn.get_admonition_lines(nb_text, nb_path) - for first, last in ad_lines: - assert pn._ADM_HEADER.match(nb_lines[first]) - assert pn._END_DIV_RE.match(nb_lines[last]) - - -def test_cell_processors(): - nb = jupytext.read(EG1_NB_PATH) - # Code cell at index 6, Markdown at index 7. - nb_cp = deepcopy(nb) - - def null_processor(cell): - return cell - - out = pn.process_cells(nb_cp, [null_processor]) - assert out["cells"] is not nb_cp["cells"] - assert out["cells"] == nb_cp["cells"] - - # Label processor. - # There is a label in the example notebook. - labeled_indices = [i for i, c in enumerate(nb["cells"]) if ")=\n" in c["source"]] - assert len(labeled_indices) == 1 - out = pn.process_cells(nb_cp, [pn.label_processor]) - other_in_cell = nb_cp["cells"].pop(labeled_indices[0]) - other_out_cell = out["cells"].pop(labeled_indices[0]) - # With these cells removed, the other cells compare equal. - assert out["cells"] == nb_cp["cells"] - # Label removed. - assert pn._LABEL.match(other_in_cell["source"]) - assert not pn._LABEL.match(other_out_cell["source"]) - - # remove-cell processor. - nb_cp = deepcopy(nb) - # No tagged cells in original notebook. - out = pn.process_cells(nb_cp, [pn.remove_processor]) - assert out["cells"] == nb_cp["cells"] - # An example code and Markdown cel. - eg_cells = [6, 7] - for eg_i in eg_cells: - nb_cp["cells"][eg_i]["metadata"]["tags"] = ["remove-cell"] - out = pn.process_cells(nb_cp, [pn.remove_processor]) - assert out["cells"] != nb_cp["cells"] - assert len(out["cells"]) == len(nb_cp["cells"]) - len(eg_cells) - # The two cells have been dropped. - assert out["cells"][eg_cells[0]] == nb_cp["cells"][eg_cells[-1] + 1] - - -def test_admonition_processing(): - src = """ -## Signal processing: {mod}`scipy.signal` - -::: {note} -:class: dropdown - -{mod}`scipy.signal` is for typical signal processing: 1D, -regularly-sampled signals. -::: - -**Resampling** {func}`scipy.signal.resample`: resample a signal to `n` -points using FFT. - -::: {admonition} Another thought - -Some text. - - -::: - -More text. -""" - out = pn.process_admonitions(src, EG1_NB_PATH) - exp = """ -## Signal processing: {mod}`scipy.signal` - -**Start of note** - -{mod}`scipy.signal` is for typical signal processing: 1D, -regularly-sampled signals. - -**End of note** - -**Resampling** {func}`scipy.signal.resample`: resample a signal to `n` -points using FFT. - -**Start of admonition: Another thought** - -Some text. - -**End of admonition** - -More text.""" - assert exp == out diff --git a/jl-build-requirements.txt b/jl-build-requirements.txt index 78ba71c9..88583a50 100644 --- a/jl-build-requirements.txt +++ b/jl-build-requirements.txt @@ -3,3 +3,4 @@ jupyterlite-core jupyterlite-pyodide-kernel jupyterlab_server +jljb diff --git a/pyproject.toml b/pyproject.toml index 353fe8ee..d1c086fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,6 @@ exclude = ''' | .*/setup.*\.py$ | .*/demo.py$ | .*/auto_examples/ - | _scripts/examples2nb.py$ - | _scripts/post_parser.py$ | advanced/mathematical_optimization/examples/plot_gradient_descent\.py$ | advanced/mathematical_optimization/examples/helper/compare_optimizers\.py$ | advanced/advanced_numpy/examples/view-colors\.py$