From 3e6e20f9c440847f428dc28d49e1cbe1ade779cc Mon Sep 17 00:00:00 2001 From: Iahn Cajigas Date: Fri, 6 Mar 2026 07:42:00 -0500 Subject: [PATCH] Make Python README standalone and auto-download example data --- README.md | 332 +++++++----------------- docs/data_installation.rst | 10 +- nstat/data_manager.py | 40 +-- nstat/datasets.py | 8 +- nstat/install.py | 35 ++- pyproject.toml | 1 + tests/test_datasets.py | 47 +++- tests/test_install_and_compat.py | 6 + tests/test_readme_examples_catalog.py | 83 ++---- tests/test_readme_nstatpaperexamples.py | 12 +- 10 files changed, 211 insertions(+), 363 deletions(-) diff --git a/README.md b/README.md index 41aec2a2..f03a0b0b 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,45 @@ -# nSTAT-python +nSTAT-python +============ -`nSTAT-python` is a Python toolbox for neural spike-train analysis, modeling, and decoding. +Neural Spike Train Analysis Toolbox for Python [![test-and-build](https://github.com/cajigaslab/nSTAT-python/actions/workflows/ci.yml/badge.svg)](https://github.com/cajigaslab/nSTAT-python/actions/workflows/ci.yml) [![pages](https://github.com/cajigaslab/nSTAT-python/actions/workflows/pages.yml/badge.svg)](https://github.com/cajigaslab/nSTAT-python/actions/workflows/pages.yml) -## Installation +nSTAT-python is an open-source, object-oriented Python toolbox that implements a range of models and algorithms for neural spike-train analysis, modeling, and decoding. The toolbox is designed for quick, consistent neural data analysis in native Python while keeping the paper-example dataset outside the Git repository. -```bash -python -m pip install nstat -``` +Like the MATLAB toolbox paper, the Python port centers point-process generalized linear models for spike trains, while also supporting Gaussian-signal workflows, simulation, fitting diagnostics, and decoding. Although created with neural signal processing in mind, nSTAT-python can be used more generally for discrete and continuous time-series analysis. + +Like all open-source projects, nSTAT-python benefits from issues, suggestions, and code contributions. The current source repository is: + +- https://github.com/cajigaslab/nSTAT-python -From source: +Lab websites: + +- Neuroscience Statistics Research Laboratory: https://www.neurostat.mit.edu +- RESToRe Lab: https://www.med.upenn.edu/cajigaslab/ + +How to install nSTAT-python +--------------------------- + +1. Clone this repository and create or activate a Python 3.10+ environment. +2. Install the package from source: ```bash -git clone git@github.com:cajigaslab/nSTAT-python.git +git clone https://github.com/cajigaslab/nSTAT-python.git cd nSTAT-python python -m pip install -e .[dev] ``` -## Example data +3. Optional post-install helper: -`nSTAT-python` does not commit raw example data to the repository. +```bash +nstat-install +``` -Install the example dataset with: +When a paper example or dataset helper needs the canonical example dataset, nSTAT-python downloads the figshare dataset automatically into a local cache. The raw dataset is not stored in this Git repository. + +To prefetch the dataset ahead of time: ```bash nstat-install --download-example-data always @@ -38,270 +54,100 @@ data_dir = ensure_example_data(download=True) print(data_dir) ``` -## How to install nSTAT (post-install setup) - -Run the setup helper: +Quickstart (Python 3.10+) +------------------------- ```bash -nstat-install -``` - -Equivalent Python API: - -```python -from nstat.install import nstat_install - -report = nstat_install() +git clone https://github.com/cajigaslab/nSTAT-python.git +cd nSTAT-python +python -m pip install -e .[dev] +python examples/nSTATPaperExamples.py --repo-root . ``` -## Examples +The first paper-example or dataset call downloads the figshare dataset automatically. Repository checkouts cache it under `data_cache/nstat_data/` by default. Set `NSTAT_DATA_DIR` to use another cache location. -> These examples generate figures with `matplotlib` and save PNGs under `examples/readme_examples/images/`. -> The images below show the expected output. +Paper Examples (Self-Contained) +------------------------------- -Examples below require `matplotlib`: +Canonical source files: -```bash -python -m pip install matplotlib -``` +- `examples/nSTATPaperExamples.py` (full command-line runner) +- `nstat/paper_examples_full.py` (paper-aligned experiment implementations) +- `examples/nstat_paper_examples.py` and `nstat/paper_examples.py` (lighter-weight summary runner) +- `notebooks/nSTATPaperExamples.ipynb` (notebook narrative) -### Example 1 — Single sinusoid: signal + multitaper spectrum + spectrogram -Run: +Single command to run the full paper-aligned example suite: ```bash -python examples/readme_examples/example1_multitaper_and_spectrogram.py +python examples/nSTATPaperExamples.py --repo-root . ``` -```python -import matplotlib -matplotlib.use("Agg") - -from pathlib import Path +This command downloads the figshare dataset automatically when needed and prints JSON summaries for the experiment blocks. The Python package does not require a MATLAB checkout. -import matplotlib.pyplot as plt -import numpy as np -from scipy.signal import spectrogram - -from nstat.compat.matlab import SignalObj - -fs_hz = 1000.0 -dt = 1.0 / fs_hz -duration_s = 2.0 -f0_hz = 10.0 -time = np.arange(0.0, duration_s, dt, dtype=float) - -signal = np.sin(2.0 * np.pi * f0_hz * time) -sig_obj = SignalObj(time=time, data=signal, name="sine_signal", units="a.u.") -freq_hz, psd = sig_obj.MTMspectrum() -f_spec, t_spec, sxx = spectrogram(signal, fs=fs_hz, nperseg=256, noverlap=224, scaling="density", mode="psd") - -fig, axes = plt.subplots(3, 1, figsize=(7.5, 7.5)) -preview_mask = time <= 1.0 -axes[0].plot(time[preview_mask], signal[preview_mask], color="tab:blue", linewidth=1.4) -axes[0].set_title("Signal (10 Hz sinusoid)") -axes[0].set_xlabel("time (s)") -axes[0].set_ylabel("amplitude") -axes[1].plot(freq_hz, psd, color="tab:orange", linewidth=1.2) -axes[1].set_xlim(0.0, 100.0) -axes[1].set_title("Multi-taper spectrum") -axes[1].set_xlabel("frequency (Hz)") -axes[1].set_ylabel("PSD") -im = axes[2].pcolormesh(t_spec, f_spec, sxx, shading="auto", cmap="magma") -axes[2].set_ylim(0.0, 100.0) -axes[2].set_title("Spectrogram") -axes[2].set_xlabel("time (s)") -axes[2].set_ylabel("frequency (Hz)") -fig.colorbar(im, ax=axes[2], pad=0.01, label="PSD") -fig.tight_layout() - -out_dir = Path("examples/readme_examples/images") -out_dir.mkdir(parents=True, exist_ok=True) -fig.savefig(out_dir / "readme_example1_multitaper_and_spectrogram.png", dpi=180) -``` +| Example | What question it answers | Python entrypoint | +|---|---|---| +| Example 01 | Do mEPSCs follow constant vs piecewise Poisson firing under Mg2+ washout? | `nstat.paper_examples_full.run_experiment1` | +| Example 02 | How do explicit whisker stimulus and spike history improve thalamic GLM fits? | `nstat.paper_examples_full.run_experiment2` | +| Example 03 | How do PSTH and SSGLM capture within-trial and across-trial dynamics? | `nstat.paper_examples_full.run_experiment3` and `run_experiment3b` | +| Example 04 | Which receptive-field basis (Gaussian vs Zernike-like) better fits place cells? | `nstat.paper_examples_full.run_experiment4` | +| Example 05 | How well do point-process-inspired decoders recover latent stimulus and state? | `nstat.paper_examples_full.run_experiment5`, `run_experiment5b`, and `run_experiment6` | -**Expected output** -![Multitaper and spectrogram](examples/readme_examples/images/readme_example1_multitaper_and_spectrogram.png) - -### Example 2 — Time-varying CIF over 10 seconds (single-frequency sinusoid) -Run: - -```bash -python examples/readme_examples/example2_simulate_cif_spiketrain_10s.py -``` +For a lighter-weight paper overview with plot payloads: ```python -import matplotlib -matplotlib.use("Agg") - from pathlib import Path -import matplotlib.pyplot as plt -import numpy as np - -from nstat.compat.matlab import CIF, Covariate - -np.random.seed(0) -dt = 0.001 -duration_s = 10.0 -t = np.arange(0.0, duration_s + dt, dt, dtype=float) - -f_hz = 0.5 -baseline_hz = 15.0 -amp_hz = 10.0 -lam = np.clip(baseline_hz + amp_hz * np.sin(2.0 * np.pi * f_hz * t), 0.2, None) - -lambda_cov = Covariate(time=t, data=lam, name="Lambda", units="spikes/s", labels=["lambda"]) -spikes = CIF.simulateCIFByThinningFromLambda(lambda_cov, 1, dt) -spike_times = spikes.getNST(0).spike_times - -fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8.0, 4.8), sharex=True, gridspec_kw={"height_ratios": [2.0, 1.0]}) -ax1.plot(t, lam, color="tab:blue", linewidth=1.3) -ax1.set_ylabel("rate (spikes/s)") -ax1.set_title("Time-varying CIF over 10 s") -ax2.vlines(spike_times, 0.0, 1.0, color="black", linewidth=0.8) -ax2.set_ylim(0.0, 1.0) -ax2.set_yticks([]) -ax2.set_xlabel("time (s)") -ax2.set_title("Simulated spike train") -fig.tight_layout() - -out_dir = Path("examples/readme_examples/images") -out_dir.mkdir(parents=True, exist_ok=True) -fig.savefig(out_dir / "readme_example2_simulate_cif_spiketrain_10s.png", dpi=180) -``` - -**Expected output** -![CIF spike train simulation](examples/readme_examples/images/readme_example2_simulate_cif_spiketrain_10s.png) - -### Example 3 — Spike train collection raster from Example 2 -Run: +from nstat.paper_examples import run_paper_examples -```bash -python examples/readme_examples/example3_nstcoll_raster_from_example2.py +results = run_paper_examples(Path.cwd()) +print(results["experiment2"]) +print(results["experiment3"]) +print(results["experiment4"]) +print(results["experiment5"]) ``` -```python -import matplotlib -matplotlib.use("Agg") - -from pathlib import Path - -import matplotlib.pyplot as plt -import numpy as np - -from nstat.compat.matlab import CIF, Covariate - -np.random.seed(0) -dt = 0.001 -duration_s = 10.0 -n_units = 20 -t = np.arange(0.0, duration_s + dt, dt, dtype=float) - -f_hz = 0.5 -baseline_hz = 15.0 -amp_hz = 10.0 -lam = np.clip(baseline_hz + amp_hz * np.sin(2.0 * np.pi * f_hz * t), 0.2, None) - -lambda_cov = Covariate(time=t, data=lam, name="Lambda", units="spikes/s", labels=["lambda"]) -coll = CIF.simulateCIFByThinningFromLambda(lambda_cov, n_units, dt) - -fig, ax = plt.subplots(figsize=(8.0, 4.8)) -plt.sca(ax) -coll.plot() -ax.set_xlabel("time (s)") -ax.set_ylabel("unit index") -ax.set_title("Spike-train collection raster (nstColl.plot)") -ax.set_ylim(0.5, n_units + 0.5) -fig.tight_layout() - -out_dir = Path("examples/readme_examples/images") -out_dir.mkdir(parents=True, exist_ok=True) -fig.savefig(out_dir / "readme_example3_nstcoll_raster.png", dpi=180) -``` +Documentation +------------- -**Expected output** -![Spike train raster](examples/readme_examples/images/readme_example3_nstcoll_raster.png) +Minimal package docs live under [`docs/`](docs/). -### nSTATPaperExamples +- API reference: [`docs/api.rst`](docs/api.rst) +- Data installation: [`docs/data_installation.rst`](docs/data_installation.rst) -Run: +For mathematical and programmatic details of the toolbox, see: -```bash -python examples/readme_examples/example4_nstatpaperexamples_overview.py -``` +Cajigas I, Malik WQ, Brown EN. nSTAT: Open-source neural spike train analysis toolbox for Matlab. Journal of Neuroscience Methods 211: 245-264, Nov. 2012 +http://doi.org/10.1016/j.jneumeth.2012.08.009 +PMID: 22981419 -```python -import matplotlib -matplotlib.use("Agg") +Paper-aligned toolbox map +------------------------- -from pathlib import Path +To keep terminology and workflows aligned with the 2012 toolbox paper, the Python package groups core functionality along the same analysis paths: -from nstat.paper_examples import run_paper_examples +- Class hierarchy and object model (`SignalObj`, `Covariate`, `Trial`, `Analysis`, `FitResult`, `DecodingAlgorithms`) +- Fitting and assessment workflow (GLM fitting, diagnostics, summaries) +- Simulation workflow (conditional intensity and thinning examples) +- Decoding workflow (stimulus and state reconstruction) +- Example-to-paper section mapping via `nstat.paper_examples_full` -repo_root = Path(".").resolve() -results, payloads = run_paper_examples(repo_root, return_plot_data=True) -print(results["experiment2"]) -print(results["experiment3"]) -print(results["experiment4"]) -print(results["experiment5"]) -``` +If you use nSTAT-python in your work, please cite the paper above. +nSTAT is protected by the GPL v2 Open Source License. -**Expected output** -![nSTATPaperExamples overview](examples/readme_examples/images/readme_example4_nstatpaperexamples_overview.png) - -Complete catalog of nSTATPaperExamples notebooks: - -- [AnalysisExamples](notebooks/AnalysisExamples.ipynb) — Notebook example for AnalysisExamples. -- [ConfigCollExamples](notebooks/ConfigCollExamples.ipynb) — Notebook example for ConfigCollExamples. -- [CovCollExamples](notebooks/CovCollExamples.ipynb) — Notebook example for CovCollExamples. -- [CovariateExamples](notebooks/CovariateExamples.ipynb) — Notebook example for CovariateExamples. -- [DecodingExample](notebooks/DecodingExample.ipynb) — Notebook example for DecodingExample. -- [DecodingExampleWithHist](notebooks/DecodingExampleWithHist.ipynb) — Notebook example for DecodingExampleWithHist. -- [EventsExamples](notebooks/EventsExamples.ipynb) — Notebook example for EventsExamples. -- [ExplicitStimulusWhiskerData](notebooks/ExplicitStimulusWhiskerData.ipynb) — Notebook example for ExplicitStimulusWhiskerData. -- [FitResSummaryExamples](notebooks/FitResSummaryExamples.ipynb) — Notebook example for FitResSummaryExamples. -- [FitResultExamples](notebooks/FitResultExamples.ipynb) — Notebook example for FitResultExamples. -- [HippocampalPlaceCellExample](notebooks/HippocampalPlaceCellExample.ipynb) — Notebook example for HippocampalPlaceCellExample. -- [HistoryExamples](notebooks/HistoryExamples.ipynb) — Notebook example for HistoryExamples. -- [NetworkTutorial](notebooks/NetworkTutorial.ipynb) — Notebook example for NetworkTutorial. -- [PPSimExample](notebooks/PPSimExample.ipynb) — Notebook example for PPSimExample. -- [PPThinning](notebooks/PPThinning.ipynb) — Notebook example for PPThinning. -- [PSTHEstimation](notebooks/PSTHEstimation.ipynb) — Notebook example for PSTHEstimation. -- [SignalObjExamples](notebooks/SignalObjExamples.ipynb) — Notebook example for SignalObjExamples. -- [StimulusDecode2D](notebooks/StimulusDecode2D.ipynb) — Notebook example for StimulusDecode2D. -- [TrialConfigExamples](notebooks/TrialConfigExamples.ipynb) — Notebook example for TrialConfigExamples. -- [TrialExamples](notebooks/TrialExamples.ipynb) — Notebook example for TrialExamples. -- [ValidationDataSet](notebooks/ValidationDataSet.ipynb) — Notebook example for ValidationDataSet. -- [mEPSCAnalysis](notebooks/mEPSCAnalysis.ipynb) — Notebook example for mEPSCAnalysis. -- [nSTATPaperExamples](notebooks/nSTATPaperExamples.ipynb) — Notebook example for nSTATPaperExamples. -- [nSpikeTrainExamples](notebooks/nSpikeTrainExamples.ipynb) — Notebook example for nSpikeTrainExamples. -- [nstCollExamples](notebooks/nstCollExamples.ipynb) — Notebook example for nstCollExamples. -- [AnalysisExamples2](notebooks/AnalysisExamples2.ipynb) — Notebook example for AnalysisExamples2. -- [FitResultReference](notebooks/FitResultReference.ipynb) — Notebook example for FitResultReference. -- [HybridFilterExample](notebooks/HybridFilterExample.ipynb) — Notebook example for HybridFilterExample. - -## Documentation - -- Docs home: [cajigaslab.github.io/nSTAT-python](https://cajigaslab.github.io/nSTAT-python/) -- Help index: [cajigaslab.github.io/nSTAT-python/help](https://cajigaslab.github.io/nSTAT-python/help/) - -## Developer notes - -- Run tests: +The code repository for nSTAT-python is hosted on GitHub at https://github.com/cajigaslab/nSTAT-python . +The paper-example dataset is distributed separately from the Git repository: -```bash -pytest -q -``` +- Figshare dataset DOI: https://doi.org/10.6084/m9.figshare.4834640.v3 +- Paper DOI: https://doi.org/10.1016/j.jneumeth.2012.08.009 -- Build docs: +Standalone Python repository +---------------------------- -```bash -sphinx-build -b html docs docs/_build -``` +`nSTAT-python` is maintained as a separate repository from the MATLAB toolbox and does not require files from `cajigaslab/nSTAT`. -## Cite +This repository provides: -Cajigas, I., Malika, W. Q., & Brown, E. N. (2012). -nSTAT: Open-source neural spike train analysis toolbox for Matlab. -Journal of Neuroscience Methods, 211, 245–264. -https://doi.org/10.1016/j.jneumeth.2012.08.009 +- Native Python implementations of core spike-train analysis and decoding workflows +- On-demand dataset download directly from figshare +- Notebook and script examples that run without a MATLAB install +- A `nstat.compat.matlab` namespace for familiar class names where API continuity is useful diff --git a/docs/data_installation.rst b/docs/data_installation.rst index 91d633c2..e63527e9 100644 --- a/docs/data_installation.rst +++ b/docs/data_installation.rst @@ -2,8 +2,11 @@ Data Installation ================= ``nSTAT-python`` does not bundle raw example data in the Git tree. +The canonical paper-example dataset is downloaded automatically the first +time a paper example or dataset helper requires it. -Use one of the supported Python-native installation paths instead: +Use one of the supported Python-native prefetch paths if you want the cache +materialized ahead of time: Command line ------------ @@ -25,6 +28,7 @@ Python API Notes ----- -- Example data is cached under ``data_cache/`` by default. +- The dataset source is figshare DOI ``10.6084/m9.figshare.4834640.v3``. +- Source checkouts cache data under ``data_cache/nstat_data`` by default. - Set ``NSTAT_DATA_DIR`` to point at an existing dataset cache if needed. -- The repository intentionally ignores ``data/`` so local example-data installs are not committed. +- The repository intentionally ignores ``data/`` and ``data_cache/`` so local downloads are not committed. diff --git a/nstat/data_manager.py b/nstat/data_manager.py index 80df75c8..6e3655e2 100644 --- a/nstat/data_manager.py +++ b/nstat/data_manager.py @@ -1,9 +1,4 @@ -"""Resolve and materialize the external nSTAT example-data package. - -This mirrors the MATLAB-side `nSTAT_ExampleDataInfo` / `nSTAT_Install` -workflow added in the upstream toolbox while keeping raw example data out of -the Python Git tree. -""" +"""Resolve and materialize the standalone nSTAT-python example dataset.""" from __future__ import annotations @@ -11,6 +6,7 @@ import os import re import shutil +import ssl import tempfile import time import urllib.request @@ -19,11 +15,15 @@ from pathlib import Path from typing import Final +import certifi + FIGSHARE_API_URL: Final[str] = "https://api.figshare.com/v2/articles/4834640" FIGSHARE_DOI_URL: Final[str] = "https://doi.org/10.6084/m9.figshare.4834640.v3" PAPER_DOI_URL: Final[str] = "https://doi.org/10.1016/j.jneumeth.2012.08.009" SENTINEL_NAME: Final[str] = ".nstat_data_ok.json" +USER_AGENT: Final[str] = "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)" +SSL_CONTEXT: Final[ssl.SSLContext] = ssl.create_default_context(cafile=certifi.where()) DOWNLOAD_URL_RE: Final[re.Pattern[str]] = re.compile( r"https?://(?:www\.)?(?:ndownloader|figshare\.com/ndownloader)/files/\d+" ) @@ -31,7 +31,7 @@ @dataclass(frozen=True) class ExampleDataInfo: - """Python analogue of MATLAB `nSTAT_ExampleDataInfo`.""" + """Resolved on-disk metadata for the canonical example dataset.""" root_dir: Path data_dir: Path @@ -56,11 +56,15 @@ def _default_cache_dir() -> Path: return (_repo_root() / "data_cache" / "nstat_data").resolve() -def get_example_data_info(root_dir: str | Path | None = None) -> ExampleDataInfo: - """Return dataset metadata using MATLAB-compatible file requirements.""" +def get_example_data_info( + root_dir: str | Path | None = None, + *, + treat_as_data_dir: bool = False, +) -> ExampleDataInfo: + """Return dataset metadata for a repo root or explicit dataset cache path.""" raw_root = _repo_root() if root_dir is None else Path(root_dir).expanduser().resolve() - if (raw_root / "mEPSCs").exists() or raw_root.name == "data": + if treat_as_data_dir or (raw_root / "mEPSCs").exists() or raw_root.name == "data": data_dir = raw_root root = raw_root.parent if raw_root.name == "data" else raw_root else: @@ -97,11 +101,9 @@ def _write_sentinel(data_dir: Path, *, source_url: str) -> None: def _http_get(url: str, *, timeout: float = 60.0) -> tuple[str, bytes]: req = urllib.request.Request( url, - headers={ - "User-Agent": "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)" - }, + headers={"User-Agent": USER_AGENT}, ) - with urllib.request.urlopen(req, timeout=timeout) as resp: + with urllib.request.urlopen(req, timeout=timeout, context=SSL_CONTEXT) as resp: final_url = str(resp.geturl()) body = resp.read() return final_url, body @@ -145,11 +147,11 @@ def _stream_download(url: str, destination: Path, *, retries: int = 3) -> None: try: req = urllib.request.Request( url, - headers={ - "User-Agent": "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)" - }, + headers={"User-Agent": USER_AGENT}, ) - with urllib.request.urlopen(req, timeout=180.0) as resp, destination.open("wb") as out: + with urllib.request.urlopen(req, timeout=180.0, context=SSL_CONTEXT) as resp, destination.open( + "wb" + ) as out: shutil.copyfileobj(resp, out, length=1024 * 1024) return except Exception as exc: # pragma: no cover - network timing dependent @@ -216,7 +218,7 @@ def get_data_dir() -> Path: def data_is_present(data_dir: Path) -> bool: """Return True when the required MATLAB-mirrored example files exist.""" - return get_example_data_info(data_dir).is_installed + return get_example_data_info(data_dir, treat_as_data_dir=True).is_installed def ensure_example_data(download: bool = True) -> Path: diff --git a/nstat/datasets.py b/nstat/datasets.py index 78d22f1f..3cc066a4 100644 --- a/nstat/datasets.py +++ b/nstat/datasets.py @@ -41,14 +41,14 @@ def list_datasets() -> list[str]: return sorted(_load_manifest().keys()) -def _resolve_dataset_target(rel_path: str) -> Path: +def _resolve_dataset_target(rel_path: str, *, download: bool) -> Path: repo_root = _repo_root() rel = Path(rel_path) if not rel.parts: return repo_root / rel if rel.parts[0] == "data": try: - data_dir = ensure_example_data(download=False) + data_dir = ensure_example_data(download=download) except FileNotFoundError as exc: raise DataNotFoundError(str(exc)) from exc return data_dir.joinpath(*rel.parts[1:]) @@ -60,7 +60,7 @@ def get_dataset_path(name: str) -> Path: if name not in entries: raise DataNotFoundError(f"Unknown dataset '{name}'. Available: {', '.join(sorted(entries))}") - path = _resolve_dataset_target(entries[name]["path"]) + path = _resolve_dataset_target(entries[name]["path"], download=True) if not path.exists(): raise DataNotFoundError(f"Dataset '{name}' not found at expected path: {path}") return path @@ -71,7 +71,7 @@ def verify_checksums() -> dict[str, bool]: result: dict[str, bool] = {} for name, item in entries.items(): try: - path = _resolve_dataset_target(item["path"]) + path = _resolve_dataset_target(item["path"], download=True) except DataNotFoundError: result[name] = False continue diff --git a/nstat/install.py b/nstat/install.py index 191995ec..df7f716c 100644 --- a/nstat/install.py +++ b/nstat/install.py @@ -42,6 +42,12 @@ def _should_prompt_for_example_data(info: dict[str, Any]) -> bool: return answer.strip().lower() in {"y", "yes"} +def _apply_example_data_info(report: dict[str, Any], info: Any) -> None: + report["example_data"]["data_dir"] = str(info.data_dir) + report["example_data"]["is_installed"] = bool(info.is_installed) + report["example_data"]["required_files"] = [str(path) for path in info.required_files] + + def nstat_install( *, rebuild_doc_search: bool = True, @@ -52,8 +58,9 @@ def nstat_install( mode = _normalize_download_mode(download_example_data) repo_root = Path(__file__).resolve().parents[1] - info = get_example_data_info(repo_root) + repo_info = get_example_data_info(repo_root) data_dir = get_data_dir() + data_info = get_example_data_info(data_dir, treat_as_data_dir=True) report: dict[str, Any] = { "repo_root": str(repo_root), @@ -63,34 +70,38 @@ def nstat_install( "download_example_data": mode, "example_data": { "data_dir": str(data_dir), - "is_installed": bool(info.is_installed or get_example_data_info(data_dir).is_installed), + "is_installed": bool(repo_info.is_installed or data_info.is_installed), "figshare_doi": FIGSHARE_DOI_URL, "paper_doi": PAPER_DOI_URL, - "required_files": [str(path) for path in info.required_files], + "required_files": [str(path) for path in data_info.required_files], }, "notes": [], } try: - if info.is_installed: - report["example_data"]["is_installed"] = True - report["example_data"]["data_dir"] = str(info.data_dir) + if repo_info.is_installed: + _apply_example_data_info(report, repo_info) + report["notes"].append("Example data already present.") + elif data_info.is_installed: + _apply_example_data_info(report, data_info) report["notes"].append("Example data already present.") elif mode == "always": path = ensure_example_data(download=True) - report["example_data"]["is_installed"] = True - report["example_data"]["data_dir"] = str(path) + _apply_example_data_info(report, get_example_data_info(path, treat_as_data_dir=True)) report["notes"].append("Downloaded example data.") elif mode == "prompt": if _should_prompt_for_example_data(report["example_data"]): path = ensure_example_data(download=True) - report["example_data"]["is_installed"] = True - report["example_data"]["data_dir"] = str(path) + _apply_example_data_info(report, get_example_data_info(path, treat_as_data_dir=True)) report["notes"].append("Downloaded example data after prompt.") else: - report["notes"].append("Example data not installed; run with download_example_data=True to install.") + report["notes"].append( + "Example data was not preinstalled; paper-example and dataset APIs will download it on first use." + ) else: - report["notes"].append("Example data not installed; run with download_example_data=True to install.") + report["notes"].append( + "Example data was not preinstalled; paper-example and dataset APIs will download it on first use." + ) except Exception as exc: # noqa: BLE001 report["example_data"]["error"] = str(exc) report["notes"].append("Example data installation failed.") diff --git a/pyproject.toml b/pyproject.toml index 3f48d3d9..eaba6ecb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "numpy>=1.24", "scipy>=1.10", "matplotlib>=3.7", + "certifi>=2024.0.0", "PyYAML>=6.0", "nbformat>=5.10", "nbclient>=0.10" diff --git a/tests/test_datasets.py b/tests/test_datasets.py index f897924f..2aba7070 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,23 +1,46 @@ from __future__ import annotations +from pathlib import Path + import nstat -from nstat.errors import DataNotFoundError +import nstat.datasets def test_dataset_manifest_and_checksums() -> None: names = nstat.list_datasets() assert names + assert names == sorted(names) + + +def test_get_dataset_path_triggers_download_when_data_is_external(monkeypatch, tmp_path: Path) -> None: + data_root = tmp_path / "nstat_data" + dataset_path = data_root / "mEPSCs" / "epsc2.txt" + dataset_path.parent.mkdir(parents=True, exist_ok=True) + dataset_path.write_text("header\n0 0\n", encoding="utf-8") + + calls: list[bool] = [] + + def fake_ensure_example_data(*, download: bool = True) -> Path: + calls.append(download) + return data_root + + monkeypatch.setattr(nstat.datasets, "ensure_example_data", fake_ensure_example_data) + + resolved = nstat.get_dataset_path("mepcs_epsc2") + assert resolved == dataset_path + assert calls == [True] + + +def test_verify_checksums_triggers_download_when_data_is_external(monkeypatch, tmp_path: Path) -> None: + calls: list[bool] = [] - check = nstat.verify_checksums() - assert set(check.keys()) == set(names) - assert all(isinstance(v, bool) for v in check.values()) + def fake_ensure_example_data(*, download: bool = True) -> Path: + calls.append(download) + return tmp_path / "nstat_data" + monkeypatch.setattr(nstat.datasets, "ensure_example_data", fake_ensure_example_data) -def test_get_dataset_path() -> None: - name = nstat.list_datasets()[0] - try: - path = nstat.get_dataset_path(name) - except DataNotFoundError: - # Standalone checkouts may intentionally omit large datasets. - return - assert path.exists() + result = nstat.verify_checksums() + assert result + assert all(isinstance(value, bool) for value in result.values()) + assert calls and all(call is True for call in calls) diff --git a/tests/test_install_and_compat.py b/tests/test_install_and_compat.py index bc22037f..b797d191 100644 --- a/tests/test_install_and_compat.py +++ b/tests/test_install_and_compat.py @@ -1,5 +1,7 @@ from __future__ import annotations +from pathlib import Path + from nstat.compat.matlab import CIF, Covariate, SignalObj, nspikeTrain, nstColl from nstat.install import nstat_install @@ -18,3 +20,7 @@ def test_nstat_install_report_without_download() -> None: assert "example_data" in report assert report["download_example_data"] == "never" assert "required_files" in report["example_data"] + data_dir = Path(report["example_data"]["data_dir"]) + required = [Path(path) for path in report["example_data"]["required_files"]] + assert required + assert all(data_dir in path.parents or path == data_dir for path in required) diff --git a/tests/test_readme_examples_catalog.py b/tests/test_readme_examples_catalog.py index 432e837c..f82c2e80 100644 --- a/tests/test_readme_examples_catalog.py +++ b/tests/test_readme_examples_catalog.py @@ -1,75 +1,34 @@ from __future__ import annotations -import re from pathlib import Path -import yaml - REPO_ROOT = Path(__file__).resolve().parents[1] README_PATH = REPO_ROOT / "README.md" -CATALOG_PATH = REPO_ROOT / "examples" / "nSTATPaperExamples" / "manifest.yml" - - -FEATURED_HEADINGS = [ - "### Example 1 — Single sinusoid: signal + multitaper spectrum + spectrogram", - "### Example 2 — Time-varying CIF over 10 seconds (single-frequency sinusoid)", - "### Example 3 — Spike train collection raster from Example 2", -] - -FEATURED_RUN_COMMANDS = [ - "python examples/readme_examples/example1_multitaper_and_spectrogram.py", - "python examples/readme_examples/example2_simulate_cif_spiketrain_10s.py", - "python examples/readme_examples/example3_nstcoll_raster_from_example2.py", -] - - -def _extract_examples_block(text: str) -> str: - match = re.search(r"## Examples\n(.*?)\n## Documentation\n", text, flags=re.S) - if not match: - raise AssertionError("README is missing an Examples block bounded by '## Examples' and '## Documentation'.") - return match.group(1) - - -def test_readme_featured_examples_are_preserved_in_order() -> None: - readme = README_PATH.read_text(encoding="utf-8") - block = _extract_examples_block(readme) - - heading_positions = [] - for heading in FEATURED_HEADINGS: - pos = block.find(heading) - assert pos >= 0, f"Missing featured heading: {heading}" - heading_positions.append(pos) - assert heading_positions == sorted(heading_positions), "Featured examples must remain in the original order." - - for cmd in FEATURED_RUN_COMMANDS: - assert cmd in block, f"Missing featured run command: {cmd}" - -def test_readme_includes_complete_nstatpaperexamples_catalog_once() -> None: - readme = README_PATH.read_text(encoding="utf-8") - block = _extract_examples_block(readme) - assert "### nSTATPaperExamples" in block, "README Examples section is missing the nSTATPaperExamples catalog header." - manifest = yaml.safe_load(CATALOG_PATH.read_text(encoding="utf-8")) or {} - entries = manifest.get("examples", []) - assert entries, "nSTATPaperExamples manifest has no entries." +def test_readme_tracks_python_port_top_level_sections() -> None: + text = README_PATH.read_text(encoding="utf-8") + for heading in ( + "How to install nSTAT-python", + "Quickstart (Python 3.10+)", + "Paper Examples (Self-Contained)", + "Paper-aligned toolbox map", + "Standalone Python repository", + ): + assert heading in text - for row in entries: - name = str(row["name"]) - rel_path = str(row["relative_path"]) - link = f"[{name}]({rel_path})" - count = block.count(link) - assert count == 1, f"Catalog entry must appear exactly once in README: {link} (found {count})." +def test_readme_documents_automatic_dataset_download() -> None: + text = README_PATH.read_text(encoding="utf-8") + lowered = text.lower() + assert "downloads it automatically" in lowered or "downloads the figshare dataset automatically" in lowered + assert "10.6084/m9.figshare.4834640.v3" in text + assert "NSTAT_DATA_DIR" in text -def test_readme_examples_section_has_no_other_example_groups() -> None: - readme = README_PATH.read_text(encoding="utf-8") - block = _extract_examples_block(readme) - headings = re.findall(r"^###\s+.+$", block, flags=re.M) - expected = FEATURED_HEADINGS + ["### nSTATPaperExamples"] - assert headings == expected, ( - "README Examples section must contain only the three featured examples " - "followed by the nSTATPaperExamples catalog header." - ) +def test_readme_lists_core_paper_examples_and_runner() -> None: + text = README_PATH.read_text(encoding="utf-8") + for label in ("Example 01", "Example 02", "Example 03", "Example 04", "Example 05"): + assert label in text + assert "python examples/nSTATPaperExamples.py --repo-root ." in text diff --git a/tests/test_readme_nstatpaperexamples.py b/tests/test_readme_nstatpaperexamples.py index 9e2cb916..e4ff0ce2 100644 --- a/tests/test_readme_nstatpaperexamples.py +++ b/tests/test_readme_nstatpaperexamples.py @@ -1,18 +1,14 @@ from __future__ import annotations from pathlib import Path -import re REPO_ROOT = Path(__file__).resolve().parents[1] README_PATH = REPO_ROOT / "README.md" -def test_readme_includes_nstatpaperexamples_code_and_figure() -> None: +def test_readme_states_python_repo_is_standalone_from_matlab_repo() -> None: text = README_PATH.read_text(encoding="utf-8") - match = re.search(r"### nSTATPaperExamples\n(.*?)\n## Documentation\n", text, flags=re.S) - assert match, "README is missing the nSTATPaperExamples block." - block = match.group(1) - assert "examples/readme_examples/example4_nstatpaperexamples_overview.py" in block - assert "from nstat.paper_examples import run_paper_examples" in block - assert "readme_example4_nstatpaperexamples_overview.png" in block + assert "does not require a MATLAB checkout" in text + assert "cajigaslab/nSTAT" in text + assert "nstat.compat.matlab" in text