From 3e6e20f9c440847f428dc28d49e1cbe1ade779cc Mon Sep 17 00:00:00 2001
From: Iahn Cajigas <iahncajigas@ist-0237.vpn.private.upenn.edu>
Date: Fri, 6 Mar 2026 07:42:00 -0500
Subject: [PATCH] Make Python README standalone and auto-download example data

---
 README.md                               | 332 +++++++-----------------
 docs/data_installation.rst              |  10 +-
 nstat/data_manager.py                   |  40 +--
 nstat/datasets.py                       |   8 +-
 nstat/install.py                        |  35 ++-
 pyproject.toml                          |   1 +
 tests/test_datasets.py                  |  47 +++-
 tests/test_install_and_compat.py        |   6 +
 tests/test_readme_examples_catalog.py   |  83 ++----
 tests/test_readme_nstatpaperexamples.py |  12 +-
 10 files changed, 211 insertions(+), 363 deletions(-)

diff --git a/README.md b/README.md
index 41aec2a2..f03a0b0b 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,45 @@
-# nSTAT-python
+nSTAT-python
+============
 
-`nSTAT-python` is a Python toolbox for neural spike-train analysis, modeling, and decoding.
+Neural Spike Train Analysis Toolbox for Python
 
 [![test-and-build](https://github.com/cajigaslab/nSTAT-python/actions/workflows/ci.yml/badge.svg)](https://github.com/cajigaslab/nSTAT-python/actions/workflows/ci.yml)
 [![pages](https://github.com/cajigaslab/nSTAT-python/actions/workflows/pages.yml/badge.svg)](https://github.com/cajigaslab/nSTAT-python/actions/workflows/pages.yml)
 
-## Installation
+nSTAT-python is an open-source, object-oriented Python toolbox that implements a range of models and algorithms for neural spike-train analysis, modeling, and decoding. The toolbox is designed for quick, consistent neural data analysis in native Python while keeping the paper-example dataset outside the Git repository.
 
-```bash
-python -m pip install nstat
-```
+Like the MATLAB toolbox paper, the Python port centers point-process generalized linear models for spike trains, while also supporting Gaussian-signal workflows, simulation, fitting diagnostics, and decoding. Although created with neural signal processing in mind, nSTAT-python can be used more generally for discrete and continuous time-series analysis.
+
+Like all open-source projects, nSTAT-python benefits from issues, suggestions, and code contributions. The current source repository is:
+
+- https://github.com/cajigaslab/nSTAT-python
 
-From source:
+Lab websites:
+
+- Neuroscience Statistics Research Laboratory: https://www.neurostat.mit.edu
+- RESToRe Lab: https://www.med.upenn.edu/cajigaslab/
+
+How to install nSTAT-python
+---------------------------
+
+1. Clone this repository and create or activate a Python 3.10+ environment.
+2. Install the package from source:
 
 ```bash
-git clone git@github.com:cajigaslab/nSTAT-python.git
+git clone https://github.com/cajigaslab/nSTAT-python.git
 cd nSTAT-python
 python -m pip install -e .[dev]
 ```
 
-## Example data
+3. Optional post-install helper:
 
-`nSTAT-python` does not commit raw example data to the repository.
+```bash
+nstat-install
+```
 
-Install the example dataset with:
+When a paper example or dataset helper needs the canonical example dataset, nSTAT-python downloads the figshare dataset automatically into a local cache. The raw dataset is not stored in this Git repository.
+
+To prefetch the dataset ahead of time:
 
 ```bash
 nstat-install --download-example-data always
@@ -38,270 +54,100 @@ data_dir = ensure_example_data(download=True)
 print(data_dir)
 ```
 
-## How to install nSTAT (post-install setup)
-
-Run the setup helper:
+Quickstart (Python 3.10+)
+-------------------------
 
 ```bash
-nstat-install
-```
-
-Equivalent Python API:
-
-```python
-from nstat.install import nstat_install
-
-report = nstat_install()
+git clone https://github.com/cajigaslab/nSTAT-python.git
+cd nSTAT-python
+python -m pip install -e .[dev]
+python examples/nSTATPaperExamples.py --repo-root .
 ```
 
-## Examples
+The first paper-example or dataset call downloads the figshare dataset automatically. Repository checkouts cache it under `data_cache/nstat_data/` by default. Set `NSTAT_DATA_DIR` to use another cache location.
 
-> These examples generate figures with `matplotlib` and save PNGs under `examples/readme_examples/images/`.
-> The images below show the expected output.
+Paper Examples (Self-Contained)
+-------------------------------
 
-Examples below require `matplotlib`:
+Canonical source files:
 
-```bash
-python -m pip install matplotlib
-```
+- `examples/nSTATPaperExamples.py` (full command-line runner)
+- `nstat/paper_examples_full.py` (paper-aligned experiment implementations)
+- `examples/nstat_paper_examples.py` and `nstat/paper_examples.py` (lighter-weight summary runner)
+- `notebooks/nSTATPaperExamples.ipynb` (notebook narrative)
 
-### Example 1 — Single sinusoid: signal + multitaper spectrum + spectrogram
-Run:
+Single command to run the full paper-aligned example suite:
 
 ```bash
-python examples/readme_examples/example1_multitaper_and_spectrogram.py
+python examples/nSTATPaperExamples.py --repo-root .
 ```
 
-```python
-import matplotlib
-matplotlib.use("Agg")
-
-from pathlib import Path
+This command downloads the figshare dataset automatically when needed and prints JSON summaries for the experiment blocks. The Python package does not require a MATLAB checkout.
 
-import matplotlib.pyplot as plt
-import numpy as np
-from scipy.signal import spectrogram
-
-from nstat.compat.matlab import SignalObj
-
-fs_hz = 1000.0
-dt = 1.0 / fs_hz
-duration_s = 2.0
-f0_hz = 10.0
-time = np.arange(0.0, duration_s, dt, dtype=float)
-
-signal = np.sin(2.0 * np.pi * f0_hz * time)
-sig_obj = SignalObj(time=time, data=signal, name="sine_signal", units="a.u.")
-freq_hz, psd = sig_obj.MTMspectrum()
-f_spec, t_spec, sxx = spectrogram(signal, fs=fs_hz, nperseg=256, noverlap=224, scaling="density", mode="psd")
-
-fig, axes = plt.subplots(3, 1, figsize=(7.5, 7.5))
-preview_mask = time <= 1.0
-axes[0].plot(time[preview_mask], signal[preview_mask], color="tab:blue", linewidth=1.4)
-axes[0].set_title("Signal (10 Hz sinusoid)")
-axes[0].set_xlabel("time (s)")
-axes[0].set_ylabel("amplitude")
-axes[1].plot(freq_hz, psd, color="tab:orange", linewidth=1.2)
-axes[1].set_xlim(0.0, 100.0)
-axes[1].set_title("Multi-taper spectrum")
-axes[1].set_xlabel("frequency (Hz)")
-axes[1].set_ylabel("PSD")
-im = axes[2].pcolormesh(t_spec, f_spec, sxx, shading="auto", cmap="magma")
-axes[2].set_ylim(0.0, 100.0)
-axes[2].set_title("Spectrogram")
-axes[2].set_xlabel("time (s)")
-axes[2].set_ylabel("frequency (Hz)")
-fig.colorbar(im, ax=axes[2], pad=0.01, label="PSD")
-fig.tight_layout()
-
-out_dir = Path("examples/readme_examples/images")
-out_dir.mkdir(parents=True, exist_ok=True)
-fig.savefig(out_dir / "readme_example1_multitaper_and_spectrogram.png", dpi=180)
-```
+| Example | What question it answers | Python entrypoint |
+|---|---|---|
+| Example 01 | Do mEPSCs follow constant vs piecewise Poisson firing under Mg2+ washout? | `nstat.paper_examples_full.run_experiment1` |
+| Example 02 | How do explicit whisker stimulus and spike history improve thalamic GLM fits? | `nstat.paper_examples_full.run_experiment2` |
+| Example 03 | How do PSTH and SSGLM capture within-trial and across-trial dynamics? | `nstat.paper_examples_full.run_experiment3` and `run_experiment3b` |
+| Example 04 | Which receptive-field basis (Gaussian vs Zernike-like) better fits place cells? | `nstat.paper_examples_full.run_experiment4` |
+| Example 05 | How well do point-process-inspired decoders recover latent stimulus and state? | `nstat.paper_examples_full.run_experiment5`, `run_experiment5b`, and `run_experiment6` |
 
-**Expected output**
-![Multitaper and spectrogram](examples/readme_examples/images/readme_example1_multitaper_and_spectrogram.png)
-
-### Example 2 — Time-varying CIF over 10 seconds (single-frequency sinusoid)
-Run:
-
-```bash
-python examples/readme_examples/example2_simulate_cif_spiketrain_10s.py
-```
+For a lighter-weight paper overview with plot payloads:
 
 ```python
-import matplotlib
-matplotlib.use("Agg")
-
 from pathlib import Path
 
-import matplotlib.pyplot as plt
-import numpy as np
-
-from nstat.compat.matlab import CIF, Covariate
-
-np.random.seed(0)
-dt = 0.001
-duration_s = 10.0
-t = np.arange(0.0, duration_s + dt, dt, dtype=float)
-
-f_hz = 0.5
-baseline_hz = 15.0
-amp_hz = 10.0
-lam = np.clip(baseline_hz + amp_hz * np.sin(2.0 * np.pi * f_hz * t), 0.2, None)
-
-lambda_cov = Covariate(time=t, data=lam, name="Lambda", units="spikes/s", labels=["lambda"])
-spikes = CIF.simulateCIFByThinningFromLambda(lambda_cov, 1, dt)
-spike_times = spikes.getNST(0).spike_times
-
-fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8.0, 4.8), sharex=True, gridspec_kw={"height_ratios": [2.0, 1.0]})
-ax1.plot(t, lam, color="tab:blue", linewidth=1.3)
-ax1.set_ylabel("rate (spikes/s)")
-ax1.set_title("Time-varying CIF over 10 s")
-ax2.vlines(spike_times, 0.0, 1.0, color="black", linewidth=0.8)
-ax2.set_ylim(0.0, 1.0)
-ax2.set_yticks([])
-ax2.set_xlabel("time (s)")
-ax2.set_title("Simulated spike train")
-fig.tight_layout()
-
-out_dir = Path("examples/readme_examples/images")
-out_dir.mkdir(parents=True, exist_ok=True)
-fig.savefig(out_dir / "readme_example2_simulate_cif_spiketrain_10s.png", dpi=180)
-```
-
-**Expected output**
-![CIF spike train simulation](examples/readme_examples/images/readme_example2_simulate_cif_spiketrain_10s.png)
-
-### Example 3 — Spike train collection raster from Example 2
-Run:
+from nstat.paper_examples import run_paper_examples
 
-```bash
-python examples/readme_examples/example3_nstcoll_raster_from_example2.py
+results = run_paper_examples(Path.cwd())
+print(results["experiment2"])
+print(results["experiment3"])
+print(results["experiment4"])
+print(results["experiment5"])
 ```
 
-```python
-import matplotlib
-matplotlib.use("Agg")
-
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from nstat.compat.matlab import CIF, Covariate
-
-np.random.seed(0)
-dt = 0.001
-duration_s = 10.0
-n_units = 20
-t = np.arange(0.0, duration_s + dt, dt, dtype=float)
-
-f_hz = 0.5
-baseline_hz = 15.0
-amp_hz = 10.0
-lam = np.clip(baseline_hz + amp_hz * np.sin(2.0 * np.pi * f_hz * t), 0.2, None)
-
-lambda_cov = Covariate(time=t, data=lam, name="Lambda", units="spikes/s", labels=["lambda"])
-coll = CIF.simulateCIFByThinningFromLambda(lambda_cov, n_units, dt)
-
-fig, ax = plt.subplots(figsize=(8.0, 4.8))
-plt.sca(ax)
-coll.plot()
-ax.set_xlabel("time (s)")
-ax.set_ylabel("unit index")
-ax.set_title("Spike-train collection raster (nstColl.plot)")
-ax.set_ylim(0.5, n_units + 0.5)
-fig.tight_layout()
-
-out_dir = Path("examples/readme_examples/images")
-out_dir.mkdir(parents=True, exist_ok=True)
-fig.savefig(out_dir / "readme_example3_nstcoll_raster.png", dpi=180)
-```
+Documentation
+-------------
 
-**Expected output**
-![Spike train raster](examples/readme_examples/images/readme_example3_nstcoll_raster.png)
+Minimal package docs live under [`docs/`](docs/).
 
-### nSTATPaperExamples
+- API reference: [`docs/api.rst`](docs/api.rst)
+- Data installation: [`docs/data_installation.rst`](docs/data_installation.rst)
 
-Run:
+For mathematical and programmatic details of the toolbox, see:
 
-```bash
-python examples/readme_examples/example4_nstatpaperexamples_overview.py
-```
+Cajigas I, Malik WQ, Brown EN. nSTAT: Open-source neural spike train analysis toolbox for Matlab. Journal of Neuroscience Methods 211: 245-264, Nov. 2012
+http://doi.org/10.1016/j.jneumeth.2012.08.009
+PMID: 22981419
 
-```python
-import matplotlib
-matplotlib.use("Agg")
+Paper-aligned toolbox map
+-------------------------
 
-from pathlib import Path
+To keep terminology and workflows aligned with the 2012 toolbox paper, the Python package groups core functionality along the same analysis paths:
 
-from nstat.paper_examples import run_paper_examples
+- Class hierarchy and object model (`SignalObj`, `Covariate`, `Trial`, `Analysis`, `FitResult`, `DecodingAlgorithms`)
+- Fitting and assessment workflow (GLM fitting, diagnostics, summaries)
+- Simulation workflow (conditional intensity and thinning examples)
+- Decoding workflow (stimulus and state reconstruction)
+- Example-to-paper section mapping via `nstat.paper_examples_full`
 
-repo_root = Path(".").resolve()
-results, payloads = run_paper_examples(repo_root, return_plot_data=True)
-print(results["experiment2"])
-print(results["experiment3"])
-print(results["experiment4"])
-print(results["experiment5"])
-```
+If you use nSTAT-python in your work, please cite the paper above.
+nSTAT is protected by the GPL v2 Open Source License.
 
-**Expected output**
-![nSTATPaperExamples overview](examples/readme_examples/images/readme_example4_nstatpaperexamples_overview.png)
-
-Complete catalog of nSTATPaperExamples notebooks:
-
-- [AnalysisExamples](notebooks/AnalysisExamples.ipynb) — Notebook example for AnalysisExamples.
-- [ConfigCollExamples](notebooks/ConfigCollExamples.ipynb) — Notebook example for ConfigCollExamples.
-- [CovCollExamples](notebooks/CovCollExamples.ipynb) — Notebook example for CovCollExamples.
-- [CovariateExamples](notebooks/CovariateExamples.ipynb) — Notebook example for CovariateExamples.
-- [DecodingExample](notebooks/DecodingExample.ipynb) — Notebook example for DecodingExample.
-- [DecodingExampleWithHist](notebooks/DecodingExampleWithHist.ipynb) — Notebook example for DecodingExampleWithHist.
-- [EventsExamples](notebooks/EventsExamples.ipynb) — Notebook example for EventsExamples.
-- [ExplicitStimulusWhiskerData](notebooks/ExplicitStimulusWhiskerData.ipynb) — Notebook example for ExplicitStimulusWhiskerData.
-- [FitResSummaryExamples](notebooks/FitResSummaryExamples.ipynb) — Notebook example for FitResSummaryExamples.
-- [FitResultExamples](notebooks/FitResultExamples.ipynb) — Notebook example for FitResultExamples.
-- [HippocampalPlaceCellExample](notebooks/HippocampalPlaceCellExample.ipynb) — Notebook example for HippocampalPlaceCellExample.
-- [HistoryExamples](notebooks/HistoryExamples.ipynb) — Notebook example for HistoryExamples.
-- [NetworkTutorial](notebooks/NetworkTutorial.ipynb) — Notebook example for NetworkTutorial.
-- [PPSimExample](notebooks/PPSimExample.ipynb) — Notebook example for PPSimExample.
-- [PPThinning](notebooks/PPThinning.ipynb) — Notebook example for PPThinning.
-- [PSTHEstimation](notebooks/PSTHEstimation.ipynb) — Notebook example for PSTHEstimation.
-- [SignalObjExamples](notebooks/SignalObjExamples.ipynb) — Notebook example for SignalObjExamples.
-- [StimulusDecode2D](notebooks/StimulusDecode2D.ipynb) — Notebook example for StimulusDecode2D.
-- [TrialConfigExamples](notebooks/TrialConfigExamples.ipynb) — Notebook example for TrialConfigExamples.
-- [TrialExamples](notebooks/TrialExamples.ipynb) — Notebook example for TrialExamples.
-- [ValidationDataSet](notebooks/ValidationDataSet.ipynb) — Notebook example for ValidationDataSet.
-- [mEPSCAnalysis](notebooks/mEPSCAnalysis.ipynb) — Notebook example for mEPSCAnalysis.
-- [nSTATPaperExamples](notebooks/nSTATPaperExamples.ipynb) — Notebook example for nSTATPaperExamples.
-- [nSpikeTrainExamples](notebooks/nSpikeTrainExamples.ipynb) — Notebook example for nSpikeTrainExamples.
-- [nstCollExamples](notebooks/nstCollExamples.ipynb) — Notebook example for nstCollExamples.
-- [AnalysisExamples2](notebooks/AnalysisExamples2.ipynb) — Notebook example for AnalysisExamples2.
-- [FitResultReference](notebooks/FitResultReference.ipynb) — Notebook example for FitResultReference.
-- [HybridFilterExample](notebooks/HybridFilterExample.ipynb) — Notebook example for HybridFilterExample.
-
-## Documentation
-
-- Docs home: [cajigaslab.github.io/nSTAT-python](https://cajigaslab.github.io/nSTAT-python/)
-- Help index: [cajigaslab.github.io/nSTAT-python/help](https://cajigaslab.github.io/nSTAT-python/help/)
-
-## Developer notes
-
-- Run tests:
+The code repository for nSTAT-python is hosted on GitHub at https://github.com/cajigaslab/nSTAT-python .
+The paper-example dataset is distributed separately from the Git repository:
 
-```bash
-pytest -q
-```
+- Figshare dataset DOI: https://doi.org/10.6084/m9.figshare.4834640.v3
+- Paper DOI: https://doi.org/10.1016/j.jneumeth.2012.08.009
 
-- Build docs:
+Standalone Python repository
+----------------------------
 
-```bash
-sphinx-build -b html docs docs/_build
-```
+`nSTAT-python` is maintained as a separate repository from the MATLAB toolbox and does not require files from `cajigaslab/nSTAT`.
 
-## Cite
+This repository provides:
 
-Cajigas, I., Malika, W. Q., & Brown, E. N. (2012).  
-nSTAT: Open-source neural spike train analysis toolbox for Matlab.  
-Journal of Neuroscience Methods, 211, 245–264.  
-https://doi.org/10.1016/j.jneumeth.2012.08.009
+- Native Python implementations of core spike-train analysis and decoding workflows
+- On-demand dataset download directly from figshare
+- Notebook and script examples that run without a MATLAB install
+- A `nstat.compat.matlab` namespace for familiar class names where API continuity is useful
diff --git a/docs/data_installation.rst b/docs/data_installation.rst
index 91d633c2..e63527e9 100644
--- a/docs/data_installation.rst
+++ b/docs/data_installation.rst
@@ -2,8 +2,11 @@ Data Installation
 =================
 
 ``nSTAT-python`` does not bundle raw example data in the Git tree.
+The canonical paper-example dataset is downloaded automatically the first
+time a paper example or dataset helper requires it.
 
-Use one of the supported Python-native installation paths instead:
+Use one of the supported Python-native prefetch paths if you want the cache
+materialized ahead of time:
 
 Command line
 ------------
@@ -25,6 +28,7 @@ Python API
 Notes
 -----
 
-- Example data is cached under ``data_cache/`` by default.
+- The dataset source is figshare DOI ``10.6084/m9.figshare.4834640.v3``.
+- Source checkouts cache data under ``data_cache/nstat_data`` by default.
 - Set ``NSTAT_DATA_DIR`` to point at an existing dataset cache if needed.
-- The repository intentionally ignores ``data/`` so local example-data installs are not committed.
+- The repository intentionally ignores ``data/`` and ``data_cache/`` so local downloads are not committed.
diff --git a/nstat/data_manager.py b/nstat/data_manager.py
index 80df75c8..6e3655e2 100644
--- a/nstat/data_manager.py
+++ b/nstat/data_manager.py
@@ -1,9 +1,4 @@
-"""Resolve and materialize the external nSTAT example-data package.
-
-This mirrors the MATLAB-side `nSTAT_ExampleDataInfo` / `nSTAT_Install`
-workflow added in the upstream toolbox while keeping raw example data out of
-the Python Git tree.
-"""
+"""Resolve and materialize the standalone nSTAT-python example dataset."""
 
 from __future__ import annotations
 
@@ -11,6 +6,7 @@
 import os
 import re
 import shutil
+import ssl
 import tempfile
 import time
 import urllib.request
@@ -19,11 +15,15 @@
 from pathlib import Path
 from typing import Final
 
+import certifi
+
 
 FIGSHARE_API_URL: Final[str] = "https://api.figshare.com/v2/articles/4834640"
 FIGSHARE_DOI_URL: Final[str] = "https://doi.org/10.6084/m9.figshare.4834640.v3"
 PAPER_DOI_URL: Final[str] = "https://doi.org/10.1016/j.jneumeth.2012.08.009"
 SENTINEL_NAME: Final[str] = ".nstat_data_ok.json"
+USER_AGENT: Final[str] = "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)"
+SSL_CONTEXT: Final[ssl.SSLContext] = ssl.create_default_context(cafile=certifi.where())
 DOWNLOAD_URL_RE: Final[re.Pattern[str]] = re.compile(
     r"https?://(?:www\.)?(?:ndownloader|figshare\.com/ndownloader)/files/\d+"
 )
@@ -31,7 +31,7 @@
 
 @dataclass(frozen=True)
 class ExampleDataInfo:
-    """Python analogue of MATLAB `nSTAT_ExampleDataInfo`."""
+    """Resolved on-disk metadata for the canonical example dataset."""
 
     root_dir: Path
     data_dir: Path
@@ -56,11 +56,15 @@ def _default_cache_dir() -> Path:
     return (_repo_root() / "data_cache" / "nstat_data").resolve()
 
 
-def get_example_data_info(root_dir: str | Path | None = None) -> ExampleDataInfo:
-    """Return dataset metadata using MATLAB-compatible file requirements."""
+def get_example_data_info(
+    root_dir: str | Path | None = None,
+    *,
+    treat_as_data_dir: bool = False,
+) -> ExampleDataInfo:
+    """Return dataset metadata for a repo root or explicit dataset cache path."""
 
     raw_root = _repo_root() if root_dir is None else Path(root_dir).expanduser().resolve()
-    if (raw_root / "mEPSCs").exists() or raw_root.name == "data":
+    if treat_as_data_dir or (raw_root / "mEPSCs").exists() or raw_root.name == "data":
         data_dir = raw_root
         root = raw_root.parent if raw_root.name == "data" else raw_root
     else:
@@ -97,11 +101,9 @@ def _write_sentinel(data_dir: Path, *, source_url: str) -> None:
 def _http_get(url: str, *, timeout: float = 60.0) -> tuple[str, bytes]:
     req = urllib.request.Request(
         url,
-        headers={
-            "User-Agent": "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)"
-        },
+        headers={"User-Agent": USER_AGENT},
     )
-    with urllib.request.urlopen(req, timeout=timeout) as resp:
+    with urllib.request.urlopen(req, timeout=timeout, context=SSL_CONTEXT) as resp:
         final_url = str(resp.geturl())
         body = resp.read()
     return final_url, body
@@ -145,11 +147,11 @@ def _stream_download(url: str, destination: Path, *, retries: int = 3) -> None:
         try:
             req = urllib.request.Request(
                 url,
-                headers={
-                    "User-Agent": "nSTAT-python-data-manager/1.0 (+https://github.com/cajigaslab/nSTAT-python)"
-                },
+                headers={"User-Agent": USER_AGENT},
             )
-            with urllib.request.urlopen(req, timeout=180.0) as resp, destination.open("wb") as out:
+            with urllib.request.urlopen(req, timeout=180.0, context=SSL_CONTEXT) as resp, destination.open(
+                "wb"
+            ) as out:
                 shutil.copyfileobj(resp, out, length=1024 * 1024)
             return
         except Exception as exc:  # pragma: no cover - network timing dependent
@@ -216,7 +218,7 @@ def get_data_dir() -> Path:
 def data_is_present(data_dir: Path) -> bool:
     """Return True when the required MATLAB-mirrored example files exist."""
 
-    return get_example_data_info(data_dir).is_installed
+    return get_example_data_info(data_dir, treat_as_data_dir=True).is_installed
 
 
 def ensure_example_data(download: bool = True) -> Path:
diff --git a/nstat/datasets.py b/nstat/datasets.py
index 78d22f1f..3cc066a4 100644
--- a/nstat/datasets.py
+++ b/nstat/datasets.py
@@ -41,14 +41,14 @@ def list_datasets() -> list[str]:
     return sorted(_load_manifest().keys())
 
 
-def _resolve_dataset_target(rel_path: str) -> Path:
+def _resolve_dataset_target(rel_path: str, *, download: bool) -> Path:
     repo_root = _repo_root()
     rel = Path(rel_path)
     if not rel.parts:
         return repo_root / rel
     if rel.parts[0] == "data":
         try:
-            data_dir = ensure_example_data(download=False)
+            data_dir = ensure_example_data(download=download)
         except FileNotFoundError as exc:
             raise DataNotFoundError(str(exc)) from exc
         return data_dir.joinpath(*rel.parts[1:])
@@ -60,7 +60,7 @@ def get_dataset_path(name: str) -> Path:
     if name not in entries:
         raise DataNotFoundError(f"Unknown dataset '{name}'. Available: {', '.join(sorted(entries))}")
 
-    path = _resolve_dataset_target(entries[name]["path"])
+    path = _resolve_dataset_target(entries[name]["path"], download=True)
     if not path.exists():
         raise DataNotFoundError(f"Dataset '{name}' not found at expected path: {path}")
     return path
@@ -71,7 +71,7 @@ def verify_checksums() -> dict[str, bool]:
     result: dict[str, bool] = {}
     for name, item in entries.items():
         try:
-            path = _resolve_dataset_target(item["path"])
+            path = _resolve_dataset_target(item["path"], download=True)
         except DataNotFoundError:
             result[name] = False
             continue
diff --git a/nstat/install.py b/nstat/install.py
index 191995ec..df7f716c 100644
--- a/nstat/install.py
+++ b/nstat/install.py
@@ -42,6 +42,12 @@ def _should_prompt_for_example_data(info: dict[str, Any]) -> bool:
     return answer.strip().lower() in {"y", "yes"}
 
 
+def _apply_example_data_info(report: dict[str, Any], info: Any) -> None:
+    report["example_data"]["data_dir"] = str(info.data_dir)
+    report["example_data"]["is_installed"] = bool(info.is_installed)
+    report["example_data"]["required_files"] = [str(path) for path in info.required_files]
+
+
 def nstat_install(
     *,
     rebuild_doc_search: bool = True,
@@ -52,8 +58,9 @@ def nstat_install(
 
     mode = _normalize_download_mode(download_example_data)
     repo_root = Path(__file__).resolve().parents[1]
-    info = get_example_data_info(repo_root)
+    repo_info = get_example_data_info(repo_root)
     data_dir = get_data_dir()
+    data_info = get_example_data_info(data_dir, treat_as_data_dir=True)
 
     report: dict[str, Any] = {
         "repo_root": str(repo_root),
@@ -63,34 +70,38 @@ def nstat_install(
         "download_example_data": mode,
         "example_data": {
             "data_dir": str(data_dir),
-            "is_installed": bool(info.is_installed or get_example_data_info(data_dir).is_installed),
+            "is_installed": bool(repo_info.is_installed or data_info.is_installed),
             "figshare_doi": FIGSHARE_DOI_URL,
             "paper_doi": PAPER_DOI_URL,
-            "required_files": [str(path) for path in info.required_files],
+            "required_files": [str(path) for path in data_info.required_files],
         },
         "notes": [],
     }
 
     try:
-        if info.is_installed:
-            report["example_data"]["is_installed"] = True
-            report["example_data"]["data_dir"] = str(info.data_dir)
+        if repo_info.is_installed:
+            _apply_example_data_info(report, repo_info)
+            report["notes"].append("Example data already present.")
+        elif data_info.is_installed:
+            _apply_example_data_info(report, data_info)
             report["notes"].append("Example data already present.")
         elif mode == "always":
             path = ensure_example_data(download=True)
-            report["example_data"]["is_installed"] = True
-            report["example_data"]["data_dir"] = str(path)
+            _apply_example_data_info(report, get_example_data_info(path, treat_as_data_dir=True))
             report["notes"].append("Downloaded example data.")
         elif mode == "prompt":
             if _should_prompt_for_example_data(report["example_data"]):
                 path = ensure_example_data(download=True)
-                report["example_data"]["is_installed"] = True
-                report["example_data"]["data_dir"] = str(path)
+                _apply_example_data_info(report, get_example_data_info(path, treat_as_data_dir=True))
                 report["notes"].append("Downloaded example data after prompt.")
             else:
-                report["notes"].append("Example data not installed; run with download_example_data=True to install.")
+                report["notes"].append(
+                    "Example data was not preinstalled; paper-example and dataset APIs will download it on first use."
+                )
         else:
-            report["notes"].append("Example data not installed; run with download_example_data=True to install.")
+            report["notes"].append(
+                "Example data was not preinstalled; paper-example and dataset APIs will download it on first use."
+            )
     except Exception as exc:  # noqa: BLE001
         report["example_data"]["error"] = str(exc)
         report["notes"].append("Example data installation failed.")
diff --git a/pyproject.toml b/pyproject.toml
index 3f48d3d9..eaba6ecb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ dependencies = [
   "numpy>=1.24",
   "scipy>=1.10",
   "matplotlib>=3.7",
+  "certifi>=2024.0.0",
   "PyYAML>=6.0",
   "nbformat>=5.10",
   "nbclient>=0.10"
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index f897924f..2aba7070 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -1,23 +1,46 @@
 from __future__ import annotations
 
+from pathlib import Path
+
 import nstat
-from nstat.errors import DataNotFoundError
+import nstat.datasets
 
 
 def test_dataset_manifest_and_checksums() -> None:
     names = nstat.list_datasets()
     assert names
+    assert names == sorted(names)
+
+
+def test_get_dataset_path_triggers_download_when_data_is_external(monkeypatch, tmp_path: Path) -> None:
+    data_root = tmp_path / "nstat_data"
+    dataset_path = data_root / "mEPSCs" / "epsc2.txt"
+    dataset_path.parent.mkdir(parents=True, exist_ok=True)
+    dataset_path.write_text("header\n0 0\n", encoding="utf-8")
+
+    calls: list[bool] = []
+
+    def fake_ensure_example_data(*, download: bool = True) -> Path:
+        calls.append(download)
+        return data_root
+
+    monkeypatch.setattr(nstat.datasets, "ensure_example_data", fake_ensure_example_data)
+
+    resolved = nstat.get_dataset_path("mepcs_epsc2")
+    assert resolved == dataset_path
+    assert calls == [True]
+
+
+def test_verify_checksums_triggers_download_when_data_is_external(monkeypatch, tmp_path: Path) -> None:
+    calls: list[bool] = []
 
-    check = nstat.verify_checksums()
-    assert set(check.keys()) == set(names)
-    assert all(isinstance(v, bool) for v in check.values())
+    def fake_ensure_example_data(*, download: bool = True) -> Path:
+        calls.append(download)
+        return tmp_path / "nstat_data"
 
+    monkeypatch.setattr(nstat.datasets, "ensure_example_data", fake_ensure_example_data)
 
-def test_get_dataset_path() -> None:
-    name = nstat.list_datasets()[0]
-    try:
-        path = nstat.get_dataset_path(name)
-    except DataNotFoundError:
-        # Standalone checkouts may intentionally omit large datasets.
-        return
-    assert path.exists()
+    result = nstat.verify_checksums()
+    assert result
+    assert all(isinstance(value, bool) for value in result.values())
+    assert calls and all(call is True for call in calls)
diff --git a/tests/test_install_and_compat.py b/tests/test_install_and_compat.py
index bc22037f..b797d191 100644
--- a/tests/test_install_and_compat.py
+++ b/tests/test_install_and_compat.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from pathlib import Path
+
 from nstat.compat.matlab import CIF, Covariate, SignalObj, nspikeTrain, nstColl
 from nstat.install import nstat_install
 
@@ -18,3 +20,7 @@ def test_nstat_install_report_without_download() -> None:
     assert "example_data" in report
     assert report["download_example_data"] == "never"
     assert "required_files" in report["example_data"]
+    data_dir = Path(report["example_data"]["data_dir"])
+    required = [Path(path) for path in report["example_data"]["required_files"]]
+    assert required
+    assert all(data_dir in path.parents or path == data_dir for path in required)
diff --git a/tests/test_readme_examples_catalog.py b/tests/test_readme_examples_catalog.py
index 432e837c..f82c2e80 100644
--- a/tests/test_readme_examples_catalog.py
+++ b/tests/test_readme_examples_catalog.py
@@ -1,75 +1,34 @@
 from __future__ import annotations
 
-import re
 from pathlib import Path
 
-import yaml
-
 
 REPO_ROOT = Path(__file__).resolve().parents[1]
 README_PATH = REPO_ROOT / "README.md"
-CATALOG_PATH = REPO_ROOT / "examples" / "nSTATPaperExamples" / "manifest.yml"
-
-
-FEATURED_HEADINGS = [
-    "### Example 1 — Single sinusoid: signal + multitaper spectrum + spectrogram",
-    "### Example 2 — Time-varying CIF over 10 seconds (single-frequency sinusoid)",
-    "### Example 3 — Spike train collection raster from Example 2",
-]
-
-FEATURED_RUN_COMMANDS = [
-    "python examples/readme_examples/example1_multitaper_and_spectrogram.py",
-    "python examples/readme_examples/example2_simulate_cif_spiketrain_10s.py",
-    "python examples/readme_examples/example3_nstcoll_raster_from_example2.py",
-]
-
-
-def _extract_examples_block(text: str) -> str:
-    match = re.search(r"## Examples\n(.*?)\n## Documentation\n", text, flags=re.S)
-    if not match:
-        raise AssertionError("README is missing an Examples block bounded by '## Examples' and '## Documentation'.")
-    return match.group(1)
-
-
-def test_readme_featured_examples_are_preserved_in_order() -> None:
-    readme = README_PATH.read_text(encoding="utf-8")
-    block = _extract_examples_block(readme)
-
-    heading_positions = []
-    for heading in FEATURED_HEADINGS:
-        pos = block.find(heading)
-        assert pos >= 0, f"Missing featured heading: {heading}"
-        heading_positions.append(pos)
-    assert heading_positions == sorted(heading_positions), "Featured examples must remain in the original order."
-
-    for cmd in FEATURED_RUN_COMMANDS:
-        assert cmd in block, f"Missing featured run command: {cmd}"
-
 
-def test_readme_includes_complete_nstatpaperexamples_catalog_once() -> None:
-    readme = README_PATH.read_text(encoding="utf-8")
-    block = _extract_examples_block(readme)
-    assert "### nSTATPaperExamples" in block, "README Examples section is missing the nSTATPaperExamples catalog header."
 
-    manifest = yaml.safe_load(CATALOG_PATH.read_text(encoding="utf-8")) or {}
-    entries = manifest.get("examples", [])
-    assert entries, "nSTATPaperExamples manifest has no entries."
+def test_readme_tracks_python_port_top_level_sections() -> None:
+    text = README_PATH.read_text(encoding="utf-8")
+    for heading in (
+        "How to install nSTAT-python",
+        "Quickstart (Python 3.10+)",
+        "Paper Examples (Self-Contained)",
+        "Paper-aligned toolbox map",
+        "Standalone Python repository",
+    ):
+        assert heading in text
 
-    for row in entries:
-        name = str(row["name"])
-        rel_path = str(row["relative_path"])
-        link = f"[{name}]({rel_path})"
-        count = block.count(link)
-        assert count == 1, f"Catalog entry must appear exactly once in README: {link} (found {count})."
 
+def test_readme_documents_automatic_dataset_download() -> None:
+    text = README_PATH.read_text(encoding="utf-8")
+    lowered = text.lower()
+    assert "downloads it automatically" in lowered or "downloads the figshare dataset automatically" in lowered
+    assert "10.6084/m9.figshare.4834640.v3" in text
+    assert "NSTAT_DATA_DIR" in text
 
-def test_readme_examples_section_has_no_other_example_groups() -> None:
-    readme = README_PATH.read_text(encoding="utf-8")
-    block = _extract_examples_block(readme)
 
-    headings = re.findall(r"^###\s+.+$", block, flags=re.M)
-    expected = FEATURED_HEADINGS + ["### nSTATPaperExamples"]
-    assert headings == expected, (
-        "README Examples section must contain only the three featured examples "
-        "followed by the nSTATPaperExamples catalog header."
-    )
+def test_readme_lists_core_paper_examples_and_runner() -> None:
+    text = README_PATH.read_text(encoding="utf-8")
+    for label in ("Example 01", "Example 02", "Example 03", "Example 04", "Example 05"):
+        assert label in text
+    assert "python examples/nSTATPaperExamples.py --repo-root ." in text
diff --git a/tests/test_readme_nstatpaperexamples.py b/tests/test_readme_nstatpaperexamples.py
index 9e2cb916..e4ff0ce2 100644
--- a/tests/test_readme_nstatpaperexamples.py
+++ b/tests/test_readme_nstatpaperexamples.py
@@ -1,18 +1,14 @@
 from __future__ import annotations
 
 from pathlib import Path
-import re
 
 
 REPO_ROOT = Path(__file__).resolve().parents[1]
 README_PATH = REPO_ROOT / "README.md"
 
 
-def test_readme_includes_nstatpaperexamples_code_and_figure() -> None:
+def test_readme_states_python_repo_is_standalone_from_matlab_repo() -> None:
     text = README_PATH.read_text(encoding="utf-8")
-    match = re.search(r"### nSTATPaperExamples\n(.*?)\n## Documentation\n", text, flags=re.S)
-    assert match, "README is missing the nSTATPaperExamples block."
-    block = match.group(1)
-    assert "examples/readme_examples/example4_nstatpaperexamples_overview.py" in block
-    assert "from nstat.paper_examples import run_paper_examples" in block
-    assert "readme_example4_nstatpaperexamples_overview.png" in block
+    assert "does not require a MATLAB checkout" in text
+    assert "cajigaslab/nSTAT" in text
+    assert "nstat.compat.matlab" in text