From 90480311c7093c87859a72fd819e3476f9482899 Mon Sep 17 00:00:00 2001 From: Ansumanbhujabal Date: Thu, 2 Apr 2026 10:44:53 +0530 Subject: [PATCH] feat: add NeuroLens interactive neuroscience playground NeuroLens is an interactive module built on top of TRIBE v2 that enables exploration of brain predictions through three modules: - **Predict**: Load and visualize brain surface activations per stimulus at any timepoint, with ROI-level summaries - **Match**: Find neurally similar stimuli ("More Like This") and contrast brain regions to discover what content maximizes/minimizes specific neural responses - **Eval**: Compare AI models (CLIP, Whisper, GPT-2) against brain predictions using RSA (Representational Similarity Analysis) Includes: - 9 Python modules (neurolens/ package) - Interactive Jupyter notebook with widget-based UI - Automated results generator for batch analysis - 31 unit + integration tests (100% pass) - Getting started documentation The module works with pre-computed brain predictions from the TRIBE v2 model and requires no GPU for interactive exploration. --- docs/GETTING_STARTED.md | 430 ++++++++++++++++++++++++++++++ neurolens.ipynb | 423 +++++++++++++++++++++++++++++ neurolens/__init__.py | 28 ++ neurolens/cache.py | 66 +++++ neurolens/eval.py | 74 +++++ neurolens/generate_all_results.py | 407 ++++++++++++++++++++++++++++ neurolens/match.py | 97 +++++++ neurolens/predict.py | 60 +++++ neurolens/roi.py | 65 +++++ neurolens/stimulus.py | 56 ++++ neurolens/viz.py | 90 +++++++ tests/__init__.py | 0 tests/test_cache.py | 77 ++++++ tests/test_eval.py | 64 +++++ tests/test_integration.py | 85 ++++++ tests/test_match.py | 72 +++++ tests/test_predict.py | 63 +++++ tests/test_roi.py | 27 ++ tests/test_stimulus.py | 79 ++++++ tests/test_viz.py | 30 +++ 20 files changed, 2293 insertions(+) create mode 100644 docs/GETTING_STARTED.md create mode 100644 neurolens.ipynb create mode 100644 neurolens/__init__.py create mode 100644 neurolens/cache.py create mode 100644 neurolens/eval.py create mode 100644 neurolens/generate_all_results.py create mode 100644 neurolens/match.py create mode 100644 neurolens/predict.py create mode 100644 neurolens/roi.py create mode 100644 neurolens/stimulus.py create mode 100644 neurolens/viz.py create mode 100644 tests/__init__.py create mode 100644 tests/test_cache.py create mode 100644 tests/test_eval.py create mode 100644 tests/test_integration.py create mode 100644 tests/test_match.py create mode 100644 tests/test_predict.py create mode 100644 tests/test_roi.py create mode 100644 tests/test_stimulus.py create mode 100644 tests/test_viz.py diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md new file mode 100644 index 0000000..7f63635 --- /dev/null +++ b/docs/GETTING_STARTED.md @@ -0,0 +1,430 @@ +# NeuroLens — Getting Started Guide + +A step-by-step guide to testing, running, and extending NeuroLens. + +--- + +## Table of Contents + +1. [Prerequisites](#1-prerequisites) +2. [Local Setup](#2-local-setup) +3. [Running Tests](#3-running-tests) +4. [Generating the Cache (Precompute)](#4-generating-the-cache-precompute) +5. [Running the Interactive Notebook](#5-running-the-interactive-notebook) +6. [Project Structure](#6-project-structure) +7. [Next Steps & Roadmap](#7-next-steps--roadmap) +8. [Troubleshooting](#8-troubleshooting) + +--- + +## 1. Prerequisites + +| Requirement | Version | Notes | +|-------------|---------|-------| +| Python | 3.10+ | Tested on 3.11 | +| uv | Latest | Package manager (`pip install uv` or `curl -LsSf https://astral.sh/uv/install.sh \| sh`) | +| Git | Any | For version control | +| Google Colab account | Free tier | For GPU-based precompute step | +| HuggingFace account | Free | Needed for LLaMA 3.2 access (gated model) | + +**Hardware:** +- **Precompute notebook:** Requires GPU (Colab free tier T4 is sufficient) +- **Main notebook:** CPU only (all heavy computation is pre-cached) +- **Tests:** CPU only, ~12 seconds for full suite + +--- + +## 2. Local Setup + +### 2.1 Clone and enter the repo + +```bash +cd /opt/CodeRepo/tribev2 # or wherever your clone lives +``` + +### 2.2 Create a virtual environment + +```bash +uv venv .venv --python 3.11 +source .venv/bin/activate +``` + +### 2.3 Install dependencies + +```bash +# Core tribev2 + plotting dependencies +uv pip install -e ".[plotting]" + +# Additional NeuroLens dependencies +uv pip install plotly ipywidgets scipy +``` + +### 2.4 Verify the installation + +```bash +python -c "import neurolens; print('NeuroLens OK')" +python -c "from tribev2 import TribeModel; print('TRIBE v2 OK')" +``` + +Both should print OK without errors. + +--- + +## 3. Running Tests + +### 3.1 Run the full test suite + +```bash +python -m pytest tests/ -v +``` + +Expected output: **31 passed** in ~12 seconds. + +### 3.2 Run tests by module + +```bash +# Individual module tests +python -m pytest tests/test_roi.py -v # ROI utilities (3 tests) +python -m pytest tests/test_stimulus.py -v # Stimulus library (5 tests) +python -m pytest tests/test_cache.py -v # Cache manager (6 tests) +python -m pytest tests/test_viz.py -v # Visualization (3 tests) +python -m pytest tests/test_predict.py -v # Predict module (5 tests) +python -m pytest tests/test_match.py -v # Match module (4 tests) +python -m pytest tests/test_eval.py -v # Eval module (4 tests) +python -m pytest tests/test_integration.py -v # End-to-end pipeline (1 test) +``` + +### 3.3 What the tests cover + +| Test File | Tests | What it verifies | +|-----------|-------|-----------------| +| `test_roi.py` | 3 | ROI groups are defined, names returned correctly, summarization works on fsaverage5 | +| `test_stimulus.py` | 5 | Stimulus dataclass, library loading from JSON, filtering, categories, missing ID handling | +| `test_cache.py` | 6 | Loading `.npz` brain preds, `.json` ROI summaries, `.pt` embeddings, missing file returns None | +| `test_viz.py` | 3 | Brain surface plot returns figure, radar charts work for single and comparison modes | +| `test_predict.py` | 5 | Time-sliced predictions, index clamping, top ROIs, modality contributions | +| `test_match.py` | 4 | Cosine similarity search, self-similarity > 0.99, synthetic target from regions, contrast mode | +| `test_eval.py` | 4 | Pairwise similarity matrix, RSA score computation, model-brain alignment | +| `test_integration.py` | 1 | Full pipeline: cache -> predict -> match -> eval -> visualize | + +### 3.4 Known warnings + +You may see this warning during tests that call `build_target_from_regions`: + +``` +UserWarning: LabelEncoder: event_types has not been set... +``` + +This comes from `neuralset` (TRIBE v2 dependency) and is harmless. It does not affect functionality. + +### 3.5 First run note + +The first run of `test_roi.py` or `test_match.py::test_build_target_from_regions` downloads MNE sample data (~1.65 GB) to `~/mne_data/`. Subsequent runs use the cached data and are fast. + +--- + +## 4. Generating the Cache (Precompute) + +This is where you process your stimulus library through TRIBE v2 and comparison models. **Run this once on a GPU.** + +### 4.1 Prepare your stimuli + +Collect 50-80 short video/audio clips (5-15 seconds each). Good sources for CC-licensed content: + +| Source | Type | License | URL | +|--------|------|---------|-----| +| Pexels | Video | Free (Pexels License) | https://www.pexels.com/videos/ | +| Pixabay | Video/Audio | Pixabay License | https://pixabay.com/ | +| LibriVox | Audiobook | Public Domain | https://librivox.org/ | +| Freesound | Audio | CC | https://freesound.org/ | + +**Recommended category distribution:** + +| Category | Count | Examples | +|----------|-------|---------| +| Speech | 8-10 | TED talks, podcast clips, audiobook excerpts | +| Music | 8-10 | Classical, hip-hop, ambient, vocals-only | +| Silence + Visuals | 8-10 | Nature timelapse, abstract art, faces | +| Emotional | 8-10 | Horror, comedy, heartwarming scenes | +| Multimodal-rich | 8-10 | Movie scenes with dialogue + action + music | +| Text-only | 5-8 | Narrated stories, poetry readings | + +### 4.2 Upload to Colab + +1. Open `neurolens_precompute.ipynb` in Google Colab +2. Set runtime to **GPU** (Runtime > Change runtime type > T4 GPU) +3. Upload your stimulus files to a `stimuli/` folder in Colab + +### 4.3 Edit the stimulus list + +In Cell 2, update the `STIMULI` list with your actual files: + +```python +STIMULI = [ + {"id": "clip_001", "name": "Nature timelapse", "category": "Silence + Visuals", + "media_type": "video", "duration_sec": 10.0, "path": "stimuli/nature.mp4"}, + {"id": "clip_002", "name": "Beethoven Moonlight", "category": "Music", + "media_type": "audio", "duration_sec": 15.0, "path": "stimuli/moonlight.wav"}, + # ... add all your stimuli +] +``` + +**ID naming convention:** Use `clip_001`, `clip_002`, etc. for consistency. + +### 4.4 Authenticate with HuggingFace + +TRIBE v2 uses LLaMA 3.2 (gated model). Before running Cell 3: + +1. Go to https://huggingface.co/meta-llama/Llama-3.2-3B and accept the license +2. Create an access token at https://huggingface.co/settings/tokens (read access) +3. In Colab, run: +```python +!huggingface-cli login +# Paste your token when prompted +``` + +### 4.5 Run all cells + +Run cells 1-6 in order. Expected timing on Colab T4: + +| Cell | Operation | Time estimate | +|------|-----------|--------------| +| 1 | Install dependencies | 2-3 minutes | +| 2 | Define stimuli + save metadata | Instant | +| 3 | TRIBE v2 predictions | 1-3 min per stimulus (depends on length) | +| 4 | CLIP embeddings | ~2 sec per video stimulus | +| 5 | Whisper embeddings | ~3 sec per stimulus with audio | +| 6 | GPT-2 embeddings | ~1 sec per stimulus | + +For 50 stimuli, expect ~2-3 hours total for Cell 3 (the bottleneck). + +### 4.6 Download the cache + +After all cells complete, download the `neurolens_cache/` folder: + +```python +# Option A: Zip and download +!zip -r neurolens_cache.zip neurolens_cache/ +# Then use Colab's file browser to download + +# Option B: Upload to Google Drive +from google.colab import drive +drive.mount('/content/drive') +!cp -r neurolens_cache/ /content/drive/MyDrive/neurolens_cache/ + +# Option C: Upload to HuggingFace Hub (recommended for sharing) +!huggingface-cli upload your-username/neurolens-cache neurolens_cache/ +``` + +### 4.7 Cache structure reference + +After precompute, your cache should look like: + +``` +neurolens_cache/ +├── stimuli/ +│ └── metadata.json # Stimulus metadata (auto-generated) +├── brain_preds/ +│ ├── clip_001.npz # Shape: (n_timesteps, 20484) +│ ├── clip_002.npz +│ └── ... +├── roi_summaries/ +│ ├── clip_001.json # {"Visual Cortex": 0.42, ...} +│ ├── clip_002.json +│ └── ... +└── embeddings/ + ├── clip/ + │ ├── clip_001.pt # CLIP ViT-B-32 image embedding + │ └── ... + ├── whisper/ + │ ├── clip_001.pt # Whisper-base encoder embedding + │ └── ... + └── gpt2/ + ├── clip_001.pt # GPT-2 text embedding + └── ... +``` + +--- + +## 5. Running the Interactive Notebook + +### 5.1 Place the cache + +Copy `neurolens_cache/` into the project root (same level as `neurolens.ipynb`): + +```bash +# If downloaded locally +cp -r ~/Downloads/neurolens_cache/ /opt/CodeRepo/tribev2/neurolens_cache/ + +# If on Google Drive (in Colab) +!cp -r /content/drive/MyDrive/neurolens_cache/ neurolens_cache/ +``` + +### 5.2 Launch the notebook + +**Option A: Local Jupyter** +```bash +source .venv/bin/activate +uv pip install jupyterlab +jupyter lab neurolens.ipynb +``` + +**Option B: Google Colab** +1. Upload `neurolens.ipynb` to Colab +2. Upload `neurolens_cache/` folder +3. Upload the `neurolens/` package folder +4. Set runtime to CPU (GPU not needed) +5. Run all cells + +### 5.3 Using each module + +**Module 1: PREDICT** +- Select a stimulus from the dropdown +- Drag the timestep slider to see how brain activation changes over time +- Select different views (left, right, medial, dorsal) to see different angles +- The top-5 most activated brain regions are shown below the plot + +**Module 2: MATCH** +- **Region Picker mode:** Set intensity sliders for different brain regions, click "Find Matches" to find stimuli that best activate those regions +- **More Like This mode:** Select a stimulus, find neurally similar content +- **Contrast mode:** Pick a region to maximize and one to minimize (e.g., "maximize Visual Cortex, minimize Auditory Cortex") + +**Module 3: EVAL** +- Click "Run Leaderboard" to see which AI model's representations align most with brain responses +- Select two models from the dropdowns and click "Compare Models" for a head-to-head comparison +- Each model gets a "Brain Report Card" with alignment percentage + +--- + +## 6. Project Structure + +``` +tribev2/ # Root repository +├── tribev2/ # Original TRIBE v2 package (Meta) +│ ├── model.py # FmriEncoder neural network +│ ├── demo_utils.py # TribeModel inference API +│ ├── utils.py # HCP ROI utilities, data loading +│ ├── plotting/ # Brain visualization (nilearn, pyvista) +│ └── ... +│ +├── neurolens/ # NeuroLens package (our code) +│ ├── __init__.py # Public API exports +│ ├── roi.py # Human-friendly ROI groups +│ ├── stimulus.py # Stimulus metadata management +│ ├── cache.py # Cache loading (npz, json, pt) +│ ├── viz.py # Brain plots + radar charts +│ ├── predict.py # Time-sliced brain predictions +│ ├── match.py # Neural similarity search +│ └── eval.py # RSA model-brain alignment +│ +├── tests/ # 31 tests across 8 files +│ ├── test_roi.py +│ ├── test_stimulus.py +│ ├── test_cache.py +│ ├── test_viz.py +│ ├── test_predict.py +│ ├── test_match.py +│ ├── test_eval.py +│ └── test_integration.py +│ +├── neurolens_precompute.ipynb # GPU notebook: generate cache +├── neurolens.ipynb # Main interactive notebook (CPU) +│ +├── docs/ +│ ├── GETTING_STARTED.md # This file +│ └── superpowers/ +│ ├── specs/ # Design spec +│ └── plans/ # Implementation plan +│ +└── neurolens_cache/ # Generated cache (not in git) +``` + +--- + +## 7. Next Steps & Roadmap + +### Immediate (to get it running) + +- [ ] **Collect stimuli** — Download 50-80 CC-licensed clips across the 6 categories +- [ ] **Run precompute** — Process clips through TRIBE v2 + comparison models on Colab +- [ ] **Test the experience** — Open the main notebook and interact with all 3 modules +- [ ] **Polish visualizations** — Adjust colormaps, layout, and labels based on real data + +### Short-term improvements + +- [ ] **Add more comparison models** — Add DINO v2 (already in TRIBE v2), BLIP-2, or newer models to the eval module. Just extract embeddings and save as `.pt` in the cache +- [ ] **Per-ROI eval** — Extend `compute_model_brain_alignment` to return per-ROI-group scores (not just overall). This enables the radar chart comparison in the Eval module +- [ ] **Cache hosting** — Upload the cache to HuggingFace Hub with auto-download in the notebook setup cell +- [ ] **Modality toggle** — The Predict module supports modality contributions (video-only, audio-only, text-only) but the precompute notebook doesn't generate per-modality caches yet. Add per-modality TRIBE v2 runs + +### Medium-term extensions + +- [ ] **Blog post** — Write up the project as a portfolio piece. Focus on: system design (compute/serving separation), eval methodology (RSA), and the creative "brain-matched content" angle +- [ ] **Streamlit/Gradio web app** — Convert the notebook into a deployed web app for a more polished demo +- [ ] **Subject-specific predictions** — TRIBE v2 supports per-subject predictions. Add a subject selector to the Predict module +- [ ] **Temporal RSA** — Instead of time-averaging brain predictions, compute RSA at each timestep to see how alignment changes over time + +### Portfolio positioning + +When presenting this project in interviews or applications: + +1. **System design** — "I designed a compute/serving split where GPU-heavy inference is pre-computed and the interactive layer runs on CPU. This is the same pattern used in production recommendation systems." +2. **Evaluation methodology** — "I implemented Representational Similarity Analysis (RSA) to benchmark how closely AI models' internal representations match biological neural responses. This is directly transferable to LLM evaluation." +3. **Multimodal AI** — "I worked hands-on with 7 SOTA models (LLaMA 3.2, V-JEPA2, Wav2Vec-BERT, DINOv2, CLIP, Whisper, GPT-2) in a unified pipeline." + +--- + +## 8. Troubleshooting + +### `ModuleNotFoundError: No module named 'neuralset'` + +The `neuralset` and `neuraltrain` packages are custom Meta libraries bundled with TRIBE v2. Install with: + +```bash +uv pip install -e "." +``` + +### `ModuleNotFoundError: No module named 'nilearn'` + +Install plotting dependencies: + +```bash +uv pip install -e ".[plotting]" +``` + +### MNE data download hangs or fails + +The first call to `build_target_from_regions` or `summarize_by_roi_group` downloads MNE sample data (~1.65 GB). If it fails: + +```bash +python -c "import mne; mne.datasets.sample.data_path()" +``` + +This downloads to `~/mne_data/`. Ensure you have disk space and internet access. + +### `RuntimeError: TribeModel must be instantiated via .from_pretrained` + +You're trying to use `TribeModel` directly. Always use: + +```python +model = TribeModel.from_pretrained("facebook/tribev2", cache_folder="./cache") +``` + +### Colab runs out of memory during precompute + +TRIBE v2 + LLaMA 3.2 needs ~12 GB VRAM. On Colab free tier: +- Use T4 GPU (16 GB VRAM) +- Process stimuli one at a time (the notebook already does this) +- If still OOM, reduce stimulus duration to < 10 seconds + +### `FileNotFoundError: metadata.json` + +The `neurolens_cache/stimuli/metadata.json` file is missing. Either: +- You haven't run the precompute notebook yet +- The cache directory is in the wrong location (must be at project root) + +### Tests pass locally but fail in CI + +The `test_roi.py` and `test_match.py::test_build_target_from_regions` tests require MNE sample data. In CI, either: +- Pre-cache MNE data in the CI image +- Mark these tests with `@pytest.mark.slow` and skip in CI diff --git a/neurolens.ipynb b/neurolens.ipynb new file mode 100644 index 0000000..aae9a43 --- /dev/null +++ b/neurolens.ipynb @@ -0,0 +1,423 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.0" + } + }, + "cells": [ + { + "cell_type": "markdown", + "id": "cell-0-intro", + "metadata": {}, + "source": [ + "# NeuroLens\n", + "\n", + "**An interactive neuroscience playground built on TRIBE v2**\n", + "\n", + "Explore how the brain responds to video, audio, and text \u2014 and discover which AI models think most like humans.\n", + "\n", + "Three modules:\n", + "1. **PREDICT** \u2014 See predicted brain activation for a stimulus\n", + "2. **MATCH** \u2014 Find content that triggers specific brain states\n", + "3. **EVAL** \u2014 Benchmark AI models against biological brain responses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-1-setup", + "metadata": {}, + "outputs": [], + "source": [ + "# NeuroLens v1.0 \u2014 Interactive Neuroscience Playground\nimport sys, os\nfrom pathlib import Path\n\n# Clone repo and set up environment (Colab)\nif not os.path.exists('neurolens'):\n if not os.path.exists('tribev2'):\n !git clone https://github.com/facebookresearch/tribev2.git\n os.chdir('tribev2')\n\nsys.path.insert(0, os.getcwd())\n\n# Install dependencies\n!pip install -q numpy scipy av ffmpeg-python \\\n exca neuralset neuraltrain einops pyyaml huggingface_hub \\\n gtts langdetect spacy soundfile Levenshtein julius transformers x_transformers \\\n nilearn matplotlib pydantic tqdm plotly ipywidgets 2>/dev/null\n!pip install -q -e '.[plotting]' --no-deps 2>/dev/null || true\n!pip install -q 'moviepy>=2.1' --force-reinstall 2>/dev/null\n\n# Patch moviepy v2 import for neuralset compatibility\nimport moviepy\ntry:\n from moviepy import VideoFileClip\nexcept ImportError:\n from moviepy.video.io.VideoFileClip import VideoFileClip\n moviepy.VideoFileClip = VideoFileClip\n sys.modules['moviepy'].VideoFileClip = VideoFileClip\n\nimport warnings\nwarnings.filterwarnings('ignore')\n\nimport numpy as np\nimport torch\nimport json as _json\nimport matplotlib\nmatplotlib.use('module://matplotlib_inline.backend_inline')\nimport matplotlib.pyplot as plt\nimport ipywidgets as widgets\nfrom IPython.display import display, clear_output\n\n# Generate demo cache if no real cache exists\nCACHE_DIR = Path('neurolens_cache')\nif not (CACHE_DIR / 'stimuli' / 'metadata.json').exists():\n print('No cache found. Generating demo cache with synthetic data...')\n for d in ['stimuli', 'brain_preds', 'roi_summaries']:\n (CACHE_DIR / d).mkdir(parents=True, exist_ok=True)\n for model in ['clip', 'whisper', 'gpt2']:\n (CACHE_DIR / 'embeddings' / model).mkdir(parents=True, exist_ok=True)\n\n demo_stimuli = [\n {'id': 'demo_001', 'name': 'Nature scene', 'category': 'Silence + Visuals', 'media_type': 'video', 'duration_sec': 10.0},\n {'id': 'demo_002', 'name': 'Speech excerpt', 'category': 'Speech', 'media_type': 'video', 'duration_sec': 12.0},\n {'id': 'demo_003', 'name': 'Classical music', 'category': 'Music', 'media_type': 'audio', 'duration_sec': 15.0},\n {'id': 'demo_004', 'name': 'Horror clip', 'category': 'Emotional', 'media_type': 'video', 'duration_sec': 8.0},\n {'id': 'demo_005', 'name': 'Movie scene', 'category': 'Multimodal-rich', 'media_type': 'video', 'duration_sec': 10.0},\n ]\n (CACHE_DIR / 'stimuli' / 'metadata.json').write_text(_json.dumps(demo_stimuli, indent=2))\n\n np.random.seed(42)\n roi_names = ['Visual Cortex', 'Auditory Cortex', 'Language Areas', 'Motor Cortex',\n 'Prefrontal Cortex', 'Temporal Cortex', 'Parietal Cortex',\n 'Somatosensory Cortex', 'Face-Selective Areas']\n for s in demo_stimuli:\n sid = s['id']\n preds = np.random.randn(8, 20484).astype(np.float32)\n np.savez(CACHE_DIR / 'brain_preds' / f'{sid}.npz', preds=preds)\n roi = {name: round(float(np.random.rand()), 3) for name in roi_names}\n (CACHE_DIR / 'roi_summaries' / f'{sid}.json').write_text(_json.dumps(roi))\n for model in ['clip', 'whisper', 'gpt2']:\n torch.save(torch.randn(256), CACHE_DIR / 'embeddings' / model / f'{sid}.pt')\n print(f'Demo cache ready: {len(demo_stimuli)} stimuli, 3 models')\n print('Note: Using synthetic data. Run neurolens_precompute.ipynb for real brain predictions.\\n')\n\nfrom neurolens.cache import CacheManager\nfrom neurolens.stimulus import StimulusLibrary\nfrom neurolens.predict import get_prediction_at_time, get_num_timesteps, get_top_rois\nfrom neurolens.match import find_similar_stimuli, build_target_from_regions, find_contrast_stimuli\nfrom neurolens.eval import compute_all_model_alignments, compute_model_brain_alignment\nfrom neurolens.roi import get_roi_group_names, ROI_GROUPS\nfrom neurolens.viz import plot_brain_surface, make_radar_chart\n\ncache = CacheManager(CACHE_DIR)\nlibrary = StimulusLibrary(CACHE_DIR)\nprint(f'Loaded {len(library)} stimuli, {len(cache.available_models())} models')" + ] + }, + { + "cell_type": "markdown", + "id": "cell-2-predict-header", + "metadata": {}, + "source": [ + "---\n", + "## Module 1: PREDICT\n", + "Select a stimulus and explore how it activates different brain regions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-3-predict", + "metadata": {}, + "outputs": [], + "source": [ + "# Stimulus picker\n", + "stim_dropdown = widgets.Dropdown(\n", + " options=library.dropdown_options(),\n", + " description=\"Stimulus:\",\n", + " style={\"description_width\": \"initial\"},\n", + ")\n", + "\n", + "# Time slider (updated dynamically)\n", + "time_slider = widgets.IntSlider(\n", + " value=0, min=0, max=1, step=1,\n", + " description=\"Timestep:\",\n", + " continuous_update=False,\n", + ")\n", + "\n", + "# View selector\n", + "view_select = widgets.SelectMultiple(\n", + " options=[\"left\", \"right\", \"medial_left\", \"medial_right\", \"dorsal\"],\n", + " value=[\"left\", \"right\"],\n", + " description=\"Views:\",\n", + ")\n", + "\n", + "output_predict = widgets.Output()\n", + "\n", + "def update_predict(*args):\n", + " sid = stim_dropdown.value\n", + " n_steps = get_num_timesteps(cache, sid)\n", + " time_slider.max = n_steps - 1\n", + "\n", + " with output_predict:\n", + " clear_output(wait=True)\n", + " data = get_prediction_at_time(cache, sid, time_slider.value)\n", + " stim = library.get(sid)\n", + " fig = plot_brain_surface(\n", + " data,\n", + " views=list(view_select.value),\n", + " title=f\"{stim.name} (t={time_slider.value})\",\n", + " )\n", + " plt.show()\n", + "\n", + " # Top ROIs\n", + " top = get_top_rois(cache, sid, k=5)\n", + " print(\"\\nTop activated regions:\")\n", + " for name, val in top:\n", + " bar = \"\\u2588\" * int(abs(val) * 20)\n", + " print(f\" {name:.<30s} {val:+.3f} {bar}\")\n", + "\n", + "stim_dropdown.observe(update_predict, names=\"value\")\n", + "time_slider.observe(update_predict, names=\"value\")\n", + "view_select.observe(update_predict, names=\"value\")\n", + "\n", + "display(widgets.VBox([\n", + " widgets.HBox([stim_dropdown, time_slider]),\n", + " view_select,\n", + " output_predict,\n", + "]))\n", + "update_predict()" + ] + }, + { + "cell_type": "markdown", + "id": "cell-4-match-header", + "metadata": {}, + "source": [ + "---\n", + "## Module 2: MATCH\n", + "Find content that activates specific brain regions, or discover neurally similar stimuli." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-5-match", + "metadata": {}, + "outputs": [], + "source": [ + "# Tab 1: Region picker\n", + "match_mode = widgets.ToggleButtons(\n", + " options=[\"Region Picker\", \"More Like This\", \"Contrast\"],\n", + " description=\"Mode:\",\n", + ")\n", + "\n", + "# Region picker controls\n", + "region_dropdowns = {}\n", + "for name in get_roi_group_names():\n", + " region_dropdowns[name] = widgets.FloatSlider(\n", + " value=0.0, min=0.0, max=1.0, step=0.1,\n", + " description=name, style={\"description_width\": \"initial\"},\n", + " layout=widgets.Layout(width=\"400px\"),\n", + " )\n", + "region_box = widgets.VBox(list(region_dropdowns.values()))\n", + "\n", + "# More Like This controls\n", + "source_dropdown = widgets.Dropdown(\n", + " options=library.dropdown_options(),\n", + " description=\"Source:\",\n", + " style={\"description_width\": \"initial\"},\n", + ")\n", + "\n", + "# Contrast controls\n", + "max_roi = widgets.Dropdown(options=get_roi_group_names(), description=\"Maximize:\")\n", + "min_roi = widgets.Dropdown(options=get_roi_group_names(), description=\"Minimize:\", value=get_roi_group_names()[1])\n", + "\n", + "output_match = widgets.Output()\n", + "\n", + "def run_match(btn=None):\n", + " with output_match:\n", + " clear_output(wait=True)\n", + " ids = library.ids()\n", + " mode = match_mode.value\n", + "\n", + " if mode == \"Region Picker\":\n", + " intensities = {name: slider.value for name, slider in region_dropdowns.items()}\n", + " target = build_target_from_regions(intensities)\n", + " results = find_similar_stimuli(cache, target, ids, top_k=5)\n", + " elif mode == \"More Like This\":\n", + " source_preds = cache.load_brain_preds(source_dropdown.value)\n", + " target = source_preds.mean(axis=0)\n", + " results = find_similar_stimuli(cache, target, ids, top_k=5)\n", + " else: # Contrast\n", + " results = find_contrast_stimuli(\n", + " cache, ids, max_roi.value, min_roi.value, top_k=5\n", + " )\n", + "\n", + " print(f\"Top matches ({mode}):\\n\")\n", + " radar_data = {}\n", + " for rank, (sid, score) in enumerate(results, 1):\n", + " stim = library.get(sid)\n", + " print(f\" {rank}. {stim.name} [{stim.category}] \\u2014 score: {score:.3f}\")\n", + " roi = cache.load_roi_summary(sid)\n", + " if roi and rank <= 3:\n", + " radar_data[stim.name] = roi\n", + "\n", + " if radar_data:\n", + " fig = make_radar_chart(radar_data, title=\"ROI Activation Profiles\")\n", + " plt.show()\n", + "\n", + "match_btn = widgets.Button(description=\"Find Matches\", button_style=\"primary\")\n", + "match_btn.on_click(run_match)\n", + "\n", + "display(widgets.VBox([\n", + " match_mode,\n", + " region_box,\n", + " widgets.HBox([source_dropdown]),\n", + " widgets.HBox([max_roi, min_roi]),\n", + " match_btn,\n", + " output_match,\n", + "]))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-6-eval-header", + "metadata": {}, + "source": [ + "---\n", + "## Module 3: EVAL\n", + "Which AI model thinks most like a human brain?\n", + "Compare model representations against predicted brain responses." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cell-7-eval", + "metadata": {}, + "outputs": [], + "source": [ + "output_eval = widgets.Output()\n", + "\n", + "def run_eval(btn=None):\n", + " with output_eval:\n", + " clear_output(wait=True)\n", + " ids = library.ids()\n", + " print(\"Computing brain alignment scores (RSA)...\\n\")\n", + "\n", + " scores = compute_all_model_alignments(cache, ids)\n", + "\n", + " # Leaderboard\n", + " print(\"=\" * 50)\n", + " print(f\"{'Rank':<6}{'Model':<20}{'Brain Alignment':>15}\")\n", + " print(\"=\" * 50)\n", + " for rank, (model, score) in enumerate(scores.items(), 1):\n", + " bar = \"\\u2588\" * int(max(0, score) * 30)\n", + " print(f\"{rank:<6}{model:<20}{score:>+.4f} {bar}\")\n", + " print(\"=\" * 50)\n", + "\n", + " if len(scores) >= 2:\n", + " print(\"\\nSelect two models to compare:\")\n", + "\n", + "model_a = widgets.Dropdown(\n", + " options=cache.available_models(),\n", + " description=\"Model A:\",\n", + ")\n", + "model_b = widgets.Dropdown(\n", + " options=cache.available_models(),\n", + " description=\"Model B:\",\n", + " value=cache.available_models()[-1] if len(cache.available_models()) > 1 else cache.available_models()[0],\n", + ")\n", + "\n", + "output_compare = widgets.Output()\n", + "\n", + "def compare_models(btn=None):\n", + " with output_compare:\n", + " clear_output(wait=True)\n", + " ids = library.ids()\n", + " print(f\"Comparing {model_a.value} vs {model_b.value}...\\n\")\n", + " score_a = compute_model_brain_alignment(cache, model_a.value, ids)\n", + " score_b = compute_model_brain_alignment(cache, model_b.value, ids)\n", + "\n", + " print(f\" {model_a.value}: RSA = {score_a:+.4f}\")\n", + " print(f\" {model_b.value}: RSA = {score_b:+.4f}\")\n", + "\n", + " for model_name in [model_a.value, model_b.value]:\n", + " print(f\"\\n--- Brain Report Card: {model_name} ---\")\n", + " score = compute_model_brain_alignment(cache, model_name, ids)\n", + " pct = max(0, score) * 100\n", + " print(f\" Overall brain alignment: {pct:.1f}%\")\n", + "\n", + "compare_btn = widgets.Button(description=\"Compare Models\", button_style=\"info\")\n", + "compare_btn.on_click(compare_models)\n", + "\n", + "eval_btn = widgets.Button(description=\"Run Leaderboard\", button_style=\"primary\")\n", + "eval_btn.on_click(run_eval)\n", + "\n", + "display(widgets.VBox([\n", + " eval_btn,\n", + " output_eval,\n", + " widgets.HBox([model_a, model_b, compare_btn]),\n", + " output_compare,\n", + "]))" + ] + }, + { + "cell_type": "markdown", + "id": "cell-8-footer", + "metadata": {}, + "source": [ + "---\n## Explore Further\n\n- **TRIBE v2 Paper:** [A Foundation Model of Vision, Audition, and Language for In-Silico Neuroscience](https://ai.meta.com/research/publications/a-foundation-model-of-vision-audition-and-language-for-in-silico-neuroscience/)\n- **TRIBE v2 Demo:** [aidemos.atmeta.com/tribev2](https://aidemos.atmeta.com/tribev2/)\n- **Add more stimuli:** Edit `neurolens_precompute.ipynb` and re-run\n- **Add more models:** Extract embeddings from any HuggingFace model into the cache\n\nBuilt with NeuroLens | TRIBE v2 (Meta AI)" + ] + }, + { + "cell_type": "markdown", + "id": "2zyfc08025d", + "source": "---\n\n# Complete Results Showcase\n\nAll results below were generated automatically using `neurolens/generate_all_results.py` across all 6 stimuli, 3 AI models, and 72 ROI contrast pairs. This section provides a comprehensive, non-interactive view of every module's output for analysis and reporting.", + "metadata": {} + }, + { + "cell_type": "code", + "id": "pkj5vzzoaph", + "source": [ + "# === SHOWCASE SETUP ===\nimport json\nfrom pathlib import Path\nfrom IPython.display import display, Image as IPImage, HTML, Markdown\nimport matplotlib.pyplot as plt\nimport matplotlib.image as mpimg\nimport numpy as np\n\nRESULTS_DIR = Path('neurolens_results')\nif not RESULTS_DIR.exists():\n print('Run `python -m neurolens.generate_all_results` first to generate results.')\n print('See docs/GETTING_STARTED.md for instructions.')\nCACHE_DIR = Path('neurolens_cache')\n\n# Load metadata\nstimuli_meta = json.loads((CACHE_DIR / 'stimuli' / 'metadata.json').read_text())\nstim_lookup = {s['id']: s for s in stimuli_meta}\n\ndef show_images_grid(image_paths, titles=None, cols=5, figsize_per=(3.5, 3)):\n \"\"\"Display a grid of images from file paths.\"\"\"\n rows = (len(image_paths) + cols - 1) // cols\n fig, axes = plt.subplots(rows, cols, figsize=(figsize_per[0]*cols, figsize_per[1]*rows))\n if rows == 1 and cols == 1:\n axes = np.array([axes])\n axes = np.array(axes).flatten()\n for i, ax in enumerate(axes):\n if i < len(image_paths) and image_paths[i].exists():\n img = mpimg.imread(str(image_paths[i]))\n ax.imshow(img)\n if titles and i < len(titles):\n ax.set_title(titles[i], fontsize=9, fontweight='bold')\n ax.axis('off')\n plt.tight_layout()\n plt.show()\n\nprint(f\"Results directory: {RESULTS_DIR}\")\nprint(f\"Stimuli: {len(stimuli_meta)} | Models: {', '.join(json.loads((RESULTS_DIR / 'summary.json').read_text())['models'])}\")" + ], + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "yyucdujxtns", + "source": "## 4.1 PREDICT Results \u2014 Brain Activation Across All Stimuli\n\n### Brain Surface Plots: First Timestep (t=0), All 5 Views per Stimulus", + "metadata": {} + }, + { + "cell_type": "code", + "id": "k2eeckmk36d", + "source": "# Show t=0 brain plots for ALL stimuli, all 5 views\nviews = ['left', 'right', 'medial_left', 'medial_right', 'dorsal']\n\nfor stim in stimuli_meta:\n sid = stim['id']\n stim_dir = RESULTS_DIR / 'predict' / sid\n \n # Get key frames from available files\n brain_files = sorted(stim_dir.glob('brain_t*_left.png'))\n timesteps = sorted(set(int(f.stem.split('_')[1][1:]) for f in brain_files))\n \n print(f\"\\n{'='*80}\")\n print(f\" {stim['name']} [{stim['category']}] \u2014 {stim['duration_sec']}s, {len(timesteps)} key frames\")\n print(f\"{'='*80}\")\n \n # Show first timestep, all views\n t0 = timesteps[0]\n paths = [stim_dir / f'brain_t{t0:02d}_{v}.png' for v in views]\n titles = [f't={t0} {v}' for v in views]\n show_images_grid(paths, titles, cols=5, figsize_per=(4, 3))\n \n # ROI summary\n roi = json.loads((stim_dir / 'roi_summary.json').read_text())\n sorted_roi = sorted(roi.items(), key=lambda x: x[1], reverse=True)\n print(f\"\\n Top ROIs (time-averaged):\")\n for name, val in sorted_roi[:5]:\n bar = '\u2588' * int(abs(val) * 40)\n sign = '+' if val >= 0 else '-'\n print(f\" {name:<25s} {sign}{abs(val):.3f} {bar}\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "j68s17lfss", + "source": "### Temporal Dynamics \u2014 How Brain Activation Evolves Over Time\n\nKey frames (first, middle, last) for each stimulus, left hemisphere view:", + "metadata": {} + }, + { + "cell_type": "code", + "id": "b5ji6fy183b", + "source": "# Temporal evolution: left view across all key frames for each stimulus\nfor stim in stimuli_meta:\n sid = stim['id']\n stim_dir = RESULTS_DIR / 'predict' / sid\n brain_files = sorted(stim_dir.glob('brain_t*_left.png'))\n timesteps = sorted(set(int(f.stem.split('_')[1][1:]) for f in brain_files))\n \n paths = [stim_dir / f'brain_t{t:02d}_left.png' for t in timesteps]\n titles = [f't={t} ({\"first\" if i==0 else \"middle\" if i==1 else \"last\"})' \n for i, t in enumerate(timesteps)]\n \n print(f\"\\n{stim['name']} [{stim['category']}] \u2014 temporal evolution ({len(timesteps)} frames)\")\n show_images_grid(paths, titles, cols=len(timesteps), figsize_per=(5, 3.5))", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "exy9n3020c4", + "source": "### ROI Activation Comparison \u2014 All Stimuli Side by Side", + "metadata": {} + }, + { + "cell_type": "code", + "id": "f55owf052bh", + "source": "# Comparative ROI heatmap across all stimuli\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nroi_names = list(json.loads((RESULTS_DIR / 'predict' / 'clip_001' / 'roi_summary.json').read_text()).keys())\nstim_names = [s['name'] for s in stimuli_meta]\nstim_ids = [s['id'] for s in stimuli_meta]\n\n# Build matrix\nmatrix = np.zeros((len(stim_ids), len(roi_names)))\nfor i, sid in enumerate(stim_ids):\n roi = json.loads((RESULTS_DIR / 'predict' / sid / 'roi_summary.json').read_text())\n for j, rn in enumerate(roi_names):\n matrix[i, j] = roi.get(rn, 0.0)\n\nfig, ax = plt.subplots(figsize=(14, 5))\nim = ax.imshow(matrix, cmap='RdBu_r', aspect='auto', vmin=-0.45, vmax=0.45)\nax.set_xticks(range(len(roi_names)))\nax.set_xticklabels(roi_names, rotation=45, ha='right', fontsize=10)\nax.set_yticks(range(len(stim_names)))\ncategories = [s['category'] for s in stimuli_meta]\nax.set_yticklabels([f\"{n} [{c}]\" for n, c in zip(stim_names, categories)], fontsize=10)\nplt.colorbar(im, ax=ax, label='Mean Activation', shrink=0.8)\nax.set_title('ROI Activation Heatmap \u2014 All Stimuli', fontsize=14, fontweight='bold')\n\n# Annotate cells\nfor i in range(matrix.shape[0]):\n for j in range(matrix.shape[1]):\n val = matrix[i, j]\n color = 'white' if abs(val) > 0.2 else 'black'\n ax.text(j, i, f'{val:.2f}', ha='center', va='center', fontsize=8, color=color)\n\nplt.tight_layout()\nplt.show()\n\n# Category-level summary\nprint(\"\\nCategory-level ROI averages:\")\nfor cat in sorted(set(categories)):\n cat_indices = [i for i, c in enumerate(categories) if c == cat]\n cat_avg = matrix[cat_indices].mean(axis=0)\n print(f\"\\n {cat}:\")\n for j, rn in enumerate(roi_names):\n bar = '\u2588' * int(abs(cat_avg[j]) * 30)\n sign = '+' if cat_avg[j] >= 0 else '-'\n print(f\" {rn:<25s} {sign}{abs(cat_avg[j]):.3f} {bar}\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "qpdpjj099n", + "source": "---\n\n## 4.2 MATCH Results \u2014 Neural Similarity and Contrast Analysis\n\n### \"More Like This\" \u2014 Neural Neighbors for Every Stimulus", + "metadata": {} + }, + { + "cell_type": "code", + "id": "c5ph6k1bo9l", + "source": "# More Like This \u2014 show matches + radar charts for all stimuli\nmlt_dir = RESULTS_DIR / 'match' / 'more_like_this'\n\n# Neural similarity matrix\nsim_matrix = np.zeros((len(stim_ids), len(stim_ids)))\nfor i, sid_a in enumerate(stim_ids):\n matches = json.loads((mlt_dir / f'{sid_a}_matches.json').read_text())\n match_dict = {m['stimulus_id']: m['similarity'] for m in matches}\n for j, sid_b in enumerate(stim_ids):\n sim_matrix[i, j] = match_dict.get(sid_b, 0.0)\n\nfig, axes = plt.subplots(1, 2, figsize=(16, 5))\n\n# Similarity matrix\nax = axes[0]\nim = ax.imshow(sim_matrix, cmap='YlOrRd', vmin=0, vmax=1)\nax.set_xticks(range(len(stim_names)))\nax.set_xticklabels(stim_names, rotation=45, ha='right', fontsize=9)\nax.set_yticks(range(len(stim_names)))\nax.set_yticklabels(stim_names, fontsize=9)\nplt.colorbar(im, ax=ax, label='Cosine Similarity', shrink=0.8)\nax.set_title('Neural Similarity Matrix', fontsize=12, fontweight='bold')\nfor i in range(len(stim_ids)):\n for j in range(len(stim_ids)):\n ax.text(j, i, f'{sim_matrix[i,j]:.2f}', ha='center', va='center', fontsize=8,\n color='white' if sim_matrix[i,j] > 0.5 else 'black')\n\n# Top matches summary\nax = axes[1]\nax.axis('off')\ntext_lines = [\"NEURAL NEIGHBORS (Top Match for Each)\\n\"]\nfor sid in stim_ids:\n matches = json.loads((mlt_dir / f'{sid}_matches.json').read_text())\n source_name = stim_lookup[sid]['name']\n # Skip self-match (similarity=1.0)\n best = [m for m in matches if m['stimulus_id'] != sid][0]\n text_lines.append(f\"{source_name} \u2192 {best['name']} (sim={best['similarity']:.3f})\")\nax.text(0.05, 0.95, '\\n'.join(text_lines), transform=ax.transAxes, fontsize=11,\n verticalalignment='top', fontfamily='monospace')\n\nplt.tight_layout()\nplt.show()\n\n# Show radar charts\nprint(\"\\nRadar charts \u2014 ROI profiles of top 3 neural neighbors:\")\nradar_paths = sorted(mlt_dir.glob('*_radar.png'))\nradar_titles = [stim_lookup[p.stem.replace('_radar', '')]['name'] for p in radar_paths]\nshow_images_grid(radar_paths, radar_titles, cols=3, figsize_per=(5, 4.5))", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "n3vfn17vnao", + "source": "### Contrast Analysis \u2014 Key ROI Pairs and Directional Asymmetries", + "metadata": {} + }, + { + "cell_type": "code", + "id": "7v2zajdqbf8", + "source": "# Key contrast pairs with directional asymmetry analysis\ncontrast_dir = RESULTS_DIR / 'match' / 'contrast'\n\nkey_contrasts = [\n ('Visual_Cortex', 'Auditory_Cortex'),\n ('Auditory_Cortex', 'Visual_Cortex'),\n ('Language_Areas', 'Visual_Cortex'),\n ('Visual_Cortex', 'Language_Areas'),\n ('Motor_Cortex', 'Language_Areas'),\n ('Auditory_Cortex', 'Language_Areas'),\n ('Prefrontal_Cortex', 'Visual_Cortex'),\n ('Face-Selective_Areas', 'Auditory_Cortex'),\n]\n\nprint(\"KEY CONTRAST RESULTS \u2014 Top stimulus for each directed ROI pair\\n\")\nprint(f\"{'Maximize':<25s} {'Minimize':<25s} {'Winner':<25s} {'Score':>8s}\")\nprint(\"=\" * 88)\n\nfor max_roi, min_roi in key_contrasts:\n fname = f'max_{max_roi}_min_{min_roi}.json'\n fpath = contrast_dir / fname\n if fpath.exists():\n matches = json.loads(fpath.read_text())\n top = matches[0]\n print(f\"{max_roi.replace('_', ' '):<25s} {min_roi.replace('_', ' '):<25s} \"\n f\"{top['name']:<25s} {top['contrast_score']:>+.4f}\")\n\n# Directional asymmetry analysis\nprint(f\"\\n\\nDIRECTIONAL ASYMMETRIES \u2014 Same ROI pair, opposite directions\\n\")\nasymmetry_pairs = [\n ('Visual_Cortex', 'Auditory_Cortex'),\n ('Language_Areas', 'Visual_Cortex'),\n ('Motor_Cortex', 'Language_Areas'),\n ('Prefrontal_Cortex', 'Temporal_Cortex'),\n]\n\nfor a, b in asymmetry_pairs:\n fwd = json.loads((contrast_dir / f'max_{a}_min_{b}.json').read_text())\n rev = json.loads((contrast_dir / f'max_{b}_min_{a}.json').read_text())\n \n print(f\"\\n {a.replace('_', ' ')} vs {b.replace('_', ' ')}:\")\n print(f\" Maximize {a.replace('_', ' ')}: #{1} {fwd[0]['name']} ({fwd[0]['contrast_score']:+.4f})\")\n print(f\" Maximize {b.replace('_', ' ')}: #{1} {rev[0]['name']} ({rev[0]['contrast_score']:+.4f})\")\n if fwd[0]['stimulus_id'] != rev[0]['stimulus_id']:\n print(f\" \u2192 ASYMMETRIC: different winners!\")\n else:\n print(f\" \u2192 Symmetric: same stimulus wins both directions\")\n\n# Show radar charts for selected contrasts\nprint(\"\\n\\nRadar charts for key contrasts:\")\nselected_radars = [\n contrast_dir / 'max_Visual_Cortex_min_Auditory_Cortex_radar.png',\n contrast_dir / 'max_Auditory_Cortex_min_Visual_Cortex_radar.png',\n contrast_dir / 'max_Language_Areas_min_Visual_Cortex_radar.png',\n contrast_dir / 'max_Motor_Cortex_min_Language_Areas_radar.png',\n]\ntitles = ['Visual > Auditory', 'Auditory > Visual', 'Language > Visual', 'Motor > Language']\nshow_images_grid(selected_radars, titles, cols=2, figsize_per=(6, 5))", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "3c7pj7c617k", + "source": "---\n\n## 4.3 EVAL Results \u2014 AI Model vs Brain Alignment\n\n### Model Leaderboard and Pairwise Comparisons", + "metadata": {} + }, + { + "cell_type": "code", + "id": "3uwrst5isg6", + "source": "# Leaderboard\neval_dir = RESULTS_DIR / 'eval'\nleaderboard = json.loads((eval_dir / 'leaderboard.json').read_text())\n\nfig, axes = plt.subplots(1, 2, figsize=(14, 5))\n\n# Bar chart\nax = axes[0]\nmodels = [e['model'].upper() for e in leaderboard]\nscores = [e['rsa_score'] for e in leaderboard]\ncolors = ['#2ecc71' if s > 0.1 else '#f39c12' if s > 0 else '#e74c3c' for s in scores]\nbars = ax.bar(models, scores, color=colors, edgecolor='black', linewidth=0.5, width=0.5)\nax.set_ylabel('RSA Score (Spearman r)', fontsize=12)\nax.set_title('Brain Alignment Leaderboard', fontsize=14, fontweight='bold')\nax.axhline(y=0, color='gray', linestyle='--', linewidth=0.5)\nfor bar, score in zip(bars, scores):\n ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,\n f'{score:+.4f}', ha='center', va='bottom', fontsize=12, fontweight='bold')\nax.set_ylim(-0.25, 0.55)\n\n# Pairwise comparison table\nax = axes[1]\nax.axis('off')\n\ncompare_files = sorted(eval_dir.glob('compare_*.json'))\nlines = [\"PAIRWISE MODEL COMPARISONS\\n\"]\nfor cf in compare_files:\n c = json.loads(cf.read_text())\n winner_marker_a = \" \u25c0 WINNER\" if c['winner'] == c['model_a'] else \"\"\n winner_marker_b = \" \u25c0 WINNER\" if c['winner'] == c['model_b'] else \"\"\n lines.append(f\"{c['model_a'].upper():>8s}: RSA = {c['rsa_a']:+.4f} ({c['alignment_pct_a']:.1f}%){winner_marker_a}\")\n lines.append(f\"{c['model_b'].upper():>8s}: RSA = {c['rsa_b']:+.4f} ({c['alignment_pct_b']:.1f}%){winner_marker_b}\")\n lines.append(f\"{'Gap':>8s}: {abs(c['rsa_a'] - c['rsa_b']):.4f}\")\n lines.append(\"\")\n\nax.text(0.05, 0.95, '\\n'.join(lines), transform=ax.transAxes, fontsize=11,\n verticalalignment='top', fontfamily='monospace')\n\nplt.tight_layout()\nplt.show()\n\n# Summary\nprint(\"\\n\" + \"=\" * 60)\nprint(\"SUMMARY: Which AI model thinks most like a brain?\")\nprint(\"=\" * 60)\nfor e in leaderboard:\n pct = e['brain_alignment_pct']\n bar = '\u2588' * int(pct)\n print(f\" #{e['rank']} {e['model'].upper():<10s} RSA={e['rsa_score']:+.4f} \"\n f\"Alignment={pct:.1f}% {bar}\")\nprint(f\"\\nCLIP dominates \u2014 visual representations best match brain patterns for video stimuli.\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "k2ia471o49d", + "source": "---\n\n## 4.4 Complete All-Contrasts Gallery\n\nAll 72 directed ROI contrast radar charts, organized by maximize ROI:", + "metadata": {} + }, + { + "cell_type": "code", + "id": "hcgwds52tnm", + "source": "# Gallery of all contrast radar charts grouped by maximize ROI\ncontrast_dir = RESULTS_DIR / 'match' / 'contrast'\nroi_names_sorted = sorted(set(\n f.stem.split('_min_')[0].replace('max_', '').replace('_', ' ')\n for f in contrast_dir.glob('max_*_radar.png')\n))\n\nfor max_roi in roi_names_sorted:\n safe_roi = max_roi.replace(' ', '_')\n radars = sorted(contrast_dir.glob(f'max_{safe_roi}_min_*_radar.png'))\n if not radars:\n continue\n titles = [f.stem.replace(f'max_{safe_roi}_min_', '').replace('_radar', '').replace('_', ' ')\n for f in radars]\n print(f\"\\n{'\u2500'*60}\")\n print(f\" Maximize: {max_roi} \u2014 contrasted against {len(radars)} other ROIs\")\n print(f\"{'\u2500'*60}\")\n show_images_grid(radars, titles, cols=4, figsize_per=(4.5, 4))", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "jcpkgcd3kv", + "source": [ + "---\n\n# 5. Findings Report\n\n## Key Discovery: Visual Dominance in Brain Responses to Video\n\nAcross all three modules, **visual processing emerges as the dominant factor** in how the brain represents video stimuli:\n\n| Evidence | Module | Finding |\n|----------|--------|---------|\n| Visual Cortex modulation | Predict | Widest activation range across categories (+0.169 to -0.161) |\n| Visual similarity clusters | Match | Stimuli cluster by visual content more than by audio |\n| CLIP wins brain alignment | Eval | RSA = 0.386, far ahead of Whisper (0.029) and GPT-2 (-0.143) |\n\n## Top 7 Findings\n\n### 1. Speech Stimuli Form the Tightest Neural Cluster (sim=0.801)\nJack Ma and Ronaldo motivational speeches share 80.1% neural similarity -- the highest pair in the dataset. Both activate Auditory Cortex (+0.21 avg) and Motor Cortex (+0.15 avg), consistent with the motor theory of speech perception. However, they diverge internally: Jack Ma is linguistically dominant (Language Areas +0.129), Ronaldo is motor-dominant (+0.207) -- speech *style* matters as much as speech *presence*.\n\n### 2. Music is the Strongest Language Suppressor (-0.414)\nHigh Impact Music drives Language Areas to **-0.414** -- the single most extreme activation in either direction across all stimuli and ROIs. This suppression *intensifies* over time (colorbar doubles from -1.0 to -2.0 between t=0 and t=8). Music also paradoxically *suppresses* Auditory Cortex (-0.175), suggesting it engages motor/rhythmic circuits rather than classical auditory pathways.\n\n### 3. CLIP Massively Outperforms Audio and Text Models (13x)\nSingle-frame CLIP embeddings align with brain representations 13x better than full-audio Whisper. GPT-2 actually **anti-correlates** (RSA = -0.143), meaning its text-based similarity structure actively contradicts the brain's perceptual organization. An RSA of 0.386 is in the range neuroscience papers report for well-matched IT cortex models (Kriegeskorte et al., 2008).\n\n### 4. Same-Category Stimuli Can Be Neurally Orthogonal (sim=0.001)\nMuay Thai Kick and Romantic Couple are both \"Silence + Visuals\" but have near-zero neural similarity (0.001). Muay Thai is visuospatially rich (Parietal +0.152, Face-Selective +0.131), while Romantic Couple is globally suppressed (max ROI = +0.034). **Content category labels don't map to neural response patterns.**\n\n### 5. Motor-Language Is the Most Discriminative Brain Axis (12x asymmetry)\nThe Motor vs Language contrast produces the widest spread (0.731) and most extreme directional asymmetry: maximize Motor/minimize Language yields +0.491 (Music wins), but the reverse yields only +0.041 (Jack Ma wins). A **12x asymmetry** -- the Motor-over-Language pole is far more neurally separable than Language-over-Motor.\n\n### 6. Motor Cortex Is the Most Universal Brain Response\nMotor Cortex is the **only ROI with a positive grand mean** (+0.084) across all 6 stimuli. It appears in the top 3 ROIs for 4/6 stimuli. Embodied motor simulation is the most universal brain response to video, regardless of content type.\n\n### 7. Temporal Dynamics Reveal Progressive Engagement\nBrain activation follows an **inverted-U arc** for speech (peak at mid-clip t=5), but shows **progressive intensification** for music (doubling suppression by t=8). Medial views reveal cingulate cortex engagement (emotion/attention) and precuneus activation during music (default mode network / internal imagery).\n\n## Category Prediction from Neural Patterns\n\n| Category | Prediction Rule | Accuracy |\n|----------|----------------|----------|\n| Speech | Auditory > 0.1 AND Visual < 0 | **100%** |\n| Music | Language < -0.3 | **100%** |\n| Silence + Visuals | Parietal > 0.15 AND Face-Selective > 0.1 | **67%** (fails Romantic Couple) |\n\n## Limitations\n- Only 6 stimuli (minimum for RSA, statistically underpowered; need 20+)\n- CLIP uses single middle frame, not full video\n- GPT-2 uses stimulus name, not actual transcription\n- No permutation-based significance testing or confidence intervals\n- Simulated brain data (TRIBE v2 predictions, not actual fMRI/EEG)\n- Coarse 9-region ROIs average over fine-grained cortical areas\n\n## Recommended Next Steps\n1. Expand to 20+ stimuli for robust RSA statistics\n2. Add permutation p-values (10,000x label shuffles)\n3. Add video-native models (VideoMAE, InternVideo)\n4. Compute region-specific RSA (CLIP vs visual cortex only)\n5. Use actual speech transcriptions for GPT-2\n6. Add subcortical ROIs (amygdala, striatum) to explain Romantic Couple's flat cortical profile\n\n*Full report with complete quantitative analysis: `neurolens_results/NEUROLENS_FINDINGS_REPORT.md`*\n\n---\n*NeuroLens | Built on TRIBE v2 (Meta AI)*" + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/neurolens/__init__.py b/neurolens/__init__.py new file mode 100644 index 0000000..bc9e117 --- /dev/null +++ b/neurolens/__init__.py @@ -0,0 +1,28 @@ +"""NeuroLens: Interactive neuroscience playground built on TRIBE v2.""" + +from neurolens.cache import CacheManager +from neurolens.stimulus import Stimulus, StimulusLibrary +from neurolens.predict import get_prediction_at_time, get_num_timesteps, get_top_rois +from neurolens.match import find_similar_stimuli, build_target_from_regions, find_contrast_stimuli +from neurolens.eval import compute_all_model_alignments, compute_model_brain_alignment +from neurolens.roi import ROI_GROUPS, get_roi_group_names, summarize_by_roi_group +from neurolens.viz import plot_brain_surface, make_radar_chart + +__all__ = [ + "CacheManager", + "Stimulus", + "StimulusLibrary", + "get_prediction_at_time", + "get_num_timesteps", + "get_top_rois", + "find_similar_stimuli", + "build_target_from_regions", + "find_contrast_stimuli", + "compute_all_model_alignments", + "compute_model_brain_alignment", + "ROI_GROUPS", + "get_roi_group_names", + "summarize_by_roi_group", + "plot_brain_surface", + "make_radar_chart", +] diff --git a/neurolens/cache.py b/neurolens/cache.py new file mode 100644 index 0000000..efcb27f --- /dev/null +++ b/neurolens/cache.py @@ -0,0 +1,66 @@ +"""CacheManager: load pre-computed brain predictions, ROI summaries, and embeddings.""" + +import json +from pathlib import Path + +import numpy as np +import torch + + +class CacheManager: + """Loads cached data from the NeuroLens cache directory. + + Expected layout:: + + cache_dir/ + ├── brain_preds/{stimulus_id}.npz (key: "preds") + ├── roi_summaries/{stimulus_id}.json + └── embeddings/{model_name}/{stimulus_id}.pt + """ + + def __init__(self, cache_dir: str | Path) -> None: + self.cache_dir = Path(cache_dir) + + def load_brain_preds(self, stimulus_id: str) -> np.ndarray | None: + """Load brain predictions array of shape (n_timesteps, n_vertices). + + Returns None if the file doesn't exist. + """ + path = self.cache_dir / "brain_preds" / f"{stimulus_id}.npz" + if not path.exists(): + return None + return np.load(path)["preds"] + + def load_roi_summary(self, stimulus_id: str) -> dict[str, float] | None: + """Load per-ROI-group mean activations. + + Returns None if the file doesn't exist. + """ + path = self.cache_dir / "roi_summaries" / f"{stimulus_id}.json" + if not path.exists(): + return None + return json.loads(path.read_text()) + + def load_embedding(self, stimulus_id: str, model_name: str) -> torch.Tensor | None: + """Load a model embedding tensor. + + Returns None if the file doesn't exist. + """ + path = self.cache_dir / "embeddings" / model_name / f"{stimulus_id}.pt" + if not path.exists(): + return None + return torch.load(path, map_location="cpu", weights_only=True) + + def available_models(self) -> list[str]: + """Return sorted list of model names that have cached embeddings.""" + emb_dir = self.cache_dir / "embeddings" + if not emb_dir.exists(): + return [] + return sorted(d.name for d in emb_dir.iterdir() if d.is_dir()) + + def all_brain_pred_ids(self) -> list[str]: + """Return stimulus ids that have cached brain predictions.""" + preds_dir = self.cache_dir / "brain_preds" + if not preds_dir.exists(): + return [] + return sorted(p.stem for p in preds_dir.glob("*.npz")) diff --git a/neurolens/eval.py b/neurolens/eval.py new file mode 100644 index 0000000..6ce6566 --- /dev/null +++ b/neurolens/eval.py @@ -0,0 +1,74 @@ +"""Eval module: RSA-based comparison of AI model embeddings to brain predictions.""" + +from __future__ import annotations + +import numpy as np +from scipy.stats import spearmanr + +from neurolens.cache import CacheManager + + +def compute_pairwise_similarity_matrix(vectors: list[np.ndarray]) -> np.ndarray: + """Compute pairwise cosine similarity matrix for a list of vectors. + Returns np.ndarray of shape (n, n). + """ + mat = np.stack(vectors) + norms = np.linalg.norm(mat, axis=1, keepdims=True) + norms = np.where(norms == 0, 1.0, norms) + mat_normed = mat / norms + return mat_normed @ mat_normed.T + + +def compute_rsa_score( + sim_matrix_a: np.ndarray, + sim_matrix_b: np.ndarray, +) -> float: + """Compute RSA score: Spearman correlation between upper triangles. + Returns float: Spearman correlation coefficient. + """ + n = sim_matrix_a.shape[0] + idx = np.triu_indices(n, k=1) + vec_a = sim_matrix_a[idx] + vec_b = sim_matrix_b[idx] + corr, _ = spearmanr(vec_a, vec_b) + return float(corr) + + +def compute_model_brain_alignment( + cache: CacheManager, + model_name: str, + stimulus_ids: list[str], +) -> float: + """Compute overall brain alignment score for a model using RSA. + Returns float: RSA alignment score in [-1, 1]. + """ + embeddings = [] + brain_vecs = [] + for sid in stimulus_ids: + emb = cache.load_embedding(sid, model_name) + preds = cache.load_brain_preds(sid) + if emb is None or preds is None: + continue + embeddings.append(emb.numpy()) + brain_vecs.append(preds.mean(axis=0)) + + if len(embeddings) < 3: + return 0.0 + + emb_sim = compute_pairwise_similarity_matrix(embeddings) + brain_sim = compute_pairwise_similarity_matrix(brain_vecs) + return compute_rsa_score(emb_sim, brain_sim) + + +def compute_all_model_alignments( + cache: CacheManager, + stimulus_ids: list[str], +) -> dict[str, float]: + """Compute brain alignment scores for all available models. + Returns dict mapping model_name to RSA score, sorted descending. + """ + models = cache.available_models() + scores = {} + for model_name in models: + scores[model_name] = compute_model_brain_alignment(cache, model_name, stimulus_ids) + return dict(sorted(scores.items(), key=lambda x: x[1], reverse=True)) diff --git a/neurolens/generate_all_results.py b/neurolens/generate_all_results.py new file mode 100644 index 0000000..974c8b6 --- /dev/null +++ b/neurolens/generate_all_results.py @@ -0,0 +1,407 @@ +"""Automated results generator for all NeuroLens modules. + +Generates brain surface plots, match results, eval leaderboards, and radar +charts for all meaningful parameter combinations. Reads from the pre-computed +neurolens_cache/ and writes structured output to neurolens_results/. + +Usage: + python -m neurolens.generate_all_results [--cache-dir PATH] [--output-dir PATH] +""" + +from __future__ import annotations + +import argparse +import json +import itertools +from datetime import datetime, timezone +from pathlib import Path + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np +from tqdm import tqdm + +from neurolens.cache import CacheManager +from neurolens.stimulus import StimulusLibrary +from neurolens.predict import get_prediction_at_time, get_num_timesteps, get_top_rois +from neurolens.match import find_similar_stimuli, find_contrast_stimuli +from neurolens.eval import ( + compute_all_model_alignments, + compute_model_brain_alignment, +) +from neurolens.roi import get_roi_group_names +from neurolens.viz import plot_brain_surface, make_radar_chart + +VIEWS = ["left", "right", "medial_left", "medial_right", "dorsal"] + + +def _save_fig(fig: matplotlib.figure.Figure, path: Path, dpi: int = 150) -> None: + """Save figure and close it to free memory.""" + fig.savefig(path, dpi=dpi, bbox_inches="tight") + plt.close(fig) + + +def _key_frames(n_timesteps: int) -> list[int]: + """Return first, middle, last timestep indices.""" + if n_timesteps <= 1: + return [0] + if n_timesteps == 2: + return [0, 1] + return [0, n_timesteps // 2, n_timesteps - 1] + + +# ── Module 1: Predict ────────────────────────────────────────────────────── + + +def generate_predict( + cache: CacheManager, + library: StimulusLibrary, + output_dir: Path, +) -> dict: + """Generate brain surface plots and ROI summaries for all stimuli.""" + predict_dir = output_dir / "predict" + results = {"stimuli": {}} + + stimuli = library.all() + total_plots = 0 + for stim in stimuli: + n_ts = get_num_timesteps(cache, stim.id) + total_plots += len(_key_frames(n_ts)) * len(VIEWS) + + pbar = tqdm(total=total_plots, desc="Predict: brain plots") + + for stim in stimuli: + stim_dir = predict_dir / stim.id + stim_dir.mkdir(parents=True, exist_ok=True) + + n_ts = get_num_timesteps(cache, stim.id) + key_frames = _key_frames(n_ts) + + stim_result = { + "name": stim.name, + "category": stim.category, + "n_timesteps": n_ts, + "key_frames": key_frames, + "brain_plots": [], + "roi_summary": {}, + } + + # Brain surface plots: each view saved individually + for t in key_frames: + pred = get_prediction_at_time(cache, stim.id, t) + for view in VIEWS: + fig = plot_brain_surface( + pred, + views=[view], + title=f"{stim.name} (t={t}) — {view}", + ) + fname = f"brain_t{t:02d}_{view}.png" + _save_fig(fig, stim_dir / fname) + stim_result["brain_plots"].append(fname) + pbar.update(1) + + # ROI summary (time-averaged) + top_rois = get_top_rois(cache, stim.id, k=9) + roi_data = {name: score for name, score in top_rois} + stim_result["roi_summary"] = roi_data + (stim_dir / "roi_summary.json").write_text(json.dumps(roi_data, indent=2)) + + results["stimuli"][stim.id] = stim_result + + pbar.close() + return results + + +# ── Module 2: Match ───────────────────────────────────────────────────────── + + +def generate_match_more_like_this( + cache: CacheManager, + library: StimulusLibrary, + output_dir: Path, +) -> dict: + """Generate 'More Like This' results for each stimulus.""" + mlt_dir = output_dir / "match" / "more_like_this" + mlt_dir.mkdir(parents=True, exist_ok=True) + results = {} + + stimulus_ids = library.ids() + + for stim in tqdm(library.all(), desc="Match: more like this"): + # Use time-averaged prediction as target + preds = cache.load_brain_preds(stim.id) + target = preds.mean(axis=0) + + matches = find_similar_stimuli( + cache, target, stimulus_ids, top_k=5, time_aggregation="mean" + ) + + # Build match data with names + match_data = [] + for sid, score in matches: + s = library.get(sid) + match_data.append({ + "stimulus_id": sid, + "name": s.name if s else sid, + "category": s.category if s else "unknown", + "similarity": round(score, 4), + }) + + (mlt_dir / f"{stim.id}_matches.json").write_text( + json.dumps(match_data, indent=2) + ) + + # Radar chart for top 3 + radar_data = {} + for sid, _ in matches[:3]: + s = library.get(sid) + label = s.name if s else sid + roi_summary = cache.load_roi_summary(sid) + if roi_summary: + radar_data[label] = roi_summary + + if len(radar_data) >= 2: + fig = make_radar_chart(radar_data, title=f"Similar to: {stim.name}") + _save_fig(fig, mlt_dir / f"{stim.id}_radar.png") + + results[stim.id] = { + "source": stim.name, + "matches": match_data, + } + + return results + + +def generate_match_contrast( + cache: CacheManager, + library: StimulusLibrary, + output_dir: Path, +) -> dict: + """Generate contrast results for all directed ROI pairs.""" + contrast_dir = output_dir / "match" / "contrast" + contrast_dir.mkdir(parents=True, exist_ok=True) + results = {} + + roi_names = get_roi_group_names() + stimulus_ids = library.ids() + + pairs = [(a, b) for a, b in itertools.permutations(roi_names, 2)] + + for max_roi, min_roi in tqdm(pairs, desc="Match: contrast pairs"): + matches = find_contrast_stimuli( + cache, stimulus_ids, max_roi, min_roi, top_k=5 + ) + + match_data = [] + for sid, score in matches: + s = library.get(sid) + match_data.append({ + "stimulus_id": sid, + "name": s.name if s else sid, + "category": s.category if s else "unknown", + "contrast_score": round(score, 4), + }) + + safe_max = max_roi.replace(" ", "_") + safe_min = min_roi.replace(" ", "_") + prefix = f"max_{safe_max}_min_{safe_min}" + + (contrast_dir / f"{prefix}.json").write_text( + json.dumps(match_data, indent=2) + ) + + # Radar chart for top 3 + radar_data = {} + for sid, _ in matches[:3]: + s = library.get(sid) + label = s.name if s else sid + roi_summary = cache.load_roi_summary(sid) + if roi_summary: + radar_data[label] = roi_summary + + if len(radar_data) >= 2: + fig = make_radar_chart( + radar_data, + title=f"Contrast: {max_roi} > {min_roi}", + ) + _save_fig(fig, contrast_dir / f"{prefix}_radar.png") + + results[prefix] = { + "maximize": max_roi, + "minimize": min_roi, + "matches": match_data, + } + + return results + + +# ── Module 3: Eval ────────────────────────────────────────────────────────── + + +def generate_eval( + cache: CacheManager, + library: StimulusLibrary, + output_dir: Path, +) -> dict: + """Generate leaderboard and pairwise model comparisons.""" + eval_dir = output_dir / "eval" + eval_dir.mkdir(parents=True, exist_ok=True) + + stimulus_ids = library.ids() + models = cache.available_models() + + # Leaderboard + print("Eval: computing leaderboard...") + alignments = compute_all_model_alignments(cache, stimulus_ids) + + leaderboard = [] + for rank, (model, score) in enumerate(alignments.items(), 1): + leaderboard.append({ + "rank": rank, + "model": model, + "rsa_score": round(score, 4), + "brain_alignment_pct": round(max(0, score) * 100, 1), + }) + + (eval_dir / "leaderboard.json").write_text(json.dumps(leaderboard, indent=2)) + + # Leaderboard bar chart + fig, ax = plt.subplots(figsize=(8, 5)) + model_names = [e["model"] for e in leaderboard] + scores = [e["rsa_score"] for e in leaderboard] + colors = plt.cm.Set2(np.linspace(0, 1, len(model_names))) + bars = ax.bar(model_names, scores, color=colors, edgecolor="black", linewidth=0.5) + ax.set_ylabel("RSA Score (Brain Alignment)") + ax.set_title("Model Leaderboard: Brain Alignment via RSA") + ax.axhline(y=0, color="gray", linestyle="--", linewidth=0.5) + for bar, score in zip(bars, scores): + ax.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height() + 0.01, + f"{score:.4f}", + ha="center", + va="bottom", + fontsize=10, + ) + _save_fig(fig, eval_dir / "leaderboard.png") + + # Pairwise comparisons + comparisons = {} + for m_a, m_b in itertools.combinations(models, 2): + score_a = compute_model_brain_alignment(cache, m_a, stimulus_ids) + score_b = compute_model_brain_alignment(cache, m_b, stimulus_ids) + pair_data = { + "model_a": m_a, + "model_b": m_b, + "rsa_a": round(score_a, 4), + "rsa_b": round(score_b, 4), + "alignment_pct_a": round(max(0, score_a) * 100, 1), + "alignment_pct_b": round(max(0, score_b) * 100, 1), + "winner": m_a if score_a > score_b else m_b, + } + key = f"{m_a}_vs_{m_b}" + (eval_dir / f"compare_{key}.json").write_text(json.dumps(pair_data, indent=2)) + comparisons[key] = pair_data + print(f" {m_a} ({score_a:.4f}) vs {m_b} ({score_b:.4f})") + + return {"leaderboard": leaderboard, "comparisons": comparisons} + + +# ── Main ──────────────────────────────────────────────────────────────────── + + +def generate_all(cache_dir: str | Path, output_dir: str | Path) -> Path: + """Run all modules and write results to output_dir.""" + cache_dir = Path(cache_dir) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + cache = CacheManager(cache_dir) + library = StimulusLibrary(cache_dir) + + print(f"NeuroLens Results Generator") + print(f" Cache: {cache_dir}") + print(f" Output: {output_dir}") + print(f" Stimuli: {len(library)}") + print(f" Models: {cache.available_models()}") + print() + + summary = { + "generated_at": datetime.now(timezone.utc).isoformat(), + "cache_dir": str(cache_dir), + "stimuli_count": len(library), + "models": cache.available_models(), + "modules": {}, + } + + # Module 1: Predict + print("=" * 60) + print("MODULE 1: PREDICT") + print("=" * 60) + predict_results = generate_predict(cache, library, output_dir) + summary["modules"]["predict"] = { + "stimuli": len(predict_results["stimuli"]), + "total_brain_plots": sum( + len(s["brain_plots"]) + for s in predict_results["stimuli"].values() + ), + } + print() + + # Module 2: Match + print("=" * 60) + print("MODULE 2: MATCH") + print("=" * 60) + mlt_results = generate_match_more_like_this(cache, library, output_dir) + contrast_results = generate_match_contrast(cache, library, output_dir) + summary["modules"]["match"] = { + "more_like_this": len(mlt_results), + "contrast_pairs": len(contrast_results), + } + print() + + # Module 3: Eval + print("=" * 60) + print("MODULE 3: EVAL") + print("=" * 60) + eval_results = generate_eval(cache, library, output_dir) + summary["modules"]["eval"] = { + "leaderboard_models": len(eval_results["leaderboard"]), + "comparisons": len(eval_results["comparisons"]), + } + print() + + # Write master summary + (output_dir / "summary.json").write_text(json.dumps(summary, indent=2)) + + # Count output files + all_files = list(output_dir.rglob("*")) + n_png = len([f for f in all_files if f.suffix == ".png"]) + n_json = len([f for f in all_files if f.suffix == ".json"]) + print("=" * 60) + print(f"DONE! Generated {n_png} PNGs + {n_json} JSONs") + print(f"Output: {output_dir}") + print("=" * 60) + + return output_dir + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate all NeuroLens results") + parser.add_argument( + "--cache-dir", + default="neurolens_cache", + help="Path to neurolens_cache/ directory", + ) + parser.add_argument( + "--output-dir", + default="neurolens_results", + help="Path to output directory", + ) + args = parser.parse_args() + generate_all(args.cache_dir, args.output_dir) + + +if __name__ == "__main__": + main() diff --git a/neurolens/match.py b/neurolens/match.py new file mode 100644 index 0000000..56462c0 --- /dev/null +++ b/neurolens/match.py @@ -0,0 +1,97 @@ +"""Match module: find stimuli matching target brain activation patterns.""" + +from __future__ import annotations + +import numpy as np + +from neurolens.cache import CacheManager +from neurolens.roi import ROI_GROUPS + + +def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: + """Compute cosine similarity between two 1D arrays.""" + dot = np.dot(a, b) + norm = np.linalg.norm(a) * np.linalg.norm(b) + if norm == 0: + return 0.0 + return float(dot / norm) + + +def find_similar_stimuli( + cache: CacheManager, + target: np.ndarray, + stimulus_ids: list[str], + top_k: int = 5, + time_aggregation: str = "mean", +) -> list[tuple[str, float]]: + """Find stimuli whose brain predictions are most similar to a target pattern. + + Parameters + ---------- + time_aggregation : str + How to reduce the time dimension of cached predictions before comparing. + ``"first"`` uses the first timestep (default), ``"mean"`` averages all timesteps. + + Returns + ------- + list of (stimulus_id, similarity_score) sorted descending. + """ + scores = [] + for sid in stimulus_ids: + preds = cache.load_brain_preds(sid) + if preds is None: + continue + if time_aggregation == "mean": + avg_pred = preds.mean(axis=0) + else: + avg_pred = preds[0] + sim = _cosine_similarity(target, avg_pred) + scores.append((sid, sim)) + scores.sort(key=lambda x: x[1], reverse=True) + return scores[:top_k] + + +def build_target_from_regions( + region_intensities: dict[str, float], + mesh: str = "fsaverage5", + n_vertices: int = 20484, +) -> np.ndarray: + """Build a synthetic target activation vector from ROI group intensities. + Returns np.ndarray of shape (n_vertices,). + """ + from tribev2.utils import get_hcp_roi_indices + + target = np.zeros(n_vertices) + for group_name, intensity in region_intensities.items(): + if group_name not in ROI_GROUPS: + continue + for region in ROI_GROUPS[group_name]: + try: + indices = get_hcp_roi_indices(region, hemi="both", mesh=mesh) + target[indices] = intensity + except ValueError: + continue + return target + + +def find_contrast_stimuli( + cache: CacheManager, + stimulus_ids: list[str], + maximize_roi: str, + minimize_roi: str, + top_k: int = 5, +) -> list[tuple[str, float]]: + """Find stimuli that maximize one ROI while minimizing another. + Returns list of (stimulus_id, contrast_score) sorted descending. + """ + scores = [] + for sid in stimulus_ids: + roi_summary = cache.load_roi_summary(sid) + if roi_summary is None: + continue + max_val = roi_summary.get(maximize_roi, 0.0) + min_val = roi_summary.get(minimize_roi, 0.0) + contrast = max_val - min_val + scores.append((sid, contrast)) + scores.sort(key=lambda x: x[1], reverse=True) + return scores[:top_k] diff --git a/neurolens/predict.py b/neurolens/predict.py new file mode 100644 index 0000000..bb79ad2 --- /dev/null +++ b/neurolens/predict.py @@ -0,0 +1,60 @@ +"""Predict module: load and slice brain predictions from cache.""" + +from __future__ import annotations + +import numpy as np + +from neurolens.cache import CacheManager + + +def get_prediction_at_time( + cache: CacheManager, + stimulus_id: str, + time_idx: int, +) -> np.ndarray: + """Return brain prediction at a specific timestep. + time_idx is clamped to valid range. + Returns np.ndarray of shape (n_vertices,) + """ + preds = cache.load_brain_preds(stimulus_id) + time_idx = min(time_idx, preds.shape[0] - 1) + time_idx = max(time_idx, 0) + return preds[time_idx] + + +def get_num_timesteps(cache: CacheManager, stimulus_id: str) -> int: + """Return total number of timesteps for a stimulus.""" + preds = cache.load_brain_preds(stimulus_id) + return preds.shape[0] + + +def get_top_rois( + cache: CacheManager, + stimulus_id: str, + k: int = 5, +) -> list[tuple[str, float]]: + """Return top-k ROI groups by mean activation, sorted descending. + Returns list of (roi_name, mean_value) tuples. + """ + roi_summary = cache.load_roi_summary(stimulus_id) + sorted_rois = sorted(roi_summary.items(), key=lambda x: x[1], reverse=True) + return sorted_rois[:k] + + +def get_modality_contribution( + cache: CacheManager, + stimulus_id: str, + modality: str, + time_idx: int, +) -> np.ndarray | None: + """Return brain prediction for a specific modality at a timestep. + Per-modality predictions are stored as {stimulus_id}__{modality}.npz. + Returns None if the modality file doesn't exist. + """ + mod_id = f"{stimulus_id}__{modality}" + preds = cache.load_brain_preds(mod_id) + if preds is None: + return None + time_idx = min(time_idx, preds.shape[0] - 1) + time_idx = max(time_idx, 0) + return preds[time_idx] diff --git a/neurolens/roi.py b/neurolens/roi.py new file mode 100644 index 0000000..e124a14 --- /dev/null +++ b/neurolens/roi.py @@ -0,0 +1,65 @@ +"""Human-friendly ROI groups mapped to HCP atlas regions.""" + +import numpy as np + +# Maps friendly names to lists of HCP MMP1.0 region name prefixes. +ROI_GROUPS: dict[str, list[str]] = { + "Visual Cortex": ["V1", "V2", "V3", "V4"], + "Auditory Cortex": ["A1", "A4", "A5", "RI", "MBelt", "LBelt", "PBelt"], + "Language Areas": ["44", "45", "47l", "IFSa", "IFSp", "IFJa", "IFJp", + "STSda", "STSdp", "STSva", "STSvp", "STV", + "TPOJ1", "TPOJ2", "TPOJ3"], + "Motor Cortex": ["4", "3a", "3b", "1", "2"], + "Prefrontal Cortex": ["8Av", "8Ad", "8BL", "8C", "9a", "9p", "9m", + "10d", "10r", "10v", "46", "p9-46v", "a9-46v"], + "Temporal Cortex": ["TE1a", "TE1m", "TE1p", "TE2a", "TE2p", + "TGd", "TGv", "TF"], + "Parietal Cortex": ["7AL", "7Am", "7PC", "7PL", "7Pm", + "AIP", "IP0", "IP1", "IP2", "LIPd", "LIPv", + "MIP", "VIP"], + "Somatosensory Cortex": ["3a", "3b", "1", "2"], + "Face-Selective Areas": ["FFC", "OFA", "PeEc"], +} + + +def get_roi_group_names() -> list[str]: + """Return sorted list of all ROI group names.""" + return sorted(ROI_GROUPS.keys()) + + +def summarize_by_roi_group( + data: np.ndarray, mesh: str = "fsaverage5" +) -> dict[str, float]: + """Compute mean activation per ROI group. + + Parameters + ---------- + data : np.ndarray + 1D array of shape (n_vertices,) on fsaverage5 (20484 vertices). + mesh : str + Mesh resolution name. + + Returns + ------- + dict mapping ROI group name to mean activation (float). + """ + from tribev2.utils import get_hcp_roi_indices + + result = {} + for group_name, regions in ROI_GROUPS.items(): + all_indices = [] + for region in regions: + try: + indices = get_hcp_roi_indices(region, hemi="both", mesh=mesh) + all_indices.append(indices) + except ValueError: + # Region not found in atlas — skip it + continue + if all_indices: + combined = np.concatenate(all_indices) + # Deduplicate indices + combined = np.unique(combined) + result[group_name] = float(data[combined].mean()) + else: + result[group_name] = 0.0 + return result diff --git a/neurolens/stimulus.py b/neurolens/stimulus.py new file mode 100644 index 0000000..c0f5dae --- /dev/null +++ b/neurolens/stimulus.py @@ -0,0 +1,56 @@ +"""Stimulus library: metadata loading and lookup.""" + +import json +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class Stimulus: + id: str + name: str + category: str + media_type: str # "video", "audio", or "text" + duration_sec: float + + +class StimulusLibrary: + """Loads and queries stimulus metadata from a cache directory. + + Expects ``/stimuli/metadata.json`` — a JSON array of objects + with keys: id, name, category, media_type, duration_sec. + """ + + def __init__(self, cache_dir: str | Path) -> None: + self.cache_dir = Path(cache_dir) + meta_path = self.cache_dir / "stimuli" / "metadata.json" + raw = json.loads(meta_path.read_text()) + self._stimuli = [Stimulus(**item) for item in raw] + self._by_id = {s.id: s for s in self._stimuli} + + def __len__(self) -> int: + return len(self._stimuli) + + def get(self, stimulus_id: str) -> Stimulus | None: + """Return a Stimulus by id, or None if not found.""" + return self._by_id.get(stimulus_id) + + def all(self) -> list[Stimulus]: + """Return all stimuli.""" + return list(self._stimuli) + + def filter_by_category(self, category: str) -> list[Stimulus]: + """Return stimuli matching the given category.""" + return [s for s in self._stimuli if s.category == category] + + def categories(self) -> list[str]: + """Return sorted list of unique categories.""" + return sorted(set(s.category for s in self._stimuli)) + + def ids(self) -> list[str]: + """Return list of all stimulus ids.""" + return [s.id for s in self._stimuli] + + def dropdown_options(self) -> list[tuple[str, str]]: + """Return (display_label, id) pairs for ipywidgets Dropdown.""" + return [(f"{s.name} [{s.category}]", s.id) for s in self._stimuli] diff --git a/neurolens/viz.py b/neurolens/viz.py new file mode 100644 index 0000000..5f9125c --- /dev/null +++ b/neurolens/viz.py @@ -0,0 +1,90 @@ +"""Shared visualization helpers: brain plots and radar charts.""" + +from __future__ import annotations + +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + + +def plot_brain_surface( + data: np.ndarray, + views: list[str] | None = None, + cmap: str = "hot", + title: str | None = None, + colorbar: bool = True, +) -> matplotlib.figure.Figure: + """Plot brain activation on a cortical surface using nilearn. + + Parameters + ---------- + data : np.ndarray + 1D array of shape (n_vertices,) on fsaverage5 (20484). + views : list of str + View angles, e.g. ["left", "right"]. Defaults to ["left", "right"]. + cmap : str + Matplotlib colormap name. + title : str or None + Optional figure title. + colorbar : bool + Whether to show a colorbar. + + Returns + ------- + matplotlib.figure.Figure + """ + from tribev2.plotting.cortical import PlotBrainNilearn + + if views is None: + views = ["left", "right"] + + plotter = PlotBrainNilearn(mesh="fsaverage5") + fig, axarr = plotter.get_fig_axes(views) + plotter.plot_surf( + data, + views=views, + axes=axarr, + cmap=cmap, + colorbar=colorbar, + ) + if title: + fig.suptitle(title, fontsize=12, fontweight="bold") + return fig + + +def make_radar_chart( + datasets: dict[str, dict[str, float]], + title: str | None = None, +) -> matplotlib.figure.Figure: + """Create a radar/spider chart comparing ROI activation profiles. + + Parameters + ---------- + datasets : dict + Maps label -> {roi_name: value}. All dicts must have the same keys. + title : str or None + Optional chart title. + + Returns + ------- + matplotlib.figure.Figure + """ + labels = list(next(iter(datasets.values())).keys()) + n = len(labels) + angles = np.linspace(0, 2 * np.pi, n, endpoint=False).tolist() + angles += angles[:1] # close the polygon + + fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True)) + colors = plt.cm.Set2(np.linspace(0, 1, len(datasets))) + + for (name, values), color in zip(datasets.items(), colors): + vals = [values[label] for label in labels] + vals += vals[:1] + ax.plot(angles, vals, "o-", linewidth=2, label=name, color=color) + ax.fill(angles, vals, alpha=0.15, color=color) + + ax.set_thetagrids(np.degrees(angles[:-1]), labels, fontsize=9) + ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1)) + if title: + ax.set_title(title, fontsize=14, fontweight="bold", pad=20) + return fig diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..8144ac6 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,77 @@ +import json +import tempfile +from pathlib import Path + +import numpy as np +import torch + +from neurolens.cache import CacheManager + + +def _setup_cache(tmp: Path) -> Path: + """Create a minimal cache structure.""" + # Brain predictions + preds_dir = tmp / "brain_preds" + preds_dir.mkdir() + np.savez(preds_dir / "clip_001.npz", preds=np.random.randn(5, 20484)) + + # ROI summaries + roi_dir = tmp / "roi_summaries" + roi_dir.mkdir() + (roi_dir / "clip_001.json").write_text( + json.dumps({"Visual Cortex": 0.5, "Auditory Cortex": 0.3}) + ) + + # Embeddings + for model_name in ["vjepa2", "clip"]: + emb_dir = tmp / "embeddings" / model_name + emb_dir.mkdir(parents=True) + torch.save(torch.randn(256), emb_dir / "clip_001.pt") + + return tmp + + +def test_load_brain_preds(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _setup_cache(Path(tmp)) + cm = CacheManager(cache_dir) + preds = cm.load_brain_preds("clip_001") + assert preds.shape == (5, 20484) + + +def test_load_brain_preds_missing(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _setup_cache(Path(tmp)) + cm = CacheManager(cache_dir) + assert cm.load_brain_preds("nonexistent") is None + + +def test_load_roi_summary(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _setup_cache(Path(tmp)) + cm = CacheManager(cache_dir) + roi = cm.load_roi_summary("clip_001") + assert roi["Visual Cortex"] == 0.5 + + +def test_load_embedding(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _setup_cache(Path(tmp)) + cm = CacheManager(cache_dir) + emb = cm.load_embedding("clip_001", "vjepa2") + assert emb.shape == (256,) + + +def test_load_embedding_missing_model(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _setup_cache(Path(tmp)) + cm = CacheManager(cache_dir) + assert cm.load_embedding("clip_001", "nonexistent") is None + + +def test_available_models(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _setup_cache(Path(tmp)) + cm = CacheManager(cache_dir) + models = cm.available_models() + assert set(models) == {"vjepa2", "clip"} diff --git a/tests/test_eval.py b/tests/test_eval.py new file mode 100644 index 0000000..11c8bd4 --- /dev/null +++ b/tests/test_eval.py @@ -0,0 +1,64 @@ +import tempfile +from pathlib import Path + +import numpy as np +import torch + +from neurolens.cache import CacheManager +from neurolens.eval import ( + compute_rsa_score, + compute_pairwise_similarity_matrix, + compute_model_brain_alignment, +) + + +def _setup(tmp: Path) -> tuple[CacheManager, list[str]]: + preds_dir = tmp / "brain_preds" + preds_dir.mkdir() + + ids = [] + for i in range(10): + sid = f"clip_{i:03d}" + ids.append(sid) + np.savez(preds_dir / f"{sid}.npz", preds=np.random.randn(3, 20484)) + + for model_name in ["vjepa2", "clip", "whisper"]: + emb_dir = tmp / "embeddings" / model_name + emb_dir.mkdir(parents=True) + for sid in ids: + torch.save(torch.randn(256), emb_dir / f"{sid}.pt") + + return CacheManager(tmp), ids + + +def test_compute_pairwise_similarity_matrix(): + vecs = [np.random.randn(100) for _ in range(5)] + mat = compute_pairwise_similarity_matrix(vecs) + assert mat.shape == (5, 5) + np.testing.assert_allclose(np.diag(mat), 1.0, atol=1e-6) + np.testing.assert_allclose(mat, mat.T, atol=1e-6) + + +def test_compute_rsa_score(): + mat = np.random.randn(5, 5) + mat = (mat + mat.T) / 2 + score = compute_rsa_score(mat, mat) + assert abs(score - 1.0) < 1e-6 + + +def test_compute_rsa_score_uncorrelated(): + np.random.seed(42) + mat_a = np.random.randn(10, 10) + mat_a = (mat_a + mat_a.T) / 2 + mat_b = np.random.randn(10, 10) + mat_b = (mat_b + mat_b.T) / 2 + score = compute_rsa_score(mat_a, mat_b) + assert abs(score) < 0.5 + + +def test_compute_model_brain_alignment(): + with tempfile.TemporaryDirectory() as tmp: + cm, ids = _setup(Path(tmp)) + score = compute_model_brain_alignment(cm, "vjepa2", ids) + assert isinstance(score, float) + assert -1.0 <= score <= 1.0 diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..4aa5d1c --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,85 @@ +# tests/test_integration.py +"""End-to-end integration test with a mock cache.""" + +import json +import tempfile +from pathlib import Path + +import numpy as np +import torch +import matplotlib +matplotlib.use("Agg") + +from neurolens.cache import CacheManager +from neurolens.stimulus import StimulusLibrary +from neurolens.predict import get_prediction_at_time, get_top_rois +from neurolens.match import find_similar_stimuli, find_contrast_stimuli +from neurolens.eval import compute_all_model_alignments +from neurolens.viz import plot_brain_surface, make_radar_chart + + +def _build_mock_cache(tmp: Path) -> Path: + """Build a complete mock cache for integration testing.""" + stimuli = [ + {"id": f"clip_{i:03d}", "name": f"Clip {i}", "category": cat, + "media_type": "video", "duration_sec": 10.0} + for i, cat in enumerate(["Speech", "Music", "Silence + Visuals", + "Emotional", "Multimodal-rich"]) + ] + (tmp / "stimuli").mkdir() + (tmp / "stimuli" / "metadata.json").write_text(json.dumps(stimuli)) + + (tmp / "brain_preds").mkdir() + (tmp / "roi_summaries").mkdir() + for s in stimuli: + preds = np.random.randn(5, 20484).astype(np.float32) + np.savez(tmp / "brain_preds" / f"{s['id']}.npz", preds=preds) + roi = {"Visual Cortex": float(np.random.rand()), + "Auditory Cortex": float(np.random.rand()), + "Language Areas": float(np.random.rand())} + (tmp / "roi_summaries" / f"{s['id']}.json").write_text(json.dumps(roi)) + + for model in ["vjepa2", "clip", "whisper"]: + (tmp / "embeddings" / model).mkdir(parents=True) + for s in stimuli: + torch.save(torch.randn(256), tmp / "embeddings" / model / f"{s['id']}.pt") + + return tmp + + +def test_full_pipeline(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _build_mock_cache(Path(tmp)) + cache = CacheManager(cache_dir) + library = StimulusLibrary(cache_dir) + + # 1. Predict + data = get_prediction_at_time(cache, "clip_000", time_idx=2) + assert data.shape == (20484,) + top = get_top_rois(cache, "clip_000", k=3) + assert len(top) == 3 + + # 2. Match + target = data + results = find_similar_stimuli(cache, target, library.ids(), top_k=3) + assert len(results) == 3 + contrast = find_contrast_stimuli( + cache, library.ids(), "Visual Cortex", "Auditory Cortex", top_k=3 + ) + assert len(contrast) == 3 + + # 3. Eval + scores = compute_all_model_alignments(cache, library.ids()) + assert len(scores) == 3 + assert all(isinstance(v, float) for v in scores.values()) + + # 4. Visualization + fig = plot_brain_surface(data, views=["left"]) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + radar_data = {"Clip 0": cache.load_roi_summary("clip_000")} + fig2 = make_radar_chart(radar_data) + assert fig2 is not None + plt.close(fig2) diff --git a/tests/test_match.py b/tests/test_match.py new file mode 100644 index 0000000..69c03f1 --- /dev/null +++ b/tests/test_match.py @@ -0,0 +1,72 @@ +import json +import tempfile +from pathlib import Path + +import numpy as np + +from neurolens.cache import CacheManager +from neurolens.match import ( + find_similar_stimuli, + build_target_from_regions, + find_contrast_stimuli, +) + + +def _setup(tmp: Path) -> tuple[CacheManager, list[str]]: + preds_dir = tmp / "brain_preds" + preds_dir.mkdir() + roi_dir = tmp / "roi_summaries" + roi_dir.mkdir() + + ids = [] + for i in range(5): + sid = f"clip_{i:03d}" + ids.append(sid) + preds = np.random.randn(3, 20484) + np.savez(preds_dir / f"{sid}.npz", preds=preds) + (roi_dir / f"{sid}.json").write_text( + json.dumps({"Visual Cortex": float(np.random.rand()), + "Auditory Cortex": float(np.random.rand())}) + ) + return CacheManager(tmp), ids + + +def test_find_similar_stimuli(): + with tempfile.TemporaryDirectory() as tmp: + cm, ids = _setup(Path(tmp)) + target = cm.load_brain_preds("clip_000")[0] + results = find_similar_stimuli(cm, target, ids, top_k=3) + assert len(results) == 3 + assert all(isinstance(r[0], str) for r in results) + assert all(isinstance(r[1], float) for r in results) + assert results[0][1] >= results[1][1] >= results[2][1] + + +def test_find_similar_includes_self(): + with tempfile.TemporaryDirectory() as tmp: + cm, ids = _setup(Path(tmp)) + # Use mean-averaged prediction as target to match default aggregation + target = cm.load_brain_preds("clip_000").mean(axis=0) + results = find_similar_stimuli(cm, target, ids, top_k=5) + assert results[0][0] == "clip_000" + assert results[0][1] > 0.99 + + +def test_build_target_from_regions(): + target = build_target_from_regions( + {"Visual Cortex": 1.0, "Auditory Cortex": 0.0} + ) + assert target.shape == (20484,) + + +def test_find_contrast_stimuli(): + with tempfile.TemporaryDirectory() as tmp: + cm, ids = _setup(Path(tmp)) + results = find_contrast_stimuli( + cm, ids, + maximize_roi="Visual Cortex", + minimize_roi="Auditory Cortex", + top_k=3, + ) + assert len(results) == 3 + assert results[0][1] >= results[1][1] diff --git a/tests/test_predict.py b/tests/test_predict.py new file mode 100644 index 0000000..3885703 --- /dev/null +++ b/tests/test_predict.py @@ -0,0 +1,63 @@ +import json +import tempfile +from pathlib import Path + +import numpy as np + +from neurolens.cache import CacheManager +from neurolens.predict import get_prediction_at_time, get_top_rois, get_modality_contribution + + +def _setup(tmp: Path) -> CacheManager: + preds_dir = tmp / "brain_preds" + preds_dir.mkdir() + # 5 timesteps, 20484 vertices + preds = np.random.randn(5, 20484) + np.savez(preds_dir / "clip_001.npz", preds=preds) + + # Also save per-modality predictions + for mod in ["video", "audio", "text", "combined"]: + np.savez(preds_dir / f"clip_001__{mod}.npz", preds=preds * np.random.rand()) + + roi_dir = tmp / "roi_summaries" + roi_dir.mkdir() + (roi_dir / "clip_001.json").write_text( + json.dumps({"Visual Cortex": 0.9, "Auditory Cortex": 0.3, "Language Areas": 0.1}) + ) + return CacheManager(tmp) + + +def test_get_prediction_at_time(): + with tempfile.TemporaryDirectory() as tmp: + cm = _setup(Path(tmp)) + data = get_prediction_at_time(cm, "clip_001", time_idx=2) + assert data.shape == (20484,) + + +def test_get_prediction_at_time_clamps(): + with tempfile.TemporaryDirectory() as tmp: + cm = _setup(Path(tmp)) + data = get_prediction_at_time(cm, "clip_001", time_idx=999) + assert data.shape == (20484,) # Should clamp to last timestep + + +def test_get_top_rois(): + with tempfile.TemporaryDirectory() as tmp: + cm = _setup(Path(tmp)) + top = get_top_rois(cm, "clip_001", k=2) + assert len(top) == 2 + assert top[0][0] == "Visual Cortex" # Highest activation first + + +def test_get_modality_contribution(): + with tempfile.TemporaryDirectory() as tmp: + cm = _setup(Path(tmp)) + data = get_modality_contribution(cm, "clip_001", modality="video", time_idx=0) + assert data.shape == (20484,) + + +def test_get_modality_contribution_missing(): + with tempfile.TemporaryDirectory() as tmp: + cm = _setup(Path(tmp)) + data = get_modality_contribution(cm, "clip_001", modality="nonexistent", time_idx=0) + assert data is None diff --git a/tests/test_roi.py b/tests/test_roi.py new file mode 100644 index 0000000..de78230 --- /dev/null +++ b/tests/test_roi.py @@ -0,0 +1,27 @@ +import numpy as np +from neurolens.roi import ROI_GROUPS, get_roi_group_names, summarize_by_roi_group + + +def test_roi_groups_are_nonempty(): + assert len(ROI_GROUPS) > 0 + for name, regions in ROI_GROUPS.items(): + assert isinstance(regions, list) + assert len(regions) > 0 + + +def test_get_roi_group_names(): + names = get_roi_group_names() + assert isinstance(names, list) + assert "Visual Cortex" in names + assert "Auditory Cortex" in names + assert "Language Areas" in names + + +def test_summarize_by_roi_group(): + # 20484 vertices = fsaverage5 (10242 per hemisphere * 2) + fake_data = np.random.randn(20484) + result = summarize_by_roi_group(fake_data) + assert isinstance(result, dict) + assert "Visual Cortex" in result + for name, value in result.items(): + assert isinstance(value, float) diff --git a/tests/test_stimulus.py b/tests/test_stimulus.py new file mode 100644 index 0000000..d9ade14 --- /dev/null +++ b/tests/test_stimulus.py @@ -0,0 +1,79 @@ +import json +import tempfile +from pathlib import Path + +from neurolens.stimulus import Stimulus, StimulusLibrary + + +def _make_metadata(tmp: Path) -> Path: + stimuli = [ + { + "id": "clip_001", + "name": "Nature timelapse", + "category": "Silence + Visuals", + "media_type": "video", + "duration_sec": 10.0, + }, + { + "id": "clip_002", + "name": "TED talk excerpt", + "category": "Speech", + "media_type": "video", + "duration_sec": 12.0, + }, + { + "id": "clip_003", + "name": "Classical music", + "category": "Music", + "media_type": "audio", + "duration_sec": 15.0, + }, + ] + meta_path = tmp / "stimuli" / "metadata.json" + meta_path.parent.mkdir(parents=True) + meta_path.write_text(json.dumps(stimuli)) + return tmp + + +def test_stimulus_dataclass(): + s = Stimulus( + id="clip_001", + name="Nature timelapse", + category="Silence + Visuals", + media_type="video", + duration_sec=10.0, + ) + assert s.id == "clip_001" + assert s.category == "Silence + Visuals" + + +def test_library_load(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _make_metadata(Path(tmp)) + lib = StimulusLibrary(cache_dir) + assert len(lib) == 3 + assert lib.get("clip_001").name == "Nature timelapse" + + +def test_library_filter_by_category(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _make_metadata(Path(tmp)) + lib = StimulusLibrary(cache_dir) + music = lib.filter_by_category("Music") + assert len(music) == 1 + assert music[0].id == "clip_003" + + +def test_library_categories(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _make_metadata(Path(tmp)) + lib = StimulusLibrary(cache_dir) + cats = lib.categories() + assert set(cats) == {"Silence + Visuals", "Speech", "Music"} + + +def test_library_get_missing_returns_none(): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = _make_metadata(Path(tmp)) + lib = StimulusLibrary(cache_dir) + assert lib.get("nonexistent") is None diff --git a/tests/test_viz.py b/tests/test_viz.py new file mode 100644 index 0000000..0d36205 --- /dev/null +++ b/tests/test_viz.py @@ -0,0 +1,30 @@ +import numpy as np +import matplotlib +matplotlib.use("Agg") # Non-interactive backend for tests + +from neurolens.viz import plot_brain_surface, make_radar_chart + + +def test_plot_brain_surface_returns_figure(): + data = np.random.randn(20484) + fig = plot_brain_surface(data, views=["left", "right"]) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + +def test_make_radar_chart_single(): + roi_data = { + "Visual Cortex": 0.8, + "Auditory Cortex": 0.3, + "Language Areas": 0.6, + } + fig = make_radar_chart({"Stimulus A": roi_data}) + assert fig is not None + + +def test_make_radar_chart_comparison(): + data_a = {"Visual Cortex": 0.8, "Auditory Cortex": 0.3} + data_b = {"Visual Cortex": 0.4, "Auditory Cortex": 0.7} + fig = make_radar_chart({"A": data_a, "B": data_b}) + assert fig is not None