diff --git a/.gitignore b/.gitignore
index ad6cf6670..64704d4e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
spd/scripts/sweep_params.yaml
-spd/scripts/sweep_params.yaml
docs/coverage/**
+artifacts/**
+docs/dep_graph/**
+tests/.temp/**
**/out/
neuronpedia_outputs/
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 75c8edbb2..eb19f182a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -230,6 +230,38 @@
"--model_path",
"wandb:goodfire/spd/runs/ioprgffh"
]
+ },
+ {
+ "name": "run_clustering example",
+ "type": "debugpy",
+ "request": "launch",
+ "program": "${workspaceFolder}/spd/clustering/scripts/run_clustering.py",
+ "args": [
+ "--config",
+ "${workspaceFolder}/spd/clustering/configs/example.yaml",
+ ],
+ "python": "${command:python.interpreterPath}",
+ "console": "integratedTerminal",
+ "justMyCode": true,
+ "env": {
+ "PYDEVD_DISABLE_FILE_VALIDATION": "1"
+ }
+ },
+ {
+ "name": "clustering pipeline",
+ "type": "debugpy",
+ "request": "launch",
+ "program": "${workspaceFolder}/spd/clustering/scripts/run_pipeline.py",
+ "args": [
+ "--config",
+ "${workspaceFolder}/spd/clustering/configs/pipeline_config.yaml",
+ ],
+ "python": "${command:python.interpreterPath}",
+ "console": "integratedTerminal",
+ "justMyCode": true,
+ "env": {
+ "PYDEVD_DISABLE_FILE_VALIDATION": "1"
+ }
}
]
}
\ No newline at end of file
diff --git a/Makefile b/Makefile
index ff8ab8955..4cc60fe44 100644
--- a/Makefile
+++ b/Makefile
@@ -76,10 +76,23 @@ coverage:
uv run python -m coverage report -m > $(COVERAGE_DIR)/coverage.txt
uv run python -m coverage html --directory=$(COVERAGE_DIR)/html/
+
+.PHONY: clean
+clean:
+ @echo "Cleaning Python cache and build artifacts..."
+ find . -type d -name "__pycache__" -exec rm -rf {} +
+ find . -type d -name "*.egg-info" -exec rm -rf {} +
+ rm -rf build/ dist/ .ruff_cache/ .pytest_cache/ .coverage
+
+
+.PHONY: clustering-dev
+clustering-dev:
+ uv run spd-cluster --local --config spd/clustering/configs/pipeline-dev-simplestories.yaml
+
.PHONY: app
app:
@uv run python app/run_app.py
.PHONY: install-app
install-app:
- (cd app/frontend && npm install)
\ No newline at end of file
+ (cd app/frontend && npm install)
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 000000000..9e6f14815
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,73 @@
+# TODO: Cluster Coactivation Matrix Implementation
+
+## What Was Changed
+
+### 1. Added `ClusterActivations` dataclass (`spd/clustering/dashboard/compute_max_act.py`)
+- New dataclass to hold vectorized cluster activations for all clusters
+- Contains `activations` tensor [n_samples, n_clusters] and `cluster_indices` list
+
+### 2. Added `compute_all_cluster_activations()` function
+- Vectorized computation of all cluster activations at once
+- Replaces the per-cluster loop for better performance
+- Returns `ClusterActivations` object
+
+### 3. Added `compute_cluster_coactivations()` function
+- Computes coactivation matrix from list of `ClusterActivations` across batches
+- Binarizes activations (acts > 0) and computes matrix multiplication: `activation_mask.T @ activation_mask`
+- Follows the pattern from `spd/clustering/merge.py:69`
+- Returns tuple of (coactivation_matrix, cluster_indices)
+
+### 4. Modified `compute_max_activations()` function
+- Now accumulates `ClusterActivations` from each batch in `all_cluster_activations` list
+- Calls `compute_cluster_coactivations()` to compute the matrix
+- **Changed return type**: now returns `tuple[DashboardData, np.ndarray, list[int]]`
+ - Added coactivation matrix and cluster_indices to return value
+
+### 5. Modified `spd/clustering/dashboard/run.py`
+- Updated to handle new return value from `compute_max_activations()`
+- Saves coactivation matrix as `coactivations.npz` in the dashboard output directory
+- NPZ file contains:
+ - `coactivations`: the [n_clusters, n_clusters] matrix
+ - `cluster_indices`: array mapping matrix positions to cluster IDs
+
+## What Needs to be Checked
+
+### Testing
+- [ ] **Run the dashboard pipeline** on a real clustering run to verify:
+ - Coactivation computation doesn't crash
+ - Coactivations are saved correctly to NPZ file
+ - Matrix dimensions are correct
+ - `cluster_indices` mapping is correct
+
+### Type Checking
+- [ ] Run `make type` to ensure no type errors were introduced
+- [ ] Verify jaxtyping annotations are correct
+
+### Verification
+- [ ] Load a saved `coactivations.npz` file and verify:
+ ```python
+ data = np.load("coactivations.npz")
+ coact = data["coactivations"]
+ cluster_indices = data["cluster_indices"]
+ # Check: coact should be symmetric
+ # Check: diagonal should be >= off-diagonal (clusters coactivate with themselves most)
+ # Check: cluster_indices length should match coact.shape[0]
+ ```
+
+### Performance
+- [ ] Check if vectorization actually improved performance
+- [ ] Monitor memory usage with large numbers of clusters
+
+### Edge Cases
+- [ ] Test with clusters that have zero activations
+- [ ] Test with single-batch runs
+- [ ] Test with very large number of clusters
+
+### Integration
+- [ ] Verify the coactivation matrix can be used in downstream analysis
+- [ ] Consider if visualization of coactivations should be added to dashboard
+
+## Notes
+- The coactivation matrix is computed over all samples processed (n_batches * batch_size * seq_len samples)
+- Binarization threshold is currently hardcoded as `> 0` - may want to make this configurable
+- The computation happens in the dashboard pipeline, NOT during the main clustering pipeline
diff --git a/pyproject.toml b/pyproject.toml
index 62393b9e7..cd8ab45be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,9 @@ dependencies = [
# see: https://github.com/huggingface/datasets/issues/6980 https://github.com/huggingface/datasets/pull/6991 (fixed in https://github.com/huggingface/datasets/releases/tag/2.21.0 )
"datasets>=2.21.0",
"simple_stories_train @ git+https://github.com/goodfire-ai/simple_stories_train.git@dev",
+ "scipy>=1.14.1",
+ "muutils",
+ "scikit-learn",
"fastapi",
"uvicorn",
]
@@ -40,10 +43,12 @@ dev = [
"ruff",
"basedpyright<1.32.0", # pyright and wandb issues, see https://github.com/goodfire-ai/spd/pull/232
"pre-commit",
+ "nbconvert",
]
[project.scripts]
spd-run = "spd.scripts.run:cli"
+spd-cluster = "spd.clustering.scripts.run_pipeline:cli"
[build-system]
requires = ["setuptools", "wheel"]
diff --git a/spd/base_config.py b/spd/base_config.py
index c9b488e19..860898907 100644
--- a/spd/base_config.py
+++ b/spd/base_config.py
@@ -6,6 +6,14 @@
from pydantic import BaseModel, ConfigDict
+class FileTypeError(ValueError):
+ """Error raised when a file has an unsupported type/extension."""
+
+
+class ConfigValidationError(ValueError):
+ """Error raised when a config file fails pydantic validation."""
+
+
class BaseConfig(BaseModel):
"""Pydantic BaseModel suited for configs.
@@ -15,6 +23,8 @@ class BaseConfig(BaseModel):
model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", frozen=True)
+ # TODO: add a "config_type" field, which is set to the class name, so that when loading a config we can check whether the config type matches the expected class
+
@classmethod
def from_file(cls, path: Path | str) -> Self:
"""Load config from path to a JSON or YAML file."""
@@ -27,9 +37,16 @@ def from_file(cls, path: Path | str) -> Self:
case Path() if path.suffix in [".yaml", ".yml"]:
data = yaml.safe_load(path.read_text())
case _:
- raise ValueError(f"Only (.json, .yaml, .yml) files are supported, got {path}")
+ raise FileTypeError(f"Only (.json, .yaml, .yml) files are supported, got {path}")
+
+ try:
+ cfg = cls.model_validate(data)
+ except Exception as e:
+ raise ConfigValidationError(
+ f"Error validating config {cls=} from path `{path.as_posix()}`\n{data = }"
+ ) from e
- return cls.model_validate(data)
+ return cfg
def to_file(self, path: Path | str) -> None:
"""Save config to file (format inferred from extension)."""
@@ -43,4 +60,4 @@ def to_file(self, path: Path | str) -> None:
case ".yaml" | ".yml":
path.write_text(yaml.dump(self.model_dump(mode="json")))
case _:
- raise ValueError(f"Only (.json, .yaml, .yml) files are supported, got {path}")
+ raise FileTypeError(f"Only (.json, .yaml, .yml) files are supported, got {path}")
diff --git a/spd/clustering/__init__.py b/spd/clustering/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spd/clustering/activations.py b/spd/clustering/activations.py
new file mode 100644
index 000000000..cd6a2b742
--- /dev/null
+++ b/spd/clustering/activations.py
@@ -0,0 +1,267 @@
+from dataclasses import dataclass
+from functools import cached_property
+from typing import Literal, NamedTuple
+
+import torch
+from jaxtyping import Bool, Float, Float16, Int
+from torch import Tensor
+
+from spd.clustering.consts import (
+ ActivationsTensor,
+ BoolActivationsTensor,
+ ClusterCoactivationShaped,
+ ComponentLabels,
+)
+from spd.clustering.util import ModuleFilterFunc
+from spd.models.component_model import ComponentModel, OutputWithCache
+
+
+def component_activations(
+ model: ComponentModel,
+ device: torch.device | str,
+ batch: Int[Tensor, "batch_size n_ctx"],
+) -> dict[str, ActivationsTensor]:
+ """Get the component activations over a **single** batch."""
+ causal_importances: dict[str, ActivationsTensor]
+ with torch.no_grad():
+ model_output: OutputWithCache = model(
+ batch.to(device),
+ cache_type="input",
+ )
+
+ # TODO: !!!IMPORTANT!!! unclear what the right thing from CIOutputs is
+ causal_importances = model.calc_causal_importances(
+ pre_weight_acts=model_output.cache,
+ sampling="continuous",
+ detach_inputs=False,
+ ).upper_leaky
+
+ return causal_importances
+
+
+def compute_coactivatons(
+ activations: ActivationsTensor | BoolActivationsTensor,
+) -> ClusterCoactivationShaped:
+ """Compute the coactivations matrix from the activations."""
+ # TODO: this works for both boolean and continuous activations,
+ # but we could do better by just using OR for boolean activations
+ # and maybe even some bitshift hacks. but for now, we convert to float16
+ activations_f16: Float16[Tensor, "samples C"] = activations.to(torch.float16)
+ return activations_f16.T @ activations_f16
+
+
+class FilteredActivations(NamedTuple):
+ activations: ActivationsTensor
+ "activations after filtering dead components"
+
+ labels: ComponentLabels
+ "list of length c with labels for each preserved component"
+
+ dead_components_labels: ComponentLabels | None
+ "list of labels for dead components, or None if no filtering was applied"
+
+ @property
+ def n_alive(self) -> int:
+ """Number of alive components after filtering."""
+ n_alive: int = len(self.labels)
+ assert n_alive == self.activations.shape[1], (
+ f"{n_alive = } != {self.activations.shape[1] = }"
+ )
+ return n_alive
+
+ @property
+ def n_dead(self) -> int:
+ """Number of dead components after filtering."""
+ return len(self.dead_components_labels) if self.dead_components_labels else 0
+
+
+def filter_dead_components(
+ activations: ActivationsTensor,
+ labels: ComponentLabels,
+ filter_dead_threshold: float = 0.01,
+) -> FilteredActivations:
+ """Filter out dead components based on a threshold
+
+ if `filter_dead_threshold` is 0, no filtering is applied.
+ activations and labels are returned as is, `dead_components_labels` is `None`.
+
+ otherwise, components whose **maximum** activations across all samples is below the threshold
+ are considered dead and filtered out. The labels of these components are returned in `dead_components_labels`.
+ `dead_components_labels` will also be `None` if no components were below the threshold.
+ """
+ dead_components_lst: ComponentLabels | None = None
+ if filter_dead_threshold > 0:
+ dead_components_lst = ComponentLabels(list())
+ max_act: Float[Tensor, " c"] = activations.max(dim=0).values
+ dead_components: Bool[Tensor, " c"] = max_act < filter_dead_threshold
+
+ if dead_components.any():
+ activations = activations[:, ~dead_components]
+ alive_labels: list[tuple[str, bool]] = [
+ (lbl, bool(keep.item()))
+ for lbl, keep in zip(labels, ~dead_components, strict=False)
+ ]
+ # re-assign labels only if we are filtering
+ labels = ComponentLabels([label for label, keep in alive_labels if keep])
+ dead_components_lst = ComponentLabels(
+ [label for label, keep in alive_labels if not keep]
+ )
+
+ return FilteredActivations(
+ activations=activations,
+ labels=labels,
+ dead_components_labels=dead_components_lst if dead_components_lst else None,
+ )
+
+
+@dataclass(frozen=True)
+class ProcessedActivations:
+ """Processed activations after filtering and concatenation"""
+
+ activations_raw: dict[str, ActivationsTensor]
+ "activations after filtering, but prior to concatenation"
+
+ activations: ActivationsTensor
+ "activations after filtering and concatenation"
+
+ labels: ComponentLabels
+ "list of length c with labels for each preserved component, format `{module_name}:{component_index}`"
+
+ dead_components_lst: ComponentLabels | None
+ "list of labels for dead components, or None if no filtering was applied"
+
+ def validate(self) -> None:
+ """Validate the processed activations"""
+ # getting this property will also perform a variety of other checks
+ assert self.n_components_alive > 0
+
+ @property
+ def n_components_original(self) -> int:
+ """Total number of components before filtering. equal to the sum of all components in `activations_raw`, or to `n_components_alive + n_components_dead`"""
+ return sum(act.shape[1] for act in self.activations_raw.values())
+
+ @property
+ def n_components_alive(self) -> int:
+ """Number of alive components after filtering. equal to the length of `labels`"""
+ n_alive: int = len(self.labels)
+ assert n_alive + self.n_components_dead == self.n_components_original, (
+ f"({n_alive = }) + ({self.n_components_dead = }) != ({self.n_components_original = })"
+ )
+ assert n_alive == self.activations.shape[1], (
+ f"{n_alive = } != {self.activations.shape[1] = }"
+ )
+
+ return n_alive
+
+ @property
+ def n_components_dead(self) -> int:
+ """Number of dead components after filtering. equal to the length of `dead_components_lst` if it is not None, or 0 otherwise"""
+ return len(self.dead_components_lst) if self.dead_components_lst else 0
+
+ @cached_property
+ def label_index(self) -> dict[str, int | None]:
+ """Create a mapping from label to alive index (`None` if dead)"""
+ return {
+ **{label: i for i, label in enumerate(self.labels)},
+ **(
+ {label: None for label in self.dead_components_lst}
+ if self.dead_components_lst
+ else {}
+ ),
+ }
+
+ def get_label_index(self, label: str) -> int | None:
+ """Get the index of a label in the activations, or None if it is dead"""
+ return self.label_index[label]
+
+ def get_label_index_alive(self, label: str) -> int:
+ """Get the index of a label in the activations, or raise if it is dead"""
+ idx: int | None = self.get_label_index(label)
+ if idx is None:
+ raise ValueError(f"Label '{label}' is dead and has no index in the activations.")
+ return idx
+
+ @property
+ def module_keys(self) -> list[str]:
+ """Get the module keys from the activations_raw"""
+ return list(self.activations_raw.keys())
+
+ def get_module_indices(self, module_key: str) -> list[int | None]:
+ """given a module key, return a list len "num components in that moduel", with int index in alive components, or None if dead"""
+ num_components: int = self.activations_raw[module_key].shape[1]
+ return [self.label_index[f"{module_key}:{i}"] for i in range(num_components)]
+
+
+def process_activations(
+ activations: dict[
+ str, # module name to
+ Float[Tensor, "samples C"] # (sample x component gate activations)
+ | Float[Tensor, " n_sample n_ctx C"], # (sample x seq index x component gate activations)
+ ],
+ filter_dead_threshold: float = 0.01,
+ seq_mode: Literal["concat", "seq_mean", None] = None,
+ filter_modules: ModuleFilterFunc | None = None,
+) -> ProcessedActivations:
+ """get back a dict of coactivations, slices, and concated activations
+
+ Args:
+ activations: Dictionary of activations by module
+ filter_dead_threshold: Threshold for filtering dead components
+ seq_mode: How to handle sequence dimension
+ filter_modules: Function to filter modules
+ sort_components: Whether to sort components by similarity within each module
+ """
+
+ # reshape -- special cases for llms
+ # ============================================================
+ activations_: dict[str, ActivationsTensor]
+ if seq_mode == "concat":
+ # Concatenate the sequence dimension into the sample dimension
+ activations_ = {
+ key: act.reshape(act.shape[0] * act.shape[1], act.shape[2])
+ for key, act in activations.items()
+ }
+ elif seq_mode == "seq_mean":
+ # Take the mean over the sequence dimension
+ activations_ = {
+ key: act.mean(dim=1) if act.ndim == 3 else act for key, act in activations.items()
+ }
+ else:
+ # Use the activations as they are
+ activations_ = activations
+
+ # put the labelled activations into one big matrix and filter them
+ # ============================================================
+
+ # filter activations for only the modules we want
+ if filter_modules is not None:
+ activations_ = {key: act for key, act in activations_.items() if filter_modules(key)}
+
+ # compute the labels and total component count
+ total_c: int = 0
+ labels: ComponentLabels = ComponentLabels(list())
+ for key, act in activations_.items():
+ c: int = act.shape[-1]
+ labels.extend([f"{key}:{i}" for i in range(c)])
+ total_c += c
+
+ # concat the activations
+ act_concat: ActivationsTensor = torch.cat([activations_[key] for key in activations_], dim=-1)
+
+ # filter dead components
+ filtered_components: FilteredActivations = filter_dead_components(
+ activations=act_concat,
+ labels=labels,
+ filter_dead_threshold=filter_dead_threshold,
+ )
+
+ assert filtered_components.n_alive + filtered_components.n_dead == total_c, (
+ f"({filtered_components.n_alive = }) + ({filtered_components.n_dead = }) != ({total_c = })"
+ )
+
+ return ProcessedActivations(
+ activations_raw=activations_,
+ activations=filtered_components.activations,
+ labels=filtered_components.labels,
+ dead_components_lst=filtered_components.dead_components_labels,
+ )
diff --git a/spd/clustering/ci_dt/VISUALIZATION_PLAN.md b/spd/clustering/ci_dt/VISUALIZATION_PLAN.md
new file mode 100644
index 000000000..9a486e484
--- /dev/null
+++ b/spd/clustering/ci_dt/VISUALIZATION_PLAN.md
@@ -0,0 +1,958 @@
+# CI Decision Tree Visualization Plan
+
+## Overview
+
+This document outlines the complete visualization strategy for causal importance decision trees, including static plots (matplotlib/PDF) and interactive visualizations (HTML/JS).
+
+---
+
+## Part 1: Static Plot Improvements
+
+### 1.1 Layer Metrics - Distribution Plots
+
+**Current:** Bar charts for mean AP, accuracy, balanced accuracy per layer
+
+**New:** Scatter plots with horizontal jitter showing full distribution per layer
+
+**Implementation:**
+- Replace `plot_layer_metrics()` bar charts with jittered scatter plots
+- For each layer, show all target component metrics as points with random horizontal jitter
+- Add mean/median line overlays
+- Better titles explaining metrics in terms of confusion matrix:
+
+```python
+# Accuracy title
+r"Accuracy per Target Component\n" +
+r"$\text{Accuracy} = \frac{TP + TN}{TP + TN + FP + FN}$"
+
+# Balanced Accuracy title
+r"Balanced Accuracy per Target Component\n" +
+r"$\text{Balanced Acc} = \frac{1}{2}\left(\frac{TP}{TP+FN} + \frac{TN}{TN+FP}\right)$"
+
+# Average Precision title
+r"Average Precision per Target Component\n" +
+r"$\text{AP} = \sum_n (R_n - R_{n-1}) P_n$" + "\n" +
+r"where $P_n = \frac{TP}{TP+FP}$ (precision), $R_n = \frac{TP}{TP+FN}$ (recall)"
+```
+
+**Rationale:** Shows full distribution of performance across targets, not just means. More informative about variance in tree quality.
+
+---
+
+### 1.2 AP vs Prevalence Plot
+
+**Current:** Simple scatter plot with alpha=0.6
+
+**New Improvements:**
+1. **Log x-axis** for prevalence (many rare components)
+2. **No marker edges** (set `edgecolors='none'`)
+3. **Color by tree depth** using viridis colormap
+4. **Enhanced title:**
+ ```python
+ r"Average Precision vs Component Prevalence\n" +
+ r"Prevalence = $\frac{n_\text{active samples}}{n_\text{total samples}}$"
+ ```
+
+**Additional:** Add heatmap version (see 1.3 below)
+
+---
+
+### 1.3 Tree Statistics - New Heatmaps
+
+**Current:** Has depth vs accuracy, leaf count vs accuracy, depth vs leaf count heatmaps
+
+**New Addition:** AP vs Prevalence heatmap
+
+**Implementation:**
+- Add new heatmap to `plot_tree_statistics()`:
+ - x-axis: prevalence bins (log scale, e.g. [0.001, 0.01, 0.1, 0.5, 1.0])
+ - y-axis: AP bins (linear, 0 to 1)
+ - color: log10(count + 1) as in existing heatmaps
+ - title:
+ ```python
+ r"Tree Performance vs Component Prevalence\n" +
+ r"AP = Average Precision, Prev = $\frac{n_\text{active}}{n_\text{total}}$"
+ ```
+
+**Rationale:** Complements the scatter plot; easier to see density patterns.
+
+---
+
+### 1.4 Global Title Improvements
+
+**Rules:**
+- Use LaTeX notation via raw strings: `r"$\text{TP}$"` not unicode "TP"
+- Use `\n` for line breaks in long titles
+- Explain abbreviations and formulas
+- Be explicit about what's plotted
+
+**Examples:**
+
+```python
+# Before
+"Covariance of components (all layers)"
+
+# After
+r"Component Coactivation Matrix\n" +
+r"$\text{Cov}(i,j) = \mathbb{E}[(A_i - \mu_i)(A_j - \mu_j)]$\n" +
+r"where $A_i$ is binary activation of component $i$"
+
+# Before
+"Tree depth"
+
+# After
+r"Distribution of Decision Tree Depths\n" +
+r"(Depth = longest path from root to leaf)"
+
+# Before
+"Activations (True)"
+
+# After
+r"True Binary Activations\n" +
+r"$A_{ij} = \mathbb{1}[\text{activation}_{ij} > \theta]$, $\theta = $" + f"{config.activation_threshold}"
+```
+
+---
+
+### 1.5 Activations Plot - Sorting and Diff
+
+**Current:** Two subplots (true, predicted) with no ordering
+
+**New Architecture:**
+
+```
+plot_activations_unsorted(...) # Original style with layer boundaries
+plot_activations_sorted(...) # New sorted version with diff
+```
+
+#### 1.5.1 Unsorted Version (Enhanced)
+
+**Changes:**
+- Add layer boundary lines and labels (borrow from `spd/clustering/plotting/activations.py:add_component_labeling()`)
+- Show module names on y-axis (component dimension)
+- Keep samples unsorted on x-axis
+- Two subplots: true, predicted
+
+**Implementation:**
+```python
+def plot_activations_unsorted(
+ layers_true: list[np.ndarray],
+ layers_pred: list[np.ndarray],
+ module_keys: list[str], # NEW: need module names
+) -> None:
+ """Show true and predicted activations with layer boundaries."""
+ # Concatenate
+ A_true = np.concatenate(layers_true, axis=1)
+ A_pred = np.concatenate(layers_pred, axis=1)
+
+ # Create component labels like "blocks.0.attn:0", "blocks.0.attn:1", ...
+ component_labels = []
+ for module_key, layer in zip(module_keys, layers_true):
+ n_components = layer.shape[1]
+ component_labels.extend([f"{module_key}:{i}" for i in range(n_components)])
+
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
+
+ # Plot
+ ax1.imshow(A_true.T, aspect="auto", interpolation="nearest", cmap="Blues")
+ ax2.imshow(A_pred.T, aspect="auto", interpolation="nearest", cmap="Reds")
+
+ # Add layer boundaries (adapt from spd/clustering/plotting/activations.py)
+ add_component_labeling(ax1, component_labels, axis='y')
+ add_component_labeling(ax2, component_labels, axis='y')
+
+ # Titles
+ ax1.set_title(r"True Binary Activations (Unsorted)\n" +
+ r"$A_{ij} = \mathbb{1}[\text{act}_{ij} > \theta]$")
+ ax2.set_title(r"Predicted Binary Activations (Unsorted)\n" +
+ r"$\hat{A}_{ij} = \mathbb{1}[P(A_{ij}=1) > 0.5]$")
+```
+
+#### 1.5.2 Sorted Version (New)
+
+**Sorting Strategy:**
+
+1. **Sample Sorting (Greedy):**
+ - Compute sample similarity matrix (cosine similarity on true activations)
+ - Greedy ordering: start from most central sample, add nearest neighbor iteratively
+ - Apply **same ordering** to predicted activations (so we can compare)
+ - Reference implementation already exists in `spd/clustering/plotting/activations.py:120-162`
+
+2. **Component Sorting (Greedy):**
+ - Compute component similarity matrix (cosine similarity on true activations)
+ - Same greedy algorithm but on columns instead of rows
+ - Apply same ordering to both true and predicted
+
+**Three Subplots:**
+1. True activations (samples sorted, components sorted)
+2. Predicted activations (same ordering)
+3. **Diff plot:** `predicted - true` with RdBu colormap
+ - Red = False Positive (predicted 1, true 0)
+ - Blue = False Negative (predicted 0, true 1)
+ - White = Correct
+
+**Implementation:**
+```python
+def plot_activations_sorted(
+ layers_true: list[np.ndarray],
+ layers_pred: list[np.ndarray],
+ module_keys: list[str],
+) -> None:
+ """Show sorted activations with diff plot."""
+ A_true = np.concatenate(layers_true, axis=1).astype(float)
+ A_pred = np.concatenate(layers_pred, axis=1).astype(float)
+
+ # Sort samples (greedy on rows)
+ sample_order = greedy_sort(A_true, axis=0) # Returns indices
+ A_true_sorted_samples = A_true[sample_order, :]
+ A_pred_sorted_samples = A_pred[sample_order, :]
+
+ # Sort components (greedy on columns)
+ component_order = greedy_sort(A_true_sorted_samples, axis=1)
+ A_true_sorted = A_true_sorted_samples[:, component_order]
+ A_pred_sorted = A_pred_sorted_samples[:, component_order]
+
+ # Diff
+ A_diff = A_pred_sorted - A_true_sorted # Range: [-1, 0, 1]
+
+ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12))
+
+ ax1.imshow(A_true_sorted.T, aspect="auto", interpolation="nearest", cmap="Blues")
+ ax1.set_title(r"True Activations (Sorted)\n" +
+ r"Samples and components sorted by similarity")
+
+ ax2.imshow(A_pred_sorted.T, aspect="auto", interpolation="nearest", cmap="Reds")
+ ax2.set_title(r"Predicted Activations (Sorted)\n" +
+ r"Same ordering as true activations")
+
+ # Diff plot with centered colormap
+ im3 = ax3.imshow(A_diff.T, aspect="auto", interpolation="nearest",
+ cmap="RdBu_r", vmin=-1, vmax=1)
+ ax3.set_title(r"Prediction Errors (Predicted - True)\n" +
+ r"Red = FP ($\hat{A}=1, A=0$), Blue = FN ($\hat{A}=0, A=1$), White = Correct")
+ plt.colorbar(im3, ax=ax3)
+
+ fig.tight_layout()
+```
+
+**Helper Function:**
+```python
+def greedy_sort(A: np.ndarray, axis: int) -> np.ndarray:
+ """Greedy ordering by similarity.
+
+ Args:
+ A: 2D array
+ axis: 0 for rows, 1 for columns
+
+ Returns:
+ Indices in sorted order
+ """
+ # Transpose if sorting columns
+ if axis == 1:
+ A = A.T
+
+ # Compute cosine similarity
+ norms = np.linalg.norm(A, axis=1, keepdims=True)
+ norms = np.where(norms > 1e-8, norms, 1.0)
+ A_normalized = A / norms
+ similarity = A_normalized @ A_normalized.T
+
+ # Greedy ordering (same as in activations.py)
+ n = similarity.shape[0]
+ avg_sim = similarity.mean(axis=1)
+ start_idx = int(np.argmax(avg_sim))
+
+ ordered = [start_idx]
+ remaining = set(range(n))
+ remaining.remove(start_idx)
+ current = start_idx
+
+ while remaining:
+ sims = [(i, similarity[current, i]) for i in remaining]
+ best_idx = max(sims, key=lambda x: x[1])[0]
+ ordered.append(best_idx)
+ remaining.remove(best_idx)
+ current = best_idx
+
+ return np.array(ordered)
+```
+
+---
+
+### 1.6 Covariance Matrix - Sorted Version
+
+**Current:** Single unsorted covariance plot
+
+**New:** Two versions
+1. **Unsorted** with layer boundaries (like activations unsorted)
+2. **Sorted** using same component ordering from activations
+
+**Implementation:**
+```python
+def plot_covariance_unsorted(
+ layers_true: list[np.ndarray],
+ module_keys: list[str],
+) -> None:
+ """Covariance with layer boundaries."""
+ A = np.concatenate(layers_true, axis=1).astype(float)
+ C = np.cov(A, rowvar=False)
+
+ component_labels = [...] # Same as activations
+
+ fig, ax = plt.subplots(figsize=(8, 8))
+ im = ax.imshow(C, aspect="auto", interpolation="nearest", cmap="RdBu_r")
+
+ # Add layer boundaries on both axes
+ add_component_labeling(ax, component_labels, axis='x')
+ add_component_labeling(ax, component_labels, axis='y')
+
+ ax.set_title(r"Component Covariance Matrix (Unsorted)\n" +
+ r"$\text{Cov}(i,j) = \mathbb{E}[(A_i - \mu_i)(A_j - \mu_j)]$")
+ plt.colorbar(im)
+
+def plot_covariance_sorted(
+ layers_true: list[np.ndarray],
+ component_order: np.ndarray, # Pass in from activations
+) -> None:
+ """Covariance with sorted components."""
+ A = np.concatenate(layers_true, axis=1).astype(float)
+ A_sorted = A[:, component_order]
+ C_sorted = np.cov(A_sorted, rowvar=False)
+
+ fig, ax = plt.subplots(figsize=(8, 8))
+ im = ax.imshow(C_sorted, aspect="auto", interpolation="nearest", cmap="RdBu_r")
+ ax.set_title(r"Component Covariance Matrix (Sorted)\n" +
+ r"Components ordered by similarity")
+ plt.colorbar(im)
+```
+
+---
+
+## Part 2: Interactive Tree Visualization (HTML/JS)
+
+### 2.1 High-Level Architecture
+
+**Export:** Python creates one JSON per tree → **Display:** HTML/JS loads JSON and renders visualizations
+
+### 2.2 Data to Export (per tree)
+
+#### Tree Metadata
+```json
+{
+ "layer_index": 1,
+ "target_component_idx": 5,
+ "module_key": "blocks.0.mlp.W_gate",
+ "metrics": {
+ "ap": 0.85,
+ "accuracy": 0.92,
+ "balanced_accuracy": 0.88,
+ "prevalence": 0.023,
+ "n_samples": 200,
+ "n_positive": 46,
+ "n_negative": 154,
+ "confusion_matrix": {
+ "TP": 40,
+ "TN": 144,
+ "FP": 10,
+ "FN": 6
+ }
+ },
+ "tree_stats": {
+ "max_depth": 5,
+ "n_leaves": 12,
+ "n_nodes": 23
+ }
+}
+```
+
+#### Tree Structure
+```json
+{
+ "structure": {
+ "children_left": [1, -1, 3, 4, -1, ...],
+ "children_right": [2, -1, 5, 6, -1, ...],
+ "feature": [7, -2, 12, 3, -2, ...],
+ "threshold": [0.5, -2, 0.5, 0.5, -2, ...],
+ "value": [[30, 20], [5, 15], ...], // [n_negative, n_positive] per node
+ "n_node_samples": [200, 50, 150, ...]
+ },
+ "feature_names": [
+ "blocks.0.attn.W_Q:3 (prev=0.15, AP=0.82)",
+ "blocks.0.attn.W_Q:17 (prev=0.08, AP=0.91)",
+ "blocks.0.mlp.W_in:5 (prev=0.23, AP=0.76)",
+ ...
+ ]
+}
+```
+
+#### Activation Histograms
+```json
+{
+ "true_activations": {
+ "histogram": {
+ "bins": [0.0, 0.01, 0.02, ...], // Bin edges
+ "counts": [120, 45, 23, ...]
+ }
+ },
+ "predicted_probabilities": {
+ "histogram": {
+ "bins": [0.0, 0.1, 0.2, ...],
+ "counts": [80, 30, 40, ...]
+ }
+ }
+}
+```
+
+#### Token-Level Samples
+
+**Sample Selection Strategies:**
+
+1. **Stratified by confusion matrix** (recommended):
+ - 2 True Positives (high confidence, low confidence)
+ - 2 True Negatives (high confidence, low confidence)
+ - 2 False Positives (worst errors)
+ - 2 False Negatives (worst errors)
+ - Total: 8 samples
+
+2. **Fallback if categories insufficient:**
+ - Random samples from each category
+ - Fill missing categories with "N/A"
+
+**Data Structure:**
+```json
+{
+ "samples": [
+ {
+ "sample_idx": 42,
+ "category": "TP", // TP, TN, FP, or FN
+ "confidence": 0.95, // abs(predicted_prob - 0.5)
+ "tokens": ["The", "cat", "sat", "on", "the", "mat"],
+ "true_activations": [0.0, 0.0, 0.82, 0.91, 0.0, 0.0], // Continuous values
+ "predicted_probabilities": [0.05, 0.1, 0.88, 0.94, 0.02, 0.01],
+ "true_binary": [0, 0, 1, 1, 0, 0],
+ "predicted_binary": [0, 0, 1, 1, 0, 0],
+ "max_true_pos": 2, // Index of max activation in true
+ "max_pred_pos": 3 // Index of max activation in predicted
+ },
+ // ... 7 more samples
+ ]
+}
+```
+
+#### Input Features Summary
+```json
+{
+ "input_features_by_module": {
+ "blocks.0.attn.W_Q": [3, 17, 42], // Component indices used in tree
+ "blocks.0.mlp.W_in": [5, 12]
+ },
+ "n_input_features_total": 5,
+ "n_components_total": 256 // All components in layer 0
+}
+```
+
+### 2.3 Python Export Implementation
+
+**New File:** `spd/clustering/ci_dt/export.py`
+
+```python
+"""Export decision tree data to JSON for interactive visualization."""
+
+from pathlib import Path
+from typing import Any
+import json
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier
+
+from spd.clustering.ci_dt.core import LayerModel
+
+
+def export_tree_json(
+ tree: DecisionTreeClassifier,
+ layer_idx: int,
+ target_idx: int,
+ module_key: str,
+ X: np.ndarray, # Input features (all layer 0 components)
+ Y_true: np.ndarray, # True binary activations for this target
+ Y_prob: np.ndarray, # Predicted probabilities
+ tokens_batch: list[list[str]], # Decoded tokens for all samples
+ feature_names: list[str],
+ output_path: Path,
+) -> None:
+ """Export single tree to JSON."""
+
+ # 1. Compute metrics
+ Y_pred = (Y_prob >= 0.5).astype(int)
+ metrics = compute_tree_metrics(Y_true, Y_pred, Y_prob)
+
+ # 2. Serialize tree structure
+ tree_dict = serialize_tree(tree, feature_names)
+
+ # 3. Create activation histograms
+ histograms = create_histograms(Y_true, Y_prob)
+
+ # 4. Select and export token samples
+ samples = select_token_samples(
+ Y_true, Y_prob, Y_pred, tokens_batch
+ )
+
+ # 5. Identify which input features are used
+ input_features = extract_input_features(tree, module_key)
+
+ # 6. Combine into single JSON
+ data = {
+ "metadata": {
+ "layer_index": layer_idx,
+ "target_component_idx": target_idx,
+ "module_key": module_key,
+ "metrics": metrics,
+ "tree_stats": {
+ "max_depth": int(tree.tree_.max_depth),
+ "n_leaves": int(tree.tree_.n_leaves),
+ "n_nodes": int(tree.tree_.node_count),
+ }
+ },
+ "tree": tree_dict,
+ "histograms": histograms,
+ "samples": samples,
+ "input_features": input_features,
+ }
+
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ with open(output_path, 'w') as f:
+ json.dump(data, f, indent=2)
+
+
+def export_all_trees(
+ models: list[LayerModel],
+ layers_true: list[np.ndarray],
+ per_layer_stats: list[dict],
+ component_acts: dict[str, Tensor], # Original activations (continuous)
+ batch_data: dict, # From dataloader (has token IDs)
+ tokenizer, # HuggingFace tokenizer
+ feature_names: list[list[str]],
+ output_dir: Path,
+) -> None:
+ """Export all trees and create index."""
+
+ # Decode all tokens once
+ tokens_batch = decode_all_tokens(batch_data, tokenizer)
+
+ # Export each tree
+ tree_index = []
+ for layer_idx, model in enumerate(models):
+ module_key = list(component_acts.keys())[layer_idx]
+ X = layers_true[0] # Always predict from layer 0
+ Y_true = layers_true[layer_idx + 1] # Target layer
+
+ for target_idx, estimator in enumerate(model.model.estimators_):
+ # Get predictions for this target
+ Y_prob = estimator.predict_proba(X)[:, 1]
+
+ # Get feature names for this layer's inputs
+ feat_names = feature_names[layer_idx] if feature_names else None
+
+ # Export
+ tree_path = output_dir / "data" / f"tree_{layer_idx}_{target_idx}.json"
+ export_tree_json(
+ tree=estimator,
+ layer_idx=layer_idx,
+ target_idx=target_idx,
+ module_key=module_key,
+ X=X,
+ Y_true=Y_true[:, target_idx],
+ Y_prob=Y_prob,
+ tokens_batch=tokens_batch,
+ feature_names=feat_names,
+ output_path=tree_path,
+ )
+
+ # Add to index
+ tree_index.append({
+ "layer": layer_idx,
+ "target": target_idx,
+ "module_key": module_key,
+ "ap": per_layer_stats[layer_idx]["ap"][target_idx],
+ "file": f"data/tree_{layer_idx}_{target_idx}.json"
+ })
+
+ # Write index
+ index_path = output_dir / "data" / "index.json"
+ with open(index_path, 'w') as f:
+ json.dump(tree_index, f, indent=2)
+
+
+def select_token_samples(
+ Y_true: np.ndarray,
+ Y_prob: np.ndarray,
+ Y_pred: np.ndarray,
+ tokens_batch: list[list[str]],
+ n_per_category: int = 2,
+) -> list[dict]:
+ """Select stratified samples from confusion matrix categories."""
+
+ # Categorize samples
+ TP_mask = (Y_true == 1) & (Y_pred == 1)
+ TN_mask = (Y_true == 0) & (Y_pred == 0)
+ FP_mask = (Y_true == 0) & (Y_pred == 1)
+ FN_mask = (Y_true == 1) & (Y_pred == 0)
+
+ # Confidence = distance from decision boundary
+ confidence = np.abs(Y_prob - 0.5)
+
+ samples = []
+
+ for category, mask in [("TP", TP_mask), ("TN", TN_mask),
+ ("FP", FP_mask), ("FN", FN_mask)]:
+ indices = np.where(mask)[0]
+ if len(indices) == 0:
+ continue
+
+ # Sort by confidence
+ sorted_indices = indices[np.argsort(confidence[indices])[::-1]]
+
+ # Take high and low confidence
+ n_take = min(n_per_category, len(sorted_indices))
+ if n_take == 2:
+ selected = [sorted_indices[0], sorted_indices[-1]] # High and low
+ else:
+ selected = sorted_indices[:n_take]
+
+ for idx in selected:
+ samples.append({
+ "sample_idx": int(idx),
+ "category": category,
+ "confidence": float(confidence[idx]),
+ "tokens": tokens_batch[idx],
+ "true_activations": Y_true[idx].tolist(), # Would need continuous version
+ "predicted_probabilities": [float(Y_prob[idx])] * len(tokens_batch[idx]),
+ "true_binary": int(Y_true[idx]),
+ "predicted_binary": int(Y_pred[idx]),
+ })
+
+ return samples
+```
+
+**Integration in `run.py`:**
+
+```python
+# After computing metrics (line ~121)
+from spd.clustering.ci_dt.export import export_all_trees
+
+export_output_dir = Path("./ci_dt_vis")
+export_all_trees(
+ models=models,
+ layers_true=layers_true,
+ per_layer_stats=per_layer_stats,
+ component_acts=component_acts_concat,
+ batch_data=next(iter(dataloader)), # Need to save earlier
+ tokenizer=cfg.task_config.tokenizer,
+ feature_names=feature_names,
+ output_dir=export_output_dir,
+)
+print(f"Exported tree visualizations to {export_output_dir}")
+```
+
+### 2.4 HTML/JS Viewer Implementation
+
+**File Structure:**
+```
+ci_dt_vis/
+├── index.html # Main viewer
+├── data/
+│ ├── index.json # Tree index
+│ ├── tree_1_0.json # Individual trees
+│ ├── tree_1_1.json
+│ └── ...
+├── js/
+│ ├── viewer.js # Main app logic
+│ ├── tree-display.js # Tree visualization
+│ ├── token-display.js # Token highlighting
+│ └── sparklines.js # Histograms
+└── css/
+ └── style.css
+```
+
+**`index.html`:**
+```html
+
+
+
+ CI Decision Tree Viewer
+
+
+
+
+
+
+
Select Tree
+
+
+
+
+
+
+
+
Decision Tree Structure
+
+
+
+
+
Activation Distributions
+
+
+
+
+
+
+
+
+
+
+
+
+```
+
+**`js/viewer.js`:**
+```javascript
+// Main viewer logic
+let treeIndex = [];
+let currentTree = null;
+
+async function init() {
+ // Load tree index
+ const response = await fetch('data/index.json');
+ treeIndex = await response.json();
+
+ // Populate layer selector
+ const layers = [...new Set(treeIndex.map(t => t.layer))];
+ const layerSelect = document.getElementById('layer-select');
+ layers.forEach(layer => {
+ const option = document.createElement('option');
+ option.value = layer;
+ option.text = `Layer ${layer}`;
+ layerSelect.appendChild(option);
+ });
+
+ // Event listeners
+ layerSelect.addEventListener('change', onLayerChange);
+ document.getElementById('target-select').addEventListener('change', onTargetChange);
+
+ // Load first tree
+ if (treeIndex.length > 0) {
+ await loadTree(treeIndex[0].layer, treeIndex[0].target);
+ }
+}
+
+function onLayerChange() {
+ const layer = parseInt(document.getElementById('layer-select').value);
+ const trees = treeIndex.filter(t => t.layer === layer);
+
+ const targetSelect = document.getElementById('target-select');
+ targetSelect.innerHTML = '';
+ trees.forEach(tree => {
+ const option = document.createElement('option');
+ option.value = tree.target;
+ option.text = `Target ${tree.target} (AP=${tree.ap.toFixed(3)})`;
+ targetSelect.appendChild(option);
+ });
+
+ if (trees.length > 0) {
+ loadTree(layer, trees[0].target);
+ }
+}
+
+async function loadTree(layer, target) {
+ const response = await fetch(`data/tree_${layer}_${target}.json`);
+ currentTree = await response.json();
+
+ displayMetrics(currentTree.metadata);
+ displayHistograms(currentTree.histograms);
+ displayTree(currentTree.tree);
+ displayTokenSamples(currentTree.samples);
+}
+
+function displayMetrics(metadata) {
+ const m = metadata.metrics;
+ const cm = m.confusion_matrix;
+
+ const html = `
+
+ | AP: | ${m.ap.toFixed(3)} |
+ | Accuracy: | ${m.accuracy.toFixed(3)} |
+ | Balanced Acc: | ${m.balanced_accuracy.toFixed(3)} |
+ | Prevalence: | ${m.prevalence.toFixed(4)} |
+ | Confusion Matrix: |
+ | TP: | ${cm.TP} |
+ | TN: | ${cm.TN} |
+ | FP: | ${cm.FP} |
+ | FN: | ${cm.FN} |
+
+ `;
+ document.getElementById('metrics').innerHTML = html;
+}
+
+function displayHistograms(histograms) {
+ // Use sparklines.js to render dual histograms
+ const canvas = document.getElementById('hist-canvas');
+ const ctx = canvas.getContext('2d');
+
+ // Draw true activations (blue) and predicted (red) overlaid
+ drawHistogram(ctx, histograms.true_activations, 'blue', 0);
+ drawHistogram(ctx, histograms.predicted_probabilities, 'red', 0);
+}
+
+function displayTree(treeData) {
+ // Use tree-display.js to render D3 tree
+ renderDecisionTree('tree-svg', treeData);
+}
+
+function displayTokenSamples(samples) {
+ const container = document.getElementById('samples-container');
+ container.innerHTML = '';
+
+ samples.forEach(sample => {
+ const div = document.createElement('div');
+ div.className = `sample sample-${sample.category}`;
+ div.innerHTML = `
+ ${sample.category} (confidence: ${sample.confidence.toFixed(3)})
+ ${renderTokens(sample)}
+ `;
+ container.appendChild(div);
+ });
+}
+
+function renderTokens(sample) {
+ // Create dual-color token visualization
+ // Blue background = true activation, Red = predicted
+ return sample.tokens.map((token, i) => {
+ const trueVal = sample.true_activations[i];
+ const predVal = sample.predicted_probabilities[i];
+
+ // Dual gradient or side-by-side bars
+ return `
+ ${token}
+ `;
+ }).join(' ');
+}
+
+// Initialize on load
+init();
+```
+
+**`js/tree-display.js`:**
+```javascript
+function renderDecisionTree(containerId, treeData) {
+ const container = document.getElementById(containerId);
+ container.innerHTML = '';
+
+ // Simple text-based tree for now
+ // Can upgrade to D3.js interactive tree later
+
+ const textTree = buildTextTree(treeData.structure, treeData.feature_names);
+ const pre = document.createElement('pre');
+ pre.textContent = textTree;
+ container.appendChild(pre);
+}
+
+function buildTextTree(structure, featureNames, nodeIdx = 0, depth = 0) {
+ const indent = ' '.repeat(depth);
+
+ if (structure.children_left[nodeIdx] === -1) {
+ // Leaf node
+ const value = structure.value[nodeIdx];
+ const prediction = value[1] > value[0] ? 'ACTIVE' : 'INACTIVE';
+ return `${indent}→ ${prediction} (${value[0]}/${value[1]})\n`;
+ }
+
+ // Internal node
+ const feature = structure.feature[nodeIdx];
+ const threshold = structure.threshold[nodeIdx];
+ const featureName = featureNames[feature];
+
+ let result = `${indent}${featureName} <= ${threshold}?\n`;
+ result += buildTextTree(structure, featureNames, structure.children_left[nodeIdx], depth + 1);
+ result += `${indent}else:\n`;
+ result += buildTextTree(structure, featureNames, structure.children_right[nodeIdx], depth + 1);
+
+ return result;
+}
+```
+
+---
+
+## Implementation Checklist
+
+### Phase 1: Static Plot Improvements
+- [ ] Update `plot_layer_metrics()`: scatter with jitter instead of bars
+- [ ] Add LaTeX titles to all metrics plots (TP/FP/TN/FN formulas)
+- [ ] Update AP vs prevalence: log scale, no edges, color by depth
+- [ ] Add AP vs prevalence heatmap to `plot_tree_statistics()`
+- [ ] Implement `greedy_sort()` helper function
+- [ ] Create `plot_activations_unsorted()` with layer boundaries
+- [ ] Create `plot_activations_sorted()` with diff plot
+- [ ] Create `plot_covariance_unsorted()` with layer boundaries
+- [ ] Create `plot_covariance_sorted()`
+- [ ] Update all plot titles with LaTeX and newlines
+- [ ] Test with existing `run.py` workflow
+
+### Phase 2: Data Export
+- [ ] Create `spd/clustering/ci_dt/export.py`
+- [ ] Implement `export_tree_json()`
+- [ ] Implement `export_all_trees()`
+- [ ] Implement `select_token_samples()` with stratified sampling
+- [ ] Implement `serialize_tree()`, `compute_tree_metrics()`, etc.
+- [ ] Add export call to `run.py`
+- [ ] Test JSON output schema
+
+### Phase 3: Interactive Viewer
+- [ ] Create `ci_dt_vis/` directory structure
+- [ ] Implement `index.html` layout
+- [ ] Implement `viewer.js` tree selection and loading
+- [ ] Implement `tree-display.js` text rendering (D3 optional)
+- [ ] Implement `token-display.js` dual-color visualization
+- [ ] Implement histogram rendering (reuse or adapt sparklines.js)
+- [ ] Add CSS styling
+- [ ] Test end-to-end workflow
+
+### Phase 4: Documentation
+- [ ] Update `run.py` docstrings
+- [ ] Add README in `ci_dt_vis/` explaining viewer usage
+- [ ] Document JSON schema
+- [ ] Add example screenshots
+
+---
+
+## Open Questions / Design Decisions
+
+1. **Token samples per tree:** 8 total (2 per category) seems reasonable. Too many?
+2. **Histogram bins:** 50 bins for activations, 20 for probabilities?
+3. **D3.js tree or text?** Start with text, add D3 if needed
+4. **Component sorting:** Should we also show a version with components sorted by layer, then by similarity within layer?
+5. **File size:** Each tree JSON might be 50-200KB. With 1000s of trees, total size could be 50-200MB. Acceptable?
+6. **Continuous activations for tokens:** Currently we only have binary. Need to save continuous pre-threshold values?
+
+---
+
+## Success Metrics
+
+**Static Plots:**
+- Plots are immediately interpretable without prior knowledge
+- Titles explain abbreviations and formulas
+- Layer boundaries visible in unsorted plots
+- Sorting reveals structure (coactivation patterns)
+- Diff plot clearly shows FP/FN errors
+
+**Interactive Viewer:**
+- Can load and view any tree in <1 second
+- Token examples clearly show where component activates
+- Confusion matrix category examples are informative
+- Tree structure is readable
+- Histograms show activation distributions clearly
diff --git a/spd/clustering/ci_dt/__init__.py b/spd/clustering/ci_dt/__init__.py
new file mode 100644
index 000000000..3f8e91e98
--- /dev/null
+++ b/spd/clustering/ci_dt/__init__.py
@@ -0,0 +1,31 @@
+"""Causal importance decision tree package."""
+
+from spd.clustering.ci_dt.config import CIDTConfig
+from spd.clustering.ci_dt.core import (
+ LayerModel,
+ build_xy,
+ concat_cols,
+ extract_prob_class_1,
+ get_estimator_for,
+ layer_metrics,
+ predict_all,
+ predict_k,
+ proba_for_layer,
+ train_trees,
+)
+
+__all__ = [
+ # Config
+ "CIDTConfig",
+ # Core
+ "LayerModel",
+ "concat_cols",
+ "build_xy",
+ "train_trees",
+ "extract_prob_class_1",
+ "predict_k",
+ "predict_all",
+ "layer_metrics",
+ "proba_for_layer",
+ "get_estimator_for",
+]
diff --git a/spd/clustering/ci_dt/attn.py b/spd/clustering/ci_dt/attn.py
new file mode 100644
index 000000000..82f1f2736
--- /dev/null
+++ b/spd/clustering/ci_dt/attn.py
@@ -0,0 +1,426 @@
+# %%
+"""Attention pattern visualization for CI decision tree analysis."""
+
+from typing import Any
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from jaxtyping import Float, Int
+from torch import Tensor
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from spd.clustering.ci_dt.config import CIDTConfig
+from spd.configs import Config
+from spd.data import DatasetConfig, create_data_loader
+from spd.experiments.lm.configs import LMTaskConfig
+from spd.models.component_model import ComponentModel, SPDRunInfo
+
+# magic autoreload
+# %load_ext autoreload
+# %autoreload 2
+
+# %%
+# ----------------------- configuration -----------------------
+
+config = CIDTConfig(
+ wandb_run_path="wandb:goodfire/spd/runs/lxs77xye",
+ batch_size=16,
+ n_batches=4,
+ activation_threshold=0.01,
+ max_depth=8,
+ random_state=42,
+)
+device: str = "cuda" if torch.cuda.is_available() else "cpu"
+
+# %%
+# ----------------------- load model -----------------------
+
+spd_run: SPDRunInfo = SPDRunInfo.from_path(config.wandb_run_path)
+model: ComponentModel = ComponentModel.from_pretrained(spd_run.checkpoint_path)
+model.to(device)
+cfg: Config = spd_run.config
+
+print(f"Loaded model from {config.wandb_run_path}")
+
+# %%
+# ----------------------- load dataset -----------------------
+
+# Create LM dataset and dataloader
+assert isinstance(cfg.task_config, LMTaskConfig)
+pretrained_model_name = cfg.pretrained_model_name
+assert pretrained_model_name is not None
+
+dataset_config = DatasetConfig(
+ name=cfg.task_config.dataset_name,
+ hf_tokenizer_path=pretrained_model_name,
+ split=cfg.task_config.train_data_split,
+ n_ctx=cfg.task_config.max_seq_len,
+ column_name=cfg.task_config.column_name,
+ is_tokenized=False,
+ streaming=False,
+ seed=0,
+)
+dataloader, _ = create_data_loader(
+ dataset_config=dataset_config,
+ batch_size=config.batch_size,
+ buffer_size=cfg.task_config.buffer_size,
+ global_seed=cfg.seed,
+ ddp_rank=0,
+ ddp_world_size=1,
+)
+print(f"Created LM dataset with {cfg.task_config.dataset_name}")
+
+# %%
+# ----------------------- extract attention patterns -----------------------
+
+
+def extract_attention_patterns_multibatch(
+ model: ComponentModel,
+ device: torch.device | str,
+ dataloader: DataLoader[Any],
+ n_batches: int,
+) -> dict[str, Float[Tensor, "total_samples n_heads seq_len seq_len"]]:
+ """Extract attention patterns over multiple batches.
+
+ Args:
+ model: ComponentModel containing the transformer
+ device: Device to run inference on
+ dataloader: DataLoader to get batches from
+ n_batches: Number of batches to process
+
+ Returns:
+ Dictionary mapping layer names to attention patterns (on CPU)
+ Format: {layer_name: tensor of shape [total_samples, n_heads, seq_len, seq_len]}
+ """
+ print(f"Extracting attention patterns for {n_batches} batches...")
+ all_attention_patterns: list[dict[str, Tensor]] = []
+
+ for _batch_idx in tqdm(range(n_batches), desc="Batches", total=n_batches):
+ batch_data = next(iter(dataloader))
+ input_ids: Int[Tensor, "batch seq_len"] = batch_data["input_ids"].to(device)
+
+ # Get attention patterns on GPU
+ with torch.no_grad():
+ outputs = model.target_model(input_ids, output_attentions=True)
+
+ # Extract attention patterns
+ # outputs.attentions is a tuple of tensors, one per layer
+ # Each tensor has shape [batch, n_heads, seq_len, seq_len]
+ batch_attention: dict[str, Tensor] = {}
+ if hasattr(outputs, "attentions") and outputs.attentions is not None:
+ for layer_idx, attn_weights in enumerate(outputs.attentions):
+ layer_name = f"layer_{layer_idx}"
+ # Move to CPU immediately
+ batch_attention[layer_name] = attn_weights.cpu()
+
+ all_attention_patterns.append(batch_attention)
+
+ # Concatenate all batches on CPU
+ print("Concatenating batches...")
+ layer_names: list[str] = list(all_attention_patterns[0].keys())
+ attention_patterns_concat: dict[str, Tensor] = {
+ layer_name: torch.cat([batch[layer_name] for batch in all_attention_patterns], dim=0)
+ for layer_name in layer_names
+ }
+
+ print(f"Extracted attention patterns for {len(layer_names)} layers")
+ return attention_patterns_concat
+
+
+# Extract attention patterns
+attention_patterns: dict[str, Float[Tensor, "total_samples n_heads seq_len seq_len"]] = (
+ extract_attention_patterns_multibatch(
+ model=model,
+ device=device,
+ dataloader=dataloader,
+ n_batches=config.n_batches,
+ )
+)
+
+# Print shapes
+print("\nAttention pattern shapes:")
+for layer_name, attn in attention_patterns.items():
+ print(f" {layer_name}: {attn.shape}")
+
+# %%
+# ----------------------- compute attention statistics -----------------------
+
+
+def compute_attention_stats(
+ attention_patterns: dict[str, Float[Tensor, "samples n_heads seq_len seq_len"]],
+) -> dict[str, dict[str, Float[np.ndarray, "..."]]]:
+ """Compute statistics about attention patterns.
+
+ Args:
+ attention_patterns: Dictionary of attention patterns per layer
+
+ Returns:
+ Dictionary with statistics per layer including:
+ - mean_pattern: Average attention pattern [n_heads, seq_len, seq_len]
+ - entropy: Entropy of attention distributions [samples, n_heads, seq_len]
+ - max_attention: Maximum attention value [samples, n_heads, seq_len]
+ - sparsity: Fraction of attention < 0.01 [samples, n_heads]
+ """
+ stats: dict[str, dict[str, np.ndarray]] = {}
+
+ for layer_name, attn in attention_patterns.items():
+ # Convert to numpy for stats
+ attn_np: np.ndarray = attn.numpy()
+
+ # Mean pattern across all samples
+ mean_pattern: np.ndarray = attn_np.mean(axis=0) # [n_heads, seq_len, seq_len]
+
+ # Entropy per query position: -sum(p * log(p))
+ # Add small epsilon to avoid log(0)
+ epsilon = 1e-10
+ attn_safe = attn_np + epsilon
+ entropy: np.ndarray = -(attn_safe * np.log(attn_safe)).sum(
+ axis=-1
+ ) # [samples, n_heads, seq_len]
+
+ # Max attention per query position
+ max_attention: np.ndarray = attn_np.max(axis=-1) # [samples, n_heads, seq_len]
+
+ # Sparsity: fraction of attention weights < 0.01
+ sparsity: np.ndarray = (attn_np < 0.01).mean(axis=(2, 3)) # [samples, n_heads]
+
+ stats[layer_name] = {
+ "mean_pattern": mean_pattern,
+ "entropy": entropy,
+ "max_attention": max_attention,
+ "sparsity": sparsity,
+ }
+
+ return stats
+
+
+attention_stats = compute_attention_stats(attention_patterns)
+print("Computed attention statistics")
+
+# %%
+# ----------------------- plot: average attention patterns per layer -----------------------
+
+
+def plot_average_attention_per_layer(
+ attention_patterns: dict[str, Float[Tensor, "samples n_heads seq_len seq_len"]],
+ max_layers: int | None = None,
+) -> None:
+ """Plot average attention pattern for each layer (averaged over heads and samples).
+
+ Args:
+ attention_patterns: Dictionary of attention patterns per layer
+ max_layers: Maximum number of layers to plot (default: all)
+ """
+ layer_names = sorted(attention_patterns.keys())
+ if max_layers is not None:
+ layer_names = layer_names[:max_layers]
+
+ n_layers = len(layer_names)
+ n_cols = min(4, n_layers)
+ n_rows = (n_layers + n_cols - 1) // n_cols
+
+ fig, axes = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
+ if n_layers == 1:
+ axes = np.array([axes])
+ axes = axes.flatten()
+
+ for idx, layer_name in enumerate(layer_names):
+ attn = attention_patterns[layer_name].numpy()
+ # Average over samples and heads
+ avg_attn = attn.mean(axis=(0, 1)) # [seq_len, seq_len]
+
+ ax = axes[idx]
+ im = ax.imshow(avg_attn, cmap="viridis", aspect="auto")
+ ax.set_title(f"{layer_name}\n(avg over samples & heads)")
+ ax.set_xlabel("Key position")
+ ax.set_ylabel("Query position")
+ plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+
+ # Hide unused subplots
+ for idx in range(n_layers, len(axes)):
+ axes[idx].axis("off")
+
+ fig.tight_layout()
+
+
+plot_average_attention_per_layer(attention_patterns, max_layers=None)
+print("Average attention per layer plots generated.")
+
+# %%
+# ----------------------- plot: per-head attention for selected layers -----------------------
+
+
+def plot_per_head_attention(
+ attention_patterns: dict[str, Float[Tensor, "samples n_heads seq_len seq_len"]],
+ layer_names: list[str] | None = None,
+) -> None:
+ """Plot attention pattern for each head in selected layers.
+
+ Args:
+ attention_patterns: Dictionary of attention patterns per layer
+ layer_names: List of layer names to plot (default: first layer)
+ """
+ if layer_names is None:
+ layer_names = [sorted(attention_patterns.keys())[0]]
+
+ for layer_name in layer_names:
+ if layer_name not in attention_patterns:
+ print(f"Warning: {layer_name} not found in attention patterns")
+ continue
+
+ attn = attention_patterns[layer_name].numpy()
+ # Average over samples
+ avg_attn = attn.mean(axis=0) # [n_heads, seq_len, seq_len]
+ n_heads = avg_attn.shape[0]
+
+ n_cols = min(4, n_heads)
+ n_rows = (n_heads + n_cols - 1) // n_cols
+
+ fig, axes = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
+ if n_heads == 1:
+ axes = np.array([axes])
+ axes = axes.flatten()
+
+ for head_idx in range(n_heads):
+ ax = axes[head_idx]
+ im = ax.imshow(avg_attn[head_idx], cmap="viridis", aspect="auto")
+ ax.set_title(f"Head {head_idx}")
+ ax.set_xlabel("Key position")
+ ax.set_ylabel("Query position")
+ plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+
+ # Hide unused subplots
+ for idx in range(n_heads, len(axes)):
+ axes[idx].axis("off")
+
+ fig.suptitle(f"{layer_name} - Per-Head Attention Patterns", fontsize=14, y=1.00)
+ fig.tight_layout()
+
+
+# Plot first and last layers
+all_layer_names = sorted(attention_patterns.keys())
+layers_to_plot = [all_layer_names[0], all_layer_names[-1]]
+plot_per_head_attention(attention_patterns, layer_names=layers_to_plot)
+print(f"Per-head attention plots generated for layers: {layers_to_plot}")
+
+# %%
+# ----------------------- plot: attention entropy across layers -----------------------
+
+
+def plot_attention_entropy(
+ attention_stats: dict[str, dict[str, np.ndarray]],
+) -> None:
+ """Plot attention entropy statistics across layers.
+
+ Args:
+ attention_stats: Dictionary of attention statistics per layer
+ """
+ layer_names = sorted(attention_stats.keys())
+
+ # Collect mean entropy per layer (averaged over samples, heads, and query positions)
+ mean_entropies: list[float] = []
+ for layer_name in layer_names:
+ entropy = attention_stats[layer_name]["entropy"] # [samples, n_heads, seq_len]
+ mean_entropies.append(float(entropy.mean()))
+
+ # Plot
+ fig, ax = plt.subplots(figsize=(10, 5))
+ ax.plot(range(len(layer_names)), mean_entropies, marker="o")
+ ax.set_xlabel("Layer")
+ ax.set_ylabel("Mean Attention Entropy")
+ ax.set_title("Attention Entropy Across Layers\n(Higher = more uniform attention)")
+ ax.set_xticks(range(len(layer_names)))
+ ax.set_xticklabels(layer_names, rotation=45, ha="right")
+ ax.grid(True, alpha=0.3)
+ fig.tight_layout()
+
+
+plot_attention_entropy(attention_stats)
+print("Attention entropy plot generated.")
+
+# %%
+# ----------------------- plot: attention sparsity across layers -----------------------
+
+
+def plot_attention_sparsity(
+ attention_stats: dict[str, dict[str, np.ndarray]],
+) -> None:
+ """Plot attention sparsity across layers.
+
+ Args:
+ attention_stats: Dictionary of attention statistics per layer
+ """
+ layer_names = sorted(attention_stats.keys())
+
+ # Collect mean sparsity per layer (averaged over samples and heads)
+ mean_sparsities: list[float] = []
+ for layer_name in layer_names:
+ sparsity = attention_stats[layer_name]["sparsity"] # [samples, n_heads]
+ mean_sparsities.append(float(sparsity.mean()))
+
+ # Plot
+ fig, ax = plt.subplots(figsize=(10, 5))
+ ax.plot(range(len(layer_names)), mean_sparsities, marker="o", color="C1")
+ ax.set_xlabel("Layer")
+ ax.set_ylabel("Mean Sparsity (fraction < 0.01)")
+ ax.set_title("Attention Sparsity Across Layers\n(Higher = more sparse/focused attention)")
+ ax.set_xticks(range(len(layer_names)))
+ ax.set_xticklabels(layer_names, rotation=45, ha="right")
+ ax.set_ylim(0, 1)
+ ax.grid(True, alpha=0.3)
+ fig.tight_layout()
+
+
+plot_attention_sparsity(attention_stats)
+print("Attention sparsity plot generated.")
+
+# %%
+# ----------------------- plot: attention to first/last tokens -----------------------
+
+
+def plot_attention_to_special_positions(
+ attention_patterns: dict[str, Float[Tensor, "samples n_heads seq_len seq_len"]],
+) -> None:
+ """Plot how much attention each position pays to first and last tokens.
+
+ Args:
+ attention_patterns: Dictionary of attention patterns per layer
+ """
+ layer_names = sorted(attention_patterns.keys())
+
+ # Collect attention to first and last tokens
+ attn_to_first: list[float] = []
+ attn_to_last: list[float] = []
+
+ for layer_name in layer_names:
+ attn = attention_patterns[layer_name].numpy()
+ # Average over samples and heads
+ avg_attn = attn.mean(axis=(0, 1)) # [seq_len, seq_len]
+
+ # Average attention to first token (across all query positions)
+ attn_to_first.append(float(avg_attn[:, 0].mean()))
+
+ # Average attention to last token (across all query positions)
+ attn_to_last.append(float(avg_attn[:, -1].mean()))
+
+ # Plot
+ fig, ax = plt.subplots(figsize=(10, 5))
+ x = range(len(layer_names))
+ ax.plot(x, attn_to_first, marker="o", label="Attention to first token")
+ ax.plot(x, attn_to_last, marker="s", label="Attention to last token")
+ ax.set_xlabel("Layer")
+ ax.set_ylabel("Mean Attention Weight")
+ ax.set_title("Attention to Special Token Positions Across Layers")
+ ax.set_xticks(x)
+ ax.set_xticklabels(layer_names, rotation=45, ha="right")
+ ax.legend()
+ ax.grid(True, alpha=0.3)
+ fig.tight_layout()
+
+
+plot_attention_to_special_positions(attention_patterns)
+print("Attention to special positions plot generated.")
+
+# %%
diff --git a/spd/clustering/ci_dt/config.py b/spd/clustering/ci_dt/config.py
new file mode 100644
index 000000000..de0f95cee
--- /dev/null
+++ b/spd/clustering/ci_dt/config.py
@@ -0,0 +1,16 @@
+"""Configuration for causal importance decision tree training."""
+
+from dataclasses import dataclass
+
+
+@dataclass
+class CIDTConfig:
+ """Configuration for causal importance decision tree training."""
+
+ wandb_run_path: str # WandB run path for the SPD model
+ batch_size: int = 10 # Number of samples per batch for GPU inference
+ n_batches: int = 25 # Number of batches to process (total samples = batch_size * n_batches)
+ n_ctx: int = 64 # Context length (sequence length) for tokenization
+ activation_threshold: float = 0.01 # Threshold for boolean conversion
+ max_depth: int = 8 # Maximum depth for decision trees
+ random_state: int = 7 # Random state for reproducibility
diff --git a/spd/clustering/ci_dt/core.py b/spd/clustering/ci_dt/core.py
new file mode 100644
index 000000000..ec9ba585b
--- /dev/null
+++ b/spd/clustering/ci_dt/core.py
@@ -0,0 +1,230 @@
+"""Core library functions for causal importance decision trees."""
+
+import warnings
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Literal
+
+import numpy as np
+from jaxtyping import Bool, Float
+from sklearn.metrics import (
+ accuracy_score,
+ average_precision_score,
+ balanced_accuracy_score,
+)
+from sklearn.multioutput import MultiOutputClassifier
+from sklearn.tree import DecisionTreeClassifier
+from tqdm import tqdm
+
+
+@dataclass
+class LayerModel:
+ """Holds a trained per-layer model."""
+
+ layer_index: int
+ model: MultiOutputClassifier
+ feature_dim: int
+ target_dim: int
+
+
+def concat_cols(
+ Xs: Sequence[Bool[np.ndarray, "n_samples n_features"]],
+) -> Bool[np.ndarray, "n_samples n_concat"]:
+ """Column-concat a sequence or return empty (n,0)."""
+ n_samples: int = Xs[0].shape[0] if len(Xs) else 0
+ return np.concatenate(Xs, axis=1) if len(Xs) else np.zeros((n_samples, 0), bool)
+
+
+def build_xy(
+ layers: Sequence[Bool[np.ndarray, "n_samples n_components"]],
+) -> list[
+ tuple[
+ Bool[np.ndarray, "n_samples n_features"],
+ Bool[np.ndarray, "n_samples n_targets"],
+ ]
+]:
+ """Return (X_k,Y_k) for k=1..L-1 with X_k=concat(layers[:k])."""
+ XYs: list[tuple[np.ndarray, np.ndarray]] = []
+ for k in range(1, len(layers)):
+ X_k: np.ndarray = concat_cols(layers[:k])
+ Y_k: np.ndarray = layers[k]
+ XYs.append((X_k, Y_k))
+ return XYs
+
+
+def train_trees(
+ layers: Sequence[Bool[np.ndarray, "n_samples n_components"]],
+ *,
+ max_depth: int | None = None,
+ min_samples_leaf: int = 1,
+ random_state: int | None = 0,
+) -> list[LayerModel]:
+ """Train one decision tree per component per target layer using previous layers as features."""
+ XYs = build_xy(layers)
+ models: list[LayerModel] = []
+ for k, (X_k, Y_k) in tqdm(enumerate(XYs, start=1), total=len(XYs), desc="Training trees"):
+ base = DecisionTreeClassifier(
+ max_depth=max_depth,
+ min_samples_leaf=min_samples_leaf,
+ random_state=random_state,
+ )
+ model = MultiOutputClassifier(base)
+ model.fit(X_k.astype(np.uint8), Y_k.astype(np.uint8))
+ models.append(LayerModel(k, model, int(X_k.shape[1]), int(Y_k.shape[1])))
+ return models
+
+
+def extract_prob_class_1(
+ proba_list: list[np.ndarray],
+ model: MultiOutputClassifier,
+) -> np.ndarray:
+ """Extract P(y=1) for each output.
+
+ Assumes constant components are filtered out, so both classes should always be present.
+ """
+ result: list[np.ndarray] = []
+ for i, p in enumerate(proba_list):
+ estimator = model.estimators_[i] # pyright: ignore[reportIndexIssue]
+ assert isinstance(estimator, DecisionTreeClassifier)
+ classes = estimator.classes_
+ assert len(classes) == 2, f"Expected 2 classes but got {len(classes)} for output {i}"
+ # Extract P(y=1) from second column
+ result.append(p[:, 1])
+ return np.stack(result, axis=1)
+
+
+def predict_k(
+ models: Sequence[LayerModel],
+ prefix_layers: Sequence[Bool[np.ndarray, "n_samples n_components"]],
+ k: int,
+ *,
+ threshold: float = 0.5,
+) -> Bool[np.ndarray, "n_samples n_components_k"]:
+ """Predict layer k activations from layers[:k]."""
+ lm: LayerModel = next(m for m in models if m.layer_index == k)
+ X: np.ndarray = concat_cols(prefix_layers)
+ # dbg_auto(X)
+ proba = lm.model.predict_proba(X.astype(np.uint8)) # type: ignore
+ # dbg_auto(proba)
+ # dbg_auto(proba[0])
+ P: np.ndarray = extract_prob_class_1(proba, lm.model)
+ # dbg_auto(P)
+ Y_hat: np.ndarray = (threshold <= P).astype(bool)
+ # dbg_auto(Y_hat)
+ return Y_hat
+
+
+def predict_all(
+ models: Sequence[LayerModel],
+ seed_layers: Sequence[Bool[np.ndarray, "n_samples n_components"]],
+ *,
+ thresholds: Sequence[float] | None = None,
+) -> list[Bool[np.ndarray, "n_samples n_components"]]:
+ """Sequentially predict layers 1.. using layer 0 as seed."""
+ out: list[np.ndarray] = [seed_layers[0].copy()]
+ ths: list[float] = list(thresholds) if thresholds is not None else []
+ for i, lm in enumerate(sorted(models, key=lambda m: m.layer_index)):
+ thr: float = ths[i] if i < len(ths) else 0.5
+ out.append(predict_k(models, out, lm.layer_index, threshold=thr))
+ return out
+
+
+MetricKey = Literal["ap", "acc", "bacc", "prev", "tpr", "tnr", "precision", "npv", "f1"]
+
+
+def layer_metrics(
+ Y_true: Bool[np.ndarray, "n t"],
+ Y_prob: Float[np.ndarray, "n t"],
+ Y_pred: Bool[np.ndarray, "n t"],
+) -> dict[MetricKey, np.ndarray]:
+ """Return per-target metrics: AP, acc, bacc, prevalence, TPR, TNR, precision, NPV, F1.
+
+ Returns:
+ Dictionary with keys:
+ - ap: Average precision
+ - acc: Accuracy
+ - bacc: Balanced accuracy
+ - prev: Prevalence (fraction of positive samples)
+ - tpr: True Positive Rate (Recall/Sensitivity)
+ - tnr: True Negative Rate (Specificity)
+ - precision: Precision (when we predict active, how often are we right?)
+ - npv: Negative Predictive Value (when we predict inactive, how often are we right?)
+ - f1: F1 score
+
+ Each value is an array of length T (number of target components).
+ """
+ T: int = Y_true.shape[1]
+
+ ap: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ acc: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ bacc: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ prev: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ tpr: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ tnr: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ precision: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ npv: Float[np.ndarray, " t"] = np.full(T, np.nan)
+ f1: Float[np.ndarray, " t"] = np.full(T, np.nan)
+
+ for j in range(T):
+ y: np.ndarray = Y_true[:, j].astype(int)
+ p: np.ndarray = Y_prob[:, j]
+ yhat: np.ndarray = Y_pred[:, j].astype(int)
+ prev[j] = float(y.mean())
+
+ # Compute confusion matrix elements
+ tp: int = int(((y == 1) & (yhat == 1)).sum())
+ tn: int = int(((y == 0) & (yhat == 0)).sum())
+ fp: int = int(((y == 0) & (yhat == 1)).sum())
+ fn: int = int(((y == 1) & (yhat == 0)).sum())
+
+ # TPR (Recall/Sensitivity) = TP / (TP + FN)
+ tpr[j] = tp / (tp + fn)
+
+ # TNR (Specificity) = TN / (TN + FP)
+ tnr[j] = tn / (tn + fp)
+
+ # Precision (PPV) = TP / (TP + FP) - when we predict active, how often are we right?
+ if (tp + fp) > 0:
+ precision[j] = tp / (tp + fp)
+ else:
+ precision[j] = np.nan
+ warnings.warn(f"Precision failed: {tp=}, {fp=}, {tp+fp=}", stacklevel=1)
+
+ # Negative Predictive Value = TN / (TN + FN) - when we predict inactive, how often are we right?
+ npv[j] = tn / (tn + fn)
+
+ # F1 = 2 * (precision * recall) / (precision + recall)
+ f1[j] = 2 * (precision[j] * tpr[j]) / (precision[j] + tpr[j])
+
+ # Sklearn metrics
+ ap[j] = average_precision_score(y, p)
+ acc[j] = accuracy_score(y, yhat)
+ bacc[j] = balanced_accuracy_score(y, yhat)
+
+ return {
+ "ap": ap,
+ "acc": acc,
+ "bacc": bacc,
+ "prev": prev,
+ "tpr": tpr,
+ "tnr": tnr,
+ "precision": precision,
+ "npv": npv,
+ "f1": f1,
+ }
+
+
+def proba_for_layer(lm: LayerModel, X: np.ndarray) -> np.ndarray:
+ """Return P(y=1) per target column."""
+ proba_list = lm.model.predict_proba(X.astype(np.uint8)) # type: ignore
+ return extract_prob_class_1(proba_list, lm.model)
+
+
+def get_estimator_for(
+ models: list[LayerModel], layer_idx: int, target_idx: int
+) -> DecisionTreeClassifier:
+ """Fetch the per-output estimator for a given layer and column."""
+ lm = next(m for m in models if m.layer_index == layer_idx)
+ estimator = lm.model.estimators_[target_idx] # pyright: ignore[reportIndexIssue]
+ assert isinstance(estimator, DecisionTreeClassifier)
+ return estimator
diff --git a/spd/clustering/ci_dt/js/cluster-detail.js b/spd/clustering/ci_dt/js/cluster-detail.js
new file mode 100644
index 000000000..83abfb96e
--- /dev/null
+++ b/spd/clustering/ci_dt/js/cluster-detail.js
@@ -0,0 +1,740 @@
+let clusterData = null;
+let allClusters = null;
+let textSamples = {};
+let activationsArray = null;
+let activationsMap = {};
+let currentClusterHash = null;
+let modelInfo = {};
+let explanations = {};
+
+// Component-level data
+let componentActivations = {}; // Map component labels to their activation data
+let enabledComponents = new Set(); // Track which components are enabled
+let combinationStrategy = 'max'; // How to combine component activations: 'max', 'sum', 'mean'
+
+async function init() {
+ // Get cluster hash from URL
+ const urlParams = new URLSearchParams(window.location.search);
+ currentClusterHash = urlParams.get('id');
+
+ if (!currentClusterHash) {
+ const loading = document.getElementById('loading');
+ if (!loading) {
+ const msg = 'Fatal error: loading element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ loading.textContent = 'No cluster ID specified';
+ return;
+ }
+
+ await loadData();
+}
+
+async function loadData() {
+ const progressBar = NOTIF.pbar('Loading cluster data...');
+
+ try {
+ progressBar.progress(0.1);
+
+ // Load data in parallel
+ let clusters, samples, activationsMapResponse, modelInfoResponse;
+
+ const clustersPath = CONFIG.getDataPath('clusters');
+ const textSamplesPath = CONFIG.getDataPath('textSamples');
+ const activationsMapPath = CONFIG.getDataPath('activationsMap');
+ const modelInfoPath = CONFIG.getDataPath('modelInfo');
+ const explanationsPath = CONFIG.getDataPath('explanations');
+
+ try {
+ [clusters, samples, activationsMapResponse, modelInfoResponse] = await Promise.all([
+ loadJSONL(clustersPath, 'cluster_hash').catch(e => {
+ throw new Error(`Failed to load ${clustersPath}: ${e.message}`);
+ }),
+ loadJSONL(textSamplesPath, 'text_hash').catch(e => {
+ throw new Error(`Failed to load ${textSamplesPath}: ${e.message}`);
+ }),
+ fetch(activationsMapPath).catch(e => {
+ throw new Error(`Failed to load ${activationsMapPath}: ${e.message}`);
+ }),
+ fetch(modelInfoPath).catch(e => {
+ throw new Error(`Failed to load ${modelInfoPath}: ${e.message}`);
+ })
+ ]);
+
+ // Load explanations (non-critical, don't fail if missing)
+ explanations = await loadJSONL(explanationsPath, 'cluster_id').catch(() => ({}));
+ } catch (error) {
+ progressBar.complete();
+ NOTIF.error(error.message, error, null);
+ const loading = document.getElementById('loading');
+ if (loading) {
+ loading.textContent = error.message;
+ } else {
+ console.error('loading element not found, cannot display error message');
+ }
+ throw error;
+ }
+
+ progressBar.progress(0.4);
+
+ if (!activationsMapResponse.ok) {
+ const msg = `Failed to load ${activationsMapPath} (HTTP ${activationsMapResponse.status})`;
+ NOTIF.error(msg, null, null);
+ throw new Error(msg);
+ }
+ if (!modelInfoResponse.ok) {
+ const msg = `Failed to load ${modelInfoPath} (HTTP ${modelInfoResponse.status})`;
+ NOTIF.error(msg, null, null);
+ throw new Error(msg);
+ }
+
+ allClusters = clusters;
+ textSamples = samples;
+
+ try {
+ activationsMap = await activationsMapResponse.json();
+ } catch (error) {
+ const msg = `Failed to parse ${activationsMapPath} (invalid JSON)`;
+ NOTIF.error(msg, error, null);
+ throw new Error(msg);
+ }
+
+ try {
+ modelInfo = await modelInfoResponse.json();
+ } catch (error) {
+ const msg = `Failed to parse ${modelInfoPath} (invalid JSON)`;
+ NOTIF.error(msg, error, null);
+ throw new Error(msg);
+ }
+
+ progressBar.progress(0.6);
+
+ if (!allClusters[currentClusterHash]) {
+ const msg = 'Cluster not found';
+ NOTIF.error(msg, null, null);
+ const loading = document.getElementById('loading');
+ if (loading) {
+ loading.textContent = msg;
+ } else {
+ console.error('loading element not found, cannot display error message');
+ }
+ progressBar.complete();
+ return;
+ }
+
+ clusterData = allClusters[currentClusterHash];
+
+ // Load activations (float16 compressed npz)
+ const activationsPath = CONFIG.getDataPath('activations');
+ try {
+ activationsArray = await NDArray.load(activationsPath);
+ } catch (error) {
+ const msg = `Failed to load ${activationsPath}`;
+ NOTIF.error(msg, error, null);
+ throw new Error(msg);
+ }
+
+ progressBar.progress(0.9);
+
+ displayCluster();
+ progressBar.complete();
+ const loading = document.getElementById('loading');
+ if (!loading) {
+ const msg = 'Fatal error: loading element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ loading.style.display = 'none';
+ } catch (error) {
+ progressBar.complete();
+ console.error('Load error:', error);
+ console.error('Stack:', error.stack);
+ }
+}
+
+function displayCluster() {
+ // Update title
+ const clusterTitle = document.getElementById('clusterTitle');
+ if (!clusterTitle) {
+ const msg = 'Fatal error: clusterTitle element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ clusterTitle.textContent = `Cluster ${currentClusterHash}`;
+
+ // Display component count
+ const componentCount = document.getElementById('componentCount');
+ if (!componentCount) {
+ const msg = 'Fatal error: componentCount element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ componentCount.textContent = clusterData.components.length;
+
+ // Display explanation and setup copy handler
+ displayExplanation();
+ setupCopyHandler();
+
+ // Initialize component data
+ initializeComponentData();
+
+ // Display model visualization
+ displayModelVisualization();
+
+ // Setup components table
+ setupComponentsTable();
+
+ // Setup hover highlighting between model view and components table
+ setupModelViewHighlighting();
+
+ // Display histogram plots
+ displayHistograms();
+
+ // Display token activation stats if available
+ if (clusterData.stats && clusterData.stats.token_activations) {
+ displayTokenActivations();
+ }
+
+ // Display samples
+ displaySamples();
+}
+
+function displayExplanation() {
+ const explanationSpan = document.getElementById('clusterExplanation');
+ if (!explanationSpan) return;
+
+ const explanationData = explanations[currentClusterHash];
+ if (explanationData && explanationData.explanation) {
+ explanationSpan.textContent = explanationData.explanation;
+ explanationSpan.style.fontStyle = 'normal';
+ explanationSpan.style.color = '#000';
+ } else {
+ explanationSpan.textContent = 'No explanation';
+ explanationSpan.style.fontStyle = 'italic';
+ explanationSpan.style.color = '#666';
+ }
+}
+
+function setupCopyHandler() {
+ const copyBtn = document.getElementById('copyTemplateBtn');
+ if (!copyBtn) return;
+
+ copyBtn.addEventListener('click', async () => {
+ const template = JSON.stringify({
+ cluster_id: currentClusterHash,
+ explanation: ""
+ }) + '\n';
+
+ try {
+ await navigator.clipboard.writeText(template);
+ NOTIF.success('Template copied to clipboard!');
+ } catch (err) {
+ // Fallback for older browsers
+ const textArea = document.createElement('textarea');
+ textArea.value = template;
+ textArea.style.position = 'fixed';
+ textArea.style.left = '-999999px';
+ document.body.appendChild(textArea);
+ textArea.select();
+ try {
+ document.execCommand('copy');
+ NOTIF.success('Template copied to clipboard!');
+ } catch (e) {
+ NOTIF.error('Failed to copy template', e, null);
+ }
+ document.body.removeChild(textArea);
+ }
+ });
+}
+
+function initializeComponentData() {
+ // Load component activations if available
+ if (clusterData.component_activations) {
+ componentActivations = clusterData.component_activations;
+ }
+
+ // Enable all components by default
+ enabledComponents.clear();
+ clusterData.components.forEach(comp => {
+ enabledComponents.add(comp.label);
+ });
+}
+
+function displayModelVisualization() {
+ const modelViewDiv = document.getElementById('modelView');
+ if (!modelViewDiv) {
+ const msg = 'Fatal error: modelView element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ renderModelView(modelViewDiv, currentClusterHash, allClusters, modelInfo, CONFIG.visualization.colormap, CONFIG.visualization.modelViewCellSize);
+}
+
+function displayHistograms() {
+ const stats = clusterData.stats;
+ if (!stats) return;
+
+ const histogramPlots = document.getElementById('histogramPlots');
+ if (!histogramPlots) {
+ const msg = 'Fatal error: histogramPlots element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ histogramPlots.innerHTML = '';
+
+ // Color mapping for different histogram types
+ const statColors = {
+ 'all_activations': '#4169E1',
+ 'max_activation-max-16': '#DC143C',
+ 'max_activation-max-32': '#DC143C',
+ 'mean_activation-max-16': '#228B22',
+ 'median_activation-max-16': '#FF8C00',
+ 'min_activation-max-16': '#9370DB',
+ 'max_activation_position': '#FF6347'
+ };
+
+ // Discover all histogram stats
+ const histogramStats = [];
+ for (const [key, value] of Object.entries(stats)) {
+ if (value && typeof value === 'object' && 'bin_counts' in value && 'bin_edges' in value) {
+ histogramStats.push(key);
+ }
+ }
+
+ // Create a plot for each histogram stat
+ histogramStats.forEach(statKey => {
+ const histData = stats[statKey];
+ const color = statColors[statKey] || '#808080';
+ const label = statKey.replace(/-/g, ' ').replace(/_/g, ' ')
+ .split(' ')
+ .map(word => word.charAt(0).toUpperCase() + word.slice(1))
+ .join(' ');
+
+ // Create container for this plot
+ const plotContainer = document.createElement('div');
+ plotContainer.style.display = 'flex';
+ plotContainer.style.flexDirection = 'column';
+ plotContainer.style.alignItems = 'center';
+ plotContainer.style.minWidth = '250px';
+
+ // Add label
+ const plotLabel = document.createElement('div');
+ plotLabel.textContent = label;
+ plotLabel.style.fontSize = '12px';
+ plotLabel.style.fontWeight = 'bold';
+ plotLabel.style.marginBottom = '5px';
+ plotLabel.style.textAlign = 'center';
+ plotContainer.appendChild(plotLabel);
+
+ // Create sparkline
+ const sparklineContainer = document.createElement('div');
+ sparklineContainer.className = 'sparkline-cell';
+
+ // Calculate bin centers for x-axis
+ const binCenters = calculateBinCenters(histData.bin_edges);
+
+ const min = histData.bin_edges[0];
+ const max = histData.bin_edges[histData.bin_edges.length - 1];
+
+ // Set x-axis limits to [0, 1] if data is in that range
+ const xlims = (min >= 0 && max <= 1) ? [0, 1] : null;
+
+ const svg = sparkbars(binCenters, histData.bin_counts, {
+ width: CONFIG.visualization.sparklineWidth || 200,
+ height: CONFIG.visualization.sparklineHeight || 60,
+ color: color,
+ shading: true,
+ lineWidth: 0,
+ markers: '',
+ margin: 2,
+ xlims: xlims,
+ ylims: [0, null],
+ logScale: true,
+ xAxis: {line: true, ticks: true, label_margin: 10},
+ yAxis: {line: true, ticks: true, label_margin: CONFIG.visualization.sparklineYAxisMargin || 35}
+ });
+
+ sparklineContainer.innerHTML = svg;
+
+ // Add tooltip with statistics
+ const mean = calculateHistogramMean(histData);
+ const median = calculateHistogramMedian(histData);
+ const totalCount = histData.bin_counts.reduce((a, b) => a + b, 0);
+ sparklineContainer.title = `${label} (n=${totalCount})\n\nMin: ${min.toFixed(4)}\nMax: ${max.toFixed(4)}\nMean: ${mean.toFixed(4)}\nMedian: ${median.toFixed(4)}`;
+
+ plotContainer.appendChild(sparklineContainer);
+ histogramPlots.appendChild(plotContainer);
+ });
+}
+
+function displayTokenActivations() {
+ const tokenStats = clusterData.stats.token_activations;
+
+ // Show the section
+ const tokenActivations = document.getElementById('tokenActivations');
+ if (!tokenActivations) {
+ const msg = 'Fatal error: tokenActivations element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ tokenActivations.style.display = 'block';
+
+ // Setup top tokens table
+ if (tokenStats.top_tokens && tokenStats.top_tokens.length > 0) {
+ const tableData = tokenStats.top_tokens.map((item, idx) => ({
+ rank: idx + 1,
+ token: item.token,
+ count: item.count,
+ percentage: ((item.count / tokenStats.total_activations) * 100)
+ }));
+
+ const maxPercentage = tableData.length > 0 ? tableData[0].percentage : 0;
+
+ const tableConfig = {
+ data: tableData,
+ columns: [
+ {
+ key: 'rank',
+ label: '#',
+ type: 'number',
+ width: '40px',
+ align: 'right'
+ },
+ {
+ key: 'token',
+ label: 'Token',
+ type: 'string',
+ width: '120px',
+ renderer: (value) => {
+ // Show token in a monospace box with visual formatting
+ const tokenDisplay = value.replace(/ /g, '·').replace(/\n/g, '↵');
+ return `${tokenDisplay}`;
+ }
+ },
+ {
+ key: 'percentage',
+ label: '%',
+ type: 'number',
+ width: '70px',
+ align: 'right',
+ renderer: (value) => {
+ const percentageValue = value;
+ const percentage = percentageValue.toFixed(1);
+
+ // Color based on percentage (normalized by max percentage)
+ const normalizedPct = maxPercentage > 0 ? percentageValue / maxPercentage : 0;
+ const intensity = Math.floor((1 - normalizedPct) * 255);
+ const bgColor = `rgb(255, ${intensity}, ${intensity})`;
+
+ const span = document.createElement('span');
+ span.textContent = `${percentage}%`;
+ span.style.backgroundColor = bgColor;
+ span.style.padding = '2px 4px';
+ span.style.borderRadius = '2px';
+
+ return span;
+ },
+ infoFunction: () => {
+ return `Unique: ${tokenStats.total_unique_tokens.toLocaleString()} | Total: ${tokenStats.total_activations.toLocaleString()} | Entropy: ${tokenStats.entropy.toFixed(2)} | Conc: ${(tokenStats.concentration_ratio * 100).toFixed(1)}%`;
+ }
+ }
+ ],
+ pageSize: 10,
+ showFilters: false,
+ showInfo: true
+ };
+
+ new DataTable('#topTokensTable', tableConfig);
+ }
+}
+
+function setupComponentsTable() {
+ const tableData = clusterData.components.map(comp => ({
+ label: comp.label,
+ module: comp.module,
+ index: comp.index,
+ enabled: enabledComponents.has(comp.label)
+ }));
+
+ const tableConfig = {
+ data: tableData,
+ columns: [
+ {
+ key: 'enabled',
+ label: '✓',
+ type: 'boolean',
+ width: '40px',
+ align: 'center',
+ renderer: (value, row) => {
+ const checkbox = document.createElement('input');
+ checkbox.type = 'checkbox';
+ checkbox.checked = value;
+ checkbox.style.cursor = 'pointer';
+ checkbox.addEventListener('change', (e) => {
+ onComponentToggle(row.label, e.target.checked);
+ });
+ return checkbox;
+ },
+ filterable: false
+ },
+ {
+ key: 'module',
+ label: 'Module',
+ type: 'string',
+ width: '250px'
+ },
+ {
+ key: 'index',
+ label: 'Index',
+ type: 'number',
+ width: '80px',
+ align: 'right'
+ }
+ ],
+ pageSize: CONFIG.clusterPage.pageSize,
+ showFilters: false
+ };
+
+ new DataTable('#componentsTable', tableConfig);
+}
+
+function onComponentToggle(componentLabel, isEnabled) {
+ if (isEnabled) {
+ enabledComponents.add(componentLabel);
+ } else {
+ enabledComponents.delete(componentLabel);
+ }
+
+ // Recompute and redisplay activations
+ recomputeDisplayedActivations();
+}
+
+async function recomputeDisplayedActivations() {
+ // If no components are enabled or component activations not available, use cluster-level
+ if (enabledComponents.size === 0 || !componentActivations || Object.keys(componentActivations).length === 0) {
+ // Just redisplay with cluster-level activations (default)
+ displaySamples();
+ return;
+ }
+
+ // If all components are enabled, use cluster-level activations (faster)
+ if (enabledComponents.size === clusterData.components.length) {
+ displaySamples();
+ return;
+ }
+
+ // Recompute activations based on enabled components
+ displaySamples();
+}
+
+function combineComponentActivations(componentActsList, strategy) {
+ // componentActsList: array of activation arrays [n_ctx]
+ // Returns: combined activation array [n_ctx]
+
+ if (componentActsList.length === 0) {
+ return null;
+ }
+
+ if (componentActsList.length === 1) {
+ return componentActsList[0];
+ }
+
+ const n_ctx = componentActsList[0].length;
+ const combined = new Array(n_ctx).fill(0);
+
+ if (strategy === 'max') {
+ for (let i = 0; i < n_ctx; i++) {
+ let maxVal = componentActsList[0][i];
+ for (let j = 1; j < componentActsList.length; j++) {
+ if (componentActsList[j][i] > maxVal) {
+ maxVal = componentActsList[j][i];
+ }
+ }
+ combined[i] = maxVal;
+ }
+ } else if (strategy === 'sum') {
+ for (let i = 0; i < n_ctx; i++) {
+ let sum = 0;
+ for (let j = 0; j < componentActsList.length; j++) {
+ sum += componentActsList[j][i];
+ }
+ combined[i] = sum;
+ }
+ } else if (strategy === 'mean') {
+ for (let i = 0; i < n_ctx; i++) {
+ let sum = 0;
+ for (let j = 0; j < componentActsList.length; j++) {
+ sum += componentActsList[j][i];
+ }
+ combined[i] = sum / componentActsList.length;
+ }
+ }
+
+ return combined;
+}
+
+function setupModelViewHighlighting() {
+ // Get all model view cells
+ const modelViewCells = document.querySelectorAll('.modelview-module-cell');
+
+ // Get components table
+ const componentsTable = document.querySelector('#componentsTable');
+ if (!componentsTable) return;
+
+ modelViewCells.forEach(cell => {
+ cell.addEventListener('mouseenter', (e) => {
+ const moduleName = e.target.dataset.module;
+ if (!moduleName) return;
+
+ // Find and highlight all rows in the components table that match this module
+ const tableRows = componentsTable.querySelectorAll('.tablejs-data-row');
+ tableRows.forEach(row => {
+ const cells = row.querySelectorAll('td');
+ if (cells.length > 1) {
+ const moduleCell = cells[1]; // Second column is module name (first is checkbox)
+ if (moduleCell && moduleCell.textContent === moduleName) {
+ row.style.backgroundColor = '#fff3cd'; // Light yellow highlight
+ }
+ }
+ });
+ });
+
+ cell.addEventListener('mouseleave', () => {
+ // Remove highlighting from all rows
+ const tableRows = componentsTable.querySelectorAll('.tablejs-data-row');
+ tableRows.forEach(row => {
+ row.style.backgroundColor = '';
+ });
+ });
+ });
+}
+
+function displaySamples() {
+ const tbody = document.getElementById('samplesTableBody');
+ if (!tbody) {
+ const msg = 'Fatal error: samplesTableBody element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ tbody.innerHTML = '';
+
+ // Get the main criterion samples (max_activation)
+ const criterionKey = Object.keys(clusterData.criterion_samples)[0];
+ if (!criterionKey) {
+ tbody.innerHTML = '| No samples available |
';
+ return;
+ }
+
+ const sampleHashes = clusterData.criterion_samples[criterionKey];
+ const samplesToShow = Math.min(CONFIG.clusterPage.maxSamplesPerCluster, sampleHashes.length);
+
+ // Check if we need to use component-level activations
+ const useComponentActivations = componentActivations &&
+ Object.keys(componentActivations).length > 0 &&
+ enabledComponents.size < clusterData.components.length;
+
+ for (let i = 0; i < samplesToShow; i++) {
+ const textHash = sampleHashes[i];
+ const textSample = textSamples[textHash];
+
+ if (!textSample) {
+ console.warn(`Text sample not found for hash: ${textHash}`);
+ continue;
+ }
+
+ let activationsData;
+
+ if (useComponentActivations) {
+ // Compute combined activations from enabled components
+ const componentActsList = [];
+
+ for (const comp of clusterData.components) {
+ if (enabledComponents.has(comp.label) && componentActivations[comp.label]) {
+ const compData = componentActivations[comp.label];
+ // Find the activation for this text sample
+ const hashIdx = compData.activation_sample_hashes.indexOf(`${currentClusterHash}:${comp.label}:${textHash}`);
+ if (hashIdx !== -1) {
+ const activationIdx = compData.activation_indices[hashIdx];
+ if (activationIdx !== undefined && activationsArray) {
+ const compActivations = activationsArray.get(activationIdx);
+ componentActsList.push(Array.from(compActivations.data));
+ }
+ }
+ }
+ }
+
+ if (componentActsList.length > 0) {
+ activationsData = combineComponentActivations(componentActsList, combinationStrategy);
+ }
+ }
+
+ // Fall back to cluster-level activations if component activations not available
+ if (!activationsData) {
+ const fullHash = `${currentClusterHash}:${textHash}`;
+ const activationIdx = activationsMap[fullHash];
+
+ if (activationIdx !== undefined && activationsArray) {
+ const activations = activationsArray.get(activationIdx);
+ activationsData = Array.from(activations.data);
+ }
+ }
+
+ let tokenViz;
+ if (activationsData) {
+ // Find max position
+ const maxPosition = activationsData.indexOf(Math.max(...activationsData));
+
+ // Use the proper token visualization with coloring and tooltips
+ tokenViz = createTokenVisualizationWithTooltip(
+ textSample.tokens,
+ activationsData,
+ maxPosition
+ );
+ } else {
+ // Fallback to simple visualization if no activations
+ console.warn(`No activations found for sample ${i}`);
+ tokenViz = createSimpleTokenViz(textSample.tokens);
+ }
+
+ const tr = document.createElement('tr');
+ tr.innerHTML = `
+ ${i + 1} |
+ |
+ `;
+
+ // Add token visualization to last cell
+ tr.lastElementChild.appendChild(tokenViz);
+
+ tbody.appendChild(tr);
+ }
+
+ if (sampleHashes.length > CONFIG.clusterPage.maxSamplesPerCluster) {
+ const tr = document.createElement('tr');
+ tr.innerHTML = `
+ ... and ${sampleHashes.length - CONFIG.clusterPage.maxSamplesPerCluster} more samples
+ | `;
+ tbody.appendChild(tr);
+ }
+}
+
+function createSimpleTokenViz(tokens) {
+ const container = document.createElement('div');
+ container.className = 'token-container';
+ container.textContent = tokens.join(' ');
+ return container;
+}
+
+// Initialize config and load data on page load
+(async () => {
+ await initConfig();
+ init();
+})();
\ No newline at end of file
diff --git a/spd/clustering/ci_dt/js/cluster-selection.js b/spd/clustering/ci_dt/js/cluster-selection.js
new file mode 100644
index 000000000..6a5ce1142
--- /dev/null
+++ b/spd/clustering/ci_dt/js/cluster-selection.js
@@ -0,0 +1,841 @@
+let clusterData = {};
+let modelInfo = {};
+let dataTable = null;
+let explanations = {};
+
+// Alpine.js data component for model info
+const modelInfoData = {
+ data: {},
+ hasData: false,
+
+ async loadData() {
+ try {
+ const response = await fetch(CONFIG.getDataPath('modelInfo'));
+ this.data = await response.json();
+ this.hasData = Object.keys(this.data).length > 0;
+
+ // Also populate global modelInfo for DataTable renderers
+ modelInfo = this.data;
+
+ console.log('Model info loaded:', this.hasData, Object.keys(this.data));
+ } catch (error) {
+ console.error('Failed to load model info:', error);
+ this.hasData = false;
+ }
+ },
+
+ formatParameters(totalParams) {
+ if (!totalParams) return '-';
+ if (totalParams >= 1000000) return (totalParams / 1000000).toFixed(1) + 'M';
+ if (totalParams >= 1000) return (totalParams / 1000).toFixed(1) + 'K';
+ return totalParams.toString();
+ },
+
+ formatWandBLink(path) {
+ if (!path) return '-';
+
+ // Remove "wandb:" prefix if present
+ const cleanPath = path.replace(/^wandb:/, '');
+
+ // Convert to WandB URL
+ const url = `https://wandb.ai/${cleanPath}`;
+
+ // Show shortened path in link text
+ const displayText = cleanPath.length > 60
+ ? cleanPath.substring(0, 57) + '...'
+ : cleanPath;
+
+ return `${displayText}`;
+ }
+};
+
+// Custom column renderers
+const columnRenderers = {
+ modelView: function(value, row, col) {
+ const container = document.createElement('div');
+ container.className = 'modelview-cell';
+
+ renderModelView(container, row.clusterHash, clusterData, modelInfo, CONFIG.visualization.colormap, CONFIG.visualization.modelViewCellSizeTable);
+
+ return container;
+ },
+
+ modulesSummary: function(value, row, col) {
+ const modules = row.modules;
+ const container = document.createElement('div');
+ container.className = 'module-summary';
+
+ if (modules.length === 1) {
+ const parts = modules[0].split('.');
+ container.textContent = parts.length > 2 ? parts.slice(-2).join('.') : modules[0];
+ } else if (modules.length <= 3) {
+ container.textContent = modules.map(m => {
+ const parts = m.split('.');
+ return parts.length > 2 ? parts.slice(-2).join('.') : m;
+ }).join(', ');
+ } else {
+ container.textContent = `${modules.length} modules`;
+ }
+
+ container.title = modules.join('\n');
+ return container;
+ },
+
+ activationHistogram: function(value, row, col) {
+ const histData = row.stats.all_activations;
+ if (!histData) {
+ return 'No data';
+ }
+
+ const container = document.createElement('div');
+ container.className = 'sparkline-cell';
+
+ // Calculate bin centers for x-axis
+ const binCenters = calculateBinCenters(histData.bin_edges);
+
+ const min = row.stats.min_activation;
+ const max = row.stats.max_activation;
+
+ // Set x-axis limits to [0, 1] if data is in that range
+ const xlims = (min >= 0 && max <= 1) ? [0, 1] : null;
+
+ // Pass bin centers as x-values and counts as y-values
+ const svg = sparkbars(binCenters, histData.bin_counts, {
+ width: CONFIG.visualization.sparklineWidth,
+ height: CONFIG.visualization.sparklineHeight,
+ color: '#4169E1',
+ shading: true,
+ lineWidth: 0,
+ markers: '',
+ margin: 2,
+ xlims: xlims,
+ ylims: [0, null],
+ logScale: true,
+ xAxis: {line: true, ticks: true, label_margin: 10},
+ yAxis: {line: true, ticks: true, label_margin: CONFIG.visualization.sparklineYAxisMargin}
+ });
+
+ container.innerHTML = svg;
+
+ const mean = row.stats.mean_activation;
+ const median = calculateHistogramMedian(histData);
+ const n = row.stats.n_tokens;
+
+ container.title = `All Activations Histogram (n=${n})\n\nMin: ${min.toFixed(4)}\nMax: ${max.toFixed(4)}\nMean: ${mean.toFixed(4)}\nMedian: ${median.toFixed(4)}`;
+
+ return container;
+ },
+
+ maxActivationDistribution: function(value, row, col) {
+ const histData = row.stats['max_activation-max-16'];
+ if (!histData) {
+ return 'No data';
+ }
+
+ const container = document.createElement('div');
+ container.className = 'sparkline-cell';
+
+ // Calculate bin centers for x-axis
+ const binCenters = calculateBinCenters(histData.bin_edges);
+
+ const min = histData.bin_edges[0];
+ const max = histData.bin_edges[histData.bin_edges.length - 1];
+
+ // Set x-axis limits to [0, 1] if data is in that range
+ const xlims = (min >= 0 && max <= 1) ? [0, 1] : null;
+
+ // Pass bin centers as x-values and counts as y-values
+ const svg = sparkbars(binCenters, histData.bin_counts, {
+ width: CONFIG.visualization.sparklineWidth,
+ height: CONFIG.visualization.sparklineHeight,
+ color: '#DC143C',
+ shading: true,
+ lineWidth: 0,
+ markers: '',
+ margin: 2,
+ xlims: xlims,
+ ylims: [0, null],
+ logScale: true,
+ xAxis: {line: true, ticks: true, label_margin: 10},
+ yAxis: {line: true, ticks: true, label_margin: CONFIG.visualization.sparklineYAxisMargin}
+ });
+
+ container.innerHTML = svg;
+
+ const n = row.stats.n_samples;
+ const mean = calculateHistogramMean(histData);
+ const median = calculateHistogramMedian(histData);
+
+ container.title = `Max Activation Distribution (n=${n} samples)\n\nMin: ${min.toFixed(4)}\nMax: ${max.toFixed(4)}\nMean: ${mean.toFixed(4)}\nMedian: ${median.toFixed(4)}`;
+
+ return container;
+ },
+
+ clusterLink: function(value, row, col) {
+ return `View →`;
+ },
+
+ explanation: function(value, row, col) {
+ if (!value) {
+ return '—';
+ }
+ // Truncate long explanations
+ const maxLength = 60;
+ if (value.length > maxLength) {
+ const truncated = value.substring(0, maxLength) + '...';
+ const span = document.createElement('span');
+ span.textContent = truncated;
+ span.title = value; // Show full text on hover
+ return span;
+ }
+ return value;
+ },
+
+ tokenEntropy: function(value, row, col) {
+ const tokenStats = row.stats.token_activations;
+ if (!tokenStats) {
+ return 'N/A';
+ }
+ return tokenStats.entropy.toFixed(2);
+ },
+
+ tokenConcentration: function(value, row, col) {
+ const tokenStats = row.stats.token_activations;
+ if (!tokenStats) {
+ return 'N/A';
+ }
+ return (tokenStats.concentration_ratio * 100).toFixed(1) + '%';
+ },
+
+ topToken: function(value, row, col) {
+ const tokenStats = row.stats.token_activations;
+ if (!tokenStats || !tokenStats.top_tokens || tokenStats.top_tokens.length === 0) {
+ return 'N/A';
+ }
+
+ const container = document.createElement('div');
+ container.style.fontFamily = 'monospace';
+ container.style.fontSize = '11px';
+ container.style.lineHeight = '1.4';
+
+ const topN = Math.min(5, tokenStats.top_tokens.length);
+ const maxPercentage = tokenStats.top_tokens.length > 0
+ ? ((tokenStats.top_tokens[0].count / tokenStats.total_activations) * 100)
+ : 0;
+
+ for (let i = 0; i < topN; i++) {
+ const token = tokenStats.top_tokens[i];
+ const tokenDisplay = token.token.replace(/ /g, '·').replace(/\n/g, '↵');
+ const percentageValue = ((token.count / tokenStats.total_activations) * 100);
+ const percentage = percentageValue.toFixed(1);
+
+ // Color based on percentage (normalized by max percentage)
+ const normalizedPct = maxPercentage > 0 ? percentageValue / maxPercentage : 0;
+ const intensity = Math.floor((1 - normalizedPct) * 255);
+ const bgColor = `rgb(255, ${intensity}, ${intensity})`;
+
+ const line = document.createElement('div');
+ line.style.display = 'flex';
+ line.style.justifyContent = 'space-between';
+ line.style.gap = '8px';
+
+ const tokenSpan = document.createElement('span');
+ tokenSpan.innerHTML = `${tokenDisplay}`;
+ tokenSpan.style.textAlign = 'left';
+
+ const pctSpan = document.createElement('span');
+ pctSpan.textContent = `${percentage}%`;
+ pctSpan.style.textAlign = 'right';
+ pctSpan.style.backgroundColor = bgColor;
+ pctSpan.style.padding = '2px 4px';
+ pctSpan.style.borderRadius = '2px';
+
+ line.appendChild(tokenSpan);
+ line.appendChild(pctSpan);
+ container.appendChild(line);
+ }
+
+ return container;
+ },
+
+ // Generic histogram renderer for any BinnedData stat
+ genericHistogram: function(statKey, color, title) {
+ return function(value, row, col) {
+ const histData = row.stats[statKey];
+ if (!histData || !histData.bin_counts) {
+ return 'No data';
+ }
+
+ const container = document.createElement('div');
+ container.className = 'sparkline-cell';
+
+ // Calculate bin centers for x-axis
+ const binCenters = calculateBinCenters(histData.bin_edges);
+
+ // Calculate statistics of underlying data
+ const min = histData.bin_edges[0];
+ const max = histData.bin_edges[histData.bin_edges.length - 1];
+
+ // Set x-axis limits to [0, 1] if data is in that range
+ const xlims = (min >= 0 && max <= 1) ? [0, 1] : null;
+
+ // Pass bin centers as x-values and counts as y-values
+ const svg = sparkbars(binCenters, histData.bin_counts, {
+ width: CONFIG.visualization.sparklineWidth,
+ height: CONFIG.visualization.sparklineHeight,
+ color: color,
+ shading: true,
+ lineWidth: 0,
+ markers: '',
+ margin: 2,
+ xlims: xlims,
+ ylims: [0, null],
+ logScale: true,
+ xAxis: {line: true, ticks: true, label_margin: 10},
+ yAxis: {line: true, ticks: true, label_margin: CONFIG.visualization.sparklineYAxisMargin}
+ });
+
+ container.innerHTML = svg;
+
+ const mean = calculateHistogramMean(histData);
+ const median = calculateHistogramMedian(histData);
+ const totalCount = histData.bin_counts.reduce((a, b) => a + b, 0);
+
+ container.title = `${title} (n=${totalCount})\n\nMin: ${min.toFixed(4)}\nMax: ${max.toFixed(4)}\nMean: ${mean.toFixed(4)}\nMedian: ${median.toFixed(4)}`;
+
+ return container;
+ };
+ }
+};
+
+// ============================================================================
+// Helper Functions for Filtering and Sorting
+// ============================================================================
+
+/**
+ * Create a filter function for module arrays that supports wildcards, multiple patterns, and negation
+ * @param {string} filterValue - The filter pattern (supports * wildcards, , for OR, & for AND, @ for all-match, ! for negation)
+ * @returns {Function|null} Filter function or null if invalid
+ */
+function createModuleFilter(filterValue) {
+ if (!filterValue || !filterValue.trim()) return null;
+
+ // Split by comma for OR groups
+ const orGroups = filterValue.split(',').map(g => g.trim()).filter(g => g);
+
+ // Parse each OR group (which may contain & for AND)
+ const parsedOrGroups = orGroups.map(group => {
+ // Split by & for AND conditions within this OR group
+ const andConditions = group.split('&').map(c => c.trim()).filter(c => c);
+
+ return andConditions.map(condition => {
+ let mode = 'some'; // default: at least one module matches
+ let negate = false;
+ let pattern = condition.toLowerCase();
+
+ // Check for @ prefix (all modules must match)
+ if (pattern.startsWith('@')) {
+ mode = 'every';
+ pattern = pattern.substring(1);
+ }
+ // Check for ! prefix (no modules can match)
+ else if (pattern.startsWith('!')) {
+ negate = true;
+ pattern = pattern.substring(1);
+ }
+
+ const regex = pattern.includes('*')
+ ? new RegExp('^' + pattern.replace(/\*/g, '.*') + '$')
+ : null;
+
+ return { mode, negate, pattern, regex };
+ });
+ });
+
+ return (cellValue) => {
+ // cellValue is the modules array
+ if (!Array.isArray(cellValue)) return false;
+
+ // OR logic across groups
+ return parsedOrGroups.some(andGroup => {
+ // AND logic within group
+ return andGroup.every(condition => {
+ const matchFn = (module) => {
+ const moduleLower = module.toLowerCase();
+ return condition.regex
+ ? condition.regex.test(moduleLower)
+ : moduleLower.includes(condition.pattern);
+ };
+
+ if (condition.mode === 'every') {
+ // ALL modules must match
+ const result = cellValue.every(matchFn);
+ return condition.negate ? !result : result;
+ } else {
+ // At least ONE module must match (or none if negated)
+ const result = cellValue.some(matchFn);
+ return condition.negate ? !result : result;
+ }
+ });
+ });
+ };
+}
+
+/**
+ * Sort function for module arrays
+ * Primary: number of modules (ascending)
+ * Secondary: alphabetically by first module name
+ * @param {Array} modules - Array of module names
+ * @returns {string} Sortable string representation
+ */
+function sortModules(modules) {
+ if (!Array.isArray(modules) || modules.length === 0) return '';
+
+ // Pad module count for proper numeric sorting, then add first module name
+ const count = modules.length.toString().padStart(5, '0');
+ const firstName = modules[0].toLowerCase();
+ return `${count}_${firstName}`;
+}
+
+/**
+ * Parse extended histogram filter syntax (e.g., "mean>0.5", "max<10", "mean>0.5, max<10")
+ * @param {string} filterValue - The filter string (can be comma-separated for multiple conditions)
+ * @returns {Array|null} Array of parsed filters [{ statType, operator, value }] or null if plain numeric
+ */
+function parseHistogramFilter(filterValue) {
+ const trimmed = filterValue.trim();
+ if (!trimmed) return null;
+
+ // Split by comma to support multiple conditions
+ const conditions = trimmed.split(',').map(c => c.trim());
+ const parsedConditions = [];
+
+ for (const condition of conditions) {
+ // Match pattern: statType operator value (e.g., "mean>0.5", "median<=0.2")
+ const match = condition.match(/^(mean|median|max|min|range|sum)\s*(==|!=|>=|<=|>|<)\s*(-?\d+\.?\d*)$/i);
+
+ if (match) {
+ parsedConditions.push({
+ statType: match[1].toLowerCase(),
+ operator: match[2],
+ value: parseFloat(match[3])
+ });
+ } else {
+ // If any condition doesn't match, return null to use default filter
+ return null;
+ }
+ }
+
+ // Return array of conditions, or null if none were found
+ return parsedConditions.length > 0 ? parsedConditions : null;
+}
+
+/**
+ * Create a filter function for histogram columns with extended syntax
+ * Supports multiple comma-separated conditions (AND logic)
+ * @param {string} statKey - The statistics key
+ * @param {string} filterValue - The filter string (e.g., "mean>0.5, max<10")
+ * @returns {Function|null} Filter function or null to use default
+ */
+function createHistogramFilter(statKey, filterValue) {
+ const parsedConditions = parseHistogramFilter(filterValue);
+
+ if (!parsedConditions) {
+ // Return null to let default numeric filter handle it
+ // Default will filter on the sort value (mean by default)
+ return null;
+ }
+
+ return (cellValue, row) => {
+ // All conditions must be satisfied (AND logic)
+ for (const condition of parsedConditions) {
+ const { statType, operator, value } = condition;
+ const histData = row.stats[statKey];
+
+ if (!histData || !histData.bin_counts || !histData.bin_edges) return false;
+
+ // Calculate the requested statistic
+ let statValue;
+ switch (statType) {
+ case 'mean':
+ // For all_activations, use precomputed mean
+ if (statKey === 'all_activations' && row.stats.mean_activation !== undefined) {
+ statValue = row.stats.mean_activation;
+ } else {
+ statValue = calculateHistogramMean(histData);
+ }
+ break;
+ case 'median':
+ statValue = calculateHistogramMedian(histData);
+ break;
+ case 'max':
+ statValue = histData.bin_edges[histData.bin_edges.length - 1];
+ break;
+ case 'min':
+ statValue = histData.bin_edges[0];
+ break;
+ case 'range':
+ statValue = histData.bin_edges[histData.bin_edges.length - 1] - histData.bin_edges[0];
+ break;
+ case 'sum':
+ statValue = histData.bin_counts.reduce((a, b) => a + b, 0);
+ break;
+ default:
+ return false;
+ }
+
+ if (statValue === null || statValue === undefined) return false;
+
+ let conditionMet = false;
+ switch (operator) {
+ case '==': conditionMet = Math.abs(statValue - value) < 0.0001; break;
+ case '!=': conditionMet = Math.abs(statValue - value) >= 0.0001; break;
+ case '>': conditionMet = statValue > value; break;
+ case '<': conditionMet = statValue < value; break;
+ case '>=': conditionMet = statValue >= value; break;
+ case '<=': conditionMet = statValue <= value; break;
+ default: conditionMet = false;
+ }
+
+ // If any condition fails, return false
+ if (!conditionMet) return false;
+ }
+
+ // All conditions passed
+ return true;
+ };
+}
+
+/**
+ * Get the top token string for sorting
+ * @param {object} value - Cell value (stats object)
+ * @param {object} row - The data row
+ * @returns {string} The top token string for sorting
+ */
+function sortTopToken(value, row) {
+ const tokenStats = row.stats.token_activations;
+ if (!tokenStats || !tokenStats.top_tokens || tokenStats.top_tokens.length === 0) {
+ return '';
+ }
+ return tokenStats.top_tokens[0].token.toLowerCase();
+}
+
+/**
+ * Create a filter function for top tokens
+ * @param {string} filterValue - The filter string
+ * @returns {Function|null} Filter function or null if invalid
+ */
+function createTopTokenFilter(filterValue) {
+ if (!filterValue || !filterValue.trim()) return null;
+
+ const pattern = filterValue.toLowerCase().trim();
+
+ return (cellValue, row) => {
+ const tokenStats = row.stats.token_activations;
+ if (!tokenStats || !tokenStats.top_tokens) return false;
+
+ // Search in top 10 tokens
+ const topN = Math.min(10, tokenStats.top_tokens.length);
+ for (let i = 0; i < topN; i++) {
+ const token = tokenStats.top_tokens[i].token.toLowerCase();
+ if (token.includes(pattern)) {
+ return true;
+ }
+ }
+ return false;
+ };
+}
+
+/**
+ * Create a filter function for numeric comparisons with operators
+ * @param {string} filterValue - The filter string (e.g., ">2.5", "<=0.8")
+ * @param {Function} valueExtractor - Function to extract numeric value from cellValue
+ * @returns {Function|null} Filter function or null if invalid
+ */
+function createNumericFilter(filterValue, valueExtractor) {
+ if (!filterValue || !filterValue.trim()) return null;
+
+ const trimmed = filterValue.trim();
+
+ // Match pattern: operator value (e.g., ">2.5", "<=0.8")
+ const match = trimmed.match(/^(==|!=|>=|<=|>|<)\s*(-?\d+\.?\d*)$/);
+
+ if (!match) {
+ // Try plain number (defaults to ==)
+ const plainNum = parseFloat(trimmed);
+ if (!isNaN(plainNum)) {
+ return (cellValue, row) => {
+ const value = valueExtractor(cellValue);
+ if (value === null || value === undefined) return false;
+ return Math.abs(value - plainNum) < 0.0001;
+ };
+ }
+ return null;
+ }
+
+ const operator = match[1];
+ const targetValue = parseFloat(match[2]);
+
+ return (cellValue, row) => {
+ const value = valueExtractor(cellValue);
+ if (value === null || value === undefined) return false;
+
+ switch (operator) {
+ case '==': return Math.abs(value - targetValue) < 0.0001;
+ case '!=': return Math.abs(value - targetValue) >= 0.0001;
+ case '>': return value > targetValue;
+ case '<': return value < targetValue;
+ case '>=': return value >= targetValue;
+ case '<=': return value <= targetValue;
+ default: return false;
+ }
+ };
+}
+
+function processClusterData() {
+ const tableData = [];
+
+ for (const [clusterHash, cluster] of Object.entries(clusterData)) {
+ const modules = new Set();
+ cluster.components.forEach(comp => {
+ modules.add(comp.module);
+ });
+
+ const stats = cluster.stats;
+
+ // Extract cluster ID from hash (format: "runid-iteration-clusteridx")
+ const parts = clusterHash.split('-');
+ const clusterId = parseInt(parts[parts.length - 1]);
+
+ // Get explanation for this cluster
+ const explanationData = explanations[clusterHash];
+ const explanation = explanationData ? explanationData.explanation : null;
+
+ tableData.push({
+ id: clusterId,
+ clusterHash: clusterHash,
+ componentCount: cluster.components.length,
+ modules: Array.from(modules),
+ stats: stats,
+ explanation: explanation
+ });
+ }
+
+ return tableData;
+}
+
+async function loadData() {
+ // Load cluster data (model info is handled by Alpine.js)
+ const clusters = await loadJSONL(CONFIG.getDataPath('clusters'), 'cluster_hash');
+
+ clusterData = clusters;
+
+ // Load explanations (non-critical, don't fail if missing)
+ explanations = await loadJSONL(CONFIG.getDataPath('explanations'), 'cluster_id').catch(() => ({}));
+
+ const tableData = processClusterData();
+
+ // Discover histogram stats from first cluster
+ const firstCluster = Object.values(clusterData)[0];
+ const histogramStats = [];
+ if (firstCluster && firstCluster.stats) {
+ for (const [key, value] of Object.entries(firstCluster.stats)) {
+ if (value && typeof value === 'object' && 'bin_counts' in value && 'bin_edges' in value) {
+ histogramStats.push(key);
+ }
+ }
+ }
+
+ // Base columns
+ const columns = [
+ {
+ key: 'id',
+ label: 'ID',
+ type: 'number',
+ width: '10px',
+ align: 'center'
+ },
+ {
+ key: 'componentCount',
+ label: 'Comps',
+ type: 'number',
+ width: '10px',
+ align: 'right'
+ },
+ {
+ key: 'modules',
+ label: 'Model View',
+ type: 'string',
+ width: '21px',
+ align: 'center',
+ renderer: columnRenderers.modelView,
+ sortFunction: (modules) => sortModules(modules),
+ filterFunction: (filterValue) => createModuleFilter(filterValue),
+ filterTooltip: 'Filter by module. Separate with , (OR) or & (AND). Use * for wildcards. Prefix @ for all-match, ! to exclude. Examples: *mlp*,*attn* (OR), *mlp*&*attn* (AND), @*proj* (all), !*o_proj* (exclude)'
+ },
+ {
+ key: 'modules',
+ label: 'Modules',
+ type: 'string',
+ width: '10px',
+ renderer: columnRenderers.modulesSummary,
+ sortFunction: (modules) => sortModules(modules),
+ filterFunction: (filterValue) => createModuleFilter(filterValue),
+ filterTooltip: 'Filter by module. Separate with , (OR) or & (AND). Use * for wildcards. Prefix @ for all-match, ! to exclude. Examples: *mlp*,*attn* (OR), *mlp*&*attn* (AND), @*proj* (all), !*o_proj* (exclude)'
+ }
+ ];
+
+ // Add histogram columns dynamically
+ const statColors = {
+ 'all_activations': '#4169E1',
+ 'max_activation-max-16': '#DC143C',
+ 'max_activation-max-32': '#DC143C',
+ 'mean_activation-max-16': '#228B22',
+ 'median_activation-max-16': '#FF8C00',
+ 'min_activation-max-16': '#9370DB',
+ 'max_activation_position': '#FF6347'
+ };
+
+ histogramStats.forEach(statKey => {
+ const color = statColors[statKey] || '#808080';
+ const label = statKey.replace(/-/g, ' ').replace(/_/g, ' ')
+ .split(' ')
+ .map(word => word.charAt(0).toUpperCase() + word.slice(1))
+ .join(' ');
+
+ columns.push({
+ id: 'histogram_' + statKey,
+ key: 'stats',
+ label: label,
+ type: 'number',
+ width: '200px',
+ align: 'center',
+ renderer: columnRenderers.genericHistogram(statKey, color, label),
+ sortFunction: (value, row) => {
+ const histData = row.stats[statKey];
+ if (!histData || !histData.bin_counts || !histData.bin_edges) return -Infinity;
+ // For all_activations, use precomputed mean
+ if (statKey === 'all_activations' && row.stats.mean_activation !== undefined) {
+ return row.stats.mean_activation;
+ }
+ // Otherwise calculate mean from histogram
+ return calculateHistogramMean(histData);
+ },
+ filterFunction: (filterValue) => createHistogramFilter(statKey, filterValue),
+ filterTooltip: 'Filter by statistics. Use: mean>0.5, median<0.2, max>=1.0, min>-0.1, range<5, sum>100. Combine with commas (e.g., mean>0.5, max<10)'
+ });
+ });
+
+ // Token activation columns
+ columns.push({
+ id: 'top_tokens',
+ key: 'stats',
+ label: 'Top Tokens',
+ type: 'string',
+ width: '150px',
+ align: 'left',
+ renderer: columnRenderers.topToken,
+ sortFunction: (value, row) => sortTopToken(value, row),
+ filterFunction: (filterValue) => createTopTokenFilter(filterValue),
+ filterTooltip: 'Search for tokens (case-insensitive substring match)'
+ });
+
+ columns.push({
+ id: 'token_entropy',
+ key: 'stats',
+ label: 'Token Entropy',
+ type: 'number',
+ width: '60px',
+ align: 'right',
+ renderer: columnRenderers.tokenEntropy,
+ sortFunction: (value, row) => {
+ const tokenStats = row.stats.token_activations;
+ return tokenStats ? tokenStats.entropy : -Infinity;
+ },
+ filterFunction: (filterValue) => createNumericFilter(filterValue, (stats) => {
+ const tokenStats = stats?.token_activations;
+ return tokenStats ? tokenStats.entropy : null;
+ }),
+ filterTooltip: 'Filter by entropy. Use operators: >, <, >=, <=, ==, != (e.g., >2.5)'
+ });
+
+ columns.push({
+ id: 'token_concentration',
+ key: 'stats',
+ label: 'Token Conc.',
+ type: 'number',
+ width: '60px',
+ align: 'right',
+ renderer: columnRenderers.tokenConcentration,
+ sortFunction: (value, row) => {
+ const tokenStats = row.stats.token_activations;
+ return tokenStats ? tokenStats.concentration_ratio : -Infinity;
+ },
+ filterFunction: (filterValue) => createNumericFilter(filterValue, (stats) => {
+ const tokenStats = stats?.token_activations;
+ return tokenStats ? tokenStats.concentration_ratio : null;
+ }),
+ filterTooltip: 'Filter by concentration (0-1). Use operators: >, <, >=, <=, ==, != (e.g., >0.5)'
+ });
+
+ // Explanation column
+ columns.push({
+ key: 'explanation',
+ label: 'Explanation',
+ type: 'string',
+ width: '200px',
+ align: 'left',
+ renderer: columnRenderers.explanation,
+ filterTooltip: 'Filter by explanation text (case-insensitive substring match)'
+ });
+
+ // Actions column
+ columns.push({
+ key: 'id',
+ label: 'Actions',
+ type: 'string',
+ width: '20px',
+ align: 'center',
+ renderer: columnRenderers.clusterLink,
+ filterable: false
+ });
+
+ const tableConfig = {
+ data: tableData,
+ columns: columns,
+ pageSize: CONFIG.indexPage.pageSize,
+ pageSizeOptions: CONFIG.indexPage.pageSizeOptions,
+ showFilters: CONFIG.indexPage.showFilters
+ };
+
+ dataTable = new DataTable('#clusterTableContainer', tableConfig);
+
+ const loading = document.getElementById('loading');
+ if (!loading) {
+ const msg = 'Fatal error: loading element not found in HTML';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ return;
+ }
+ loading.style.display = 'none';
+}
+
+document.addEventListener('DOMContentLoaded', async () => {
+ await initConfig();
+
+ // Check if Alpine.js loaded
+ if (typeof Alpine === 'undefined') {
+ const msg = 'Fatal error: Alpine.js failed to load. Check your internet connection or CDN.';
+ NOTIF.error(msg, null, null);
+ console.error(msg);
+ } else {
+ // Manually trigger Alpine component's loadData now that CONFIG is ready
+ const modelInfoEl = document.getElementById('modelInfo');
+ if (modelInfoEl && Alpine.$data(modelInfoEl)) {
+ Alpine.$data(modelInfoEl).loadData();
+ }
+ }
+
+ // Load cluster data and render table
+ loadData();
+});
diff --git a/spd/clustering/ci_dt/js/model-visualization.js b/spd/clustering/ci_dt/js/model-visualization.js
new file mode 100644
index 000000000..f42e55922
--- /dev/null
+++ b/spd/clustering/ci_dt/js/model-visualization.js
@@ -0,0 +1,222 @@
+// Self-contained utilities for model visualization
+// No global variables, all functions take necessary data as parameters
+
+function getClusterModuleStats(clusterId, clusterData) {
+ if (!clusterData || !clusterData[clusterId]) return {};
+
+ const cluster = clusterData[clusterId];
+ const moduleStats = {};
+
+ // Count components per module for this specific cluster
+ cluster.components.forEach(comp => {
+ const module = comp.module;
+ if (!moduleStats[module]) {
+ moduleStats[module] = {
+ componentCount: 0,
+ components: []
+ };
+ }
+ moduleStats[module].componentCount++;
+ moduleStats[module].components.push(comp);
+ });
+
+ return moduleStats;
+}
+
+function getModuleOrder(moduleName) {
+ if (moduleName.includes('q_proj')) return 0;
+ if (moduleName.includes('k_proj')) return 1;
+ if (moduleName.includes('v_proj')) return 2;
+ if (moduleName.includes('o_proj')) return 3;
+ if (moduleName.includes('gate_proj')) return 10;
+ if (moduleName.includes('up_proj')) return 11;
+ if (moduleName.includes('down_proj')) return 12;
+ return 999;
+}
+
+function renderModelArchitecture(clusterId, clusterData, modelInfo, colormap = 'blues') {
+ if (!modelInfo || !modelInfo.module_list) {
+ throw new Error('Model info not loaded');
+ }
+
+ const moduleStats = clusterData && clusterData[clusterId] ? getClusterModuleStats(clusterId, clusterData) : {};
+ const maxComponents = Math.max(...Object.values(moduleStats).map(s => s.componentCount), 1);
+
+ // Group ALL modules from model_info by layer and type
+ const layerGroups = {};
+
+ modelInfo.module_list.forEach(moduleName => {
+ const parts = moduleName.split('.');
+ let layerNum = -1;
+ let moduleType = 'other';
+
+ for (let i = 0; i < parts.length; i++) {
+ if (parts[i] === 'layers' && i + 1 < parts.length) {
+ layerNum = parseInt(parts[i + 1]);
+ }
+ }
+
+ if (moduleName.includes('self_attn')) {
+ moduleType = 'attention';
+ } else if (moduleName.includes('mlp')) {
+ moduleType = 'mlp';
+ }
+
+ if (!layerGroups[layerNum]) {
+ layerGroups[layerNum] = { attention: [], mlp: [], other: [] };
+ }
+
+ const count = moduleStats[moduleName] ? moduleStats[moduleName].componentCount : 0;
+ const components = moduleStats[moduleName] ? moduleStats[moduleName].components : [];
+
+ layerGroups[layerNum][moduleType].push({
+ name: moduleName,
+ count: count,
+ components: components
+ });
+ });
+
+ // Sort modules within each group by desired order
+ Object.values(layerGroups).forEach(layer => {
+ layer.attention.sort((a, b) => getModuleOrder(a.name) - getModuleOrder(b.name));
+ layer.mlp.sort((a, b) => getModuleOrder(a.name) - getModuleOrder(b.name));
+ });
+
+ const sortedLayers = Object.keys(layerGroups).sort((a, b) => a - b);
+ const cellSize = 12;
+
+ const moduleElements = [];
+
+ sortedLayers.forEach(layerNum => {
+ const layer = layerGroups[layerNum];
+ const layerElements = [];
+
+ // Attention row (above MLP)
+ if (layer.attention.length > 0) {
+ const attentionRow = layer.attention.map(module => ({
+ type: 'cell',
+ module: module.name,
+ count: module.count,
+ components: module.components.map(c => c.index).join(','),
+ color: getColorForValue(module.count, maxComponents, colormap),
+ size: cellSize
+ }));
+ layerElements.push({ type: 'row', cells: attentionRow });
+ }
+
+ // MLP row (below attention)
+ if (layer.mlp.length > 0) {
+ const mlpRow = layer.mlp.map(module => ({
+ type: 'cell',
+ module: module.name,
+ count: module.count,
+ components: module.components.map(c => c.index).join(','),
+ color: getColorForValue(module.count, maxComponents, colormap),
+ size: cellSize
+ }));
+ layerElements.push({ type: 'row', cells: mlpRow });
+ }
+
+ // Other modules
+ if (layer.other.length > 0) {
+ const otherRow = layer.other.map(module => ({
+ type: 'cell',
+ module: module.name,
+ count: module.count,
+ components: module.components.map(c => c.index).join(','),
+ color: getColorForValue(module.count, maxComponents, colormap),
+ size: cellSize
+ }));
+ layerElements.push({ type: 'row', cells: otherRow });
+ }
+
+ if (layerElements.length > 0) {
+ moduleElements.push({ type: 'layer', rows: layerElements });
+ }
+ });
+
+ return {
+ elements: moduleElements,
+ maxComponents: maxComponents
+ };
+}
+
+function renderToHTML(architecture) {
+ let html = '';
+
+ architecture.elements.forEach(layer => {
+ html += '';
+ layer.rows.forEach(row => {
+ html += '
';
+ row.cells.forEach(cell => {
+ html += `
`;
+ });
+ html += '
';
+ });
+ html += '
';
+ });
+
+ return html;
+}
+
+// Consolidated tooltip setup - works for all model visualizations
+function setupTooltips(containerElement) {
+ const tooltip = document.getElementById('tooltip');
+ if (!tooltip) return;
+
+ const cells = containerElement.querySelectorAll('.modelview-module-cell');
+
+ cells.forEach(cell => {
+ cell.addEventListener('mouseenter', (e) => {
+ const module = e.target.dataset.module;
+ const count = e.target.dataset.count;
+ const components = e.target.dataset.components;
+
+ if (module) {
+ tooltip.textContent = `${module}\nComponents: ${count}\nIndices: ${components || 'none'}`;
+ tooltip.style.display = 'block';
+ tooltip.style.left = (e.pageX + 10) + 'px';
+ tooltip.style.top = (e.pageY + 10) + 'px';
+ }
+ });
+
+ cell.addEventListener('mouseleave', () => {
+ tooltip.style.display = 'none';
+ });
+
+ cell.addEventListener('mousemove', (e) => {
+ tooltip.style.left = (e.pageX + 10) + 'px';
+ tooltip.style.top = (e.pageY + 10) + 'px';
+ });
+ });
+}
+
+// Consolidated render function - creates model visualization in a container
+function renderModelView(containerElement, clusterHash, clusterData, modelInfo, colormap = 'blues', cellSize = null) {
+ if (!modelInfo || !modelInfo.module_list) {
+ containerElement.innerHTML = 'Model info loading...';
+ return;
+ }
+
+ if (!clusterData || !clusterData[clusterHash]) {
+ containerElement.innerHTML = 'Cluster data missing';
+ return;
+ }
+
+ try {
+ const architecture = renderModelArchitecture(clusterHash, clusterData, modelInfo, colormap);
+ const html = renderToHTML(architecture);
+ containerElement.innerHTML = html;
+
+ // Apply cell size from config if provided
+ if (cellSize !== null) {
+ containerElement.style.setProperty('--modelview-cell-size', cellSize + 'px');
+ }
+
+ // Setup tooltips after a brief delay to ensure DOM is ready
+ setTimeout(() => setupTooltips(containerElement), 0);
+ } catch (error) {
+ console.error('Failed to render model visualization:', error);
+ containerElement.innerHTML = 'Model visualization error';
+ }
+}
\ No newline at end of file
diff --git a/spd/clustering/ci_dt/js/pkg/jszip.js b/spd/clustering/ci_dt/js/pkg/jszip.js
new file mode 100644
index 000000000..60fbb41a6
--- /dev/null
+++ b/spd/clustering/ci_dt/js/pkg/jszip.js
@@ -0,0 +1,11577 @@
+/*!
+
+JSZip v3.10.1 - A JavaScript class for generating and reading zip files
+
+
+(c) 2009-2016 Stuart Knightley
+Dual licenced under the MIT license or GPLv3. See https://raw.github.com/Stuk/jszip/main/LICENSE.markdown.
+
+JSZip uses the library pako released under the MIT license :
+https://github.com/nodeca/pako/blob/main/LICENSE
+*/
+
+(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.JSZip = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o> 2;
+ enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
+ enc3 = remainingBytes > 1 ? (((chr2 & 15) << 2) | (chr3 >> 6)) : 64;
+ enc4 = remainingBytes > 2 ? (chr3 & 63) : 64;
+
+ output.push(_keyStr.charAt(enc1) + _keyStr.charAt(enc2) + _keyStr.charAt(enc3) + _keyStr.charAt(enc4));
+
+ }
+
+ return output.join("");
+};
+
+// public method for decoding
+exports.decode = function(input) {
+ var chr1, chr2, chr3;
+ var enc1, enc2, enc3, enc4;
+ var i = 0, resultIndex = 0;
+
+ var dataUrlPrefix = "data:";
+
+ if (input.substr(0, dataUrlPrefix.length) === dataUrlPrefix) {
+ // This is a common error: people give a data url
+ // (data:image/png;base64,iVBOR...) with a {base64: true} and
+ // wonders why things don't work.
+ // We can detect that the string input looks like a data url but we
+ // *can't* be sure it is one: removing everything up to the comma would
+ // be too dangerous.
+ throw new Error("Invalid base64 input, it looks like a data url.");
+ }
+
+ input = input.replace(/[^A-Za-z0-9+/=]/g, "");
+
+ var totalLength = input.length * 3 / 4;
+ if(input.charAt(input.length - 1) === _keyStr.charAt(64)) {
+ totalLength--;
+ }
+ if(input.charAt(input.length - 2) === _keyStr.charAt(64)) {
+ totalLength--;
+ }
+ if (totalLength % 1 !== 0) {
+ // totalLength is not an integer, the length does not match a valid
+ // base64 content. That can happen if:
+ // - the input is not a base64 content
+ // - the input is *almost* a base64 content, with a extra chars at the
+ // beginning or at the end
+ // - the input uses a base64 variant (base64url for example)
+ throw new Error("Invalid base64 input, bad content length.");
+ }
+ var output;
+ if (support.uint8array) {
+ output = new Uint8Array(totalLength|0);
+ } else {
+ output = new Array(totalLength|0);
+ }
+
+ while (i < input.length) {
+
+ enc1 = _keyStr.indexOf(input.charAt(i++));
+ enc2 = _keyStr.indexOf(input.charAt(i++));
+ enc3 = _keyStr.indexOf(input.charAt(i++));
+ enc4 = _keyStr.indexOf(input.charAt(i++));
+
+ chr1 = (enc1 << 2) | (enc2 >> 4);
+ chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
+ chr3 = ((enc3 & 3) << 6) | enc4;
+
+ output[resultIndex++] = chr1;
+
+ if (enc3 !== 64) {
+ output[resultIndex++] = chr2;
+ }
+ if (enc4 !== 64) {
+ output[resultIndex++] = chr3;
+ }
+
+ }
+
+ return output;
+};
+
+},{"./support":30,"./utils":32}],2:[function(require,module,exports){
+"use strict";
+
+var external = require("./external");
+var DataWorker = require("./stream/DataWorker");
+var Crc32Probe = require("./stream/Crc32Probe");
+var DataLengthProbe = require("./stream/DataLengthProbe");
+
+/**
+ * Represent a compressed object, with everything needed to decompress it.
+ * @constructor
+ * @param {number} compressedSize the size of the data compressed.
+ * @param {number} uncompressedSize the size of the data after decompression.
+ * @param {number} crc32 the crc32 of the decompressed file.
+ * @param {object} compression the type of compression, see lib/compressions.js.
+ * @param {String|ArrayBuffer|Uint8Array|Buffer} data the compressed data.
+ */
+function CompressedObject(compressedSize, uncompressedSize, crc32, compression, data) {
+ this.compressedSize = compressedSize;
+ this.uncompressedSize = uncompressedSize;
+ this.crc32 = crc32;
+ this.compression = compression;
+ this.compressedContent = data;
+}
+
+CompressedObject.prototype = {
+ /**
+ * Create a worker to get the uncompressed content.
+ * @return {GenericWorker} the worker.
+ */
+ getContentWorker: function () {
+ var worker = new DataWorker(external.Promise.resolve(this.compressedContent))
+ .pipe(this.compression.uncompressWorker())
+ .pipe(new DataLengthProbe("data_length"));
+
+ var that = this;
+ worker.on("end", function () {
+ if (this.streamInfo["data_length"] !== that.uncompressedSize) {
+ throw new Error("Bug : uncompressed data size mismatch");
+ }
+ });
+ return worker;
+ },
+ /**
+ * Create a worker to get the compressed content.
+ * @return {GenericWorker} the worker.
+ */
+ getCompressedWorker: function () {
+ return new DataWorker(external.Promise.resolve(this.compressedContent))
+ .withStreamInfo("compressedSize", this.compressedSize)
+ .withStreamInfo("uncompressedSize", this.uncompressedSize)
+ .withStreamInfo("crc32", this.crc32)
+ .withStreamInfo("compression", this.compression)
+ ;
+ }
+};
+
+/**
+ * Chain the given worker with other workers to compress the content with the
+ * given compression.
+ * @param {GenericWorker} uncompressedWorker the worker to pipe.
+ * @param {Object} compression the compression object.
+ * @param {Object} compressionOptions the options to use when compressing.
+ * @return {GenericWorker} the new worker compressing the content.
+ */
+CompressedObject.createWorkerFrom = function (uncompressedWorker, compression, compressionOptions) {
+ return uncompressedWorker
+ .pipe(new Crc32Probe())
+ .pipe(new DataLengthProbe("uncompressedSize"))
+ .pipe(compression.compressWorker(compressionOptions))
+ .pipe(new DataLengthProbe("compressedSize"))
+ .withStreamInfo("compression", compression);
+};
+
+module.exports = CompressedObject;
+
+},{"./external":6,"./stream/Crc32Probe":25,"./stream/DataLengthProbe":26,"./stream/DataWorker":27}],3:[function(require,module,exports){
+"use strict";
+
+var GenericWorker = require("./stream/GenericWorker");
+
+exports.STORE = {
+ magic: "\x00\x00",
+ compressWorker : function () {
+ return new GenericWorker("STORE compression");
+ },
+ uncompressWorker : function () {
+ return new GenericWorker("STORE decompression");
+ }
+};
+exports.DEFLATE = require("./flate");
+
+},{"./flate":7,"./stream/GenericWorker":28}],4:[function(require,module,exports){
+"use strict";
+
+var utils = require("./utils");
+
+/**
+ * The following functions come from pako, from pako/lib/zlib/crc32.js
+ * released under the MIT license, see pako https://github.com/nodeca/pako/
+ */
+
+// Use ordinary array, since untyped makes no boost here
+function makeTable() {
+ var c, table = [];
+
+ for(var n =0; n < 256; n++){
+ c = n;
+ for(var k =0; k < 8; k++){
+ c = ((c&1) ? (0xEDB88320 ^ (c >>> 1)) : (c >>> 1));
+ }
+ table[n] = c;
+ }
+
+ return table;
+}
+
+// Create table on load. Just 255 signed longs. Not a problem.
+var crcTable = makeTable();
+
+
+function crc32(crc, buf, len, pos) {
+ var t = crcTable, end = pos + len;
+
+ crc = crc ^ (-1);
+
+ for (var i = pos; i < end; i++ ) {
+ crc = (crc >>> 8) ^ t[(crc ^ buf[i]) & 0xFF];
+ }
+
+ return (crc ^ (-1)); // >>> 0;
+}
+
+// That's all for the pako functions.
+
+/**
+ * Compute the crc32 of a string.
+ * This is almost the same as the function crc32, but for strings. Using the
+ * same function for the two use cases leads to horrible performances.
+ * @param {Number} crc the starting value of the crc.
+ * @param {String} str the string to use.
+ * @param {Number} len the length of the string.
+ * @param {Number} pos the starting position for the crc32 computation.
+ * @return {Number} the computed crc32.
+ */
+function crc32str(crc, str, len, pos) {
+ var t = crcTable, end = pos + len;
+
+ crc = crc ^ (-1);
+
+ for (var i = pos; i < end; i++ ) {
+ crc = (crc >>> 8) ^ t[(crc ^ str.charCodeAt(i)) & 0xFF];
+ }
+
+ return (crc ^ (-1)); // >>> 0;
+}
+
+module.exports = function crc32wrapper(input, crc) {
+ if (typeof input === "undefined" || !input.length) {
+ return 0;
+ }
+
+ var isArray = utils.getTypeOf(input) !== "string";
+
+ if(isArray) {
+ return crc32(crc|0, input, input.length, 0);
+ } else {
+ return crc32str(crc|0, input, input.length, 0);
+ }
+};
+
+},{"./utils":32}],5:[function(require,module,exports){
+"use strict";
+exports.base64 = false;
+exports.binary = false;
+exports.dir = false;
+exports.createFolders = true;
+exports.date = null;
+exports.compression = null;
+exports.compressionOptions = null;
+exports.comment = null;
+exports.unixPermissions = null;
+exports.dosPermissions = null;
+
+},{}],6:[function(require,module,exports){
+"use strict";
+
+// load the global object first:
+// - it should be better integrated in the system (unhandledRejection in node)
+// - the environment may have a custom Promise implementation (see zone.js)
+var ES6Promise = null;
+if (typeof Promise !== "undefined") {
+ ES6Promise = Promise;
+} else {
+ ES6Promise = require("lie");
+}
+
+/**
+ * Let the user use/change some implementations.
+ */
+module.exports = {
+ Promise: ES6Promise
+};
+
+},{"lie":37}],7:[function(require,module,exports){
+"use strict";
+var USE_TYPEDARRAY = (typeof Uint8Array !== "undefined") && (typeof Uint16Array !== "undefined") && (typeof Uint32Array !== "undefined");
+
+var pako = require("pako");
+var utils = require("./utils");
+var GenericWorker = require("./stream/GenericWorker");
+
+var ARRAY_TYPE = USE_TYPEDARRAY ? "uint8array" : "array";
+
+exports.magic = "\x08\x00";
+
+/**
+ * Create a worker that uses pako to inflate/deflate.
+ * @constructor
+ * @param {String} action the name of the pako function to call : either "Deflate" or "Inflate".
+ * @param {Object} options the options to use when (de)compressing.
+ */
+function FlateWorker(action, options) {
+ GenericWorker.call(this, "FlateWorker/" + action);
+
+ this._pako = null;
+ this._pakoAction = action;
+ this._pakoOptions = options;
+ // the `meta` object from the last chunk received
+ // this allow this worker to pass around metadata
+ this.meta = {};
+}
+
+utils.inherits(FlateWorker, GenericWorker);
+
+/**
+ * @see GenericWorker.processChunk
+ */
+FlateWorker.prototype.processChunk = function (chunk) {
+ this.meta = chunk.meta;
+ if (this._pako === null) {
+ this._createPako();
+ }
+ this._pako.push(utils.transformTo(ARRAY_TYPE, chunk.data), false);
+};
+
+/**
+ * @see GenericWorker.flush
+ */
+FlateWorker.prototype.flush = function () {
+ GenericWorker.prototype.flush.call(this);
+ if (this._pako === null) {
+ this._createPako();
+ }
+ this._pako.push([], true);
+};
+/**
+ * @see GenericWorker.cleanUp
+ */
+FlateWorker.prototype.cleanUp = function () {
+ GenericWorker.prototype.cleanUp.call(this);
+ this._pako = null;
+};
+
+/**
+ * Create the _pako object.
+ * TODO: lazy-loading this object isn't the best solution but it's the
+ * quickest. The best solution is to lazy-load the worker list. See also the
+ * issue #446.
+ */
+FlateWorker.prototype._createPako = function () {
+ this._pako = new pako[this._pakoAction]({
+ raw: true,
+ level: this._pakoOptions.level || -1 // default compression
+ });
+ var self = this;
+ this._pako.onData = function(data) {
+ self.push({
+ data : data,
+ meta : self.meta
+ });
+ };
+};
+
+exports.compressWorker = function (compressionOptions) {
+ return new FlateWorker("Deflate", compressionOptions);
+};
+exports.uncompressWorker = function () {
+ return new FlateWorker("Inflate", {});
+};
+
+},{"./stream/GenericWorker":28,"./utils":32,"pako":38}],8:[function(require,module,exports){
+"use strict";
+
+var utils = require("../utils");
+var GenericWorker = require("../stream/GenericWorker");
+var utf8 = require("../utf8");
+var crc32 = require("../crc32");
+var signature = require("../signature");
+
+/**
+ * Transform an integer into a string in hexadecimal.
+ * @private
+ * @param {number} dec the number to convert.
+ * @param {number} bytes the number of bytes to generate.
+ * @returns {string} the result.
+ */
+var decToHex = function(dec, bytes) {
+ var hex = "", i;
+ for (i = 0; i < bytes; i++) {
+ hex += String.fromCharCode(dec & 0xff);
+ dec = dec >>> 8;
+ }
+ return hex;
+};
+
+/**
+ * Generate the UNIX part of the external file attributes.
+ * @param {Object} unixPermissions the unix permissions or null.
+ * @param {Boolean} isDir true if the entry is a directory, false otherwise.
+ * @return {Number} a 32 bit integer.
+ *
+ * adapted from http://unix.stackexchange.com/questions/14705/the-zip-formats-external-file-attribute :
+ *
+ * TTTTsstrwxrwxrwx0000000000ADVSHR
+ * ^^^^____________________________ file type, see zipinfo.c (UNX_*)
+ * ^^^_________________________ setuid, setgid, sticky
+ * ^^^^^^^^^________________ permissions
+ * ^^^^^^^^^^______ not used ?
+ * ^^^^^^ DOS attribute bits : Archive, Directory, Volume label, System file, Hidden, Read only
+ */
+var generateUnixExternalFileAttr = function (unixPermissions, isDir) {
+
+ var result = unixPermissions;
+ if (!unixPermissions) {
+ // I can't use octal values in strict mode, hence the hexa.
+ // 040775 => 0x41fd
+ // 0100664 => 0x81b4
+ result = isDir ? 0x41fd : 0x81b4;
+ }
+ return (result & 0xFFFF) << 16;
+};
+
+/**
+ * Generate the DOS part of the external file attributes.
+ * @param {Object} dosPermissions the dos permissions or null.
+ * @param {Boolean} isDir true if the entry is a directory, false otherwise.
+ * @return {Number} a 32 bit integer.
+ *
+ * Bit 0 Read-Only
+ * Bit 1 Hidden
+ * Bit 2 System
+ * Bit 3 Volume Label
+ * Bit 4 Directory
+ * Bit 5 Archive
+ */
+var generateDosExternalFileAttr = function (dosPermissions) {
+ // the dir flag is already set for compatibility
+ return (dosPermissions || 0) & 0x3F;
+};
+
+/**
+ * Generate the various parts used in the construction of the final zip file.
+ * @param {Object} streamInfo the hash with information about the compressed file.
+ * @param {Boolean} streamedContent is the content streamed ?
+ * @param {Boolean} streamingEnded is the stream finished ?
+ * @param {number} offset the current offset from the start of the zip file.
+ * @param {String} platform let's pretend we are this platform (change platform dependents fields)
+ * @param {Function} encodeFileName the function to encode the file name / comment.
+ * @return {Object} the zip parts.
+ */
+var generateZipParts = function(streamInfo, streamedContent, streamingEnded, offset, platform, encodeFileName) {
+ var file = streamInfo["file"],
+ compression = streamInfo["compression"],
+ useCustomEncoding = encodeFileName !== utf8.utf8encode,
+ encodedFileName = utils.transformTo("string", encodeFileName(file.name)),
+ utfEncodedFileName = utils.transformTo("string", utf8.utf8encode(file.name)),
+ comment = file.comment,
+ encodedComment = utils.transformTo("string", encodeFileName(comment)),
+ utfEncodedComment = utils.transformTo("string", utf8.utf8encode(comment)),
+ useUTF8ForFileName = utfEncodedFileName.length !== file.name.length,
+ useUTF8ForComment = utfEncodedComment.length !== comment.length,
+ dosTime,
+ dosDate,
+ extraFields = "",
+ unicodePathExtraField = "",
+ unicodeCommentExtraField = "",
+ dir = file.dir,
+ date = file.date;
+
+
+ var dataInfo = {
+ crc32 : 0,
+ compressedSize : 0,
+ uncompressedSize : 0
+ };
+
+ // if the content is streamed, the sizes/crc32 are only available AFTER
+ // the end of the stream.
+ if (!streamedContent || streamingEnded) {
+ dataInfo.crc32 = streamInfo["crc32"];
+ dataInfo.compressedSize = streamInfo["compressedSize"];
+ dataInfo.uncompressedSize = streamInfo["uncompressedSize"];
+ }
+
+ var bitflag = 0;
+ if (streamedContent) {
+ // Bit 3: the sizes/crc32 are set to zero in the local header.
+ // The correct values are put in the data descriptor immediately
+ // following the compressed data.
+ bitflag |= 0x0008;
+ }
+ if (!useCustomEncoding && (useUTF8ForFileName || useUTF8ForComment)) {
+ // Bit 11: Language encoding flag (EFS).
+ bitflag |= 0x0800;
+ }
+
+
+ var extFileAttr = 0;
+ var versionMadeBy = 0;
+ if (dir) {
+ // dos or unix, we set the dos dir flag
+ extFileAttr |= 0x00010;
+ }
+ if(platform === "UNIX") {
+ versionMadeBy = 0x031E; // UNIX, version 3.0
+ extFileAttr |= generateUnixExternalFileAttr(file.unixPermissions, dir);
+ } else { // DOS or other, fallback to DOS
+ versionMadeBy = 0x0014; // DOS, version 2.0
+ extFileAttr |= generateDosExternalFileAttr(file.dosPermissions, dir);
+ }
+
+ // date
+ // @see http://www.delorie.com/djgpp/doc/rbinter/it/52/13.html
+ // @see http://www.delorie.com/djgpp/doc/rbinter/it/65/16.html
+ // @see http://www.delorie.com/djgpp/doc/rbinter/it/66/16.html
+
+ dosTime = date.getUTCHours();
+ dosTime = dosTime << 6;
+ dosTime = dosTime | date.getUTCMinutes();
+ dosTime = dosTime << 5;
+ dosTime = dosTime | date.getUTCSeconds() / 2;
+
+ dosDate = date.getUTCFullYear() - 1980;
+ dosDate = dosDate << 4;
+ dosDate = dosDate | (date.getUTCMonth() + 1);
+ dosDate = dosDate << 5;
+ dosDate = dosDate | date.getUTCDate();
+
+ if (useUTF8ForFileName) {
+ // set the unicode path extra field. unzip needs at least one extra
+ // field to correctly handle unicode path, so using the path is as good
+ // as any other information. This could improve the situation with
+ // other archive managers too.
+ // This field is usually used without the utf8 flag, with a non
+ // unicode path in the header (winrar, winzip). This helps (a bit)
+ // with the messy Windows' default compressed folders feature but
+ // breaks on p7zip which doesn't seek the unicode path extra field.
+ // So for now, UTF-8 everywhere !
+ unicodePathExtraField =
+ // Version
+ decToHex(1, 1) +
+ // NameCRC32
+ decToHex(crc32(encodedFileName), 4) +
+ // UnicodeName
+ utfEncodedFileName;
+
+ extraFields +=
+ // Info-ZIP Unicode Path Extra Field
+ "\x75\x70" +
+ // size
+ decToHex(unicodePathExtraField.length, 2) +
+ // content
+ unicodePathExtraField;
+ }
+
+ if(useUTF8ForComment) {
+
+ unicodeCommentExtraField =
+ // Version
+ decToHex(1, 1) +
+ // CommentCRC32
+ decToHex(crc32(encodedComment), 4) +
+ // UnicodeName
+ utfEncodedComment;
+
+ extraFields +=
+ // Info-ZIP Unicode Path Extra Field
+ "\x75\x63" +
+ // size
+ decToHex(unicodeCommentExtraField.length, 2) +
+ // content
+ unicodeCommentExtraField;
+ }
+
+ var header = "";
+
+ // version needed to extract
+ header += "\x0A\x00";
+ // general purpose bit flag
+ header += decToHex(bitflag, 2);
+ // compression method
+ header += compression.magic;
+ // last mod file time
+ header += decToHex(dosTime, 2);
+ // last mod file date
+ header += decToHex(dosDate, 2);
+ // crc-32
+ header += decToHex(dataInfo.crc32, 4);
+ // compressed size
+ header += decToHex(dataInfo.compressedSize, 4);
+ // uncompressed size
+ header += decToHex(dataInfo.uncompressedSize, 4);
+ // file name length
+ header += decToHex(encodedFileName.length, 2);
+ // extra field length
+ header += decToHex(extraFields.length, 2);
+
+
+ var fileRecord = signature.LOCAL_FILE_HEADER + header + encodedFileName + extraFields;
+
+ var dirRecord = signature.CENTRAL_FILE_HEADER +
+ // version made by (00: DOS)
+ decToHex(versionMadeBy, 2) +
+ // file header (common to file and central directory)
+ header +
+ // file comment length
+ decToHex(encodedComment.length, 2) +
+ // disk number start
+ "\x00\x00" +
+ // internal file attributes TODO
+ "\x00\x00" +
+ // external file attributes
+ decToHex(extFileAttr, 4) +
+ // relative offset of local header
+ decToHex(offset, 4) +
+ // file name
+ encodedFileName +
+ // extra field
+ extraFields +
+ // file comment
+ encodedComment;
+
+ return {
+ fileRecord: fileRecord,
+ dirRecord: dirRecord
+ };
+};
+
+/**
+ * Generate the EOCD record.
+ * @param {Number} entriesCount the number of entries in the zip file.
+ * @param {Number} centralDirLength the length (in bytes) of the central dir.
+ * @param {Number} localDirLength the length (in bytes) of the local dir.
+ * @param {String} comment the zip file comment as a binary string.
+ * @param {Function} encodeFileName the function to encode the comment.
+ * @return {String} the EOCD record.
+ */
+var generateCentralDirectoryEnd = function (entriesCount, centralDirLength, localDirLength, comment, encodeFileName) {
+ var dirEnd = "";
+ var encodedComment = utils.transformTo("string", encodeFileName(comment));
+
+ // end of central dir signature
+ dirEnd = signature.CENTRAL_DIRECTORY_END +
+ // number of this disk
+ "\x00\x00" +
+ // number of the disk with the start of the central directory
+ "\x00\x00" +
+ // total number of entries in the central directory on this disk
+ decToHex(entriesCount, 2) +
+ // total number of entries in the central directory
+ decToHex(entriesCount, 2) +
+ // size of the central directory 4 bytes
+ decToHex(centralDirLength, 4) +
+ // offset of start of central directory with respect to the starting disk number
+ decToHex(localDirLength, 4) +
+ // .ZIP file comment length
+ decToHex(encodedComment.length, 2) +
+ // .ZIP file comment
+ encodedComment;
+
+ return dirEnd;
+};
+
+/**
+ * Generate data descriptors for a file entry.
+ * @param {Object} streamInfo the hash generated by a worker, containing information
+ * on the file entry.
+ * @return {String} the data descriptors.
+ */
+var generateDataDescriptors = function (streamInfo) {
+ var descriptor = "";
+ descriptor = signature.DATA_DESCRIPTOR +
+ // crc-32 4 bytes
+ decToHex(streamInfo["crc32"], 4) +
+ // compressed size 4 bytes
+ decToHex(streamInfo["compressedSize"], 4) +
+ // uncompressed size 4 bytes
+ decToHex(streamInfo["uncompressedSize"], 4);
+
+ return descriptor;
+};
+
+
+/**
+ * A worker to concatenate other workers to create a zip file.
+ * @param {Boolean} streamFiles `true` to stream the content of the files,
+ * `false` to accumulate it.
+ * @param {String} comment the comment to use.
+ * @param {String} platform the platform to use, "UNIX" or "DOS".
+ * @param {Function} encodeFileName the function to encode file names and comments.
+ */
+function ZipFileWorker(streamFiles, comment, platform, encodeFileName) {
+ GenericWorker.call(this, "ZipFileWorker");
+ // The number of bytes written so far. This doesn't count accumulated chunks.
+ this.bytesWritten = 0;
+ // The comment of the zip file
+ this.zipComment = comment;
+ // The platform "generating" the zip file.
+ this.zipPlatform = platform;
+ // the function to encode file names and comments.
+ this.encodeFileName = encodeFileName;
+ // Should we stream the content of the files ?
+ this.streamFiles = streamFiles;
+ // If `streamFiles` is false, we will need to accumulate the content of the
+ // files to calculate sizes / crc32 (and write them *before* the content).
+ // This boolean indicates if we are accumulating chunks (it will change a lot
+ // during the lifetime of this worker).
+ this.accumulate = false;
+ // The buffer receiving chunks when accumulating content.
+ this.contentBuffer = [];
+ // The list of generated directory records.
+ this.dirRecords = [];
+ // The offset (in bytes) from the beginning of the zip file for the current source.
+ this.currentSourceOffset = 0;
+ // The total number of entries in this zip file.
+ this.entriesCount = 0;
+ // the name of the file currently being added, null when handling the end of the zip file.
+ // Used for the emitted metadata.
+ this.currentFile = null;
+
+
+
+ this._sources = [];
+}
+utils.inherits(ZipFileWorker, GenericWorker);
+
+/**
+ * @see GenericWorker.push
+ */
+ZipFileWorker.prototype.push = function (chunk) {
+
+ var currentFilePercent = chunk.meta.percent || 0;
+ var entriesCount = this.entriesCount;
+ var remainingFiles = this._sources.length;
+
+ if(this.accumulate) {
+ this.contentBuffer.push(chunk);
+ } else {
+ this.bytesWritten += chunk.data.length;
+
+ GenericWorker.prototype.push.call(this, {
+ data : chunk.data,
+ meta : {
+ currentFile : this.currentFile,
+ percent : entriesCount ? (currentFilePercent + 100 * (entriesCount - remainingFiles - 1)) / entriesCount : 100
+ }
+ });
+ }
+};
+
+/**
+ * The worker started a new source (an other worker).
+ * @param {Object} streamInfo the streamInfo object from the new source.
+ */
+ZipFileWorker.prototype.openedSource = function (streamInfo) {
+ this.currentSourceOffset = this.bytesWritten;
+ this.currentFile = streamInfo["file"].name;
+
+ var streamedContent = this.streamFiles && !streamInfo["file"].dir;
+
+ // don't stream folders (because they don't have any content)
+ if(streamedContent) {
+ var record = generateZipParts(streamInfo, streamedContent, false, this.currentSourceOffset, this.zipPlatform, this.encodeFileName);
+ this.push({
+ data : record.fileRecord,
+ meta : {percent:0}
+ });
+ } else {
+ // we need to wait for the whole file before pushing anything
+ this.accumulate = true;
+ }
+};
+
+/**
+ * The worker finished a source (an other worker).
+ * @param {Object} streamInfo the streamInfo object from the finished source.
+ */
+ZipFileWorker.prototype.closedSource = function (streamInfo) {
+ this.accumulate = false;
+ var streamedContent = this.streamFiles && !streamInfo["file"].dir;
+ var record = generateZipParts(streamInfo, streamedContent, true, this.currentSourceOffset, this.zipPlatform, this.encodeFileName);
+
+ this.dirRecords.push(record.dirRecord);
+ if(streamedContent) {
+ // after the streamed file, we put data descriptors
+ this.push({
+ data : generateDataDescriptors(streamInfo),
+ meta : {percent:100}
+ });
+ } else {
+ // the content wasn't streamed, we need to push everything now
+ // first the file record, then the content
+ this.push({
+ data : record.fileRecord,
+ meta : {percent:0}
+ });
+ while(this.contentBuffer.length) {
+ this.push(this.contentBuffer.shift());
+ }
+ }
+ this.currentFile = null;
+};
+
+/**
+ * @see GenericWorker.flush
+ */
+ZipFileWorker.prototype.flush = function () {
+
+ var localDirLength = this.bytesWritten;
+ for(var i = 0; i < this.dirRecords.length; i++) {
+ this.push({
+ data : this.dirRecords[i],
+ meta : {percent:100}
+ });
+ }
+ var centralDirLength = this.bytesWritten - localDirLength;
+
+ var dirEnd = generateCentralDirectoryEnd(this.dirRecords.length, centralDirLength, localDirLength, this.zipComment, this.encodeFileName);
+
+ this.push({
+ data : dirEnd,
+ meta : {percent:100}
+ });
+};
+
+/**
+ * Prepare the next source to be read.
+ */
+ZipFileWorker.prototype.prepareNextSource = function () {
+ this.previous = this._sources.shift();
+ this.openedSource(this.previous.streamInfo);
+ if (this.isPaused) {
+ this.previous.pause();
+ } else {
+ this.previous.resume();
+ }
+};
+
+/**
+ * @see GenericWorker.registerPrevious
+ */
+ZipFileWorker.prototype.registerPrevious = function (previous) {
+ this._sources.push(previous);
+ var self = this;
+
+ previous.on("data", function (chunk) {
+ self.processChunk(chunk);
+ });
+ previous.on("end", function () {
+ self.closedSource(self.previous.streamInfo);
+ if(self._sources.length) {
+ self.prepareNextSource();
+ } else {
+ self.end();
+ }
+ });
+ previous.on("error", function (e) {
+ self.error(e);
+ });
+ return this;
+};
+
+/**
+ * @see GenericWorker.resume
+ */
+ZipFileWorker.prototype.resume = function () {
+ if(!GenericWorker.prototype.resume.call(this)) {
+ return false;
+ }
+
+ if (!this.previous && this._sources.length) {
+ this.prepareNextSource();
+ return true;
+ }
+ if (!this.previous && !this._sources.length && !this.generatedError) {
+ this.end();
+ return true;
+ }
+};
+
+/**
+ * @see GenericWorker.error
+ */
+ZipFileWorker.prototype.error = function (e) {
+ var sources = this._sources;
+ if(!GenericWorker.prototype.error.call(this, e)) {
+ return false;
+ }
+ for(var i = 0; i < sources.length; i++) {
+ try {
+ sources[i].error(e);
+ } catch(e) {
+ // the `error` exploded, nothing to do
+ }
+ }
+ return true;
+};
+
+/**
+ * @see GenericWorker.lock
+ */
+ZipFileWorker.prototype.lock = function () {
+ GenericWorker.prototype.lock.call(this);
+ var sources = this._sources;
+ for(var i = 0; i < sources.length; i++) {
+ sources[i].lock();
+ }
+};
+
+module.exports = ZipFileWorker;
+
+},{"../crc32":4,"../signature":23,"../stream/GenericWorker":28,"../utf8":31,"../utils":32}],9:[function(require,module,exports){
+"use strict";
+
+var compressions = require("../compressions");
+var ZipFileWorker = require("./ZipFileWorker");
+
+/**
+ * Find the compression to use.
+ * @param {String} fileCompression the compression defined at the file level, if any.
+ * @param {String} zipCompression the compression defined at the load() level.
+ * @return {Object} the compression object to use.
+ */
+var getCompression = function (fileCompression, zipCompression) {
+
+ var compressionName = fileCompression || zipCompression;
+ var compression = compressions[compressionName];
+ if (!compression) {
+ throw new Error(compressionName + " is not a valid compression method !");
+ }
+ return compression;
+};
+
+/**
+ * Create a worker to generate a zip file.
+ * @param {JSZip} zip the JSZip instance at the right root level.
+ * @param {Object} options to generate the zip file.
+ * @param {String} comment the comment to use.
+ */
+exports.generateWorker = function (zip, options, comment) {
+
+ var zipFileWorker = new ZipFileWorker(options.streamFiles, comment, options.platform, options.encodeFileName);
+ var entriesCount = 0;
+ try {
+
+ zip.forEach(function (relativePath, file) {
+ entriesCount++;
+ var compression = getCompression(file.options.compression, options.compression);
+ var compressionOptions = file.options.compressionOptions || options.compressionOptions || {};
+ var dir = file.dir, date = file.date;
+
+ file._compressWorker(compression, compressionOptions)
+ .withStreamInfo("file", {
+ name : relativePath,
+ dir : dir,
+ date : date,
+ comment : file.comment || "",
+ unixPermissions : file.unixPermissions,
+ dosPermissions : file.dosPermissions
+ })
+ .pipe(zipFileWorker);
+ });
+ zipFileWorker.entriesCount = entriesCount;
+ } catch (e) {
+ zipFileWorker.error(e);
+ }
+
+ return zipFileWorker;
+};
+
+},{"../compressions":3,"./ZipFileWorker":8}],10:[function(require,module,exports){
+"use strict";
+
+/**
+ * Representation a of zip file in js
+ * @constructor
+ */
+function JSZip() {
+ // if this constructor is used without `new`, it adds `new` before itself:
+ if(!(this instanceof JSZip)) {
+ return new JSZip();
+ }
+
+ if(arguments.length) {
+ throw new Error("The constructor with parameters has been removed in JSZip 3.0, please check the upgrade guide.");
+ }
+
+ // object containing the files :
+ // {
+ // "folder/" : {...},
+ // "folder/data.txt" : {...}
+ // }
+ // NOTE: we use a null prototype because we do not
+ // want filenames like "toString" coming from a zip file
+ // to overwrite methods and attributes in a normal Object.
+ this.files = Object.create(null);
+
+ this.comment = null;
+
+ // Where we are in the hierarchy
+ this.root = "";
+ this.clone = function() {
+ var newObj = new JSZip();
+ for (var i in this) {
+ if (typeof this[i] !== "function") {
+ newObj[i] = this[i];
+ }
+ }
+ return newObj;
+ };
+}
+JSZip.prototype = require("./object");
+JSZip.prototype.loadAsync = require("./load");
+JSZip.support = require("./support");
+JSZip.defaults = require("./defaults");
+
+// TODO find a better way to handle this version,
+// a require('package.json').version doesn't work with webpack, see #327
+JSZip.version = "3.10.1";
+
+JSZip.loadAsync = function (content, options) {
+ return new JSZip().loadAsync(content, options);
+};
+
+JSZip.external = require("./external");
+module.exports = JSZip;
+
+},{"./defaults":5,"./external":6,"./load":11,"./object":15,"./support":30}],11:[function(require,module,exports){
+"use strict";
+var utils = require("./utils");
+var external = require("./external");
+var utf8 = require("./utf8");
+var ZipEntries = require("./zipEntries");
+var Crc32Probe = require("./stream/Crc32Probe");
+var nodejsUtils = require("./nodejsUtils");
+
+/**
+ * Check the CRC32 of an entry.
+ * @param {ZipEntry} zipEntry the zip entry to check.
+ * @return {Promise} the result.
+ */
+function checkEntryCRC32(zipEntry) {
+ return new external.Promise(function (resolve, reject) {
+ var worker = zipEntry.decompressed.getContentWorker().pipe(new Crc32Probe());
+ worker.on("error", function (e) {
+ reject(e);
+ })
+ .on("end", function () {
+ if (worker.streamInfo.crc32 !== zipEntry.decompressed.crc32) {
+ reject(new Error("Corrupted zip : CRC32 mismatch"));
+ } else {
+ resolve();
+ }
+ })
+ .resume();
+ });
+}
+
+module.exports = function (data, options) {
+ var zip = this;
+ options = utils.extend(options || {}, {
+ base64: false,
+ checkCRC32: false,
+ optimizedBinaryString: false,
+ createFolders: false,
+ decodeFileName: utf8.utf8decode
+ });
+
+ if (nodejsUtils.isNode && nodejsUtils.isStream(data)) {
+ return external.Promise.reject(new Error("JSZip can't accept a stream when loading a zip file."));
+ }
+
+ return utils.prepareContent("the loaded zip file", data, true, options.optimizedBinaryString, options.base64)
+ .then(function (data) {
+ var zipEntries = new ZipEntries(options);
+ zipEntries.load(data);
+ return zipEntries;
+ }).then(function checkCRC32(zipEntries) {
+ var promises = [external.Promise.resolve(zipEntries)];
+ var files = zipEntries.files;
+ if (options.checkCRC32) {
+ for (var i = 0; i < files.length; i++) {
+ promises.push(checkEntryCRC32(files[i]));
+ }
+ }
+ return external.Promise.all(promises);
+ }).then(function addFiles(results) {
+ var zipEntries = results.shift();
+ var files = zipEntries.files;
+ for (var i = 0; i < files.length; i++) {
+ var input = files[i];
+
+ var unsafeName = input.fileNameStr;
+ var safeName = utils.resolve(input.fileNameStr);
+
+ zip.file(safeName, input.decompressed, {
+ binary: true,
+ optimizedBinaryString: true,
+ date: input.date,
+ dir: input.dir,
+ comment: input.fileCommentStr.length ? input.fileCommentStr : null,
+ unixPermissions: input.unixPermissions,
+ dosPermissions: input.dosPermissions,
+ createFolders: options.createFolders
+ });
+ if (!input.dir) {
+ zip.file(safeName).unsafeOriginalName = unsafeName;
+ }
+ }
+ if (zipEntries.zipComment.length) {
+ zip.comment = zipEntries.zipComment;
+ }
+
+ return zip;
+ });
+};
+
+},{"./external":6,"./nodejsUtils":14,"./stream/Crc32Probe":25,"./utf8":31,"./utils":32,"./zipEntries":33}],12:[function(require,module,exports){
+"use strict";
+
+var utils = require("../utils");
+var GenericWorker = require("../stream/GenericWorker");
+
+/**
+ * A worker that use a nodejs stream as source.
+ * @constructor
+ * @param {String} filename the name of the file entry for this stream.
+ * @param {Readable} stream the nodejs stream.
+ */
+function NodejsStreamInputAdapter(filename, stream) {
+ GenericWorker.call(this, "Nodejs stream input adapter for " + filename);
+ this._upstreamEnded = false;
+ this._bindStream(stream);
+}
+
+utils.inherits(NodejsStreamInputAdapter, GenericWorker);
+
+/**
+ * Prepare the stream and bind the callbacks on it.
+ * Do this ASAP on node 0.10 ! A lazy binding doesn't always work.
+ * @param {Stream} stream the nodejs stream to use.
+ */
+NodejsStreamInputAdapter.prototype._bindStream = function (stream) {
+ var self = this;
+ this._stream = stream;
+ stream.pause();
+ stream
+ .on("data", function (chunk) {
+ self.push({
+ data: chunk,
+ meta : {
+ percent : 0
+ }
+ });
+ })
+ .on("error", function (e) {
+ if(self.isPaused) {
+ this.generatedError = e;
+ } else {
+ self.error(e);
+ }
+ })
+ .on("end", function () {
+ if(self.isPaused) {
+ self._upstreamEnded = true;
+ } else {
+ self.end();
+ }
+ });
+};
+NodejsStreamInputAdapter.prototype.pause = function () {
+ if(!GenericWorker.prototype.pause.call(this)) {
+ return false;
+ }
+ this._stream.pause();
+ return true;
+};
+NodejsStreamInputAdapter.prototype.resume = function () {
+ if(!GenericWorker.prototype.resume.call(this)) {
+ return false;
+ }
+
+ if(this._upstreamEnded) {
+ this.end();
+ } else {
+ this._stream.resume();
+ }
+
+ return true;
+};
+
+module.exports = NodejsStreamInputAdapter;
+
+},{"../stream/GenericWorker":28,"../utils":32}],13:[function(require,module,exports){
+"use strict";
+
+var Readable = require("readable-stream").Readable;
+
+var utils = require("../utils");
+utils.inherits(NodejsStreamOutputAdapter, Readable);
+
+/**
+* A nodejs stream using a worker as source.
+* @see the SourceWrapper in http://nodejs.org/api/stream.html
+* @constructor
+* @param {StreamHelper} helper the helper wrapping the worker
+* @param {Object} options the nodejs stream options
+* @param {Function} updateCb the update callback.
+*/
+function NodejsStreamOutputAdapter(helper, options, updateCb) {
+ Readable.call(this, options);
+ this._helper = helper;
+
+ var self = this;
+ helper.on("data", function (data, meta) {
+ if (!self.push(data)) {
+ self._helper.pause();
+ }
+ if(updateCb) {
+ updateCb(meta);
+ }
+ })
+ .on("error", function(e) {
+ self.emit("error", e);
+ })
+ .on("end", function () {
+ self.push(null);
+ });
+}
+
+
+NodejsStreamOutputAdapter.prototype._read = function() {
+ this._helper.resume();
+};
+
+module.exports = NodejsStreamOutputAdapter;
+
+},{"../utils":32,"readable-stream":16}],14:[function(require,module,exports){
+"use strict";
+
+module.exports = {
+ /**
+ * True if this is running in Nodejs, will be undefined in a browser.
+ * In a browser, browserify won't include this file and the whole module
+ * will be resolved an empty object.
+ */
+ isNode : typeof Buffer !== "undefined",
+ /**
+ * Create a new nodejs Buffer from an existing content.
+ * @param {Object} data the data to pass to the constructor.
+ * @param {String} encoding the encoding to use.
+ * @return {Buffer} a new Buffer.
+ */
+ newBufferFrom: function(data, encoding) {
+ if (Buffer.from && Buffer.from !== Uint8Array.from) {
+ return Buffer.from(data, encoding);
+ } else {
+ if (typeof data === "number") {
+ // Safeguard for old Node.js versions. On newer versions,
+ // Buffer.from(number) / Buffer(number, encoding) already throw.
+ throw new Error("The \"data\" argument must not be a number");
+ }
+ return new Buffer(data, encoding);
+ }
+ },
+ /**
+ * Create a new nodejs Buffer with the specified size.
+ * @param {Integer} size the size of the buffer.
+ * @return {Buffer} a new Buffer.
+ */
+ allocBuffer: function (size) {
+ if (Buffer.alloc) {
+ return Buffer.alloc(size);
+ } else {
+ var buf = new Buffer(size);
+ buf.fill(0);
+ return buf;
+ }
+ },
+ /**
+ * Find out if an object is a Buffer.
+ * @param {Object} b the object to test.
+ * @return {Boolean} true if the object is a Buffer, false otherwise.
+ */
+ isBuffer : function(b){
+ return Buffer.isBuffer(b);
+ },
+
+ isStream : function (obj) {
+ return obj &&
+ typeof obj.on === "function" &&
+ typeof obj.pause === "function" &&
+ typeof obj.resume === "function";
+ }
+};
+
+},{}],15:[function(require,module,exports){
+"use strict";
+var utf8 = require("./utf8");
+var utils = require("./utils");
+var GenericWorker = require("./stream/GenericWorker");
+var StreamHelper = require("./stream/StreamHelper");
+var defaults = require("./defaults");
+var CompressedObject = require("./compressedObject");
+var ZipObject = require("./zipObject");
+var generate = require("./generate");
+var nodejsUtils = require("./nodejsUtils");
+var NodejsStreamInputAdapter = require("./nodejs/NodejsStreamInputAdapter");
+
+
+/**
+ * Add a file in the current folder.
+ * @private
+ * @param {string} name the name of the file
+ * @param {String|ArrayBuffer|Uint8Array|Buffer} data the data of the file
+ * @param {Object} originalOptions the options of the file
+ * @return {Object} the new file.
+ */
+var fileAdd = function(name, data, originalOptions) {
+ // be sure sub folders exist
+ var dataType = utils.getTypeOf(data),
+ parent;
+
+
+ /*
+ * Correct options.
+ */
+
+ var o = utils.extend(originalOptions || {}, defaults);
+ o.date = o.date || new Date();
+ if (o.compression !== null) {
+ o.compression = o.compression.toUpperCase();
+ }
+
+ if (typeof o.unixPermissions === "string") {
+ o.unixPermissions = parseInt(o.unixPermissions, 8);
+ }
+
+ // UNX_IFDIR 0040000 see zipinfo.c
+ if (o.unixPermissions && (o.unixPermissions & 0x4000)) {
+ o.dir = true;
+ }
+ // Bit 4 Directory
+ if (o.dosPermissions && (o.dosPermissions & 0x0010)) {
+ o.dir = true;
+ }
+
+ if (o.dir) {
+ name = forceTrailingSlash(name);
+ }
+ if (o.createFolders && (parent = parentFolder(name))) {
+ folderAdd.call(this, parent, true);
+ }
+
+ var isUnicodeString = dataType === "string" && o.binary === false && o.base64 === false;
+ if (!originalOptions || typeof originalOptions.binary === "undefined") {
+ o.binary = !isUnicodeString;
+ }
+
+
+ var isCompressedEmpty = (data instanceof CompressedObject) && data.uncompressedSize === 0;
+
+ if (isCompressedEmpty || o.dir || !data || data.length === 0) {
+ o.base64 = false;
+ o.binary = true;
+ data = "";
+ o.compression = "STORE";
+ dataType = "string";
+ }
+
+ /*
+ * Convert content to fit.
+ */
+
+ var zipObjectContent = null;
+ if (data instanceof CompressedObject || data instanceof GenericWorker) {
+ zipObjectContent = data;
+ } else if (nodejsUtils.isNode && nodejsUtils.isStream(data)) {
+ zipObjectContent = new NodejsStreamInputAdapter(name, data);
+ } else {
+ zipObjectContent = utils.prepareContent(name, data, o.binary, o.optimizedBinaryString, o.base64);
+ }
+
+ var object = new ZipObject(name, zipObjectContent, o);
+ this.files[name] = object;
+ /*
+ TODO: we can't throw an exception because we have async promises
+ (we can have a promise of a Date() for example) but returning a
+ promise is useless because file(name, data) returns the JSZip
+ object for chaining. Should we break that to allow the user
+ to catch the error ?
+
+ return external.Promise.resolve(zipObjectContent)
+ .then(function () {
+ return object;
+ });
+ */
+};
+
+/**
+ * Find the parent folder of the path.
+ * @private
+ * @param {string} path the path to use
+ * @return {string} the parent folder, or ""
+ */
+var parentFolder = function (path) {
+ if (path.slice(-1) === "/") {
+ path = path.substring(0, path.length - 1);
+ }
+ var lastSlash = path.lastIndexOf("/");
+ return (lastSlash > 0) ? path.substring(0, lastSlash) : "";
+};
+
+/**
+ * Returns the path with a slash at the end.
+ * @private
+ * @param {String} path the path to check.
+ * @return {String} the path with a trailing slash.
+ */
+var forceTrailingSlash = function(path) {
+ // Check the name ends with a /
+ if (path.slice(-1) !== "/") {
+ path += "/"; // IE doesn't like substr(-1)
+ }
+ return path;
+};
+
+/**
+ * Add a (sub) folder in the current folder.
+ * @private
+ * @param {string} name the folder's name
+ * @param {boolean=} [createFolders] If true, automatically create sub
+ * folders. Defaults to false.
+ * @return {Object} the new folder.
+ */
+var folderAdd = function(name, createFolders) {
+ createFolders = (typeof createFolders !== "undefined") ? createFolders : defaults.createFolders;
+
+ name = forceTrailingSlash(name);
+
+ // Does this folder already exist?
+ if (!this.files[name]) {
+ fileAdd.call(this, name, null, {
+ dir: true,
+ createFolders: createFolders
+ });
+ }
+ return this.files[name];
+};
+
+/**
+* Cross-window, cross-Node-context regular expression detection
+* @param {Object} object Anything
+* @return {Boolean} true if the object is a regular expression,
+* false otherwise
+*/
+function isRegExp(object) {
+ return Object.prototype.toString.call(object) === "[object RegExp]";
+}
+
+// return the actual prototype of JSZip
+var out = {
+ /**
+ * @see loadAsync
+ */
+ load: function() {
+ throw new Error("This method has been removed in JSZip 3.0, please check the upgrade guide.");
+ },
+
+
+ /**
+ * Call a callback function for each entry at this folder level.
+ * @param {Function} cb the callback function:
+ * function (relativePath, file) {...}
+ * It takes 2 arguments : the relative path and the file.
+ */
+ forEach: function(cb) {
+ var filename, relativePath, file;
+ // ignore warning about unwanted properties because this.files is a null prototype object
+ /* eslint-disable-next-line guard-for-in */
+ for (filename in this.files) {
+ file = this.files[filename];
+ relativePath = filename.slice(this.root.length, filename.length);
+ if (relativePath && filename.slice(0, this.root.length) === this.root) { // the file is in the current root
+ cb(relativePath, file); // TODO reverse the parameters ? need to be clean AND consistent with the filter search fn...
+ }
+ }
+ },
+
+ /**
+ * Filter nested files/folders with the specified function.
+ * @param {Function} search the predicate to use :
+ * function (relativePath, file) {...}
+ * It takes 2 arguments : the relative path and the file.
+ * @return {Array} An array of matching elements.
+ */
+ filter: function(search) {
+ var result = [];
+ this.forEach(function (relativePath, entry) {
+ if (search(relativePath, entry)) { // the file matches the function
+ result.push(entry);
+ }
+
+ });
+ return result;
+ },
+
+ /**
+ * Add a file to the zip file, or search a file.
+ * @param {string|RegExp} name The name of the file to add (if data is defined),
+ * the name of the file to find (if no data) or a regex to match files.
+ * @param {String|ArrayBuffer|Uint8Array|Buffer} data The file data, either raw or base64 encoded
+ * @param {Object} o File options
+ * @return {JSZip|Object|Array} this JSZip object (when adding a file),
+ * a file (when searching by string) or an array of files (when searching by regex).
+ */
+ file: function(name, data, o) {
+ if (arguments.length === 1) {
+ if (isRegExp(name)) {
+ var regexp = name;
+ return this.filter(function(relativePath, file) {
+ return !file.dir && regexp.test(relativePath);
+ });
+ }
+ else { // text
+ var obj = this.files[this.root + name];
+ if (obj && !obj.dir) {
+ return obj;
+ } else {
+ return null;
+ }
+ }
+ }
+ else { // more than one argument : we have data !
+ name = this.root + name;
+ fileAdd.call(this, name, data, o);
+ }
+ return this;
+ },
+
+ /**
+ * Add a directory to the zip file, or search.
+ * @param {String|RegExp} arg The name of the directory to add, or a regex to search folders.
+ * @return {JSZip} an object with the new directory as the root, or an array containing matching folders.
+ */
+ folder: function(arg) {
+ if (!arg) {
+ return this;
+ }
+
+ if (isRegExp(arg)) {
+ return this.filter(function(relativePath, file) {
+ return file.dir && arg.test(relativePath);
+ });
+ }
+
+ // else, name is a new folder
+ var name = this.root + arg;
+ var newFolder = folderAdd.call(this, name);
+
+ // Allow chaining by returning a new object with this folder as the root
+ var ret = this.clone();
+ ret.root = newFolder.name;
+ return ret;
+ },
+
+ /**
+ * Delete a file, or a directory and all sub-files, from the zip
+ * @param {string} name the name of the file to delete
+ * @return {JSZip} this JSZip object
+ */
+ remove: function(name) {
+ name = this.root + name;
+ var file = this.files[name];
+ if (!file) {
+ // Look for any folders
+ if (name.slice(-1) !== "/") {
+ name += "/";
+ }
+ file = this.files[name];
+ }
+
+ if (file && !file.dir) {
+ // file
+ delete this.files[name];
+ } else {
+ // maybe a folder, delete recursively
+ var kids = this.filter(function(relativePath, file) {
+ return file.name.slice(0, name.length) === name;
+ });
+ for (var i = 0; i < kids.length; i++) {
+ delete this.files[kids[i].name];
+ }
+ }
+
+ return this;
+ },
+
+ /**
+ * @deprecated This method has been removed in JSZip 3.0, please check the upgrade guide.
+ */
+ generate: function() {
+ throw new Error("This method has been removed in JSZip 3.0, please check the upgrade guide.");
+ },
+
+ /**
+ * Generate the complete zip file as an internal stream.
+ * @param {Object} options the options to generate the zip file :
+ * - compression, "STORE" by default.
+ * - type, "base64" by default. Values are : string, base64, uint8array, arraybuffer, blob.
+ * @return {StreamHelper} the streamed zip file.
+ */
+ generateInternalStream: function(options) {
+ var worker, opts = {};
+ try {
+ opts = utils.extend(options || {}, {
+ streamFiles: false,
+ compression: "STORE",
+ compressionOptions : null,
+ type: "",
+ platform: "DOS",
+ comment: null,
+ mimeType: "application/zip",
+ encodeFileName: utf8.utf8encode
+ });
+
+ opts.type = opts.type.toLowerCase();
+ opts.compression = opts.compression.toUpperCase();
+
+ // "binarystring" is preferred but the internals use "string".
+ if(opts.type === "binarystring") {
+ opts.type = "string";
+ }
+
+ if (!opts.type) {
+ throw new Error("No output type specified.");
+ }
+
+ utils.checkSupport(opts.type);
+
+ // accept nodejs `process.platform`
+ if(
+ opts.platform === "darwin" ||
+ opts.platform === "freebsd" ||
+ opts.platform === "linux" ||
+ opts.platform === "sunos"
+ ) {
+ opts.platform = "UNIX";
+ }
+ if (opts.platform === "win32") {
+ opts.platform = "DOS";
+ }
+
+ var comment = opts.comment || this.comment || "";
+ worker = generate.generateWorker(this, opts, comment);
+ } catch (e) {
+ worker = new GenericWorker("error");
+ worker.error(e);
+ }
+ return new StreamHelper(worker, opts.type || "string", opts.mimeType);
+ },
+ /**
+ * Generate the complete zip file asynchronously.
+ * @see generateInternalStream
+ */
+ generateAsync: function(options, onUpdate) {
+ return this.generateInternalStream(options).accumulate(onUpdate);
+ },
+ /**
+ * Generate the complete zip file asynchronously.
+ * @see generateInternalStream
+ */
+ generateNodeStream: function(options, onUpdate) {
+ options = options || {};
+ if (!options.type) {
+ options.type = "nodebuffer";
+ }
+ return this.generateInternalStream(options).toNodejsStream(onUpdate);
+ }
+};
+module.exports = out;
+
+},{"./compressedObject":2,"./defaults":5,"./generate":9,"./nodejs/NodejsStreamInputAdapter":12,"./nodejsUtils":14,"./stream/GenericWorker":28,"./stream/StreamHelper":29,"./utf8":31,"./utils":32,"./zipObject":35}],16:[function(require,module,exports){
+"use strict";
+/*
+ * This file is used by module bundlers (browserify/webpack/etc) when
+ * including a stream implementation. We use "readable-stream" to get a
+ * consistent behavior between nodejs versions but bundlers often have a shim
+ * for "stream". Using this shim greatly improve the compatibility and greatly
+ * reduce the final size of the bundle (only one stream implementation, not
+ * two).
+ */
+module.exports = require("stream");
+
+},{"stream":undefined}],17:[function(require,module,exports){
+"use strict";
+var DataReader = require("./DataReader");
+var utils = require("../utils");
+
+function ArrayReader(data) {
+ DataReader.call(this, data);
+ for(var i = 0; i < this.data.length; i++) {
+ data[i] = data[i] & 0xFF;
+ }
+}
+utils.inherits(ArrayReader, DataReader);
+/**
+ * @see DataReader.byteAt
+ */
+ArrayReader.prototype.byteAt = function(i) {
+ return this.data[this.zero + i];
+};
+/**
+ * @see DataReader.lastIndexOfSignature
+ */
+ArrayReader.prototype.lastIndexOfSignature = function(sig) {
+ var sig0 = sig.charCodeAt(0),
+ sig1 = sig.charCodeAt(1),
+ sig2 = sig.charCodeAt(2),
+ sig3 = sig.charCodeAt(3);
+ for (var i = this.length - 4; i >= 0; --i) {
+ if (this.data[i] === sig0 && this.data[i + 1] === sig1 && this.data[i + 2] === sig2 && this.data[i + 3] === sig3) {
+ return i - this.zero;
+ }
+ }
+
+ return -1;
+};
+/**
+ * @see DataReader.readAndCheckSignature
+ */
+ArrayReader.prototype.readAndCheckSignature = function (sig) {
+ var sig0 = sig.charCodeAt(0),
+ sig1 = sig.charCodeAt(1),
+ sig2 = sig.charCodeAt(2),
+ sig3 = sig.charCodeAt(3),
+ data = this.readData(4);
+ return sig0 === data[0] && sig1 === data[1] && sig2 === data[2] && sig3 === data[3];
+};
+/**
+ * @see DataReader.readData
+ */
+ArrayReader.prototype.readData = function(size) {
+ this.checkOffset(size);
+ if(size === 0) {
+ return [];
+ }
+ var result = this.data.slice(this.zero + this.index, this.zero + this.index + size);
+ this.index += size;
+ return result;
+};
+module.exports = ArrayReader;
+
+},{"../utils":32,"./DataReader":18}],18:[function(require,module,exports){
+"use strict";
+var utils = require("../utils");
+
+function DataReader(data) {
+ this.data = data; // type : see implementation
+ this.length = data.length;
+ this.index = 0;
+ this.zero = 0;
+}
+DataReader.prototype = {
+ /**
+ * Check that the offset will not go too far.
+ * @param {string} offset the additional offset to check.
+ * @throws {Error} an Error if the offset is out of bounds.
+ */
+ checkOffset: function(offset) {
+ this.checkIndex(this.index + offset);
+ },
+ /**
+ * Check that the specified index will not be too far.
+ * @param {string} newIndex the index to check.
+ * @throws {Error} an Error if the index is out of bounds.
+ */
+ checkIndex: function(newIndex) {
+ if (this.length < this.zero + newIndex || newIndex < 0) {
+ throw new Error("End of data reached (data length = " + this.length + ", asked index = " + (newIndex) + "). Corrupted zip ?");
+ }
+ },
+ /**
+ * Change the index.
+ * @param {number} newIndex The new index.
+ * @throws {Error} if the new index is out of the data.
+ */
+ setIndex: function(newIndex) {
+ this.checkIndex(newIndex);
+ this.index = newIndex;
+ },
+ /**
+ * Skip the next n bytes.
+ * @param {number} n the number of bytes to skip.
+ * @throws {Error} if the new index is out of the data.
+ */
+ skip: function(n) {
+ this.setIndex(this.index + n);
+ },
+ /**
+ * Get the byte at the specified index.
+ * @param {number} i the index to use.
+ * @return {number} a byte.
+ */
+ byteAt: function() {
+ // see implementations
+ },
+ /**
+ * Get the next number with a given byte size.
+ * @param {number} size the number of bytes to read.
+ * @return {number} the corresponding number.
+ */
+ readInt: function(size) {
+ var result = 0,
+ i;
+ this.checkOffset(size);
+ for (i = this.index + size - 1; i >= this.index; i--) {
+ result = (result << 8) + this.byteAt(i);
+ }
+ this.index += size;
+ return result;
+ },
+ /**
+ * Get the next string with a given byte size.
+ * @param {number} size the number of bytes to read.
+ * @return {string} the corresponding string.
+ */
+ readString: function(size) {
+ return utils.transformTo("string", this.readData(size));
+ },
+ /**
+ * Get raw data without conversion, bytes.
+ * @param {number} size the number of bytes to read.
+ * @return {Object} the raw data, implementation specific.
+ */
+ readData: function() {
+ // see implementations
+ },
+ /**
+ * Find the last occurrence of a zip signature (4 bytes).
+ * @param {string} sig the signature to find.
+ * @return {number} the index of the last occurrence, -1 if not found.
+ */
+ lastIndexOfSignature: function() {
+ // see implementations
+ },
+ /**
+ * Read the signature (4 bytes) at the current position and compare it with sig.
+ * @param {string} sig the expected signature
+ * @return {boolean} true if the signature matches, false otherwise.
+ */
+ readAndCheckSignature: function() {
+ // see implementations
+ },
+ /**
+ * Get the next date.
+ * @return {Date} the date.
+ */
+ readDate: function() {
+ var dostime = this.readInt(4);
+ return new Date(Date.UTC(
+ ((dostime >> 25) & 0x7f) + 1980, // year
+ ((dostime >> 21) & 0x0f) - 1, // month
+ (dostime >> 16) & 0x1f, // day
+ (dostime >> 11) & 0x1f, // hour
+ (dostime >> 5) & 0x3f, // minute
+ (dostime & 0x1f) << 1)); // second
+ }
+};
+module.exports = DataReader;
+
+},{"../utils":32}],19:[function(require,module,exports){
+"use strict";
+var Uint8ArrayReader = require("./Uint8ArrayReader");
+var utils = require("../utils");
+
+function NodeBufferReader(data) {
+ Uint8ArrayReader.call(this, data);
+}
+utils.inherits(NodeBufferReader, Uint8ArrayReader);
+
+/**
+ * @see DataReader.readData
+ */
+NodeBufferReader.prototype.readData = function(size) {
+ this.checkOffset(size);
+ var result = this.data.slice(this.zero + this.index, this.zero + this.index + size);
+ this.index += size;
+ return result;
+};
+module.exports = NodeBufferReader;
+
+},{"../utils":32,"./Uint8ArrayReader":21}],20:[function(require,module,exports){
+"use strict";
+var DataReader = require("./DataReader");
+var utils = require("../utils");
+
+function StringReader(data) {
+ DataReader.call(this, data);
+}
+utils.inherits(StringReader, DataReader);
+/**
+ * @see DataReader.byteAt
+ */
+StringReader.prototype.byteAt = function(i) {
+ return this.data.charCodeAt(this.zero + i);
+};
+/**
+ * @see DataReader.lastIndexOfSignature
+ */
+StringReader.prototype.lastIndexOfSignature = function(sig) {
+ return this.data.lastIndexOf(sig) - this.zero;
+};
+/**
+ * @see DataReader.readAndCheckSignature
+ */
+StringReader.prototype.readAndCheckSignature = function (sig) {
+ var data = this.readData(4);
+ return sig === data;
+};
+/**
+ * @see DataReader.readData
+ */
+StringReader.prototype.readData = function(size) {
+ this.checkOffset(size);
+ // this will work because the constructor applied the "& 0xff" mask.
+ var result = this.data.slice(this.zero + this.index, this.zero + this.index + size);
+ this.index += size;
+ return result;
+};
+module.exports = StringReader;
+
+},{"../utils":32,"./DataReader":18}],21:[function(require,module,exports){
+"use strict";
+var ArrayReader = require("./ArrayReader");
+var utils = require("../utils");
+
+function Uint8ArrayReader(data) {
+ ArrayReader.call(this, data);
+}
+utils.inherits(Uint8ArrayReader, ArrayReader);
+/**
+ * @see DataReader.readData
+ */
+Uint8ArrayReader.prototype.readData = function(size) {
+ this.checkOffset(size);
+ if(size === 0) {
+ // in IE10, when using subarray(idx, idx), we get the array [0x00] instead of [].
+ return new Uint8Array(0);
+ }
+ var result = this.data.subarray(this.zero + this.index, this.zero + this.index + size);
+ this.index += size;
+ return result;
+};
+module.exports = Uint8ArrayReader;
+
+},{"../utils":32,"./ArrayReader":17}],22:[function(require,module,exports){
+"use strict";
+
+var utils = require("../utils");
+var support = require("../support");
+var ArrayReader = require("./ArrayReader");
+var StringReader = require("./StringReader");
+var NodeBufferReader = require("./NodeBufferReader");
+var Uint8ArrayReader = require("./Uint8ArrayReader");
+
+/**
+ * Create a reader adapted to the data.
+ * @param {String|ArrayBuffer|Uint8Array|Buffer} data the data to read.
+ * @return {DataReader} the data reader.
+ */
+module.exports = function (data) {
+ var type = utils.getTypeOf(data);
+ utils.checkSupport(type);
+ if (type === "string" && !support.uint8array) {
+ return new StringReader(data);
+ }
+ if (type === "nodebuffer") {
+ return new NodeBufferReader(data);
+ }
+ if (support.uint8array) {
+ return new Uint8ArrayReader(utils.transformTo("uint8array", data));
+ }
+ return new ArrayReader(utils.transformTo("array", data));
+};
+
+},{"../support":30,"../utils":32,"./ArrayReader":17,"./NodeBufferReader":19,"./StringReader":20,"./Uint8ArrayReader":21}],23:[function(require,module,exports){
+"use strict";
+exports.LOCAL_FILE_HEADER = "PK\x03\x04";
+exports.CENTRAL_FILE_HEADER = "PK\x01\x02";
+exports.CENTRAL_DIRECTORY_END = "PK\x05\x06";
+exports.ZIP64_CENTRAL_DIRECTORY_LOCATOR = "PK\x06\x07";
+exports.ZIP64_CENTRAL_DIRECTORY_END = "PK\x06\x06";
+exports.DATA_DESCRIPTOR = "PK\x07\x08";
+
+},{}],24:[function(require,module,exports){
+"use strict";
+
+var GenericWorker = require("./GenericWorker");
+var utils = require("../utils");
+
+/**
+ * A worker which convert chunks to a specified type.
+ * @constructor
+ * @param {String} destType the destination type.
+ */
+function ConvertWorker(destType) {
+ GenericWorker.call(this, "ConvertWorker to " + destType);
+ this.destType = destType;
+}
+utils.inherits(ConvertWorker, GenericWorker);
+
+/**
+ * @see GenericWorker.processChunk
+ */
+ConvertWorker.prototype.processChunk = function (chunk) {
+ this.push({
+ data : utils.transformTo(this.destType, chunk.data),
+ meta : chunk.meta
+ });
+};
+module.exports = ConvertWorker;
+
+},{"../utils":32,"./GenericWorker":28}],25:[function(require,module,exports){
+"use strict";
+
+var GenericWorker = require("./GenericWorker");
+var crc32 = require("../crc32");
+var utils = require("../utils");
+
+/**
+ * A worker which calculate the crc32 of the data flowing through.
+ * @constructor
+ */
+function Crc32Probe() {
+ GenericWorker.call(this, "Crc32Probe");
+ this.withStreamInfo("crc32", 0);
+}
+utils.inherits(Crc32Probe, GenericWorker);
+
+/**
+ * @see GenericWorker.processChunk
+ */
+Crc32Probe.prototype.processChunk = function (chunk) {
+ this.streamInfo.crc32 = crc32(chunk.data, this.streamInfo.crc32 || 0);
+ this.push(chunk);
+};
+module.exports = Crc32Probe;
+
+},{"../crc32":4,"../utils":32,"./GenericWorker":28}],26:[function(require,module,exports){
+"use strict";
+
+var utils = require("../utils");
+var GenericWorker = require("./GenericWorker");
+
+/**
+ * A worker which calculate the total length of the data flowing through.
+ * @constructor
+ * @param {String} propName the name used to expose the length
+ */
+function DataLengthProbe(propName) {
+ GenericWorker.call(this, "DataLengthProbe for " + propName);
+ this.propName = propName;
+ this.withStreamInfo(propName, 0);
+}
+utils.inherits(DataLengthProbe, GenericWorker);
+
+/**
+ * @see GenericWorker.processChunk
+ */
+DataLengthProbe.prototype.processChunk = function (chunk) {
+ if(chunk) {
+ var length = this.streamInfo[this.propName] || 0;
+ this.streamInfo[this.propName] = length + chunk.data.length;
+ }
+ GenericWorker.prototype.processChunk.call(this, chunk);
+};
+module.exports = DataLengthProbe;
+
+
+},{"../utils":32,"./GenericWorker":28}],27:[function(require,module,exports){
+"use strict";
+
+var utils = require("../utils");
+var GenericWorker = require("./GenericWorker");
+
+// the size of the generated chunks
+// TODO expose this as a public variable
+var DEFAULT_BLOCK_SIZE = 16 * 1024;
+
+/**
+ * A worker that reads a content and emits chunks.
+ * @constructor
+ * @param {Promise} dataP the promise of the data to split
+ */
+function DataWorker(dataP) {
+ GenericWorker.call(this, "DataWorker");
+ var self = this;
+ this.dataIsReady = false;
+ this.index = 0;
+ this.max = 0;
+ this.data = null;
+ this.type = "";
+
+ this._tickScheduled = false;
+
+ dataP.then(function (data) {
+ self.dataIsReady = true;
+ self.data = data;
+ self.max = data && data.length || 0;
+ self.type = utils.getTypeOf(data);
+ if(!self.isPaused) {
+ self._tickAndRepeat();
+ }
+ }, function (e) {
+ self.error(e);
+ });
+}
+
+utils.inherits(DataWorker, GenericWorker);
+
+/**
+ * @see GenericWorker.cleanUp
+ */
+DataWorker.prototype.cleanUp = function () {
+ GenericWorker.prototype.cleanUp.call(this);
+ this.data = null;
+};
+
+/**
+ * @see GenericWorker.resume
+ */
+DataWorker.prototype.resume = function () {
+ if(!GenericWorker.prototype.resume.call(this)) {
+ return false;
+ }
+
+ if (!this._tickScheduled && this.dataIsReady) {
+ this._tickScheduled = true;
+ utils.delay(this._tickAndRepeat, [], this);
+ }
+ return true;
+};
+
+/**
+ * Trigger a tick a schedule an other call to this function.
+ */
+DataWorker.prototype._tickAndRepeat = function() {
+ this._tickScheduled = false;
+ if(this.isPaused || this.isFinished) {
+ return;
+ }
+ this._tick();
+ if(!this.isFinished) {
+ utils.delay(this._tickAndRepeat, [], this);
+ this._tickScheduled = true;
+ }
+};
+
+/**
+ * Read and push a chunk.
+ */
+DataWorker.prototype._tick = function() {
+
+ if(this.isPaused || this.isFinished) {
+ return false;
+ }
+
+ var size = DEFAULT_BLOCK_SIZE;
+ var data = null, nextIndex = Math.min(this.max, this.index + size);
+ if (this.index >= this.max) {
+ // EOF
+ return this.end();
+ } else {
+ switch(this.type) {
+ case "string":
+ data = this.data.substring(this.index, nextIndex);
+ break;
+ case "uint8array":
+ data = this.data.subarray(this.index, nextIndex);
+ break;
+ case "array":
+ case "nodebuffer":
+ data = this.data.slice(this.index, nextIndex);
+ break;
+ }
+ this.index = nextIndex;
+ return this.push({
+ data : data,
+ meta : {
+ percent : this.max ? this.index / this.max * 100 : 0
+ }
+ });
+ }
+};
+
+module.exports = DataWorker;
+
+},{"../utils":32,"./GenericWorker":28}],28:[function(require,module,exports){
+"use strict";
+
+/**
+ * A worker that does nothing but passing chunks to the next one. This is like
+ * a nodejs stream but with some differences. On the good side :
+ * - it works on IE 6-9 without any issue / polyfill
+ * - it weights less than the full dependencies bundled with browserify
+ * - it forwards errors (no need to declare an error handler EVERYWHERE)
+ *
+ * A chunk is an object with 2 attributes : `meta` and `data`. The former is an
+ * object containing anything (`percent` for example), see each worker for more
+ * details. The latter is the real data (String, Uint8Array, etc).
+ *
+ * @constructor
+ * @param {String} name the name of the stream (mainly used for debugging purposes)
+ */
+function GenericWorker(name) {
+ // the name of the worker
+ this.name = name || "default";
+ // an object containing metadata about the workers chain
+ this.streamInfo = {};
+ // an error which happened when the worker was paused
+ this.generatedError = null;
+ // an object containing metadata to be merged by this worker into the general metadata
+ this.extraStreamInfo = {};
+ // true if the stream is paused (and should not do anything), false otherwise
+ this.isPaused = true;
+ // true if the stream is finished (and should not do anything), false otherwise
+ this.isFinished = false;
+ // true if the stream is locked to prevent further structure updates (pipe), false otherwise
+ this.isLocked = false;
+ // the event listeners
+ this._listeners = {
+ "data":[],
+ "end":[],
+ "error":[]
+ };
+ // the previous worker, if any
+ this.previous = null;
+}
+
+GenericWorker.prototype = {
+ /**
+ * Push a chunk to the next workers.
+ * @param {Object} chunk the chunk to push
+ */
+ push : function (chunk) {
+ this.emit("data", chunk);
+ },
+ /**
+ * End the stream.
+ * @return {Boolean} true if this call ended the worker, false otherwise.
+ */
+ end : function () {
+ if (this.isFinished) {
+ return false;
+ }
+
+ this.flush();
+ try {
+ this.emit("end");
+ this.cleanUp();
+ this.isFinished = true;
+ } catch (e) {
+ this.emit("error", e);
+ }
+ return true;
+ },
+ /**
+ * End the stream with an error.
+ * @param {Error} e the error which caused the premature end.
+ * @return {Boolean} true if this call ended the worker with an error, false otherwise.
+ */
+ error : function (e) {
+ if (this.isFinished) {
+ return false;
+ }
+
+ if(this.isPaused) {
+ this.generatedError = e;
+ } else {
+ this.isFinished = true;
+
+ this.emit("error", e);
+
+ // in the workers chain exploded in the middle of the chain,
+ // the error event will go downward but we also need to notify
+ // workers upward that there has been an error.
+ if(this.previous) {
+ this.previous.error(e);
+ }
+
+ this.cleanUp();
+ }
+ return true;
+ },
+ /**
+ * Add a callback on an event.
+ * @param {String} name the name of the event (data, end, error)
+ * @param {Function} listener the function to call when the event is triggered
+ * @return {GenericWorker} the current object for chainability
+ */
+ on : function (name, listener) {
+ this._listeners[name].push(listener);
+ return this;
+ },
+ /**
+ * Clean any references when a worker is ending.
+ */
+ cleanUp : function () {
+ this.streamInfo = this.generatedError = this.extraStreamInfo = null;
+ this._listeners = [];
+ },
+ /**
+ * Trigger an event. This will call registered callback with the provided arg.
+ * @param {String} name the name of the event (data, end, error)
+ * @param {Object} arg the argument to call the callback with.
+ */
+ emit : function (name, arg) {
+ if (this._listeners[name]) {
+ for(var i = 0; i < this._listeners[name].length; i++) {
+ this._listeners[name][i].call(this, arg);
+ }
+ }
+ },
+ /**
+ * Chain a worker with an other.
+ * @param {Worker} next the worker receiving events from the current one.
+ * @return {worker} the next worker for chainability
+ */
+ pipe : function (next) {
+ return next.registerPrevious(this);
+ },
+ /**
+ * Same as `pipe` in the other direction.
+ * Using an API with `pipe(next)` is very easy.
+ * Implementing the API with the point of view of the next one registering
+ * a source is easier, see the ZipFileWorker.
+ * @param {Worker} previous the previous worker, sending events to this one
+ * @return {Worker} the current worker for chainability
+ */
+ registerPrevious : function (previous) {
+ if (this.isLocked) {
+ throw new Error("The stream '" + this + "' has already been used.");
+ }
+
+ // sharing the streamInfo...
+ this.streamInfo = previous.streamInfo;
+ // ... and adding our own bits
+ this.mergeStreamInfo();
+ this.previous = previous;
+ var self = this;
+ previous.on("data", function (chunk) {
+ self.processChunk(chunk);
+ });
+ previous.on("end", function () {
+ self.end();
+ });
+ previous.on("error", function (e) {
+ self.error(e);
+ });
+ return this;
+ },
+ /**
+ * Pause the stream so it doesn't send events anymore.
+ * @return {Boolean} true if this call paused the worker, false otherwise.
+ */
+ pause : function () {
+ if(this.isPaused || this.isFinished) {
+ return false;
+ }
+ this.isPaused = true;
+
+ if(this.previous) {
+ this.previous.pause();
+ }
+ return true;
+ },
+ /**
+ * Resume a paused stream.
+ * @return {Boolean} true if this call resumed the worker, false otherwise.
+ */
+ resume : function () {
+ if(!this.isPaused || this.isFinished) {
+ return false;
+ }
+ this.isPaused = false;
+
+ // if true, the worker tried to resume but failed
+ var withError = false;
+ if(this.generatedError) {
+ this.error(this.generatedError);
+ withError = true;
+ }
+ if(this.previous) {
+ this.previous.resume();
+ }
+
+ return !withError;
+ },
+ /**
+ * Flush any remaining bytes as the stream is ending.
+ */
+ flush : function () {},
+ /**
+ * Process a chunk. This is usually the method overridden.
+ * @param {Object} chunk the chunk to process.
+ */
+ processChunk : function(chunk) {
+ this.push(chunk);
+ },
+ /**
+ * Add a key/value to be added in the workers chain streamInfo once activated.
+ * @param {String} key the key to use
+ * @param {Object} value the associated value
+ * @return {Worker} the current worker for chainability
+ */
+ withStreamInfo : function (key, value) {
+ this.extraStreamInfo[key] = value;
+ this.mergeStreamInfo();
+ return this;
+ },
+ /**
+ * Merge this worker's streamInfo into the chain's streamInfo.
+ */
+ mergeStreamInfo : function () {
+ for(var key in this.extraStreamInfo) {
+ if (!Object.prototype.hasOwnProperty.call(this.extraStreamInfo, key)) {
+ continue;
+ }
+ this.streamInfo[key] = this.extraStreamInfo[key];
+ }
+ },
+
+ /**
+ * Lock the stream to prevent further updates on the workers chain.
+ * After calling this method, all calls to pipe will fail.
+ */
+ lock: function () {
+ if (this.isLocked) {
+ throw new Error("The stream '" + this + "' has already been used.");
+ }
+ this.isLocked = true;
+ if (this.previous) {
+ this.previous.lock();
+ }
+ },
+
+ /**
+ *
+ * Pretty print the workers chain.
+ */
+ toString : function () {
+ var me = "Worker " + this.name;
+ if (this.previous) {
+ return this.previous + " -> " + me;
+ } else {
+ return me;
+ }
+ }
+};
+
+module.exports = GenericWorker;
+
+},{}],29:[function(require,module,exports){
+"use strict";
+
+var utils = require("../utils");
+var ConvertWorker = require("./ConvertWorker");
+var GenericWorker = require("./GenericWorker");
+var base64 = require("../base64");
+var support = require("../support");
+var external = require("../external");
+
+var NodejsStreamOutputAdapter = null;
+if (support.nodestream) {
+ try {
+ NodejsStreamOutputAdapter = require("../nodejs/NodejsStreamOutputAdapter");
+ } catch(e) {
+ // ignore
+ }
+}
+
+/**
+ * Apply the final transformation of the data. If the user wants a Blob for
+ * example, it's easier to work with an U8intArray and finally do the
+ * ArrayBuffer/Blob conversion.
+ * @param {String} type the name of the final type
+ * @param {String|Uint8Array|Buffer} content the content to transform
+ * @param {String} mimeType the mime type of the content, if applicable.
+ * @return {String|Uint8Array|ArrayBuffer|Buffer|Blob} the content in the right format.
+ */
+function transformZipOutput(type, content, mimeType) {
+ switch(type) {
+ case "blob" :
+ return utils.newBlob(utils.transformTo("arraybuffer", content), mimeType);
+ case "base64" :
+ return base64.encode(content);
+ default :
+ return utils.transformTo(type, content);
+ }
+}
+
+/**
+ * Concatenate an array of data of the given type.
+ * @param {String} type the type of the data in the given array.
+ * @param {Array} dataArray the array containing the data chunks to concatenate
+ * @return {String|Uint8Array|Buffer} the concatenated data
+ * @throws Error if the asked type is unsupported
+ */
+function concat (type, dataArray) {
+ var i, index = 0, res = null, totalLength = 0;
+ for(i = 0; i < dataArray.length; i++) {
+ totalLength += dataArray[i].length;
+ }
+ switch(type) {
+ case "string":
+ return dataArray.join("");
+ case "array":
+ return Array.prototype.concat.apply([], dataArray);
+ case "uint8array":
+ res = new Uint8Array(totalLength);
+ for(i = 0; i < dataArray.length; i++) {
+ res.set(dataArray[i], index);
+ index += dataArray[i].length;
+ }
+ return res;
+ case "nodebuffer":
+ return Buffer.concat(dataArray);
+ default:
+ throw new Error("concat : unsupported type '" + type + "'");
+ }
+}
+
+/**
+ * Listen a StreamHelper, accumulate its content and concatenate it into a
+ * complete block.
+ * @param {StreamHelper} helper the helper to use.
+ * @param {Function} updateCallback a callback called on each update. Called
+ * with one arg :
+ * - the metadata linked to the update received.
+ * @return Promise the promise for the accumulation.
+ */
+function accumulate(helper, updateCallback) {
+ return new external.Promise(function (resolve, reject){
+ var dataArray = [];
+ var chunkType = helper._internalType,
+ resultType = helper._outputType,
+ mimeType = helper._mimeType;
+ helper
+ .on("data", function (data, meta) {
+ dataArray.push(data);
+ if(updateCallback) {
+ updateCallback(meta);
+ }
+ })
+ .on("error", function(err) {
+ dataArray = [];
+ reject(err);
+ })
+ .on("end", function (){
+ try {
+ var result = transformZipOutput(resultType, concat(chunkType, dataArray), mimeType);
+ resolve(result);
+ } catch (e) {
+ reject(e);
+ }
+ dataArray = [];
+ })
+ .resume();
+ });
+}
+
+/**
+ * An helper to easily use workers outside of JSZip.
+ * @constructor
+ * @param {Worker} worker the worker to wrap
+ * @param {String} outputType the type of data expected by the use
+ * @param {String} mimeType the mime type of the content, if applicable.
+ */
+function StreamHelper(worker, outputType, mimeType) {
+ var internalType = outputType;
+ switch(outputType) {
+ case "blob":
+ case "arraybuffer":
+ internalType = "uint8array";
+ break;
+ case "base64":
+ internalType = "string";
+ break;
+ }
+
+ try {
+ // the type used internally
+ this._internalType = internalType;
+ // the type used to output results
+ this._outputType = outputType;
+ // the mime type
+ this._mimeType = mimeType;
+ utils.checkSupport(internalType);
+ this._worker = worker.pipe(new ConvertWorker(internalType));
+ // the last workers can be rewired without issues but we need to
+ // prevent any updates on previous workers.
+ worker.lock();
+ } catch(e) {
+ this._worker = new GenericWorker("error");
+ this._worker.error(e);
+ }
+}
+
+StreamHelper.prototype = {
+ /**
+ * Listen a StreamHelper, accumulate its content and concatenate it into a
+ * complete block.
+ * @param {Function} updateCb the update callback.
+ * @return Promise the promise for the accumulation.
+ */
+ accumulate : function (updateCb) {
+ return accumulate(this, updateCb);
+ },
+ /**
+ * Add a listener on an event triggered on a stream.
+ * @param {String} evt the name of the event
+ * @param {Function} fn the listener
+ * @return {StreamHelper} the current helper.
+ */
+ on : function (evt, fn) {
+ var self = this;
+
+ if(evt === "data") {
+ this._worker.on(evt, function (chunk) {
+ fn.call(self, chunk.data, chunk.meta);
+ });
+ } else {
+ this._worker.on(evt, function () {
+ utils.delay(fn, arguments, self);
+ });
+ }
+ return this;
+ },
+ /**
+ * Resume the flow of chunks.
+ * @return {StreamHelper} the current helper.
+ */
+ resume : function () {
+ utils.delay(this._worker.resume, [], this._worker);
+ return this;
+ },
+ /**
+ * Pause the flow of chunks.
+ * @return {StreamHelper} the current helper.
+ */
+ pause : function () {
+ this._worker.pause();
+ return this;
+ },
+ /**
+ * Return a nodejs stream for this helper.
+ * @param {Function} updateCb the update callback.
+ * @return {NodejsStreamOutputAdapter} the nodejs stream.
+ */
+ toNodejsStream : function (updateCb) {
+ utils.checkSupport("nodestream");
+ if (this._outputType !== "nodebuffer") {
+ // an object stream containing blob/arraybuffer/uint8array/string
+ // is strange and I don't know if it would be useful.
+ // I you find this comment and have a good usecase, please open a
+ // bug report !
+ throw new Error(this._outputType + " is not supported by this method");
+ }
+
+ return new NodejsStreamOutputAdapter(this, {
+ objectMode : this._outputType !== "nodebuffer"
+ }, updateCb);
+ }
+};
+
+
+module.exports = StreamHelper;
+
+},{"../base64":1,"../external":6,"../nodejs/NodejsStreamOutputAdapter":13,"../support":30,"../utils":32,"./ConvertWorker":24,"./GenericWorker":28}],30:[function(require,module,exports){
+"use strict";
+
+exports.base64 = true;
+exports.array = true;
+exports.string = true;
+exports.arraybuffer = typeof ArrayBuffer !== "undefined" && typeof Uint8Array !== "undefined";
+exports.nodebuffer = typeof Buffer !== "undefined";
+// contains true if JSZip can read/generate Uint8Array, false otherwise.
+exports.uint8array = typeof Uint8Array !== "undefined";
+
+if (typeof ArrayBuffer === "undefined") {
+ exports.blob = false;
+}
+else {
+ var buffer = new ArrayBuffer(0);
+ try {
+ exports.blob = new Blob([buffer], {
+ type: "application/zip"
+ }).size === 0;
+ }
+ catch (e) {
+ try {
+ var Builder = self.BlobBuilder || self.WebKitBlobBuilder || self.MozBlobBuilder || self.MSBlobBuilder;
+ var builder = new Builder();
+ builder.append(buffer);
+ exports.blob = builder.getBlob("application/zip").size === 0;
+ }
+ catch (e) {
+ exports.blob = false;
+ }
+ }
+}
+
+try {
+ exports.nodestream = !!require("readable-stream").Readable;
+} catch(e) {
+ exports.nodestream = false;
+}
+
+},{"readable-stream":16}],31:[function(require,module,exports){
+"use strict";
+
+var utils = require("./utils");
+var support = require("./support");
+var nodejsUtils = require("./nodejsUtils");
+var GenericWorker = require("./stream/GenericWorker");
+
+/**
+ * The following functions come from pako, from pako/lib/utils/strings
+ * released under the MIT license, see pako https://github.com/nodeca/pako/
+ */
+
+// Table with utf8 lengths (calculated by first byte of sequence)
+// Note, that 5 & 6-byte values and some 4-byte values can not be represented in JS,
+// because max possible codepoint is 0x10ffff
+var _utf8len = new Array(256);
+for (var i=0; i<256; i++) {
+ _utf8len[i] = (i >= 252 ? 6 : i >= 248 ? 5 : i >= 240 ? 4 : i >= 224 ? 3 : i >= 192 ? 2 : 1);
+}
+_utf8len[254]=_utf8len[254]=1; // Invalid sequence start
+
+// convert string to array (typed, when possible)
+var string2buf = function (str) {
+ var buf, c, c2, m_pos, i, str_len = str.length, buf_len = 0;
+
+ // count binary size
+ for (m_pos = 0; m_pos < str_len; m_pos++) {
+ c = str.charCodeAt(m_pos);
+ if ((c & 0xfc00) === 0xd800 && (m_pos+1 < str_len)) {
+ c2 = str.charCodeAt(m_pos+1);
+ if ((c2 & 0xfc00) === 0xdc00) {
+ c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00);
+ m_pos++;
+ }
+ }
+ buf_len += c < 0x80 ? 1 : c < 0x800 ? 2 : c < 0x10000 ? 3 : 4;
+ }
+
+ // allocate buffer
+ if (support.uint8array) {
+ buf = new Uint8Array(buf_len);
+ } else {
+ buf = new Array(buf_len);
+ }
+
+ // convert
+ for (i=0, m_pos = 0; i < buf_len; m_pos++) {
+ c = str.charCodeAt(m_pos);
+ if ((c & 0xfc00) === 0xd800 && (m_pos+1 < str_len)) {
+ c2 = str.charCodeAt(m_pos+1);
+ if ((c2 & 0xfc00) === 0xdc00) {
+ c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00);
+ m_pos++;
+ }
+ }
+ if (c < 0x80) {
+ /* one byte */
+ buf[i++] = c;
+ } else if (c < 0x800) {
+ /* two bytes */
+ buf[i++] = 0xC0 | (c >>> 6);
+ buf[i++] = 0x80 | (c & 0x3f);
+ } else if (c < 0x10000) {
+ /* three bytes */
+ buf[i++] = 0xE0 | (c >>> 12);
+ buf[i++] = 0x80 | (c >>> 6 & 0x3f);
+ buf[i++] = 0x80 | (c & 0x3f);
+ } else {
+ /* four bytes */
+ buf[i++] = 0xf0 | (c >>> 18);
+ buf[i++] = 0x80 | (c >>> 12 & 0x3f);
+ buf[i++] = 0x80 | (c >>> 6 & 0x3f);
+ buf[i++] = 0x80 | (c & 0x3f);
+ }
+ }
+
+ return buf;
+};
+
+// Calculate max possible position in utf8 buffer,
+// that will not break sequence. If that's not possible
+// - (very small limits) return max size as is.
+//
+// buf[] - utf8 bytes array
+// max - length limit (mandatory);
+var utf8border = function(buf, max) {
+ var pos;
+
+ max = max || buf.length;
+ if (max > buf.length) { max = buf.length; }
+
+ // go back from last position, until start of sequence found
+ pos = max-1;
+ while (pos >= 0 && (buf[pos] & 0xC0) === 0x80) { pos--; }
+
+ // Fuckup - very small and broken sequence,
+ // return max, because we should return something anyway.
+ if (pos < 0) { return max; }
+
+ // If we came to start of buffer - that means vuffer is too small,
+ // return max too.
+ if (pos === 0) { return max; }
+
+ return (pos + _utf8len[buf[pos]] > max) ? pos : max;
+};
+
+// convert array to string
+var buf2string = function (buf) {
+ var i, out, c, c_len;
+ var len = buf.length;
+
+ // Reserve max possible length (2 words per char)
+ // NB: by unknown reasons, Array is significantly faster for
+ // String.fromCharCode.apply than Uint16Array.
+ var utf16buf = new Array(len*2);
+
+ for (out=0, i=0; i 4) { utf16buf[out++] = 0xfffd; i += c_len-1; continue; }
+
+ // apply mask on first byte
+ c &= c_len === 2 ? 0x1f : c_len === 3 ? 0x0f : 0x07;
+ // join the rest
+ while (c_len > 1 && i < len) {
+ c = (c << 6) | (buf[i++] & 0x3f);
+ c_len--;
+ }
+
+ // terminated by end of string?
+ if (c_len > 1) { utf16buf[out++] = 0xfffd; continue; }
+
+ if (c < 0x10000) {
+ utf16buf[out++] = c;
+ } else {
+ c -= 0x10000;
+ utf16buf[out++] = 0xd800 | ((c >> 10) & 0x3ff);
+ utf16buf[out++] = 0xdc00 | (c & 0x3ff);
+ }
+ }
+
+ // shrinkBuf(utf16buf, out)
+ if (utf16buf.length !== out) {
+ if(utf16buf.subarray) {
+ utf16buf = utf16buf.subarray(0, out);
+ } else {
+ utf16buf.length = out;
+ }
+ }
+
+ // return String.fromCharCode.apply(null, utf16buf);
+ return utils.applyFromCharCode(utf16buf);
+};
+
+
+// That's all for the pako functions.
+
+
+/**
+ * Transform a javascript string into an array (typed if possible) of bytes,
+ * UTF-8 encoded.
+ * @param {String} str the string to encode
+ * @return {Array|Uint8Array|Buffer} the UTF-8 encoded string.
+ */
+exports.utf8encode = function utf8encode(str) {
+ if (support.nodebuffer) {
+ return nodejsUtils.newBufferFrom(str, "utf-8");
+ }
+
+ return string2buf(str);
+};
+
+
+/**
+ * Transform a bytes array (or a representation) representing an UTF-8 encoded
+ * string into a javascript string.
+ * @param {Array|Uint8Array|Buffer} buf the data de decode
+ * @return {String} the decoded string.
+ */
+exports.utf8decode = function utf8decode(buf) {
+ if (support.nodebuffer) {
+ return utils.transformTo("nodebuffer", buf).toString("utf-8");
+ }
+
+ buf = utils.transformTo(support.uint8array ? "uint8array" : "array", buf);
+
+ return buf2string(buf);
+};
+
+/**
+ * A worker to decode utf8 encoded binary chunks into string chunks.
+ * @constructor
+ */
+function Utf8DecodeWorker() {
+ GenericWorker.call(this, "utf-8 decode");
+ // the last bytes if a chunk didn't end with a complete codepoint.
+ this.leftOver = null;
+}
+utils.inherits(Utf8DecodeWorker, GenericWorker);
+
+/**
+ * @see GenericWorker.processChunk
+ */
+Utf8DecodeWorker.prototype.processChunk = function (chunk) {
+
+ var data = utils.transformTo(support.uint8array ? "uint8array" : "array", chunk.data);
+
+ // 1st step, re-use what's left of the previous chunk
+ if (this.leftOver && this.leftOver.length) {
+ if(support.uint8array) {
+ var previousData = data;
+ data = new Uint8Array(previousData.length + this.leftOver.length);
+ data.set(this.leftOver, 0);
+ data.set(previousData, this.leftOver.length);
+ } else {
+ data = this.leftOver.concat(data);
+ }
+ this.leftOver = null;
+ }
+
+ var nextBoundary = utf8border(data);
+ var usableData = data;
+ if (nextBoundary !== data.length) {
+ if (support.uint8array) {
+ usableData = data.subarray(0, nextBoundary);
+ this.leftOver = data.subarray(nextBoundary, data.length);
+ } else {
+ usableData = data.slice(0, nextBoundary);
+ this.leftOver = data.slice(nextBoundary, data.length);
+ }
+ }
+
+ this.push({
+ data : exports.utf8decode(usableData),
+ meta : chunk.meta
+ });
+};
+
+/**
+ * @see GenericWorker.flush
+ */
+Utf8DecodeWorker.prototype.flush = function () {
+ if(this.leftOver && this.leftOver.length) {
+ this.push({
+ data : exports.utf8decode(this.leftOver),
+ meta : {}
+ });
+ this.leftOver = null;
+ }
+};
+exports.Utf8DecodeWorker = Utf8DecodeWorker;
+
+/**
+ * A worker to endcode string chunks into utf8 encoded binary chunks.
+ * @constructor
+ */
+function Utf8EncodeWorker() {
+ GenericWorker.call(this, "utf-8 encode");
+}
+utils.inherits(Utf8EncodeWorker, GenericWorker);
+
+/**
+ * @see GenericWorker.processChunk
+ */
+Utf8EncodeWorker.prototype.processChunk = function (chunk) {
+ this.push({
+ data : exports.utf8encode(chunk.data),
+ meta : chunk.meta
+ });
+};
+exports.Utf8EncodeWorker = Utf8EncodeWorker;
+
+},{"./nodejsUtils":14,"./stream/GenericWorker":28,"./support":30,"./utils":32}],32:[function(require,module,exports){
+"use strict";
+
+var support = require("./support");
+var base64 = require("./base64");
+var nodejsUtils = require("./nodejsUtils");
+var external = require("./external");
+require("setimmediate");
+
+
+/**
+ * Convert a string that pass as a "binary string": it should represent a byte
+ * array but may have > 255 char codes. Be sure to take only the first byte
+ * and returns the byte array.
+ * @param {String} str the string to transform.
+ * @return {Array|Uint8Array} the string in a binary format.
+ */
+function string2binary(str) {
+ var result = null;
+ if (support.uint8array) {
+ result = new Uint8Array(str.length);
+ } else {
+ result = new Array(str.length);
+ }
+ return stringToArrayLike(str, result);
+}
+
+/**
+ * Create a new blob with the given content and the given type.
+ * @param {String|ArrayBuffer} part the content to put in the blob. DO NOT use
+ * an Uint8Array because the stock browser of android 4 won't accept it (it
+ * will be silently converted to a string, "[object Uint8Array]").
+ *
+ * Use only ONE part to build the blob to avoid a memory leak in IE11 / Edge:
+ * when a large amount of Array is used to create the Blob, the amount of
+ * memory consumed is nearly 100 times the original data amount.
+ *
+ * @param {String} type the mime type of the blob.
+ * @return {Blob} the created blob.
+ */
+exports.newBlob = function(part, type) {
+ exports.checkSupport("blob");
+
+ try {
+ // Blob constructor
+ return new Blob([part], {
+ type: type
+ });
+ }
+ catch (e) {
+
+ try {
+ // deprecated, browser only, old way
+ var Builder = self.BlobBuilder || self.WebKitBlobBuilder || self.MozBlobBuilder || self.MSBlobBuilder;
+ var builder = new Builder();
+ builder.append(part);
+ return builder.getBlob(type);
+ }
+ catch (e) {
+
+ // well, fuck ?!
+ throw new Error("Bug : can't construct the Blob.");
+ }
+ }
+
+
+};
+/**
+ * The identity function.
+ * @param {Object} input the input.
+ * @return {Object} the same input.
+ */
+function identity(input) {
+ return input;
+}
+
+/**
+ * Fill in an array with a string.
+ * @param {String} str the string to use.
+ * @param {Array|ArrayBuffer|Uint8Array|Buffer} array the array to fill in (will be mutated).
+ * @return {Array|ArrayBuffer|Uint8Array|Buffer} the updated array.
+ */
+function stringToArrayLike(str, array) {
+ for (var i = 0; i < str.length; ++i) {
+ array[i] = str.charCodeAt(i) & 0xFF;
+ }
+ return array;
+}
+
+/**
+ * An helper for the function arrayLikeToString.
+ * This contains static information and functions that
+ * can be optimized by the browser JIT compiler.
+ */
+var arrayToStringHelper = {
+ /**
+ * Transform an array of int into a string, chunk by chunk.
+ * See the performances notes on arrayLikeToString.
+ * @param {Array|ArrayBuffer|Uint8Array|Buffer} array the array to transform.
+ * @param {String} type the type of the array.
+ * @param {Integer} chunk the chunk size.
+ * @return {String} the resulting string.
+ * @throws Error if the chunk is too big for the stack.
+ */
+ stringifyByChunk: function(array, type, chunk) {
+ var result = [], k = 0, len = array.length;
+ // shortcut
+ if (len <= chunk) {
+ return String.fromCharCode.apply(null, array);
+ }
+ while (k < len) {
+ if (type === "array" || type === "nodebuffer") {
+ result.push(String.fromCharCode.apply(null, array.slice(k, Math.min(k + chunk, len))));
+ }
+ else {
+ result.push(String.fromCharCode.apply(null, array.subarray(k, Math.min(k + chunk, len))));
+ }
+ k += chunk;
+ }
+ return result.join("");
+ },
+ /**
+ * Call String.fromCharCode on every item in the array.
+ * This is the naive implementation, which generate A LOT of intermediate string.
+ * This should be used when everything else fail.
+ * @param {Array|ArrayBuffer|Uint8Array|Buffer} array the array to transform.
+ * @return {String} the result.
+ */
+ stringifyByChar: function(array){
+ var resultStr = "";
+ for(var i = 0; i < array.length; i++) {
+ resultStr += String.fromCharCode(array[i]);
+ }
+ return resultStr;
+ },
+ applyCanBeUsed : {
+ /**
+ * true if the browser accepts to use String.fromCharCode on Uint8Array
+ */
+ uint8array : (function () {
+ try {
+ return support.uint8array && String.fromCharCode.apply(null, new Uint8Array(1)).length === 1;
+ } catch (e) {
+ return false;
+ }
+ })(),
+ /**
+ * true if the browser accepts to use String.fromCharCode on nodejs Buffer.
+ */
+ nodebuffer : (function () {
+ try {
+ return support.nodebuffer && String.fromCharCode.apply(null, nodejsUtils.allocBuffer(1)).length === 1;
+ } catch (e) {
+ return false;
+ }
+ })()
+ }
+};
+
+/**
+ * Transform an array-like object to a string.
+ * @param {Array|ArrayBuffer|Uint8Array|Buffer} array the array to transform.
+ * @return {String} the result.
+ */
+function arrayLikeToString(array) {
+ // Performances notes :
+ // --------------------
+ // String.fromCharCode.apply(null, array) is the fastest, see
+ // see http://jsperf.com/converting-a-uint8array-to-a-string/2
+ // but the stack is limited (and we can get huge arrays !).
+ //
+ // result += String.fromCharCode(array[i]); generate too many strings !
+ //
+ // This code is inspired by http://jsperf.com/arraybuffer-to-string-apply-performance/2
+ // TODO : we now have workers that split the work. Do we still need that ?
+ var chunk = 65536,
+ type = exports.getTypeOf(array),
+ canUseApply = true;
+ if (type === "uint8array") {
+ canUseApply = arrayToStringHelper.applyCanBeUsed.uint8array;
+ } else if (type === "nodebuffer") {
+ canUseApply = arrayToStringHelper.applyCanBeUsed.nodebuffer;
+ }
+
+ if (canUseApply) {
+ while (chunk > 1) {
+ try {
+ return arrayToStringHelper.stringifyByChunk(array, type, chunk);
+ } catch (e) {
+ chunk = Math.floor(chunk / 2);
+ }
+ }
+ }
+
+ // no apply or chunk error : slow and painful algorithm
+ // default browser on android 4.*
+ return arrayToStringHelper.stringifyByChar(array);
+}
+
+exports.applyFromCharCode = arrayLikeToString;
+
+
+/**
+ * Copy the data from an array-like to an other array-like.
+ * @param {Array|ArrayBuffer|Uint8Array|Buffer} arrayFrom the origin array.
+ * @param {Array|ArrayBuffer|Uint8Array|Buffer} arrayTo the destination array which will be mutated.
+ * @return {Array|ArrayBuffer|Uint8Array|Buffer} the updated destination array.
+ */
+function arrayLikeToArrayLike(arrayFrom, arrayTo) {
+ for (var i = 0; i < arrayFrom.length; i++) {
+ arrayTo[i] = arrayFrom[i];
+ }
+ return arrayTo;
+}
+
+// a matrix containing functions to transform everything into everything.
+var transform = {};
+
+// string to ?
+transform["string"] = {
+ "string": identity,
+ "array": function(input) {
+ return stringToArrayLike(input, new Array(input.length));
+ },
+ "arraybuffer": function(input) {
+ return transform["string"]["uint8array"](input).buffer;
+ },
+ "uint8array": function(input) {
+ return stringToArrayLike(input, new Uint8Array(input.length));
+ },
+ "nodebuffer": function(input) {
+ return stringToArrayLike(input, nodejsUtils.allocBuffer(input.length));
+ }
+};
+
+// array to ?
+transform["array"] = {
+ "string": arrayLikeToString,
+ "array": identity,
+ "arraybuffer": function(input) {
+ return (new Uint8Array(input)).buffer;
+ },
+ "uint8array": function(input) {
+ return new Uint8Array(input);
+ },
+ "nodebuffer": function(input) {
+ return nodejsUtils.newBufferFrom(input);
+ }
+};
+
+// arraybuffer to ?
+transform["arraybuffer"] = {
+ "string": function(input) {
+ return arrayLikeToString(new Uint8Array(input));
+ },
+ "array": function(input) {
+ return arrayLikeToArrayLike(new Uint8Array(input), new Array(input.byteLength));
+ },
+ "arraybuffer": identity,
+ "uint8array": function(input) {
+ return new Uint8Array(input);
+ },
+ "nodebuffer": function(input) {
+ return nodejsUtils.newBufferFrom(new Uint8Array(input));
+ }
+};
+
+// uint8array to ?
+transform["uint8array"] = {
+ "string": arrayLikeToString,
+ "array": function(input) {
+ return arrayLikeToArrayLike(input, new Array(input.length));
+ },
+ "arraybuffer": function(input) {
+ return input.buffer;
+ },
+ "uint8array": identity,
+ "nodebuffer": function(input) {
+ return nodejsUtils.newBufferFrom(input);
+ }
+};
+
+// nodebuffer to ?
+transform["nodebuffer"] = {
+ "string": arrayLikeToString,
+ "array": function(input) {
+ return arrayLikeToArrayLike(input, new Array(input.length));
+ },
+ "arraybuffer": function(input) {
+ return transform["nodebuffer"]["uint8array"](input).buffer;
+ },
+ "uint8array": function(input) {
+ return arrayLikeToArrayLike(input, new Uint8Array(input.length));
+ },
+ "nodebuffer": identity
+};
+
+/**
+ * Transform an input into any type.
+ * The supported output type are : string, array, uint8array, arraybuffer, nodebuffer.
+ * If no output type is specified, the unmodified input will be returned.
+ * @param {String} outputType the output type.
+ * @param {String|Array|ArrayBuffer|Uint8Array|Buffer} input the input to convert.
+ * @throws {Error} an Error if the browser doesn't support the requested output type.
+ */
+exports.transformTo = function(outputType, input) {
+ if (!input) {
+ // undefined, null, etc
+ // an empty string won't harm.
+ input = "";
+ }
+ if (!outputType) {
+ return input;
+ }
+ exports.checkSupport(outputType);
+ var inputType = exports.getTypeOf(input);
+ var result = transform[inputType][outputType](input);
+ return result;
+};
+
+/**
+ * Resolve all relative path components, "." and "..", in a path. If these relative components
+ * traverse above the root then the resulting path will only contain the final path component.
+ *
+ * All empty components, e.g. "//", are removed.
+ * @param {string} path A path with / or \ separators
+ * @returns {string} The path with all relative path components resolved.
+ */
+exports.resolve = function(path) {
+ var parts = path.split("/");
+ var result = [];
+ for (var index = 0; index < parts.length; index++) {
+ var part = parts[index];
+ // Allow the first and last component to be empty for trailing slashes.
+ if (part === "." || (part === "" && index !== 0 && index !== parts.length - 1)) {
+ continue;
+ } else if (part === "..") {
+ result.pop();
+ } else {
+ result.push(part);
+ }
+ }
+ return result.join("/");
+};
+
+/**
+ * Return the type of the input.
+ * The type will be in a format valid for JSZip.utils.transformTo : string, array, uint8array, arraybuffer.
+ * @param {Object} input the input to identify.
+ * @return {String} the (lowercase) type of the input.
+ */
+exports.getTypeOf = function(input) {
+ if (typeof input === "string") {
+ return "string";
+ }
+ if (Object.prototype.toString.call(input) === "[object Array]") {
+ return "array";
+ }
+ if (support.nodebuffer && nodejsUtils.isBuffer(input)) {
+ return "nodebuffer";
+ }
+ if (support.uint8array && input instanceof Uint8Array) {
+ return "uint8array";
+ }
+ if (support.arraybuffer && input instanceof ArrayBuffer) {
+ return "arraybuffer";
+ }
+};
+
+/**
+ * Throw an exception if the type is not supported.
+ * @param {String} type the type to check.
+ * @throws {Error} an Error if the browser doesn't support the requested type.
+ */
+exports.checkSupport = function(type) {
+ var supported = support[type.toLowerCase()];
+ if (!supported) {
+ throw new Error(type + " is not supported by this platform");
+ }
+};
+
+exports.MAX_VALUE_16BITS = 65535;
+exports.MAX_VALUE_32BITS = -1; // well, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" is parsed as -1
+
+/**
+ * Prettify a string read as binary.
+ * @param {string} str the string to prettify.
+ * @return {string} a pretty string.
+ */
+exports.pretty = function(str) {
+ var res = "",
+ code, i;
+ for (i = 0; i < (str || "").length; i++) {
+ code = str.charCodeAt(i);
+ res += "\\x" + (code < 16 ? "0" : "") + code.toString(16).toUpperCase();
+ }
+ return res;
+};
+
+/**
+ * Defer the call of a function.
+ * @param {Function} callback the function to call asynchronously.
+ * @param {Array} args the arguments to give to the callback.
+ */
+exports.delay = function(callback, args, self) {
+ setImmediate(function () {
+ callback.apply(self || null, args || []);
+ });
+};
+
+/**
+ * Extends a prototype with an other, without calling a constructor with
+ * side effects. Inspired by nodejs' `utils.inherits`
+ * @param {Function} ctor the constructor to augment
+ * @param {Function} superCtor the parent constructor to use
+ */
+exports.inherits = function (ctor, superCtor) {
+ var Obj = function() {};
+ Obj.prototype = superCtor.prototype;
+ ctor.prototype = new Obj();
+};
+
+/**
+ * Merge the objects passed as parameters into a new one.
+ * @private
+ * @param {...Object} var_args All objects to merge.
+ * @return {Object} a new object with the data of the others.
+ */
+exports.extend = function() {
+ var result = {}, i, attr;
+ for (i = 0; i < arguments.length; i++) { // arguments is not enumerable in some browsers
+ for (attr in arguments[i]) {
+ if (Object.prototype.hasOwnProperty.call(arguments[i], attr) && typeof result[attr] === "undefined") {
+ result[attr] = arguments[i][attr];
+ }
+ }
+ }
+ return result;
+};
+
+/**
+ * Transform arbitrary content into a Promise.
+ * @param {String} name a name for the content being processed.
+ * @param {Object} inputData the content to process.
+ * @param {Boolean} isBinary true if the content is not an unicode string
+ * @param {Boolean} isOptimizedBinaryString true if the string content only has one byte per character.
+ * @param {Boolean} isBase64 true if the string content is encoded with base64.
+ * @return {Promise} a promise in a format usable by JSZip.
+ */
+exports.prepareContent = function(name, inputData, isBinary, isOptimizedBinaryString, isBase64) {
+
+ // if inputData is already a promise, this flatten it.
+ var promise = external.Promise.resolve(inputData).then(function(data) {
+
+
+ var isBlob = support.blob && (data instanceof Blob || ["[object File]", "[object Blob]"].indexOf(Object.prototype.toString.call(data)) !== -1);
+
+ if (isBlob && typeof FileReader !== "undefined") {
+ return new external.Promise(function (resolve, reject) {
+ var reader = new FileReader();
+
+ reader.onload = function(e) {
+ resolve(e.target.result);
+ };
+ reader.onerror = function(e) {
+ reject(e.target.error);
+ };
+ reader.readAsArrayBuffer(data);
+ });
+ } else {
+ return data;
+ }
+ });
+
+ return promise.then(function(data) {
+ var dataType = exports.getTypeOf(data);
+
+ if (!dataType) {
+ return external.Promise.reject(
+ new Error("Can't read the data of '" + name + "'. Is it " +
+ "in a supported JavaScript type (String, Blob, ArrayBuffer, etc) ?")
+ );
+ }
+ // special case : it's way easier to work with Uint8Array than with ArrayBuffer
+ if (dataType === "arraybuffer") {
+ data = exports.transformTo("uint8array", data);
+ } else if (dataType === "string") {
+ if (isBase64) {
+ data = base64.decode(data);
+ }
+ else if (isBinary) {
+ // optimizedBinaryString === true means that the file has already been filtered with a 0xFF mask
+ if (isOptimizedBinaryString !== true) {
+ // this is a string, not in a base64 format.
+ // Be sure that this is a correct "binary string"
+ data = string2binary(data);
+ }
+ }
+ }
+ return data;
+ });
+};
+
+},{"./base64":1,"./external":6,"./nodejsUtils":14,"./support":30,"setimmediate":54}],33:[function(require,module,exports){
+"use strict";
+var readerFor = require("./reader/readerFor");
+var utils = require("./utils");
+var sig = require("./signature");
+var ZipEntry = require("./zipEntry");
+var support = require("./support");
+// class ZipEntries {{{
+/**
+ * All the entries in the zip file.
+ * @constructor
+ * @param {Object} loadOptions Options for loading the stream.
+ */
+function ZipEntries(loadOptions) {
+ this.files = [];
+ this.loadOptions = loadOptions;
+}
+ZipEntries.prototype = {
+ /**
+ * Check that the reader is on the specified signature.
+ * @param {string} expectedSignature the expected signature.
+ * @throws {Error} if it is an other signature.
+ */
+ checkSignature: function(expectedSignature) {
+ if (!this.reader.readAndCheckSignature(expectedSignature)) {
+ this.reader.index -= 4;
+ var signature = this.reader.readString(4);
+ throw new Error("Corrupted zip or bug: unexpected signature " + "(" + utils.pretty(signature) + ", expected " + utils.pretty(expectedSignature) + ")");
+ }
+ },
+ /**
+ * Check if the given signature is at the given index.
+ * @param {number} askedIndex the index to check.
+ * @param {string} expectedSignature the signature to expect.
+ * @return {boolean} true if the signature is here, false otherwise.
+ */
+ isSignature: function(askedIndex, expectedSignature) {
+ var currentIndex = this.reader.index;
+ this.reader.setIndex(askedIndex);
+ var signature = this.reader.readString(4);
+ var result = signature === expectedSignature;
+ this.reader.setIndex(currentIndex);
+ return result;
+ },
+ /**
+ * Read the end of the central directory.
+ */
+ readBlockEndOfCentral: function() {
+ this.diskNumber = this.reader.readInt(2);
+ this.diskWithCentralDirStart = this.reader.readInt(2);
+ this.centralDirRecordsOnThisDisk = this.reader.readInt(2);
+ this.centralDirRecords = this.reader.readInt(2);
+ this.centralDirSize = this.reader.readInt(4);
+ this.centralDirOffset = this.reader.readInt(4);
+
+ this.zipCommentLength = this.reader.readInt(2);
+ // warning : the encoding depends of the system locale
+ // On a linux machine with LANG=en_US.utf8, this field is utf8 encoded.
+ // On a windows machine, this field is encoded with the localized windows code page.
+ var zipComment = this.reader.readData(this.zipCommentLength);
+ var decodeParamType = support.uint8array ? "uint8array" : "array";
+ // To get consistent behavior with the generation part, we will assume that
+ // this is utf8 encoded unless specified otherwise.
+ var decodeContent = utils.transformTo(decodeParamType, zipComment);
+ this.zipComment = this.loadOptions.decodeFileName(decodeContent);
+ },
+ /**
+ * Read the end of the Zip 64 central directory.
+ * Not merged with the method readEndOfCentral :
+ * The end of central can coexist with its Zip64 brother,
+ * I don't want to read the wrong number of bytes !
+ */
+ readBlockZip64EndOfCentral: function() {
+ this.zip64EndOfCentralSize = this.reader.readInt(8);
+ this.reader.skip(4);
+ // this.versionMadeBy = this.reader.readString(2);
+ // this.versionNeeded = this.reader.readInt(2);
+ this.diskNumber = this.reader.readInt(4);
+ this.diskWithCentralDirStart = this.reader.readInt(4);
+ this.centralDirRecordsOnThisDisk = this.reader.readInt(8);
+ this.centralDirRecords = this.reader.readInt(8);
+ this.centralDirSize = this.reader.readInt(8);
+ this.centralDirOffset = this.reader.readInt(8);
+
+ this.zip64ExtensibleData = {};
+ var extraDataSize = this.zip64EndOfCentralSize - 44,
+ index = 0,
+ extraFieldId,
+ extraFieldLength,
+ extraFieldValue;
+ while (index < extraDataSize) {
+ extraFieldId = this.reader.readInt(2);
+ extraFieldLength = this.reader.readInt(4);
+ extraFieldValue = this.reader.readData(extraFieldLength);
+ this.zip64ExtensibleData[extraFieldId] = {
+ id: extraFieldId,
+ length: extraFieldLength,
+ value: extraFieldValue
+ };
+ }
+ },
+ /**
+ * Read the end of the Zip 64 central directory locator.
+ */
+ readBlockZip64EndOfCentralLocator: function() {
+ this.diskWithZip64CentralDirStart = this.reader.readInt(4);
+ this.relativeOffsetEndOfZip64CentralDir = this.reader.readInt(8);
+ this.disksCount = this.reader.readInt(4);
+ if (this.disksCount > 1) {
+ throw new Error("Multi-volumes zip are not supported");
+ }
+ },
+ /**
+ * Read the local files, based on the offset read in the central part.
+ */
+ readLocalFiles: function() {
+ var i, file;
+ for (i = 0; i < this.files.length; i++) {
+ file = this.files[i];
+ this.reader.setIndex(file.localHeaderOffset);
+ this.checkSignature(sig.LOCAL_FILE_HEADER);
+ file.readLocalPart(this.reader);
+ file.handleUTF8();
+ file.processAttributes();
+ }
+ },
+ /**
+ * Read the central directory.
+ */
+ readCentralDir: function() {
+ var file;
+
+ this.reader.setIndex(this.centralDirOffset);
+ while (this.reader.readAndCheckSignature(sig.CENTRAL_FILE_HEADER)) {
+ file = new ZipEntry({
+ zip64: this.zip64
+ }, this.loadOptions);
+ file.readCentralPart(this.reader);
+ this.files.push(file);
+ }
+
+ if (this.centralDirRecords !== this.files.length) {
+ if (this.centralDirRecords !== 0 && this.files.length === 0) {
+ // We expected some records but couldn't find ANY.
+ // This is really suspicious, as if something went wrong.
+ throw new Error("Corrupted zip or bug: expected " + this.centralDirRecords + " records in central dir, got " + this.files.length);
+ } else {
+ // We found some records but not all.
+ // Something is wrong but we got something for the user: no error here.
+ // console.warn("expected", this.centralDirRecords, "records in central dir, got", this.files.length);
+ }
+ }
+ },
+ /**
+ * Read the end of central directory.
+ */
+ readEndOfCentral: function() {
+ var offset = this.reader.lastIndexOfSignature(sig.CENTRAL_DIRECTORY_END);
+ if (offset < 0) {
+ // Check if the content is a truncated zip or complete garbage.
+ // A "LOCAL_FILE_HEADER" is not required at the beginning (auto
+ // extractible zip for example) but it can give a good hint.
+ // If an ajax request was used without responseType, we will also
+ // get unreadable data.
+ var isGarbage = !this.isSignature(0, sig.LOCAL_FILE_HEADER);
+
+ if (isGarbage) {
+ throw new Error("Can't find end of central directory : is this a zip file ? " +
+ "If it is, see https://stuk.github.io/jszip/documentation/howto/read_zip.html");
+ } else {
+ throw new Error("Corrupted zip: can't find end of central directory");
+ }
+
+ }
+ this.reader.setIndex(offset);
+ var endOfCentralDirOffset = offset;
+ this.checkSignature(sig.CENTRAL_DIRECTORY_END);
+ this.readBlockEndOfCentral();
+
+
+ /* extract from the zip spec :
+ 4) If one of the fields in the end of central directory
+ record is too small to hold required data, the field
+ should be set to -1 (0xFFFF or 0xFFFFFFFF) and the
+ ZIP64 format record should be created.
+ 5) The end of central directory record and the
+ Zip64 end of central directory locator record must
+ reside on the same disk when splitting or spanning
+ an archive.
+ */
+ if (this.diskNumber === utils.MAX_VALUE_16BITS || this.diskWithCentralDirStart === utils.MAX_VALUE_16BITS || this.centralDirRecordsOnThisDisk === utils.MAX_VALUE_16BITS || this.centralDirRecords === utils.MAX_VALUE_16BITS || this.centralDirSize === utils.MAX_VALUE_32BITS || this.centralDirOffset === utils.MAX_VALUE_32BITS) {
+ this.zip64 = true;
+
+ /*
+ Warning : the zip64 extension is supported, but ONLY if the 64bits integer read from
+ the zip file can fit into a 32bits integer. This cannot be solved : JavaScript represents
+ all numbers as 64-bit double precision IEEE 754 floating point numbers.
+ So, we have 53bits for integers and bitwise operations treat everything as 32bits.
+ see https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Operators/Bitwise_Operators
+ and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf section 8.5
+ */
+
+ // should look for a zip64 EOCD locator
+ offset = this.reader.lastIndexOfSignature(sig.ZIP64_CENTRAL_DIRECTORY_LOCATOR);
+ if (offset < 0) {
+ throw new Error("Corrupted zip: can't find the ZIP64 end of central directory locator");
+ }
+ this.reader.setIndex(offset);
+ this.checkSignature(sig.ZIP64_CENTRAL_DIRECTORY_LOCATOR);
+ this.readBlockZip64EndOfCentralLocator();
+
+ // now the zip64 EOCD record
+ if (!this.isSignature(this.relativeOffsetEndOfZip64CentralDir, sig.ZIP64_CENTRAL_DIRECTORY_END)) {
+ // console.warn("ZIP64 end of central directory not where expected.");
+ this.relativeOffsetEndOfZip64CentralDir = this.reader.lastIndexOfSignature(sig.ZIP64_CENTRAL_DIRECTORY_END);
+ if (this.relativeOffsetEndOfZip64CentralDir < 0) {
+ throw new Error("Corrupted zip: can't find the ZIP64 end of central directory");
+ }
+ }
+ this.reader.setIndex(this.relativeOffsetEndOfZip64CentralDir);
+ this.checkSignature(sig.ZIP64_CENTRAL_DIRECTORY_END);
+ this.readBlockZip64EndOfCentral();
+ }
+
+ var expectedEndOfCentralDirOffset = this.centralDirOffset + this.centralDirSize;
+ if (this.zip64) {
+ expectedEndOfCentralDirOffset += 20; // end of central dir 64 locator
+ expectedEndOfCentralDirOffset += 12 /* should not include the leading 12 bytes */ + this.zip64EndOfCentralSize;
+ }
+
+ var extraBytes = endOfCentralDirOffset - expectedEndOfCentralDirOffset;
+
+ if (extraBytes > 0) {
+ // console.warn(extraBytes, "extra bytes at beginning or within zipfile");
+ if (this.isSignature(endOfCentralDirOffset, sig.CENTRAL_FILE_HEADER)) {
+ // The offsets seem wrong, but we have something at the specified offset.
+ // So… we keep it.
+ } else {
+ // the offset is wrong, update the "zero" of the reader
+ // this happens if data has been prepended (crx files for example)
+ this.reader.zero = extraBytes;
+ }
+ } else if (extraBytes < 0) {
+ throw new Error("Corrupted zip: missing " + Math.abs(extraBytes) + " bytes.");
+ }
+ },
+ prepareReader: function(data) {
+ this.reader = readerFor(data);
+ },
+ /**
+ * Read a zip file and create ZipEntries.
+ * @param {String|ArrayBuffer|Uint8Array|Buffer} data the binary string representing a zip file.
+ */
+ load: function(data) {
+ this.prepareReader(data);
+ this.readEndOfCentral();
+ this.readCentralDir();
+ this.readLocalFiles();
+ }
+};
+// }}} end of ZipEntries
+module.exports = ZipEntries;
+
+},{"./reader/readerFor":22,"./signature":23,"./support":30,"./utils":32,"./zipEntry":34}],34:[function(require,module,exports){
+"use strict";
+var readerFor = require("./reader/readerFor");
+var utils = require("./utils");
+var CompressedObject = require("./compressedObject");
+var crc32fn = require("./crc32");
+var utf8 = require("./utf8");
+var compressions = require("./compressions");
+var support = require("./support");
+
+var MADE_BY_DOS = 0x00;
+var MADE_BY_UNIX = 0x03;
+
+/**
+ * Find a compression registered in JSZip.
+ * @param {string} compressionMethod the method magic to find.
+ * @return {Object|null} the JSZip compression object, null if none found.
+ */
+var findCompression = function(compressionMethod) {
+ for (var method in compressions) {
+ if (!Object.prototype.hasOwnProperty.call(compressions, method)) {
+ continue;
+ }
+ if (compressions[method].magic === compressionMethod) {
+ return compressions[method];
+ }
+ }
+ return null;
+};
+
+// class ZipEntry {{{
+/**
+ * An entry in the zip file.
+ * @constructor
+ * @param {Object} options Options of the current file.
+ * @param {Object} loadOptions Options for loading the stream.
+ */
+function ZipEntry(options, loadOptions) {
+ this.options = options;
+ this.loadOptions = loadOptions;
+}
+ZipEntry.prototype = {
+ /**
+ * say if the file is encrypted.
+ * @return {boolean} true if the file is encrypted, false otherwise.
+ */
+ isEncrypted: function() {
+ // bit 1 is set
+ return (this.bitFlag & 0x0001) === 0x0001;
+ },
+ /**
+ * say if the file has utf-8 filename/comment.
+ * @return {boolean} true if the filename/comment is in utf-8, false otherwise.
+ */
+ useUTF8: function() {
+ // bit 11 is set
+ return (this.bitFlag & 0x0800) === 0x0800;
+ },
+ /**
+ * Read the local part of a zip file and add the info in this object.
+ * @param {DataReader} reader the reader to use.
+ */
+ readLocalPart: function(reader) {
+ var compression, localExtraFieldsLength;
+
+ // we already know everything from the central dir !
+ // If the central dir data are false, we are doomed.
+ // On the bright side, the local part is scary : zip64, data descriptors, both, etc.
+ // The less data we get here, the more reliable this should be.
+ // Let's skip the whole header and dash to the data !
+ reader.skip(22);
+ // in some zip created on windows, the filename stored in the central dir contains \ instead of /.
+ // Strangely, the filename here is OK.
+ // I would love to treat these zip files as corrupted (see http://www.info-zip.org/FAQ.html#backslashes
+ // or APPNOTE#4.4.17.1, "All slashes MUST be forward slashes '/'") but there are a lot of bad zip generators...
+ // Search "unzip mismatching "local" filename continuing with "central" filename version" on
+ // the internet.
+ //
+ // I think I see the logic here : the central directory is used to display
+ // content and the local directory is used to extract the files. Mixing / and \
+ // may be used to display \ to windows users and use / when extracting the files.
+ // Unfortunately, this lead also to some issues : http://seclists.org/fulldisclosure/2009/Sep/394
+ this.fileNameLength = reader.readInt(2);
+ localExtraFieldsLength = reader.readInt(2); // can't be sure this will be the same as the central dir
+ // the fileName is stored as binary data, the handleUTF8 method will take care of the encoding.
+ this.fileName = reader.readData(this.fileNameLength);
+ reader.skip(localExtraFieldsLength);
+
+ if (this.compressedSize === -1 || this.uncompressedSize === -1) {
+ throw new Error("Bug or corrupted zip : didn't get enough information from the central directory " + "(compressedSize === -1 || uncompressedSize === -1)");
+ }
+
+ compression = findCompression(this.compressionMethod);
+ if (compression === null) { // no compression found
+ throw new Error("Corrupted zip : compression " + utils.pretty(this.compressionMethod) + " unknown (inner file : " + utils.transformTo("string", this.fileName) + ")");
+ }
+ this.decompressed = new CompressedObject(this.compressedSize, this.uncompressedSize, this.crc32, compression, reader.readData(this.compressedSize));
+ },
+
+ /**
+ * Read the central part of a zip file and add the info in this object.
+ * @param {DataReader} reader the reader to use.
+ */
+ readCentralPart: function(reader) {
+ this.versionMadeBy = reader.readInt(2);
+ reader.skip(2);
+ // this.versionNeeded = reader.readInt(2);
+ this.bitFlag = reader.readInt(2);
+ this.compressionMethod = reader.readString(2);
+ this.date = reader.readDate();
+ this.crc32 = reader.readInt(4);
+ this.compressedSize = reader.readInt(4);
+ this.uncompressedSize = reader.readInt(4);
+ var fileNameLength = reader.readInt(2);
+ this.extraFieldsLength = reader.readInt(2);
+ this.fileCommentLength = reader.readInt(2);
+ this.diskNumberStart = reader.readInt(2);
+ this.internalFileAttributes = reader.readInt(2);
+ this.externalFileAttributes = reader.readInt(4);
+ this.localHeaderOffset = reader.readInt(4);
+
+ if (this.isEncrypted()) {
+ throw new Error("Encrypted zip are not supported");
+ }
+
+ // will be read in the local part, see the comments there
+ reader.skip(fileNameLength);
+ this.readExtraFields(reader);
+ this.parseZIP64ExtraField(reader);
+ this.fileComment = reader.readData(this.fileCommentLength);
+ },
+
+ /**
+ * Parse the external file attributes and get the unix/dos permissions.
+ */
+ processAttributes: function () {
+ this.unixPermissions = null;
+ this.dosPermissions = null;
+ var madeBy = this.versionMadeBy >> 8;
+
+ // Check if we have the DOS directory flag set.
+ // We look for it in the DOS and UNIX permissions
+ // but some unknown platform could set it as a compatibility flag.
+ this.dir = this.externalFileAttributes & 0x0010 ? true : false;
+
+ if(madeBy === MADE_BY_DOS) {
+ // first 6 bits (0 to 5)
+ this.dosPermissions = this.externalFileAttributes & 0x3F;
+ }
+
+ if(madeBy === MADE_BY_UNIX) {
+ this.unixPermissions = (this.externalFileAttributes >> 16) & 0xFFFF;
+ // the octal permissions are in (this.unixPermissions & 0x01FF).toString(8);
+ }
+
+ // fail safe : if the name ends with a / it probably means a folder
+ if (!this.dir && this.fileNameStr.slice(-1) === "/") {
+ this.dir = true;
+ }
+ },
+
+ /**
+ * Parse the ZIP64 extra field and merge the info in the current ZipEntry.
+ * @param {DataReader} reader the reader to use.
+ */
+ parseZIP64ExtraField: function() {
+ if (!this.extraFields[0x0001]) {
+ return;
+ }
+
+ // should be something, preparing the extra reader
+ var extraReader = readerFor(this.extraFields[0x0001].value);
+
+ // I really hope that these 64bits integer can fit in 32 bits integer, because js
+ // won't let us have more.
+ if (this.uncompressedSize === utils.MAX_VALUE_32BITS) {
+ this.uncompressedSize = extraReader.readInt(8);
+ }
+ if (this.compressedSize === utils.MAX_VALUE_32BITS) {
+ this.compressedSize = extraReader.readInt(8);
+ }
+ if (this.localHeaderOffset === utils.MAX_VALUE_32BITS) {
+ this.localHeaderOffset = extraReader.readInt(8);
+ }
+ if (this.diskNumberStart === utils.MAX_VALUE_32BITS) {
+ this.diskNumberStart = extraReader.readInt(4);
+ }
+ },
+ /**
+ * Read the central part of a zip file and add the info in this object.
+ * @param {DataReader} reader the reader to use.
+ */
+ readExtraFields: function(reader) {
+ var end = reader.index + this.extraFieldsLength,
+ extraFieldId,
+ extraFieldLength,
+ extraFieldValue;
+
+ if (!this.extraFields) {
+ this.extraFields = {};
+ }
+
+ while (reader.index + 4 < end) {
+ extraFieldId = reader.readInt(2);
+ extraFieldLength = reader.readInt(2);
+ extraFieldValue = reader.readData(extraFieldLength);
+
+ this.extraFields[extraFieldId] = {
+ id: extraFieldId,
+ length: extraFieldLength,
+ value: extraFieldValue
+ };
+ }
+
+ reader.setIndex(end);
+ },
+ /**
+ * Apply an UTF8 transformation if needed.
+ */
+ handleUTF8: function() {
+ var decodeParamType = support.uint8array ? "uint8array" : "array";
+ if (this.useUTF8()) {
+ this.fileNameStr = utf8.utf8decode(this.fileName);
+ this.fileCommentStr = utf8.utf8decode(this.fileComment);
+ } else {
+ var upath = this.findExtraFieldUnicodePath();
+ if (upath !== null) {
+ this.fileNameStr = upath;
+ } else {
+ // ASCII text or unsupported code page
+ var fileNameByteArray = utils.transformTo(decodeParamType, this.fileName);
+ this.fileNameStr = this.loadOptions.decodeFileName(fileNameByteArray);
+ }
+
+ var ucomment = this.findExtraFieldUnicodeComment();
+ if (ucomment !== null) {
+ this.fileCommentStr = ucomment;
+ } else {
+ // ASCII text or unsupported code page
+ var commentByteArray = utils.transformTo(decodeParamType, this.fileComment);
+ this.fileCommentStr = this.loadOptions.decodeFileName(commentByteArray);
+ }
+ }
+ },
+
+ /**
+ * Find the unicode path declared in the extra field, if any.
+ * @return {String} the unicode path, null otherwise.
+ */
+ findExtraFieldUnicodePath: function() {
+ var upathField = this.extraFields[0x7075];
+ if (upathField) {
+ var extraReader = readerFor(upathField.value);
+
+ // wrong version
+ if (extraReader.readInt(1) !== 1) {
+ return null;
+ }
+
+ // the crc of the filename changed, this field is out of date.
+ if (crc32fn(this.fileName) !== extraReader.readInt(4)) {
+ return null;
+ }
+
+ return utf8.utf8decode(extraReader.readData(upathField.length - 5));
+ }
+ return null;
+ },
+
+ /**
+ * Find the unicode comment declared in the extra field, if any.
+ * @return {String} the unicode comment, null otherwise.
+ */
+ findExtraFieldUnicodeComment: function() {
+ var ucommentField = this.extraFields[0x6375];
+ if (ucommentField) {
+ var extraReader = readerFor(ucommentField.value);
+
+ // wrong version
+ if (extraReader.readInt(1) !== 1) {
+ return null;
+ }
+
+ // the crc of the comment changed, this field is out of date.
+ if (crc32fn(this.fileComment) !== extraReader.readInt(4)) {
+ return null;
+ }
+
+ return utf8.utf8decode(extraReader.readData(ucommentField.length - 5));
+ }
+ return null;
+ }
+};
+module.exports = ZipEntry;
+
+},{"./compressedObject":2,"./compressions":3,"./crc32":4,"./reader/readerFor":22,"./support":30,"./utf8":31,"./utils":32}],35:[function(require,module,exports){
+"use strict";
+
+var StreamHelper = require("./stream/StreamHelper");
+var DataWorker = require("./stream/DataWorker");
+var utf8 = require("./utf8");
+var CompressedObject = require("./compressedObject");
+var GenericWorker = require("./stream/GenericWorker");
+
+/**
+ * A simple object representing a file in the zip file.
+ * @constructor
+ * @param {string} name the name of the file
+ * @param {String|ArrayBuffer|Uint8Array|Buffer} data the data
+ * @param {Object} options the options of the file
+ */
+var ZipObject = function(name, data, options) {
+ this.name = name;
+ this.dir = options.dir;
+ this.date = options.date;
+ this.comment = options.comment;
+ this.unixPermissions = options.unixPermissions;
+ this.dosPermissions = options.dosPermissions;
+
+ this._data = data;
+ this._dataBinary = options.binary;
+ // keep only the compression
+ this.options = {
+ compression : options.compression,
+ compressionOptions : options.compressionOptions
+ };
+};
+
+ZipObject.prototype = {
+ /**
+ * Create an internal stream for the content of this object.
+ * @param {String} type the type of each chunk.
+ * @return StreamHelper the stream.
+ */
+ internalStream: function (type) {
+ var result = null, outputType = "string";
+ try {
+ if (!type) {
+ throw new Error("No output type specified.");
+ }
+ outputType = type.toLowerCase();
+ var askUnicodeString = outputType === "string" || outputType === "text";
+ if (outputType === "binarystring" || outputType === "text") {
+ outputType = "string";
+ }
+ result = this._decompressWorker();
+
+ var isUnicodeString = !this._dataBinary;
+
+ if (isUnicodeString && !askUnicodeString) {
+ result = result.pipe(new utf8.Utf8EncodeWorker());
+ }
+ if (!isUnicodeString && askUnicodeString) {
+ result = result.pipe(new utf8.Utf8DecodeWorker());
+ }
+ } catch (e) {
+ result = new GenericWorker("error");
+ result.error(e);
+ }
+
+ return new StreamHelper(result, outputType, "");
+ },
+
+ /**
+ * Prepare the content in the asked type.
+ * @param {String} type the type of the result.
+ * @param {Function} onUpdate a function to call on each internal update.
+ * @return Promise the promise of the result.
+ */
+ async: function (type, onUpdate) {
+ return this.internalStream(type).accumulate(onUpdate);
+ },
+
+ /**
+ * Prepare the content as a nodejs stream.
+ * @param {String} type the type of each chunk.
+ * @param {Function} onUpdate a function to call on each internal update.
+ * @return Stream the stream.
+ */
+ nodeStream: function (type, onUpdate) {
+ return this.internalStream(type || "nodebuffer").toNodejsStream(onUpdate);
+ },
+
+ /**
+ * Return a worker for the compressed content.
+ * @private
+ * @param {Object} compression the compression object to use.
+ * @param {Object} compressionOptions the options to use when compressing.
+ * @return Worker the worker.
+ */
+ _compressWorker: function (compression, compressionOptions) {
+ if (
+ this._data instanceof CompressedObject &&
+ this._data.compression.magic === compression.magic
+ ) {
+ return this._data.getCompressedWorker();
+ } else {
+ var result = this._decompressWorker();
+ if(!this._dataBinary) {
+ result = result.pipe(new utf8.Utf8EncodeWorker());
+ }
+ return CompressedObject.createWorkerFrom(result, compression, compressionOptions);
+ }
+ },
+ /**
+ * Return a worker for the decompressed content.
+ * @private
+ * @return Worker the worker.
+ */
+ _decompressWorker : function () {
+ if (this._data instanceof CompressedObject) {
+ return this._data.getContentWorker();
+ } else if (this._data instanceof GenericWorker) {
+ return this._data;
+ } else {
+ return new DataWorker(this._data);
+ }
+ }
+};
+
+var removedMethods = ["asText", "asBinary", "asNodeBuffer", "asUint8Array", "asArrayBuffer"];
+var removedFn = function () {
+ throw new Error("This method has been removed in JSZip 3.0, please check the upgrade guide.");
+};
+
+for(var i = 0; i < removedMethods.length; i++) {
+ ZipObject.prototype[removedMethods[i]] = removedFn;
+}
+module.exports = ZipObject;
+
+},{"./compressedObject":2,"./stream/DataWorker":27,"./stream/GenericWorker":28,"./stream/StreamHelper":29,"./utf8":31}],36:[function(require,module,exports){
+(function (global){
+'use strict';
+var Mutation = global.MutationObserver || global.WebKitMutationObserver;
+
+var scheduleDrain;
+
+{
+ if (Mutation) {
+ var called = 0;
+ var observer = new Mutation(nextTick);
+ var element = global.document.createTextNode('');
+ observer.observe(element, {
+ characterData: true
+ });
+ scheduleDrain = function () {
+ element.data = (called = ++called % 2);
+ };
+ } else if (!global.setImmediate && typeof global.MessageChannel !== 'undefined') {
+ var channel = new global.MessageChannel();
+ channel.port1.onmessage = nextTick;
+ scheduleDrain = function () {
+ channel.port2.postMessage(0);
+ };
+ } else if ('document' in global && 'onreadystatechange' in global.document.createElement('script')) {
+ scheduleDrain = function () {
+
+ // Create a
+