diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..34aebaf
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,53 @@
+name: Docs
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "docs/**"
+      - "mkdocs.yml"
+      - "docs/requirements.txt"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  build:
+    name: Build Docs
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: pip install -r docs/requirements.txt
+
+      - name: Build site
+        run: mkdocs build --strict
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: site/
+
+  deploy:
+    name: Deploy Docs
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
index b7ab767..84a3cea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,9 @@ Thumbs.db
 *.db-shm
 *.db-wal
 
+# Docs build
+site/
+
 # Build
 dist/
 build/output/
diff --git a/Makefile b/Makefile
index 696c98a..025da1a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: build test test-short lint fmt vet vulncheck clean dev setup docker docker-run helm-lint release-dry
+.PHONY: build test test-short lint fmt vet vulncheck clean dev setup docker docker-run helm-lint release-dry docs docs-serve
 
 # Build variables
 VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
@@ -84,6 +84,16 @@ release-dry:
 ## check: Run all checks (what CI runs)
 check: fmt vet lint test vulncheck
 
+## docs: Build documentation site
+docs:
+	pip install -q -r docs/requirements.txt
+	mkdocs build --strict
+
+## docs-serve: Serve documentation locally with live reload
+docs-serve:
+	pip install -q -r docs/requirements.txt
+	mkdocs serve
+
 ## help: Show this help
 help:
 	@echo "Usage: make [target]"
diff --git a/docs/admin-api.md b/docs/admin-api.md
new file mode 100644
index 0000000..d1b6db6
--- /dev/null
+++ b/docs/admin-api.md
@@ -0,0 +1,67 @@
+# Admin API
+
+Manage budget rules and view API key usage at runtime without restarting the proxy.
+
+## Enable
+
+```yaml
+admin:
+  enabled: true
+  token: "your-secret-admin-token"
+```
+
+## Authentication
+
+All admin endpoints require a Bearer token:
+
+```bash
+curl -H "Authorization: Bearer your-secret-admin-token" \
+  http://localhost:8787/api/admin/budgets/rules
+```
+
+## Endpoints
+
+### Budget Rules
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET` | `/api/admin/budgets/rules` | List all budget rules |
+| `POST` | `/api/admin/budgets/rules` | Create a budget rule |
+| `DELETE` | `/api/admin/budgets/rules?pattern=...` | Delete a rule by pattern |
+
+#### Create a Rule
+
+```bash
+curl -X POST http://localhost:8787/api/admin/budgets/rules \
+  -H "Authorization: Bearer your-secret-admin-token" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "api_key_pattern": "sk-proj-dev-*",
+    "daily_limit_usd": 5.0,
+    "monthly_limit_usd": 50.0,
+    "action": "block"
+  }'
+```
+
+#### Delete a Rule
+
+```bash
+curl -X DELETE "http://localhost:8787/api/admin/budgets/rules?pattern=sk-proj-dev-*" \
+  -H "Authorization: Bearer your-secret-admin-token"
+```
+
+### API Keys
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET` | `/api/admin/api-keys` | List API key hashes with monthly spend |
+
+### Providers
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET` | `/api/admin/providers` | List provider status |
+
+## Persistence
+
+Runtime rules take effect immediately and persist across restarts. They are stored in the database and take precedence over YAML config rules.
diff --git a/docs/configuration/index.md b/docs/configuration/index.md
new file mode 100644
index 0000000..eee7d02
--- /dev/null
+++ b/docs/configuration/index.md
@@ -0,0 +1,47 @@
+# Configuration Overview
+
+AgentLedger works out of the box with sensible defaults. All configuration is optional — the proxy starts with OpenAI and Anthropic enabled, SQLite storage, and the dashboard on.
+
+## Config File Locations
+
+AgentLedger looks for config in these locations (in order):
+
+1. Path passed via `--config` / `-c` flag
+2. `./agentledger.yaml`
+3. `./configs/agentledger.yaml`
+4. `~/.config/agentledger/agentledger.yaml`
+5. `/etc/agentledger/agentledger.yaml`
+
+## Minimal Config
+
+No config file is needed for basic usage. To customize:
+
+```yaml
+listen: ":8787"
+
+providers:
+  openai:
+    upstream: "https://api.openai.com"
+    enabled: true
+  anthropic:
+    upstream: "https://api.anthropic.com"
+    enabled: true
+
+storage:
+  driver: "sqlite"
+  dsn: "data/agentledger.db"
+```
+
+## Environment Variable Overrides
+
+All settings can be overridden with environment variables prefixed `AGENTLEDGER_`:
+
+```bash
+AGENTLEDGER_LISTEN=":9090"
+AGENTLEDGER_STORAGE_DSN="/tmp/ledger.db"
+AGENTLEDGER_LOG_LEVEL="debug"
+```
+
+## Full Reference
+
+See [Full Reference](reference.md) for every configuration option with descriptions and defaults.
diff --git a/docs/configuration/reference.md b/docs/configuration/reference.md
new file mode 100644
index 0000000..2622ea4
--- /dev/null
+++ b/docs/configuration/reference.md
@@ -0,0 +1,202 @@
+# Configuration Reference
+
+Full annotated configuration. All sections are optional — only configure what you need.
+
+```yaml
+# Proxy listen address
+listen: ":8787"
+
+# ─── Providers ───────────────────────────────────────────────────────
+
+providers:
+  openai:
+    upstream: "https://api.openai.com"
+    enabled: true
+  anthropic:
+    upstream: "https://api.anthropic.com"
+    enabled: true
+
+  # Additional providers — route via path prefix
+  # e.g., /groq/v1/chat/completions → api.groq.com
+  extra:
+    groq:
+      type: "openai"                    # OpenAI-compatible API format
+      upstream: "https://api.groq.com/openai"
+      path_prefix: "/groq"
+      enabled: true
+    mistral:
+      type: "openai"
+      upstream: "https://api.mistral.ai"
+      path_prefix: "/mistral"
+      enabled: true
+    deepseek:
+      type: "openai"
+      upstream: "https://api.deepseek.com"
+      path_prefix: "/deepseek"
+      enabled: true
+    gemini:
+      type: "gemini"                    # Custom Gemini parser
+      upstream: "https://generativelanguage.googleapis.com"
+      path_prefix: "/gemini"
+      enabled: true
+    cohere:
+      type: "cohere"                    # Custom Cohere parser
+      upstream: "https://api.cohere.com"
+      path_prefix: "/cohere"
+      enabled: true
+    azure:
+      type: "azure"                     # Azure OpenAI
+      upstream: "https://my-resource.openai.azure.com"
+      path_prefix: "/azure"
+      enabled: true
+    together:
+      type: "openai"
+      upstream: "https://api.together.xyz"
+      path_prefix: "/together"
+      enabled: true
+    fireworks:
+      type: "openai"
+      upstream: "https://api.fireworks.ai/inference"
+      path_prefix: "/fireworks"
+      enabled: true
+    perplexity:
+      type: "openai"
+      upstream: "https://api.perplexity.ai"
+      path_prefix: "/perplexity"
+      enabled: true
+    openrouter:
+      type: "openai"
+      upstream: "https://openrouter.ai/api"
+      path_prefix: "/openrouter"
+      enabled: true
+    xai:
+      type: "openai"                    # xAI (Grok)
+      upstream: "https://api.x.ai"
+      path_prefix: "/xai"
+      enabled: true
+    cerebras:
+      type: "openai"
+      upstream: "https://api.cerebras.ai"
+      path_prefix: "/cerebras"
+      enabled: true
+    sambanova:
+      type: "openai"
+      upstream: "https://api.sambanova.ai"
+      path_prefix: "/sambanova"
+      enabled: true
+
+# ─── Storage ─────────────────────────────────────────────────────────
+
+storage:
+  driver: "sqlite"                      # "sqlite" or "postgres"
+  dsn: "data/agentledger.db"            # SQLite path or Postgres DSN
+  # max_open_conns: 25                  # Postgres only
+  # max_idle_conns: 5                   # Postgres only
+  # Example Postgres DSN:
+  # dsn: "postgres://user:pass@localhost:5432/agentledger?sslmode=disable"
+
+# ─── Logging ─────────────────────────────────────────────────────────
+
+log:
+  level: "info"                         # debug, info, warn, error
+  format: "text"                        # text or json
+
+# ─── Async Recording ────────────────────────────────────────────────
+
+recording:
+  buffer_size: 10000                    # channel buffer for async writes
+  workers: 4                            # recording goroutines
+
+# ─── Budget Enforcement ─────────────────────────────────────────────
+
+budgets:
+  default:
+    daily_limit_usd: 50.0
+    monthly_limit_usd: 500.0
+    soft_limit_pct: 0.8                 # warn at 80% of limit
+    action: "block"                     # "block" returns 429, "warn" adds header only
+  rules:
+    - api_key_pattern: "sk-proj-dev-*"  # glob pattern
+      daily_limit_usd: 5.0
+      monthly_limit_usd: 50.0
+      action: "block"
+    - tenant_id: "alpha"                # tenant-scoped rule
+      daily_limit_usd: 100.0
+      monthly_limit_usd: 1000.0
+      action: "block"
+
+# ─── Circuit Breaker ────────────────────────────────────────────────
+
+circuit_breaker:
+  max_failures: 5                       # consecutive 5xx before opening
+  timeout_secs: 30                      # seconds before half-open retry
+
+# ─── Agent Session Tracking ─────────────────────────────────────────
+
+agent:
+  session_timeout_mins: 30              # auto-expire idle sessions
+  loop_threshold: 20                    # same path N times = loop (0 = disabled)
+  loop_window_mins: 5                   # sliding window
+  loop_action: "warn"                   # "warn" or "block"
+  ghost_max_age_mins: 60                # sessions older than this = ghost (0 = disabled)
+  ghost_min_calls: 50
+  ghost_min_cost_usd: 1.0
+
+# ─── Dashboard ───────────────────────────────────────────────────────
+
+dashboard:
+  enabled: true
+
+# ─── Multi-Tenancy ──────────────────────────────────────────────────
+
+tenants:
+  enabled: true
+  key_mappings:
+    - api_key_pattern: "sk-proj-team-alpha-*"
+      tenant_id: "alpha"
+    - api_key_pattern: "sk-proj-team-beta-*"
+      tenant_id: "beta"
+
+# ─── Alerting ────────────────────────────────────────────────────────
+
+alerts:
+  slack:
+    webhook_url: "https://hooks.slack.com/services/T00/B00/xxx"
+  webhooks:
+    - url: "https://api.example.com/alerts"
+      headers:
+        Authorization: "Bearer token"
+  cooldown_mins: 5                      # deduplication window per alert
+
+# ─── Rate Limiting ──────────────────────────────────────────────────
+
+rate_limits:
+  default:
+    requests_per_minute: 60
+    requests_per_hour: 1000
+  rules:
+    - api_key_pattern: "sk-proj-dev-*"
+      requests_per_minute: 10
+
+# ─── Admin API ───────────────────────────────────────────────────────
+
+admin:
+  enabled: true
+  token: "your-secret-admin-token"      # Bearer token for auth
+
+# ─── MCP Tool Metering ──────────────────────────────────────────────
+
+mcp:
+  enabled: true
+  upstream: "http://localhost:3000"
+  pricing:
+    - server: "filesystem"
+      tool: "read_file"
+      cost_per_call: 0.01
+    - server: "filesystem"
+      tool: ""                          # wildcard: all tools on server
+      cost_per_call: 0.005
+    - server: "github"
+      tool: ""
+      cost_per_call: 0.02
+```
diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
new file mode 100644
index 0000000..3b05ede
--- /dev/null
+++ b/docs/deployment/docker.md
@@ -0,0 +1,90 @@
+# Docker Deployment
+
+## Quick Start
+
+```bash
+docker run --rm -p 8787:8787 ghcr.io/wdz-dev/agent-ledger:latest
+```
+
+## With Persistent Storage
+
+```bash
+docker run -d \
+  --name agentledger \
+  -p 8787:8787 \
+  -v agentledger-data:/data \
+  ghcr.io/wdz-dev/agent-ledger:latest
+```
+
+## Docker Compose
+
+A `docker-compose.yml` is included in the `deploy/` directory:
+
+```bash
+cd deploy && docker compose up
+```
+
+This starts AgentLedger with persistent volume storage and health checks.
+
+## Custom Configuration
+
+Mount a config file:
+
+```bash
+docker run -d \
+  --name agentledger \
+  -p 8787:8787 \
+  -v agentledger-data:/data \
+  -v ./agentledger.yaml:/etc/agentledger/agentledger.yaml \
+  ghcr.io/wdz-dev/agent-ledger:latest
+```
+
+Or use environment variables:
+
+```bash
+docker run -d \
+  --name agentledger \
+  -p 8787:8787 \
+  -e AGENTLEDGER_LISTEN=":8787" \
+  -e AGENTLEDGER_STORAGE_DSN="/data/agentledger.db" \
+  -e AGENTLEDGER_LOG_LEVEL="info" \
+  ghcr.io/wdz-dev/agent-ledger:latest
+```
+
+## With PostgreSQL
+
+For production deployments with multiple replicas, use PostgreSQL instead of SQLite:
+
+```yaml
+# docker-compose.yml
+services:
+  agentledger:
+    image: ghcr.io/wdz-dev/agent-ledger:latest
+    ports:
+      - "8787:8787"
+    environment:
+      AGENTLEDGER_STORAGE_DRIVER: "postgres"
+      AGENTLEDGER_STORAGE_DSN: "postgres://user:pass@postgres:5432/agentledger?sslmode=disable"
+    depends_on:
+      - postgres
+
+  postgres:
+    image: postgres:16
+    environment:
+      POSTGRES_USER: user
+      POSTGRES_PASSWORD: pass
+      POSTGRES_DB: agentledger
+    volumes:
+      - pgdata:/var/lib/postgresql/data
+
+volumes:
+  pgdata:
+```
+
+## Health Check
+
+The proxy responds to health checks at:
+
+```
+GET http://localhost:8787/healthz
+```
diff --git a/docs/deployment/kubernetes.md b/docs/deployment/kubernetes.md
new file mode 100644
index 0000000..65830d2
--- /dev/null
+++ b/docs/deployment/kubernetes.md
@@ -0,0 +1,74 @@
+# Kubernetes / Helm
+
+## Install
+
+```bash
+helm install agentledger deploy/helm/agentledger
+```
+
+## Key Values
+
+| Value | Description | Default |
+|-------|-------------|---------|
+| `replicaCount` | Number of proxy replicas | `1` |
+| `image.repository` | Container image | `ghcr.io/wdz-dev/agent-ledger` |
+| `image.tag` | Image tag | Chart appVersion |
+| `service.port` | Service port | `8787` |
+| `ingress.enabled` | Enable ingress | `false` |
+| `persistence.enabled` | Enable PVC for SQLite | `true` |
+| `persistence.size` | PVC size | `1Gi` |
+
+## Custom Values
+
+```bash
+helm install agentledger deploy/helm/agentledger \
+  --set replicaCount=2 \
+  --set ingress.enabled=true \
+  --set ingress.hosts[0].host=agentledger.example.com
+```
+
+Or with a values file:
+
+```yaml
+# values-prod.yaml
+replicaCount: 3
+
+ingress:
+  enabled: true
+  hosts:
+    - host: agentledger.example.com
+      paths:
+        - path: /
+          pathType: Prefix
+
+resources:
+  requests:
+    cpu: 100m
+    memory: 128Mi
+  limits:
+    cpu: 500m
+    memory: 256Mi
+```
+
+```bash
+helm install agentledger deploy/helm/agentledger -f values-prod.yaml
+```
+
+## SQLite vs PostgreSQL
+
+!!! warning "SQLite with Multiple Replicas"
+    SQLite is single-writer. If you run multiple replicas, use PostgreSQL instead.
+
+For PostgreSQL:
+
+```yaml
+# values-prod.yaml
+env:
+  - name: AGENTLEDGER_STORAGE_DRIVER
+    value: "postgres"
+  - name: AGENTLEDGER_STORAGE_DSN
+    value: "postgres://user:pass@postgres:5432/agentledger?sslmode=disable"
+
+persistence:
+  enabled: false
+```
diff --git a/docs/features/agent-tracking.md b/docs/features/agent-tracking.md
new file mode 100644
index 0000000..5990350
--- /dev/null
+++ b/docs/features/agent-tracking.md
@@ -0,0 +1,76 @@
+# Agent Session Tracking
+
+AgentLedger groups multi-call agent runs into sessions, enabling per-execution cost attribution. This is the key differentiator — most tools track cost per API key or per user. AgentLedger tracks cost per agent execution.
+
+## Agent Headers
+
+Tag requests with agent metadata using HTTP headers:
+
+```
+X-Agent-Id: code-reviewer
+X-Agent-Session: sess_abc123
+X-Agent-User: user@example.com
+X-Agent-Task: "Review PR #456"
+```
+
+All agent headers are stripped before forwarding to the upstream provider.
+
+### Python Example
+
+```python
+client = openai.OpenAI(
+    base_url="http://localhost:8787/v1",
+    default_headers={
+        "X-Agent-Id": "code-reviewer",
+        "X-Agent-Session": f"sess_{run_id}",
+        "X-Agent-User": "alice@company.com",
+        "X-Agent-Task": "Review PR #456",
+    }
+)
+```
+
+### Fallback
+
+Without agent headers, costs are tracked at the API-key level. Headers are optional but recommended for per-agent visibility.
+
+## Loop Detection
+
+Automatically detects runaway agents making repetitive calls to the same endpoint.
+
+```yaml
+agent:
+  loop_threshold: 20        # same path N times in window = loop
+  loop_window_mins: 5       # sliding window
+  loop_action: "warn"       # "warn" logs + alert, "block" returns 429
+```
+
+When a loop is detected:
+
+- **warn**: logs a warning and sends an alert (if [alerting](alerting.md) is configured)
+- **block**: returns `429 Too Many Requests` to stop the agent
+
+Set `loop_threshold: 0` to disable.
+
+## Ghost Agent Detection
+
+Finds agents that are still running but may have been forgotten — burning tokens silently.
+
+```yaml
+agent:
+  ghost_max_age_mins: 60    # sessions older than this are candidates
+  ghost_min_calls: 50       # minimum calls before flagging
+  ghost_min_cost_usd: 1.0   # minimum spend before flagging
+```
+
+A session is flagged as a ghost when all three thresholds are met: it's been running longer than `ghost_max_age_mins`, has made more than `ghost_min_calls`, and has spent more than `ghost_min_cost_usd`.
+
+Set `ghost_max_age_mins: 0` to disable.
+
+## Session Lifecycle
+
+```yaml
+agent:
+  session_timeout_mins: 30  # auto-expire idle sessions
+```
+
+Sessions are automatically expired after `session_timeout_mins` of inactivity. Active sessions are visible in the dashboard and via the sessions API endpoint.
diff --git a/docs/features/alerting.md b/docs/features/alerting.md
new file mode 100644
index 0000000..99e00df
--- /dev/null
+++ b/docs/features/alerting.md
@@ -0,0 +1,37 @@
+# Alerting
+
+Get notified when budgets are approaching limits or agents are misbehaving.
+
+## Configuration
+
+```yaml
+alerts:
+  slack:
+    webhook_url: "https://hooks.slack.com/services/T00/B00/xxx"
+  webhooks:
+    - url: "https://api.example.com/alerts"
+      headers:
+        Authorization: "Bearer token"
+  cooldown_mins: 5
+```
+
+## Alert Types
+
+| Type | Trigger |
+|------|---------|
+| `budget_warning` | Spend exceeds soft limit threshold |
+| `budget_exceeded` | Spend exceeds hard limit |
+| `loop_detected` | Agent making repetitive calls |
+| `ghost_detected` | Long-running agent with high spend |
+
+## Slack Notifications
+
+Provide a Slack webhook URL and alerts are posted as formatted messages with severity, details, and timestamps.
+
+## Webhook Notifications
+
+Generic webhook support for any HTTP endpoint. Alerts are sent as JSON POST requests with custom headers for authentication.
+
+## Deduplication
+
+The `cooldown_mins` setting prevents alert spam. Once an alert is sent for a specific key (e.g., a particular API key exceeding its budget), the same alert won't fire again until the cooldown period expires.
diff --git a/docs/features/budgets.md b/docs/features/budgets.md
new file mode 100644
index 0000000..383d9d8
--- /dev/null
+++ b/docs/features/budgets.md
@@ -0,0 +1,80 @@
+# Budget Enforcement
+
+Set daily and monthly spend limits per API key, per agent, or per tenant. When exceeded, requests are blocked before they reach the LLM provider.
+
+## How It Works
+
+1. Request arrives at the proxy
+2. Budget manager checks current spend against limits
+3. If over limit: return `429` immediately (no API call, no cost)
+4. If approaching limit (soft limit): add warning header, forward request
+5. If under limit: forward request normally
+
+## Configuration
+
+```yaml
+budgets:
+  default:
+    daily_limit_usd: 50.0
+    monthly_limit_usd: 500.0
+    soft_limit_pct: 0.8        # warn at 80%
+    action: "block"            # "block" returns 429, "warn" adds header only
+  rules:
+    - api_key_pattern: "sk-proj-dev-*"
+      daily_limit_usd: 5.0
+      monthly_limit_usd: 50.0
+      action: "block"
+    - tenant_id: "alpha"
+      daily_limit_usd: 100.0
+      monthly_limit_usd: 1000.0
+      action: "block"
+```
+
+## Block Response
+
+When a limit is hit:
+
+```json
+{
+  "error": {
+    "type": "budget_exceeded",
+    "message": "spending limit exceeded",
+    "daily_spent": 12.50,
+    "daily_limit": 10.00,
+    "monthly_spent": 45.00,
+    "monthly_limit": 500.00
+  }
+}
+```
+
+HTTP status: `429 Too Many Requests`
+
+## Soft Limits
+
+When `soft_limit_pct` is configured, AgentLedger adds a response header when approaching the threshold:
+
+```
+X-AgentLedger-Budget-Warning: daily spend at 82% of limit
+```
+
+The request is still forwarded — soft limits are informational only.
+
+## Pre-Flight Estimation
+
+AgentLedger calculates worst-case cost from `max_tokens` before forwarding to the API. If the estimated cost would exceed the remaining budget, the request is rejected immediately — no wasted spend.
+
+## Per-Key Rules
+
+Rules use glob patterns to match API keys:
+
+| Pattern | Matches |
+|---------|---------|
+| `sk-proj-dev-*` | All keys starting with `sk-proj-dev-` |
+| `sk-*` | All keys starting with `sk-` |
+| `*` | All keys |
+
+Rules are evaluated in order. The first matching rule wins. If no rule matches, the default applies.
+
+## Runtime Management
+
+Budget rules can be managed at runtime via the [Admin API](../admin-api.md) without restarting the proxy.
diff --git a/docs/features/circuit-breaker.md b/docs/features/circuit-breaker.md
new file mode 100644
index 0000000..180937e
--- /dev/null
+++ b/docs/features/circuit-breaker.md
@@ -0,0 +1,31 @@
+# Circuit Breaker
+
+Protects against upstream provider failures. After a configurable number of consecutive 5xx responses, the circuit opens and rejects requests immediately — preventing wasted spend on failing APIs.
+
+## Configuration
+
+```yaml
+circuit_breaker:
+  max_failures: 5           # consecutive 5xx before opening
+  timeout_secs: 30          # seconds before half-open retry
+```
+
+## States
+
+| State | Behavior |
+|-------|----------|
+| **Closed** | Normal operation — requests forwarded to upstream |
+| **Open** | Circuit tripped — requests rejected immediately with `503` |
+| **Half-Open** | After `timeout_secs`, one request is allowed through to test recovery |
+
+If the half-open request succeeds, the circuit closes. If it fails, the circuit opens again.
+
+## When to Use
+
+The circuit breaker is useful when:
+
+- An upstream provider is experiencing an outage
+- You want to fail fast rather than wait for timeouts
+- You want to prevent agents from burning budget on requests that will fail
+
+Omit the `circuit_breaker` section to disable.
diff --git a/docs/features/dashboard.md b/docs/features/dashboard.md
new file mode 100644
index 0000000..bb379f2
--- /dev/null
+++ b/docs/features/dashboard.md
@@ -0,0 +1,50 @@
+# Dashboard
+
+AgentLedger includes an embedded web dashboard for real-time cost visibility. No external tools needed.
+
+## Accessing the Dashboard
+
+The dashboard is served at the proxy's root URL:
+
+```
+http://localhost:8787/
+```
+
+Enabled by default. To disable:
+
+```yaml
+dashboard:
+  enabled: false
+```
+
+## Features
+
+- **Summary cards** — today's spend, month's spend, request count, avg cost per request, active sessions, error rate
+- **Cost over time** — line/area chart with selectable time ranges (30 min to 30 days)
+- **Spend by provider** — doughnut chart showing cost distribution across providers
+- **Cost breakdown** — table grouped by model, provider, agent, or session
+- **Most expensive requests** — top 10 costliest individual API calls
+- **Active sessions** — live view of running agent sessions with cost and status
+- **Error breakdown** — 429s, 5xx errors, avg latency
+- **API key usage** — spend per API key hash
+- **Budget rules** — view and manage rules (requires admin token)
+
+## Multi-Tenant Filtering
+
+All dashboard views support tenant filtering. Enter a tenant ID in the filter bar to see costs for a specific team or organization.
+
+The dashboard REST API endpoints also accept `?tenant=` query parameters.
+
+## Dashboard API
+
+The dashboard exposes these REST endpoints:
+
+| Endpoint | Description |
+|----------|-------------|
+| `GET /api/dashboard/summary` | Summary cards data |
+| `GET /api/dashboard/timeseries` | Cost over time chart data |
+| `GET /api/dashboard/costs` | Cost breakdown table data |
+| `GET /api/dashboard/sessions` | Active sessions list |
+| `GET /api/dashboard/expensive` | Most expensive requests |
+| `GET /api/dashboard/stats` | Error stats and avg cost |
+| `GET /api/dashboard/export` | Export as CSV or JSON |
diff --git a/docs/features/mcp.md b/docs/features/mcp.md
new file mode 100644
index 0000000..635dd27
--- /dev/null
+++ b/docs/features/mcp.md
@@ -0,0 +1,52 @@
+# MCP Tool Metering
+
+Track costs of MCP (Model Context Protocol) tool calls alongside LLM usage. Two modes are available.
+
+## HTTP Proxy Mode
+
+Forward JSON-RPC requests to an upstream MCP server, metering each tool call:
+
+```yaml
+mcp:
+  enabled: true
+  upstream: "http://localhost:3000"
+  pricing:
+    - server: "filesystem"
+      tool: "read_file"
+      cost_per_call: 0.01
+    - server: "filesystem"
+      tool: ""                  # wildcard: all tools on this server
+      cost_per_call: 0.005
+    - server: "github"
+      tool: ""
+      cost_per_call: 0.02
+```
+
+## Stdio Wrapper Mode
+
+Wrap any MCP server process and intercept tool calls via stdio:
+
+```bash
+agentledger mcp-wrap -- npx @modelcontextprotocol/server-filesystem /tmp
+```
+
+This launches the MCP server as a child process, intercepts JSON-RPC messages on stdin/stdout, records tool call costs, and forwards everything transparently.
+
+## Pricing Rules
+
+Rules are matched in order. The first matching `server` + `tool` combination wins.
+
+| server | tool | Matches |
+|--------|------|---------|
+| `"filesystem"` | `"read_file"` | Exact match: filesystem server, read_file tool |
+| `"filesystem"` | `""` | Wildcard: any tool on the filesystem server |
+| `""` | `""` | Catch-all: any server, any tool |
+
+## Viewing MCP Costs
+
+MCP tool costs appear alongside LLM costs in:
+
+- The CLI: `agentledger costs`
+- The dashboard
+- Prometheus metrics: `agentledger_mcp_calls_total`
+- Export: `agentledger export`
diff --git a/docs/features/multi-tenancy.md b/docs/features/multi-tenancy.md
new file mode 100644
index 0000000..f3e8a25
--- /dev/null
+++ b/docs/features/multi-tenancy.md
@@ -0,0 +1,38 @@
+# Multi-Tenancy
+
+Isolate costs, budgets, and dashboards by team or organization.
+
+## Configuration
+
+Enable tenancy and map API keys to tenants using glob patterns:
+
+```yaml
+tenants:
+  enabled: true
+  key_mappings:
+    - api_key_pattern: "sk-proj-team-alpha-*"
+      tenant_id: "alpha"
+    - api_key_pattern: "sk-proj-team-beta-*"
+      tenant_id: "beta"
+```
+
+## Header-Based Tenancy
+
+Set the tenant per-request via header:
+
+```
+X-AgentLedger-Tenant: alpha
+```
+
+Header-based tenancy takes precedence over config-based key mapping.
+
+## What's Isolated
+
+- **Costs** — each tenant's spend is tracked separately
+- **Budgets** — tenant-scoped budget rules (see [budgets](budgets.md))
+- **Dashboard** — filter by tenant in the web UI
+- **API endpoints** — all cost and stats endpoints accept `?tenant=` filter
+
+## Without Tenancy
+
+When tenancy is disabled (default), all costs are tracked globally. Existing behavior is unchanged — tenancy is fully opt-in.
diff --git a/docs/features/observability.md b/docs/features/observability.md
new file mode 100644
index 0000000..2ec7c01
--- /dev/null
+++ b/docs/features/observability.md
@@ -0,0 +1,45 @@
+# Observability
+
+AgentLedger exports OpenTelemetry metrics via a Prometheus endpoint for integration with your existing monitoring stack.
+
+## Prometheus Endpoint
+
+Metrics are exposed at:
+
+```
+http://localhost:8787/metrics
+```
+
+## Key Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `agentledger_requests_total` | Counter | Total proxied requests by provider, model, status |
+| `agentledger_request_duration_ms` | Histogram | Request latency |
+| `agentledger_input_tokens_total` | Counter | Total input tokens |
+| `agentledger_output_tokens_total` | Counter | Total output tokens |
+| `agentledger_cost_usd_total` | Counter | Total cost in USD |
+| `agentledger_sessions_active` | Gauge | Currently active agent sessions |
+| `agentledger_loop_detected_total` | Counter | Loop detection events |
+| `agentledger_ghost_detected_total` | Counter | Ghost detection events |
+| `agentledger_rate_limited_total` | Counter | Rate-limited requests |
+| `agentledger_mcp_calls_total` | Counter | MCP tool calls |
+
+## Grafana
+
+A pre-built Grafana dashboard template is included at `deploy/grafana/agentledger.json`. Import it into your Grafana instance for panels covering:
+
+- Total spend (gauge)
+- Spend rate over time (graph)
+- Requests by provider (pie chart)
+- Top models by cost (table)
+- Active sessions (stat)
+
+## Prometheus Scrape Config
+
+```yaml
+scrape_configs:
+  - job_name: "agentledger"
+    static_configs:
+      - targets: ["localhost:8787"]
+```
diff --git a/docs/features/providers.md b/docs/features/providers.md
new file mode 100644
index 0000000..038fdb5
--- /dev/null
+++ b/docs/features/providers.md
@@ -0,0 +1,67 @@
+# Providers
+
+AgentLedger supports 15 LLM providers with 83+ models and built-in pricing.
+
+## Supported Providers
+
+| Provider | Routing | Type | Models |
+|----------|---------|------|--------|
+| OpenAI | `/v1/` (default) | Native | GPT-4.1, GPT-4.1-mini, GPT-4o, o3, o4-mini, GPT-3.5-turbo |
+| Anthropic | `/v1/messages` | Native | Claude Opus 4, Sonnet 4, Haiku 4, Claude 3.5/3.x |
+| Azure OpenAI | `/azure/` | Custom | All Azure-hosted OpenAI models |
+| Google Gemini | `/gemini/` | Custom | Gemini 2.5 Pro/Flash, 2.0, 1.5 |
+| Cohere | `/cohere/` | Custom | Command R+, Command R, Command Light |
+| Groq | `/groq/v1/` | OpenAI-compat | Llama 3.3 70B, Mixtral, Gemma |
+| Mistral | `/mistral/v1/` | OpenAI-compat | Large, Small, Codestral, Nemo |
+| DeepSeek | `/deepseek/v1/` | OpenAI-compat | DeepSeek Chat, Reasoner |
+| Together AI | `/together/v1/` | OpenAI-compat | Llama, Qwen, DeepSeek |
+| Fireworks AI | `/fireworks/v1/` | OpenAI-compat | Llama, Qwen |
+| Perplexity | `/perplexity/v1/` | OpenAI-compat | Sonar Pro, Sonar, Reasoning |
+| OpenRouter | `/openrouter/v1/` | OpenAI-compat | 200+ models via routing |
+| xAI (Grok) | `/xai/v1/` | OpenAI-compat | Grok 3, Grok 3 Mini, Grok 2 |
+| Cerebras | `/cerebras/v1/` | OpenAI-compat | Llama 3.3 70B, Llama 3.1 8B |
+| SambaNova | `/sambanova/v1/` | OpenAI-compat | Llama 3.3 70B, Llama 3.1 8B |
+
+## How Routing Works
+
+**OpenAI** is the default — requests to `/v1/chat/completions` route to OpenAI.
+
+**Anthropic** is detected by the `/v1/messages` path.
+
+**All other providers** use path-prefix routing. A request to `/groq/v1/chat/completions` is routed to Groq. The prefix is stripped before forwarding, so Groq's API sees `/v1/chat/completions`.
+
+## Provider Types
+
+- **Native** — OpenAI and Anthropic have dedicated parsers for their specific API formats.
+- **OpenAI-compatible** — Groq, Mistral, DeepSeek, Together, Fireworks, Perplexity, OpenRouter, xAI, Cerebras, and SambaNova all use the OpenAI `/v1/chat/completions` format. They share a common parser.
+- **Custom** — Gemini and Cohere have unique API formats and get dedicated parsers.
+
+## Configuration
+
+OpenAI and Anthropic are enabled by default. Additional providers go in `providers.extra`:
+
+```yaml
+providers:
+  openai:
+    upstream: "https://api.openai.com"
+    enabled: true
+  anthropic:
+    upstream: "https://api.anthropic.com"
+    enabled: true
+  extra:
+    groq:
+      type: "openai"
+      upstream: "https://api.groq.com/openai"
+      path_prefix: "/groq"
+      enabled: true
+```
+
+See the [full reference](../configuration/reference.md) for all provider entries.
+
+## API Key Handling
+
+API keys pass through to the upstream provider untouched. AgentLedger never stores raw keys — it creates a SHA-256 fingerprint from the first 8 and last 4 characters for attribution and reporting.
+
+## Model Matching
+
+Versioned model names (e.g., `gpt-4o-2024-11-20`) are matched via longest prefix to the pricing table. If a model isn't found, costs are recorded with a fallback of $0 and a warning is logged.
diff --git a/docs/features/rate-limiting.md b/docs/features/rate-limiting.md
new file mode 100644
index 0000000..6365755
--- /dev/null
+++ b/docs/features/rate-limiting.md
@@ -0,0 +1,30 @@
+# Rate Limiting
+
+Throttle request volume per API key with sliding window counters. Budget enforcement limits spend; rate limiting limits request frequency.
+
+## Configuration
+
+```yaml
+rate_limits:
+  default:
+    requests_per_minute: 60
+    requests_per_hour: 1000
+  rules:
+    - api_key_pattern: "sk-proj-dev-*"
+      requests_per_minute: 10
+```
+
+## How It Works
+
+Rate limits use an in-memory sliding window counter keyed by API key hash. When a limit is exceeded, the request is rejected immediately with:
+
+- HTTP status: `429 Too Many Requests`
+- `Retry-After` header with seconds until the window resets
+
+## Per-Key Rules
+
+Rules use the same glob pattern matching as [budget rules](budgets.md). Rules are evaluated in order — the first match wins. If no rule matches, the default applies.
+
+## Metrics
+
+Rate-limited requests are tracked via the `agentledger_rate_limited_total` Prometheus metric.
diff --git a/docs/getting-started/cli.md b/docs/getting-started/cli.md
new file mode 100644
index 0000000..b5e8ec6
--- /dev/null
+++ b/docs/getting-started/cli.md
@@ -0,0 +1,84 @@
+# CLI Reference
+
+## `agentledger serve`
+
+Start the reverse proxy.
+
+```bash
+agentledger serve [flags]
+```
+
+| Flag | Description | Default |
+|------|-------------|---------|
+| `-c, --config` | Path to config file | Auto-detect (see [config overview](../configuration/index.md)) |
+
+## `agentledger costs`
+
+Show a cost report from the ledger.
+
+```bash
+agentledger costs [flags]
+```
+
+| Flag | Description | Default |
+|------|-------------|---------|
+| `-c, --config` | Path to config file | Auto-detect |
+| `--last` | Time window: `1h`, `24h`, `7d`, `30d` | `24h` |
+| `--by` | Group by: `model`, `provider`, `key` | `model` |
+
+Example:
+
+```bash
+agentledger costs --last 7d --by provider
+```
+
+## `agentledger export`
+
+Export cost data as CSV or JSON.
+
+```bash
+agentledger export [flags]
+```
+
+| Flag | Description | Default |
+|------|-------------|---------|
+| `-c, --config` | Path to config file | Auto-detect |
+| `--format` | Output format: `csv`, `json` | `json` |
+| `--last` | Time window: `1h`, `24h`, `7d`, `30d` | `30d` |
+| `--by` | Group by: `model`, `provider`, `key`, `agent`, `session` | `model` |
+
+## `agentledger mcp-wrap`
+
+Wrap an MCP server process for tool call metering via stdio.
+
+```bash
+agentledger mcp-wrap [flags] -- command [args...]
+```
+
+| Flag | Description | Default |
+|------|-------------|---------|
+| `-c, --config` | Path to config file | Auto-detect |
+
+Example:
+
+```bash
+agentledger mcp-wrap -- npx @modelcontextprotocol/server-filesystem /tmp
+```
+
+## `agentledger version`
+
+Print the version string.
+
+```bash
+agentledger version
+```
+
+## Environment Variables
+
+All config settings can be overridden with environment variables prefixed `AGENTLEDGER_`:
+
+```bash
+AGENTLEDGER_LISTEN=":9090"
+AGENTLEDGER_STORAGE_DSN="/tmp/ledger.db"
+AGENTLEDGER_LOG_LEVEL="debug"
+```
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
new file mode 100644
index 0000000..eee1d5b
--- /dev/null
+++ b/docs/getting-started/installation.md
@@ -0,0 +1,48 @@
+# Installation
+
+## Homebrew
+
+```bash
+brew install wdz-dev/tap/agentledger
+```
+
+## Binary Download
+
+Download the latest release from [GitHub Releases](https://github.com/WDZ-Dev/agent-ledger/releases):
+
+```bash
+curl -sSL https://github.com/WDZ-Dev/agent-ledger/releases/latest/download/agentledger_$(uname -s)_$(uname -m).tar.gz | tar xz
+sudo mv agentledger /usr/local/bin/
+```
+
+Available for Linux, macOS, and Windows on both amd64 and arm64.
+
+## Docker
+
+```bash
+docker run --rm -p 8787:8787 ghcr.io/wdz-dev/agent-ledger:latest
+```
+
+See [Docker deployment](../deployment/docker.md) for persistence and configuration.
+
+## Helm (Kubernetes)
+
+```bash
+helm install agentledger deploy/helm/agentledger
+```
+
+See [Kubernetes deployment](../deployment/kubernetes.md) for full details.
+
+## From Source
+
+Requires Go 1.25+:
+
+```bash
+go install github.com/WDZ-Dev/agent-ledger/cmd/agentledger@latest
+```
+
+## Verify
+
+```bash
+agentledger version
+```
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
new file mode 100644
index 0000000..ec6b73f
--- /dev/null
+++ b/docs/getting-started/quickstart.md
@@ -0,0 +1,131 @@
+# Quick Start
+
+## 1. Start the proxy
+
+```bash
+# With defaults (listens on :8787, SQLite storage)
+agentledger serve
+
+# Or with a config file
+agentledger serve -c agentledger.yaml
+```
+
+## 2. Point your agents at it
+
+=== "Python (OpenAI)"
+
+    ```bash
+    export OPENAI_BASE_URL=http://localhost:8787/v1
+    ```
+
+    ```python
+    import openai
+    client = openai.OpenAI()  # picks up OPENAI_BASE_URL automatically
+    ```
+
+=== "Node.js"
+
+    ```javascript
+    const openai = new OpenAI({ baseURL: 'http://localhost:8787/v1' });
+    ```
+
+=== "Claude Code"
+
+    ```bash
+    export ANTHROPIC_BASE_URL=http://localhost:8787
+    ```
+
+=== "curl"
+
+    ```bash
+    curl http://localhost:8787/v1/chat/completions \
+      -H "Authorization: Bearer $OPENAI_API_KEY" \
+      -H "Content-Type: application/json" \
+      -d '{"model": "gpt-4.1-mini", "messages": [{"role": "user", "content": "Hello"}]}'
+    ```
+
+For other providers, use path-prefix routing:
+
+```bash
+# Groq
+curl http://localhost:8787/groq/v1/chat/completions
+
+# Mistral
+curl http://localhost:8787/mistral/v1/chat/completions
+
+# DeepSeek
+curl http://localhost:8787/deepseek/v1/chat/completions
+
+# Gemini
+curl http://localhost:8787/gemini/v1beta/models/gemini-2.5-pro:generateContent
+
+# Cohere
+curl http://localhost:8787/cohere/v2/chat
+```
+
+## 3. Check your costs
+
+```bash
+# Last 24 hours, grouped by model
+agentledger costs
+
+# Last 7 days, grouped by API key
+agentledger costs --last 7d --by key
+```
+
+```
+PROVIDER   MODEL            REQUESTS   INPUT TOKENS   OUTPUT TOKENS   COST (USD)
+--------   -----            --------   ------------   -------------   ----------
+openai     gpt-4.1-mini     142        28400          14200           $0.0341
+openai     gpt-4.1          38         19000          9500            $0.1140
+anthropic  claude-sonnet-4   12         6000           3000            $0.0630
+--------   -----            --------   ------------   -------------   ----------
+TOTAL                       192        53400          26700           $0.2111
+```
+
+## 4. Set budgets (optional)
+
+```yaml
+# agentledger.yaml
+budgets:
+  default:
+    daily_limit_usd: 50.0
+    monthly_limit_usd: 500.0
+    soft_limit_pct: 0.8
+    action: "block"
+  rules:
+    - api_key_pattern: "sk-proj-dev-*"
+      daily_limit_usd: 5.0
+      action: "block"
+```
+
+When a limit is hit, the agent receives a `429` with a clear JSON error — no surprise charges.
+
+## 5. Add agent tracking (optional)
+
+Add headers to get per-agent cost attribution:
+
+```python
+client = openai.OpenAI(
+    base_url="http://localhost:8787/v1",
+    default_headers={
+        "X-Agent-Id": "code-reviewer",
+        "X-Agent-Session": f"sess_{run_id}",
+        "X-Agent-User": "alice@company.com",
+        "X-Agent-Task": "Review PR #456",
+    }
+)
+```
+
+Headers are stripped before forwarding to the provider. Without them, costs are tracked at the API-key level.
+
+## 6. View the dashboard
+
+Open [http://localhost:8787/](http://localhost:8787/) for real-time cost breakdowns, session views, and spending trends.
+
+## Next steps
+
+- [Configuration reference](../configuration/reference.md) — full YAML config
+- [Providers](../features/providers.md) — all 15 supported providers
+- [Budget enforcement](../features/budgets.md) — per-key limits, pre-flight estimation
+- [Agent tracking](../features/agent-tracking.md) — sessions, loop detection, ghost detection
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..747e945
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,94 @@
+# AgentLedger
+
+**Know what your agents cost.** Meter. Budget. Control.
+
+AgentLedger is an open-source reverse proxy that gives you real-time cost attribution, budget enforcement, and financial observability for AI agents — without changing a single line of code.
+
+```bash
+export OPENAI_BASE_URL=http://localhost:8787/v1
+# That's it. Your agents now have cost tracking and budget enforcement.
+```
+
+---
+
+## Why AgentLedger?
+
+AI agents make dozens of LLM calls per task. Costs compound fast, loops happen silently, and provider dashboards only show you the damage after the fact.
+
+- **Real-time cost tracking** — every request metered, every token counted
+- **Budget enforcement** — daily and monthly limits with automatic blocking
+- **Pre-flight estimation** — rejects requests that would exceed your budget before they hit the API
+- **Agent session tracking** — group multi-call agent runs into sessions, detect loops and ghost agents
+- **15 LLM providers** — OpenAI, Anthropic, Gemini, Groq, Mistral, DeepSeek, Cohere, and more
+- **Zero code changes** — works with any OpenAI/Anthropic SDK via base URL override
+
+---
+
+## Architecture
+
+```
+  Agents (any SDK) --+
+                     |     +--------------------+     OpenAI
+  MCP Servers -------+---> |  AgentLedger :8787 | --> Anthropic
+  (stdio/HTTP)       |     |                    |     Groq, Mistral
+                     |     |  Budget limits     |     DeepSeek, Gemini
+                     |     |  Rate limiting     |     Cohere, + 8 more
+                     |     |  Token metering    |
+                     |     |  Agent sessions    | --> Slack / Webhooks
+                     |     |  Cost recording    |
+                     |     |                    |
+                     |     |  SQLite/Postgres   |
+                     |     |  Dashboard         |
+                     |     |  Prometheus        |
+                     |     +--------------------+
+```
+
+---
+
+## At a Glance
+
+| | |
+|---|---|
+| **Providers** | 15 LLM providers, 83+ models with built-in pricing |
+| **Overhead** | Sub-10ms proxy latency (~0.1ms typical) |
+| **Dependencies** | Zero — single Go binary with embedded SQLite and dashboard |
+| **Setup** | One environment variable, zero code changes |
+| **License** | Apache 2.0 — all features free and open-source |
+| **Platforms** | Linux, macOS, Windows (amd64 + arm64) |
+
+---
+
+## vs LiteLLM
+
+| | AgentLedger | LiteLLM |
+|---|---|---|
+| **Architecture** | Go single binary, sub-10ms overhead | Python, documented memory leaks |
+| **Cost model** | Per-agent-execution tracking | Per-key/user/team only |
+| **Loop detection** | Built-in, zero-config | Not available |
+| **Ghost agent detection** | Built-in | Not available |
+| **Pre-flight estimation** | Rejects before API call | Post-hoc only |
+| **Budget enforcement** | Free, included | Enterprise paywall |
+| **Audit logs** | Free, included | Enterprise paywall |
+| **Setup** | `brew install` + one env var | Python + pip + database server |
+| **Dependencies** | Zero (embedded SQLite + dashboard) | PostgreSQL required, Redis recommended |
+
+---
+
+## Quick Start
+
+```bash
+# Install
+brew install wdz-dev/tap/agentledger
+
+# Start the proxy
+agentledger serve
+
+# Point your agents at it
+export OPENAI_BASE_URL=http://localhost:8787/v1
+
+# Check your costs
+agentledger costs
+```
+
+[Get started :material-arrow-right:](getting-started/installation.md){ .md-button .md-button--primary }
+[View on GitHub :material-github:](https://github.com/WDZ-Dev/agent-ledger){ .md-button }
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..2951c67
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,2 @@
+mkdocs-material==9.6.7
+mkdocs-minify-plugin==0.8.0
diff --git a/docs/roadmap.md b/docs/roadmap.md
new file mode 100644
index 0000000..296d702
--- /dev/null
+++ b/docs/roadmap.md
@@ -0,0 +1,26 @@
+# Roadmap
+
+## Completed
+
+- [x] **Phase 1: Core Proxy** — Reverse proxy, token metering, cost calculation, SQLite storage, CLI
+- [x] **Phase 2: Budget Enforcement** — Per-key budgets, pre-flight estimation, circuit breaker
+- [x] **Phase 3: Agent Attribution** — Session tracking, loop detection, ghost agent detection
+- [x] **Phase 4: Observability** — OpenTelemetry metrics, Prometheus endpoint, web dashboard
+- [x] **Phase 5: MCP Integration** — Meter MCP tool calls alongside LLM costs
+- [x] **Phase 6: Polish & Launch** — Docker, GoReleaser, Helm chart, docs
+- [x] **Phase 7: Multi-Provider** — Groq, Mistral, DeepSeek, Gemini, Cohere with path-prefix routing
+- [x] **Phase 8: Postgres** — Production-grade PostgreSQL storage backend
+- [x] **Phase 9: Multi-Tenancy** — Tenant isolation with header and config-based resolution
+- [x] **Phase 10: Alerting** — Slack and webhook notifications with deduplication
+- [x] **Phase 11: Rate Limiting** — Per-key request throttling + Homebrew tap
+- [x] **Phase 12: Admin API** — Runtime budget rule management
+
+## Future
+
+| Feature | Value | Complexity |
+|---------|-------|------------|
+| Response Caching | High | High |
+| WebSocket Live Dashboard | Medium | Medium |
+| Plugin / Middleware System | Medium | Very High |
+
+See [ROADMAP.md](https://github.com/WDZ-Dev/agent-ledger/blob/main/ROADMAP.md) on GitHub for detailed descriptions of each planned feature.
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
new file mode 100644
index 0000000..590bfe6
--- /dev/null
+++ b/docs/stylesheets/extra.css
@@ -0,0 +1,13 @@
+[data-md-color-scheme="slate"] {
+  --md-default-bg-color: #0d1117;
+  --md-default-bg-color--light: #161b22;
+  --md-default-fg-color: #e6edf3;
+  --md-primary-fg-color: #161b22;
+  --md-primary-bg-color: #e6edf3;
+  --md-accent-fg-color: #3fb950;
+}
+
+.md-typeset h1,
+.md-typeset h2 {
+  font-family: "JetBrains Mono", monospace;
+}
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..53ba6c9
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,104 @@
+site_name: AgentLedger
+site_url: https://wdz-dev.github.io/agent-ledger/
+site_description: "Know what your agents cost. Real-time cost attribution, budget enforcement, and financial observability for AI agents."
+repo_url: https://github.com/WDZ-Dev/agent-ledger
+repo_name: WDZ-Dev/agent-ledger
+edit_uri: edit/main/docs/
+
+theme:
+  name: material
+  palette:
+    - scheme: slate
+      primary: custom
+      accent: teal
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+    - scheme: default
+      primary: custom
+      accent: teal
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+  font:
+    text: Inter
+    code: JetBrains Mono
+  icon:
+    repo: fontawesome/brands/github
+  features:
+    - navigation.instant
+    - navigation.instant.prefetch
+    - navigation.tracking
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - navigation.sections
+    - navigation.top
+    - navigation.footer
+    - search.suggest
+    - search.highlight
+    - content.code.copy
+    - content.code.annotate
+    - content.tabs.link
+    - toc.follow
+
+extra_css:
+  - stylesheets/extra.css
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/WDZ-Dev/agent-ledger
+
+copyright: Copyright &copy; 2025-2026 WDZ-Dev — Apache 2.0
+
+markdown_extensions:
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.tabbed:
+      alternate_style: true
+  - pymdownx.snippets
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  - tables
+  - attr_list
+  - md_in_html
+  - toc:
+      permalink: true
+
+plugins:
+  - search
+  - minify:
+      minify_html: true
+
+nav:
+  - Home: index.md
+  - Getting Started:
+      - Installation: getting-started/installation.md
+      - Quick Start: getting-started/quickstart.md
+      - CLI Reference: getting-started/cli.md
+  - Configuration:
+      - Overview: configuration/index.md
+      - Full Reference: configuration/reference.md
+  - Features:
+      - Providers: features/providers.md
+      - Budget Enforcement: features/budgets.md
+      - Agent Tracking: features/agent-tracking.md
+      - MCP Tool Metering: features/mcp.md
+      - Dashboard: features/dashboard.md
+      - Multi-Tenancy: features/multi-tenancy.md
+      - Alerting: features/alerting.md
+      - Rate Limiting: features/rate-limiting.md
+      - Circuit Breaker: features/circuit-breaker.md
+      - Observability: features/observability.md
+  - Admin API: admin-api.md
+  - Deployment:
+      - Docker: deployment/docker.md
+      - Kubernetes / Helm: deployment/kubernetes.md
+  - Roadmap: roadmap.md