diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..34aebaf --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,53 @@ +name: Docs + +on: + push: + branches: [main] + paths: + - "docs/**" + - "mkdocs.yml" + - "docs/requirements.txt" + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + name: Build Docs + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install -r docs/requirements.txt + + - name: Build site + run: mkdocs build --strict + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: site/ + + deploy: + name: Deploy Docs + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index b7ab767..84a3cea 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,9 @@ Thumbs.db *.db-shm *.db-wal +# Docs build +site/ + # Build dist/ build/output/ diff --git a/Makefile b/Makefile index 696c98a..025da1a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build test test-short lint fmt vet vulncheck clean dev setup docker docker-run helm-lint release-dry +.PHONY: build test test-short lint fmt vet vulncheck clean dev setup docker docker-run helm-lint release-dry docs docs-serve # Build variables VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") @@ -84,6 +84,16 @@ release-dry: ## check: Run all checks (what CI runs) check: fmt vet lint test vulncheck +## docs: Build documentation site +docs: + pip install -q -r docs/requirements.txt + mkdocs build --strict + +## docs-serve: Serve documentation locally with live reload +docs-serve: + pip install -q -r docs/requirements.txt + mkdocs serve + ## help: Show this help help: @echo "Usage: make [target]" diff --git a/docs/admin-api.md b/docs/admin-api.md new file mode 100644 index 0000000..d1b6db6 --- /dev/null +++ b/docs/admin-api.md @@ -0,0 +1,67 @@ +# Admin API + +Manage budget rules and view API key usage at runtime without restarting the proxy. + +## Enable + +```yaml +admin: + enabled: true + token: "your-secret-admin-token" +``` + +## Authentication + +All admin endpoints require a Bearer token: + +```bash +curl -H "Authorization: Bearer your-secret-admin-token" \ + http://localhost:8787/api/admin/budgets/rules +``` + +## Endpoints + +### Budget Rules + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/api/admin/budgets/rules` | List all budget rules | +| `POST` | `/api/admin/budgets/rules` | Create a budget rule | +| `DELETE` | `/api/admin/budgets/rules?pattern=...` | Delete a rule by pattern | + +#### Create a Rule + +```bash +curl -X POST http://localhost:8787/api/admin/budgets/rules \ + -H "Authorization: Bearer your-secret-admin-token" \ + -H "Content-Type: application/json" \ + -d '{ + "api_key_pattern": "sk-proj-dev-*", + "daily_limit_usd": 5.0, + "monthly_limit_usd": 50.0, + "action": "block" + }' +``` + +#### Delete a Rule + +```bash +curl -X DELETE "http://localhost:8787/api/admin/budgets/rules?pattern=sk-proj-dev-*" \ + -H "Authorization: Bearer your-secret-admin-token" +``` + +### API Keys + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/api/admin/api-keys` | List API key hashes with monthly spend | + +### Providers + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/api/admin/providers` | List provider status | + +## Persistence + +Runtime rules take effect immediately and persist across restarts. They are stored in the database and take precedence over YAML config rules. diff --git a/docs/configuration/index.md b/docs/configuration/index.md new file mode 100644 index 0000000..eee7d02 --- /dev/null +++ b/docs/configuration/index.md @@ -0,0 +1,47 @@ +# Configuration Overview + +AgentLedger works out of the box with sensible defaults. All configuration is optional — the proxy starts with OpenAI and Anthropic enabled, SQLite storage, and the dashboard on. + +## Config File Locations + +AgentLedger looks for config in these locations (in order): + +1. Path passed via `--config` / `-c` flag +2. `./agentledger.yaml` +3. `./configs/agentledger.yaml` +4. `~/.config/agentledger/agentledger.yaml` +5. `/etc/agentledger/agentledger.yaml` + +## Minimal Config + +No config file is needed for basic usage. To customize: + +```yaml +listen: ":8787" + +providers: + openai: + upstream: "https://api.openai.com" + enabled: true + anthropic: + upstream: "https://api.anthropic.com" + enabled: true + +storage: + driver: "sqlite" + dsn: "data/agentledger.db" +``` + +## Environment Variable Overrides + +All settings can be overridden with environment variables prefixed `AGENTLEDGER_`: + +```bash +AGENTLEDGER_LISTEN=":9090" +AGENTLEDGER_STORAGE_DSN="/tmp/ledger.db" +AGENTLEDGER_LOG_LEVEL="debug" +``` + +## Full Reference + +See [Full Reference](reference.md) for every configuration option with descriptions and defaults. diff --git a/docs/configuration/reference.md b/docs/configuration/reference.md new file mode 100644 index 0000000..2622ea4 --- /dev/null +++ b/docs/configuration/reference.md @@ -0,0 +1,202 @@ +# Configuration Reference + +Full annotated configuration. All sections are optional — only configure what you need. + +```yaml +# Proxy listen address +listen: ":8787" + +# ─── Providers ─────────────────────────────────────────────────────── + +providers: + openai: + upstream: "https://api.openai.com" + enabled: true + anthropic: + upstream: "https://api.anthropic.com" + enabled: true + + # Additional providers — route via path prefix + # e.g., /groq/v1/chat/completions → api.groq.com + extra: + groq: + type: "openai" # OpenAI-compatible API format + upstream: "https://api.groq.com/openai" + path_prefix: "/groq" + enabled: true + mistral: + type: "openai" + upstream: "https://api.mistral.ai" + path_prefix: "/mistral" + enabled: true + deepseek: + type: "openai" + upstream: "https://api.deepseek.com" + path_prefix: "/deepseek" + enabled: true + gemini: + type: "gemini" # Custom Gemini parser + upstream: "https://generativelanguage.googleapis.com" + path_prefix: "/gemini" + enabled: true + cohere: + type: "cohere" # Custom Cohere parser + upstream: "https://api.cohere.com" + path_prefix: "/cohere" + enabled: true + azure: + type: "azure" # Azure OpenAI + upstream: "https://my-resource.openai.azure.com" + path_prefix: "/azure" + enabled: true + together: + type: "openai" + upstream: "https://api.together.xyz" + path_prefix: "/together" + enabled: true + fireworks: + type: "openai" + upstream: "https://api.fireworks.ai/inference" + path_prefix: "/fireworks" + enabled: true + perplexity: + type: "openai" + upstream: "https://api.perplexity.ai" + path_prefix: "/perplexity" + enabled: true + openrouter: + type: "openai" + upstream: "https://openrouter.ai/api" + path_prefix: "/openrouter" + enabled: true + xai: + type: "openai" # xAI (Grok) + upstream: "https://api.x.ai" + path_prefix: "/xai" + enabled: true + cerebras: + type: "openai" + upstream: "https://api.cerebras.ai" + path_prefix: "/cerebras" + enabled: true + sambanova: + type: "openai" + upstream: "https://api.sambanova.ai" + path_prefix: "/sambanova" + enabled: true + +# ─── Storage ───────────────────────────────────────────────────────── + +storage: + driver: "sqlite" # "sqlite" or "postgres" + dsn: "data/agentledger.db" # SQLite path or Postgres DSN + # max_open_conns: 25 # Postgres only + # max_idle_conns: 5 # Postgres only + # Example Postgres DSN: + # dsn: "postgres://user:pass@localhost:5432/agentledger?sslmode=disable" + +# ─── Logging ───────────────────────────────────────────────────────── + +log: + level: "info" # debug, info, warn, error + format: "text" # text or json + +# ─── Async Recording ──────────────────────────────────────────────── + +recording: + buffer_size: 10000 # channel buffer for async writes + workers: 4 # recording goroutines + +# ─── Budget Enforcement ───────────────────────────────────────────── + +budgets: + default: + daily_limit_usd: 50.0 + monthly_limit_usd: 500.0 + soft_limit_pct: 0.8 # warn at 80% of limit + action: "block" # "block" returns 429, "warn" adds header only + rules: + - api_key_pattern: "sk-proj-dev-*" # glob pattern + daily_limit_usd: 5.0 + monthly_limit_usd: 50.0 + action: "block" + - tenant_id: "alpha" # tenant-scoped rule + daily_limit_usd: 100.0 + monthly_limit_usd: 1000.0 + action: "block" + +# ─── Circuit Breaker ──────────────────────────────────────────────── + +circuit_breaker: + max_failures: 5 # consecutive 5xx before opening + timeout_secs: 30 # seconds before half-open retry + +# ─── Agent Session Tracking ───────────────────────────────────────── + +agent: + session_timeout_mins: 30 # auto-expire idle sessions + loop_threshold: 20 # same path N times = loop (0 = disabled) + loop_window_mins: 5 # sliding window + loop_action: "warn" # "warn" or "block" + ghost_max_age_mins: 60 # sessions older than this = ghost (0 = disabled) + ghost_min_calls: 50 + ghost_min_cost_usd: 1.0 + +# ─── Dashboard ─────────────────────────────────────────────────────── + +dashboard: + enabled: true + +# ─── Multi-Tenancy ────────────────────────────────────────────────── + +tenants: + enabled: true + key_mappings: + - api_key_pattern: "sk-proj-team-alpha-*" + tenant_id: "alpha" + - api_key_pattern: "sk-proj-team-beta-*" + tenant_id: "beta" + +# ─── Alerting ──────────────────────────────────────────────────────── + +alerts: + slack: + webhook_url: "https://hooks.slack.com/services/T00/B00/xxx" + webhooks: + - url: "https://api.example.com/alerts" + headers: + Authorization: "Bearer token" + cooldown_mins: 5 # deduplication window per alert + +# ─── Rate Limiting ────────────────────────────────────────────────── + +rate_limits: + default: + requests_per_minute: 60 + requests_per_hour: 1000 + rules: + - api_key_pattern: "sk-proj-dev-*" + requests_per_minute: 10 + +# ─── Admin API ─────────────────────────────────────────────────────── + +admin: + enabled: true + token: "your-secret-admin-token" # Bearer token for auth + +# ─── MCP Tool Metering ────────────────────────────────────────────── + +mcp: + enabled: true + upstream: "http://localhost:3000" + pricing: + - server: "filesystem" + tool: "read_file" + cost_per_call: 0.01 + - server: "filesystem" + tool: "" # wildcard: all tools on server + cost_per_call: 0.005 + - server: "github" + tool: "" + cost_per_call: 0.02 +``` diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md new file mode 100644 index 0000000..3b05ede --- /dev/null +++ b/docs/deployment/docker.md @@ -0,0 +1,90 @@ +# Docker Deployment + +## Quick Start + +```bash +docker run --rm -p 8787:8787 ghcr.io/wdz-dev/agent-ledger:latest +``` + +## With Persistent Storage + +```bash +docker run -d \ + --name agentledger \ + -p 8787:8787 \ + -v agentledger-data:/data \ + ghcr.io/wdz-dev/agent-ledger:latest +``` + +## Docker Compose + +A `docker-compose.yml` is included in the `deploy/` directory: + +```bash +cd deploy && docker compose up +``` + +This starts AgentLedger with persistent volume storage and health checks. + +## Custom Configuration + +Mount a config file: + +```bash +docker run -d \ + --name agentledger \ + -p 8787:8787 \ + -v agentledger-data:/data \ + -v ./agentledger.yaml:/etc/agentledger/agentledger.yaml \ + ghcr.io/wdz-dev/agent-ledger:latest +``` + +Or use environment variables: + +```bash +docker run -d \ + --name agentledger \ + -p 8787:8787 \ + -e AGENTLEDGER_LISTEN=":8787" \ + -e AGENTLEDGER_STORAGE_DSN="/data/agentledger.db" \ + -e AGENTLEDGER_LOG_LEVEL="info" \ + ghcr.io/wdz-dev/agent-ledger:latest +``` + +## With PostgreSQL + +For production deployments with multiple replicas, use PostgreSQL instead of SQLite: + +```yaml +# docker-compose.yml +services: + agentledger: + image: ghcr.io/wdz-dev/agent-ledger:latest + ports: + - "8787:8787" + environment: + AGENTLEDGER_STORAGE_DRIVER: "postgres" + AGENTLEDGER_STORAGE_DSN: "postgres://user:pass@postgres:5432/agentledger?sslmode=disable" + depends_on: + - postgres + + postgres: + image: postgres:16 + environment: + POSTGRES_USER: user + POSTGRES_PASSWORD: pass + POSTGRES_DB: agentledger + volumes: + - pgdata:/var/lib/postgresql/data + +volumes: + pgdata: +``` + +## Health Check + +The proxy responds to health checks at: + +``` +GET http://localhost:8787/healthz +``` diff --git a/docs/deployment/kubernetes.md b/docs/deployment/kubernetes.md new file mode 100644 index 0000000..65830d2 --- /dev/null +++ b/docs/deployment/kubernetes.md @@ -0,0 +1,74 @@ +# Kubernetes / Helm + +## Install + +```bash +helm install agentledger deploy/helm/agentledger +``` + +## Key Values + +| Value | Description | Default | +|-------|-------------|---------| +| `replicaCount` | Number of proxy replicas | `1` | +| `image.repository` | Container image | `ghcr.io/wdz-dev/agent-ledger` | +| `image.tag` | Image tag | Chart appVersion | +| `service.port` | Service port | `8787` | +| `ingress.enabled` | Enable ingress | `false` | +| `persistence.enabled` | Enable PVC for SQLite | `true` | +| `persistence.size` | PVC size | `1Gi` | + +## Custom Values + +```bash +helm install agentledger deploy/helm/agentledger \ + --set replicaCount=2 \ + --set ingress.enabled=true \ + --set ingress.hosts[0].host=agentledger.example.com +``` + +Or with a values file: + +```yaml +# values-prod.yaml +replicaCount: 3 + +ingress: + enabled: true + hosts: + - host: agentledger.example.com + paths: + - path: / + pathType: Prefix + +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 256Mi +``` + +```bash +helm install agentledger deploy/helm/agentledger -f values-prod.yaml +``` + +## SQLite vs PostgreSQL + +!!! warning "SQLite with Multiple Replicas" + SQLite is single-writer. If you run multiple replicas, use PostgreSQL instead. + +For PostgreSQL: + +```yaml +# values-prod.yaml +env: + - name: AGENTLEDGER_STORAGE_DRIVER + value: "postgres" + - name: AGENTLEDGER_STORAGE_DSN + value: "postgres://user:pass@postgres:5432/agentledger?sslmode=disable" + +persistence: + enabled: false +``` diff --git a/docs/features/agent-tracking.md b/docs/features/agent-tracking.md new file mode 100644 index 0000000..5990350 --- /dev/null +++ b/docs/features/agent-tracking.md @@ -0,0 +1,76 @@ +# Agent Session Tracking + +AgentLedger groups multi-call agent runs into sessions, enabling per-execution cost attribution. This is the key differentiator — most tools track cost per API key or per user. AgentLedger tracks cost per agent execution. + +## Agent Headers + +Tag requests with agent metadata using HTTP headers: + +``` +X-Agent-Id: code-reviewer +X-Agent-Session: sess_abc123 +X-Agent-User: user@example.com +X-Agent-Task: "Review PR #456" +``` + +All agent headers are stripped before forwarding to the upstream provider. + +### Python Example + +```python +client = openai.OpenAI( + base_url="http://localhost:8787/v1", + default_headers={ + "X-Agent-Id": "code-reviewer", + "X-Agent-Session": f"sess_{run_id}", + "X-Agent-User": "alice@company.com", + "X-Agent-Task": "Review PR #456", + } +) +``` + +### Fallback + +Without agent headers, costs are tracked at the API-key level. Headers are optional but recommended for per-agent visibility. + +## Loop Detection + +Automatically detects runaway agents making repetitive calls to the same endpoint. + +```yaml +agent: + loop_threshold: 20 # same path N times in window = loop + loop_window_mins: 5 # sliding window + loop_action: "warn" # "warn" logs + alert, "block" returns 429 +``` + +When a loop is detected: + +- **warn**: logs a warning and sends an alert (if [alerting](alerting.md) is configured) +- **block**: returns `429 Too Many Requests` to stop the agent + +Set `loop_threshold: 0` to disable. + +## Ghost Agent Detection + +Finds agents that are still running but may have been forgotten — burning tokens silently. + +```yaml +agent: + ghost_max_age_mins: 60 # sessions older than this are candidates + ghost_min_calls: 50 # minimum calls before flagging + ghost_min_cost_usd: 1.0 # minimum spend before flagging +``` + +A session is flagged as a ghost when all three thresholds are met: it's been running longer than `ghost_max_age_mins`, has made more than `ghost_min_calls`, and has spent more than `ghost_min_cost_usd`. + +Set `ghost_max_age_mins: 0` to disable. + +## Session Lifecycle + +```yaml +agent: + session_timeout_mins: 30 # auto-expire idle sessions +``` + +Sessions are automatically expired after `session_timeout_mins` of inactivity. Active sessions are visible in the dashboard and via the sessions API endpoint. diff --git a/docs/features/alerting.md b/docs/features/alerting.md new file mode 100644 index 0000000..99e00df --- /dev/null +++ b/docs/features/alerting.md @@ -0,0 +1,37 @@ +# Alerting + +Get notified when budgets are approaching limits or agents are misbehaving. + +## Configuration + +```yaml +alerts: + slack: + webhook_url: "https://hooks.slack.com/services/T00/B00/xxx" + webhooks: + - url: "https://api.example.com/alerts" + headers: + Authorization: "Bearer token" + cooldown_mins: 5 +``` + +## Alert Types + +| Type | Trigger | +|------|---------| +| `budget_warning` | Spend exceeds soft limit threshold | +| `budget_exceeded` | Spend exceeds hard limit | +| `loop_detected` | Agent making repetitive calls | +| `ghost_detected` | Long-running agent with high spend | + +## Slack Notifications + +Provide a Slack webhook URL and alerts are posted as formatted messages with severity, details, and timestamps. + +## Webhook Notifications + +Generic webhook support for any HTTP endpoint. Alerts are sent as JSON POST requests with custom headers for authentication. + +## Deduplication + +The `cooldown_mins` setting prevents alert spam. Once an alert is sent for a specific key (e.g., a particular API key exceeding its budget), the same alert won't fire again until the cooldown period expires. diff --git a/docs/features/budgets.md b/docs/features/budgets.md new file mode 100644 index 0000000..383d9d8 --- /dev/null +++ b/docs/features/budgets.md @@ -0,0 +1,80 @@ +# Budget Enforcement + +Set daily and monthly spend limits per API key, per agent, or per tenant. When exceeded, requests are blocked before they reach the LLM provider. + +## How It Works + +1. Request arrives at the proxy +2. Budget manager checks current spend against limits +3. If over limit: return `429` immediately (no API call, no cost) +4. If approaching limit (soft limit): add warning header, forward request +5. If under limit: forward request normally + +## Configuration + +```yaml +budgets: + default: + daily_limit_usd: 50.0 + monthly_limit_usd: 500.0 + soft_limit_pct: 0.8 # warn at 80% + action: "block" # "block" returns 429, "warn" adds header only + rules: + - api_key_pattern: "sk-proj-dev-*" + daily_limit_usd: 5.0 + monthly_limit_usd: 50.0 + action: "block" + - tenant_id: "alpha" + daily_limit_usd: 100.0 + monthly_limit_usd: 1000.0 + action: "block" +``` + +## Block Response + +When a limit is hit: + +```json +{ + "error": { + "type": "budget_exceeded", + "message": "spending limit exceeded", + "daily_spent": 12.50, + "daily_limit": 10.00, + "monthly_spent": 45.00, + "monthly_limit": 500.00 + } +} +``` + +HTTP status: `429 Too Many Requests` + +## Soft Limits + +When `soft_limit_pct` is configured, AgentLedger adds a response header when approaching the threshold: + +``` +X-AgentLedger-Budget-Warning: daily spend at 82% of limit +``` + +The request is still forwarded — soft limits are informational only. + +## Pre-Flight Estimation + +AgentLedger calculates worst-case cost from `max_tokens` before forwarding to the API. If the estimated cost would exceed the remaining budget, the request is rejected immediately — no wasted spend. + +## Per-Key Rules + +Rules use glob patterns to match API keys: + +| Pattern | Matches | +|---------|---------| +| `sk-proj-dev-*` | All keys starting with `sk-proj-dev-` | +| `sk-*` | All keys starting with `sk-` | +| `*` | All keys | + +Rules are evaluated in order. The first matching rule wins. If no rule matches, the default applies. + +## Runtime Management + +Budget rules can be managed at runtime via the [Admin API](../admin-api.md) without restarting the proxy. diff --git a/docs/features/circuit-breaker.md b/docs/features/circuit-breaker.md new file mode 100644 index 0000000..180937e --- /dev/null +++ b/docs/features/circuit-breaker.md @@ -0,0 +1,31 @@ +# Circuit Breaker + +Protects against upstream provider failures. After a configurable number of consecutive 5xx responses, the circuit opens and rejects requests immediately — preventing wasted spend on failing APIs. + +## Configuration + +```yaml +circuit_breaker: + max_failures: 5 # consecutive 5xx before opening + timeout_secs: 30 # seconds before half-open retry +``` + +## States + +| State | Behavior | +|-------|----------| +| **Closed** | Normal operation — requests forwarded to upstream | +| **Open** | Circuit tripped — requests rejected immediately with `503` | +| **Half-Open** | After `timeout_secs`, one request is allowed through to test recovery | + +If the half-open request succeeds, the circuit closes. If it fails, the circuit opens again. + +## When to Use + +The circuit breaker is useful when: + +- An upstream provider is experiencing an outage +- You want to fail fast rather than wait for timeouts +- You want to prevent agents from burning budget on requests that will fail + +Omit the `circuit_breaker` section to disable. diff --git a/docs/features/dashboard.md b/docs/features/dashboard.md new file mode 100644 index 0000000..bb379f2 --- /dev/null +++ b/docs/features/dashboard.md @@ -0,0 +1,50 @@ +# Dashboard + +AgentLedger includes an embedded web dashboard for real-time cost visibility. No external tools needed. + +## Accessing the Dashboard + +The dashboard is served at the proxy's root URL: + +``` +http://localhost:8787/ +``` + +Enabled by default. To disable: + +```yaml +dashboard: + enabled: false +``` + +## Features + +- **Summary cards** — today's spend, month's spend, request count, avg cost per request, active sessions, error rate +- **Cost over time** — line/area chart with selectable time ranges (30 min to 30 days) +- **Spend by provider** — doughnut chart showing cost distribution across providers +- **Cost breakdown** — table grouped by model, provider, agent, or session +- **Most expensive requests** — top 10 costliest individual API calls +- **Active sessions** — live view of running agent sessions with cost and status +- **Error breakdown** — 429s, 5xx errors, avg latency +- **API key usage** — spend per API key hash +- **Budget rules** — view and manage rules (requires admin token) + +## Multi-Tenant Filtering + +All dashboard views support tenant filtering. Enter a tenant ID in the filter bar to see costs for a specific team or organization. + +The dashboard REST API endpoints also accept `?tenant=` query parameters. + +## Dashboard API + +The dashboard exposes these REST endpoints: + +| Endpoint | Description | +|----------|-------------| +| `GET /api/dashboard/summary` | Summary cards data | +| `GET /api/dashboard/timeseries` | Cost over time chart data | +| `GET /api/dashboard/costs` | Cost breakdown table data | +| `GET /api/dashboard/sessions` | Active sessions list | +| `GET /api/dashboard/expensive` | Most expensive requests | +| `GET /api/dashboard/stats` | Error stats and avg cost | +| `GET /api/dashboard/export` | Export as CSV or JSON | diff --git a/docs/features/mcp.md b/docs/features/mcp.md new file mode 100644 index 0000000..635dd27 --- /dev/null +++ b/docs/features/mcp.md @@ -0,0 +1,52 @@ +# MCP Tool Metering + +Track costs of MCP (Model Context Protocol) tool calls alongside LLM usage. Two modes are available. + +## HTTP Proxy Mode + +Forward JSON-RPC requests to an upstream MCP server, metering each tool call: + +```yaml +mcp: + enabled: true + upstream: "http://localhost:3000" + pricing: + - server: "filesystem" + tool: "read_file" + cost_per_call: 0.01 + - server: "filesystem" + tool: "" # wildcard: all tools on this server + cost_per_call: 0.005 + - server: "github" + tool: "" + cost_per_call: 0.02 +``` + +## Stdio Wrapper Mode + +Wrap any MCP server process and intercept tool calls via stdio: + +```bash +agentledger mcp-wrap -- npx @modelcontextprotocol/server-filesystem /tmp +``` + +This launches the MCP server as a child process, intercepts JSON-RPC messages on stdin/stdout, records tool call costs, and forwards everything transparently. + +## Pricing Rules + +Rules are matched in order. The first matching `server` + `tool` combination wins. + +| server | tool | Matches | +|--------|------|---------| +| `"filesystem"` | `"read_file"` | Exact match: filesystem server, read_file tool | +| `"filesystem"` | `""` | Wildcard: any tool on the filesystem server | +| `""` | `""` | Catch-all: any server, any tool | + +## Viewing MCP Costs + +MCP tool costs appear alongside LLM costs in: + +- The CLI: `agentledger costs` +- The dashboard +- Prometheus metrics: `agentledger_mcp_calls_total` +- Export: `agentledger export` diff --git a/docs/features/multi-tenancy.md b/docs/features/multi-tenancy.md new file mode 100644 index 0000000..f3e8a25 --- /dev/null +++ b/docs/features/multi-tenancy.md @@ -0,0 +1,38 @@ +# Multi-Tenancy + +Isolate costs, budgets, and dashboards by team or organization. + +## Configuration + +Enable tenancy and map API keys to tenants using glob patterns: + +```yaml +tenants: + enabled: true + key_mappings: + - api_key_pattern: "sk-proj-team-alpha-*" + tenant_id: "alpha" + - api_key_pattern: "sk-proj-team-beta-*" + tenant_id: "beta" +``` + +## Header-Based Tenancy + +Set the tenant per-request via header: + +``` +X-AgentLedger-Tenant: alpha +``` + +Header-based tenancy takes precedence over config-based key mapping. + +## What's Isolated + +- **Costs** — each tenant's spend is tracked separately +- **Budgets** — tenant-scoped budget rules (see [budgets](budgets.md)) +- **Dashboard** — filter by tenant in the web UI +- **API endpoints** — all cost and stats endpoints accept `?tenant=` filter + +## Without Tenancy + +When tenancy is disabled (default), all costs are tracked globally. Existing behavior is unchanged — tenancy is fully opt-in. diff --git a/docs/features/observability.md b/docs/features/observability.md new file mode 100644 index 0000000..2ec7c01 --- /dev/null +++ b/docs/features/observability.md @@ -0,0 +1,45 @@ +# Observability + +AgentLedger exports OpenTelemetry metrics via a Prometheus endpoint for integration with your existing monitoring stack. + +## Prometheus Endpoint + +Metrics are exposed at: + +``` +http://localhost:8787/metrics +``` + +## Key Metrics + +| Metric | Type | Description | +|--------|------|-------------| +| `agentledger_requests_total` | Counter | Total proxied requests by provider, model, status | +| `agentledger_request_duration_ms` | Histogram | Request latency | +| `agentledger_input_tokens_total` | Counter | Total input tokens | +| `agentledger_output_tokens_total` | Counter | Total output tokens | +| `agentledger_cost_usd_total` | Counter | Total cost in USD | +| `agentledger_sessions_active` | Gauge | Currently active agent sessions | +| `agentledger_loop_detected_total` | Counter | Loop detection events | +| `agentledger_ghost_detected_total` | Counter | Ghost detection events | +| `agentledger_rate_limited_total` | Counter | Rate-limited requests | +| `agentledger_mcp_calls_total` | Counter | MCP tool calls | + +## Grafana + +A pre-built Grafana dashboard template is included at `deploy/grafana/agentledger.json`. Import it into your Grafana instance for panels covering: + +- Total spend (gauge) +- Spend rate over time (graph) +- Requests by provider (pie chart) +- Top models by cost (table) +- Active sessions (stat) + +## Prometheus Scrape Config + +```yaml +scrape_configs: + - job_name: "agentledger" + static_configs: + - targets: ["localhost:8787"] +``` diff --git a/docs/features/providers.md b/docs/features/providers.md new file mode 100644 index 0000000..038fdb5 --- /dev/null +++ b/docs/features/providers.md @@ -0,0 +1,67 @@ +# Providers + +AgentLedger supports 15 LLM providers with 83+ models and built-in pricing. + +## Supported Providers + +| Provider | Routing | Type | Models | +|----------|---------|------|--------| +| OpenAI | `/v1/` (default) | Native | GPT-4.1, GPT-4.1-mini, GPT-4o, o3, o4-mini, GPT-3.5-turbo | +| Anthropic | `/v1/messages` | Native | Claude Opus 4, Sonnet 4, Haiku 4, Claude 3.5/3.x | +| Azure OpenAI | `/azure/` | Custom | All Azure-hosted OpenAI models | +| Google Gemini | `/gemini/` | Custom | Gemini 2.5 Pro/Flash, 2.0, 1.5 | +| Cohere | `/cohere/` | Custom | Command R+, Command R, Command Light | +| Groq | `/groq/v1/` | OpenAI-compat | Llama 3.3 70B, Mixtral, Gemma | +| Mistral | `/mistral/v1/` | OpenAI-compat | Large, Small, Codestral, Nemo | +| DeepSeek | `/deepseek/v1/` | OpenAI-compat | DeepSeek Chat, Reasoner | +| Together AI | `/together/v1/` | OpenAI-compat | Llama, Qwen, DeepSeek | +| Fireworks AI | `/fireworks/v1/` | OpenAI-compat | Llama, Qwen | +| Perplexity | `/perplexity/v1/` | OpenAI-compat | Sonar Pro, Sonar, Reasoning | +| OpenRouter | `/openrouter/v1/` | OpenAI-compat | 200+ models via routing | +| xAI (Grok) | `/xai/v1/` | OpenAI-compat | Grok 3, Grok 3 Mini, Grok 2 | +| Cerebras | `/cerebras/v1/` | OpenAI-compat | Llama 3.3 70B, Llama 3.1 8B | +| SambaNova | `/sambanova/v1/` | OpenAI-compat | Llama 3.3 70B, Llama 3.1 8B | + +## How Routing Works + +**OpenAI** is the default — requests to `/v1/chat/completions` route to OpenAI. + +**Anthropic** is detected by the `/v1/messages` path. + +**All other providers** use path-prefix routing. A request to `/groq/v1/chat/completions` is routed to Groq. The prefix is stripped before forwarding, so Groq's API sees `/v1/chat/completions`. + +## Provider Types + +- **Native** — OpenAI and Anthropic have dedicated parsers for their specific API formats. +- **OpenAI-compatible** — Groq, Mistral, DeepSeek, Together, Fireworks, Perplexity, OpenRouter, xAI, Cerebras, and SambaNova all use the OpenAI `/v1/chat/completions` format. They share a common parser. +- **Custom** — Gemini and Cohere have unique API formats and get dedicated parsers. + +## Configuration + +OpenAI and Anthropic are enabled by default. Additional providers go in `providers.extra`: + +```yaml +providers: + openai: + upstream: "https://api.openai.com" + enabled: true + anthropic: + upstream: "https://api.anthropic.com" + enabled: true + extra: + groq: + type: "openai" + upstream: "https://api.groq.com/openai" + path_prefix: "/groq" + enabled: true +``` + +See the [full reference](../configuration/reference.md) for all provider entries. + +## API Key Handling + +API keys pass through to the upstream provider untouched. AgentLedger never stores raw keys — it creates a SHA-256 fingerprint from the first 8 and last 4 characters for attribution and reporting. + +## Model Matching + +Versioned model names (e.g., `gpt-4o-2024-11-20`) are matched via longest prefix to the pricing table. If a model isn't found, costs are recorded with a fallback of $0 and a warning is logged. diff --git a/docs/features/rate-limiting.md b/docs/features/rate-limiting.md new file mode 100644 index 0000000..6365755 --- /dev/null +++ b/docs/features/rate-limiting.md @@ -0,0 +1,30 @@ +# Rate Limiting + +Throttle request volume per API key with sliding window counters. Budget enforcement limits spend; rate limiting limits request frequency. + +## Configuration + +```yaml +rate_limits: + default: + requests_per_minute: 60 + requests_per_hour: 1000 + rules: + - api_key_pattern: "sk-proj-dev-*" + requests_per_minute: 10 +``` + +## How It Works + +Rate limits use an in-memory sliding window counter keyed by API key hash. When a limit is exceeded, the request is rejected immediately with: + +- HTTP status: `429 Too Many Requests` +- `Retry-After` header with seconds until the window resets + +## Per-Key Rules + +Rules use the same glob pattern matching as [budget rules](budgets.md). Rules are evaluated in order — the first match wins. If no rule matches, the default applies. + +## Metrics + +Rate-limited requests are tracked via the `agentledger_rate_limited_total` Prometheus metric. diff --git a/docs/getting-started/cli.md b/docs/getting-started/cli.md new file mode 100644 index 0000000..b5e8ec6 --- /dev/null +++ b/docs/getting-started/cli.md @@ -0,0 +1,84 @@ +# CLI Reference + +## `agentledger serve` + +Start the reverse proxy. + +```bash +agentledger serve [flags] +``` + +| Flag | Description | Default | +|------|-------------|---------| +| `-c, --config` | Path to config file | Auto-detect (see [config overview](../configuration/index.md)) | + +## `agentledger costs` + +Show a cost report from the ledger. + +```bash +agentledger costs [flags] +``` + +| Flag | Description | Default | +|------|-------------|---------| +| `-c, --config` | Path to config file | Auto-detect | +| `--last` | Time window: `1h`, `24h`, `7d`, `30d` | `24h` | +| `--by` | Group by: `model`, `provider`, `key` | `model` | + +Example: + +```bash +agentledger costs --last 7d --by provider +``` + +## `agentledger export` + +Export cost data as CSV or JSON. + +```bash +agentledger export [flags] +``` + +| Flag | Description | Default | +|------|-------------|---------| +| `-c, --config` | Path to config file | Auto-detect | +| `--format` | Output format: `csv`, `json` | `json` | +| `--last` | Time window: `1h`, `24h`, `7d`, `30d` | `30d` | +| `--by` | Group by: `model`, `provider`, `key`, `agent`, `session` | `model` | + +## `agentledger mcp-wrap` + +Wrap an MCP server process for tool call metering via stdio. + +```bash +agentledger mcp-wrap [flags] -- command [args...] +``` + +| Flag | Description | Default | +|------|-------------|---------| +| `-c, --config` | Path to config file | Auto-detect | + +Example: + +```bash +agentledger mcp-wrap -- npx @modelcontextprotocol/server-filesystem /tmp +``` + +## `agentledger version` + +Print the version string. + +```bash +agentledger version +``` + +## Environment Variables + +All config settings can be overridden with environment variables prefixed `AGENTLEDGER_`: + +```bash +AGENTLEDGER_LISTEN=":9090" +AGENTLEDGER_STORAGE_DSN="/tmp/ledger.db" +AGENTLEDGER_LOG_LEVEL="debug" +``` diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..eee1d5b --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,48 @@ +# Installation + +## Homebrew + +```bash +brew install wdz-dev/tap/agentledger +``` + +## Binary Download + +Download the latest release from [GitHub Releases](https://github.com/WDZ-Dev/agent-ledger/releases): + +```bash +curl -sSL https://github.com/WDZ-Dev/agent-ledger/releases/latest/download/agentledger_$(uname -s)_$(uname -m).tar.gz | tar xz +sudo mv agentledger /usr/local/bin/ +``` + +Available for Linux, macOS, and Windows on both amd64 and arm64. + +## Docker + +```bash +docker run --rm -p 8787:8787 ghcr.io/wdz-dev/agent-ledger:latest +``` + +See [Docker deployment](../deployment/docker.md) for persistence and configuration. + +## Helm (Kubernetes) + +```bash +helm install agentledger deploy/helm/agentledger +``` + +See [Kubernetes deployment](../deployment/kubernetes.md) for full details. + +## From Source + +Requires Go 1.25+: + +```bash +go install github.com/WDZ-Dev/agent-ledger/cmd/agentledger@latest +``` + +## Verify + +```bash +agentledger version +``` diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 0000000..ec6b73f --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,131 @@ +# Quick Start + +## 1. Start the proxy + +```bash +# With defaults (listens on :8787, SQLite storage) +agentledger serve + +# Or with a config file +agentledger serve -c agentledger.yaml +``` + +## 2. Point your agents at it + +=== "Python (OpenAI)" + + ```bash + export OPENAI_BASE_URL=http://localhost:8787/v1 + ``` + + ```python + import openai + client = openai.OpenAI() # picks up OPENAI_BASE_URL automatically + ``` + +=== "Node.js" + + ```javascript + const openai = new OpenAI({ baseURL: 'http://localhost:8787/v1' }); + ``` + +=== "Claude Code" + + ```bash + export ANTHROPIC_BASE_URL=http://localhost:8787 + ``` + +=== "curl" + + ```bash + curl http://localhost:8787/v1/chat/completions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-4.1-mini", "messages": [{"role": "user", "content": "Hello"}]}' + ``` + +For other providers, use path-prefix routing: + +```bash +# Groq +curl http://localhost:8787/groq/v1/chat/completions + +# Mistral +curl http://localhost:8787/mistral/v1/chat/completions + +# DeepSeek +curl http://localhost:8787/deepseek/v1/chat/completions + +# Gemini +curl http://localhost:8787/gemini/v1beta/models/gemini-2.5-pro:generateContent + +# Cohere +curl http://localhost:8787/cohere/v2/chat +``` + +## 3. Check your costs + +```bash +# Last 24 hours, grouped by model +agentledger costs + +# Last 7 days, grouped by API key +agentledger costs --last 7d --by key +``` + +``` +PROVIDER MODEL REQUESTS INPUT TOKENS OUTPUT TOKENS COST (USD) +-------- ----- -------- ------------ ------------- ---------- +openai gpt-4.1-mini 142 28400 14200 $0.0341 +openai gpt-4.1 38 19000 9500 $0.1140 +anthropic claude-sonnet-4 12 6000 3000 $0.0630 +-------- ----- -------- ------------ ------------- ---------- +TOTAL 192 53400 26700 $0.2111 +``` + +## 4. Set budgets (optional) + +```yaml +# agentledger.yaml +budgets: + default: + daily_limit_usd: 50.0 + monthly_limit_usd: 500.0 + soft_limit_pct: 0.8 + action: "block" + rules: + - api_key_pattern: "sk-proj-dev-*" + daily_limit_usd: 5.0 + action: "block" +``` + +When a limit is hit, the agent receives a `429` with a clear JSON error — no surprise charges. + +## 5. Add agent tracking (optional) + +Add headers to get per-agent cost attribution: + +```python +client = openai.OpenAI( + base_url="http://localhost:8787/v1", + default_headers={ + "X-Agent-Id": "code-reviewer", + "X-Agent-Session": f"sess_{run_id}", + "X-Agent-User": "alice@company.com", + "X-Agent-Task": "Review PR #456", + } +) +``` + +Headers are stripped before forwarding to the provider. Without them, costs are tracked at the API-key level. + +## 6. View the dashboard + +Open [http://localhost:8787/](http://localhost:8787/) for real-time cost breakdowns, session views, and spending trends. + +## Next steps + +- [Configuration reference](../configuration/reference.md) — full YAML config +- [Providers](../features/providers.md) — all 15 supported providers +- [Budget enforcement](../features/budgets.md) — per-key limits, pre-flight estimation +- [Agent tracking](../features/agent-tracking.md) — sessions, loop detection, ghost detection diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..747e945 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,94 @@ +# AgentLedger + +**Know what your agents cost.** Meter. Budget. Control. + +AgentLedger is an open-source reverse proxy that gives you real-time cost attribution, budget enforcement, and financial observability for AI agents — without changing a single line of code. + +```bash +export OPENAI_BASE_URL=http://localhost:8787/v1 +# That's it. Your agents now have cost tracking and budget enforcement. +``` + +--- + +## Why AgentLedger? + +AI agents make dozens of LLM calls per task. Costs compound fast, loops happen silently, and provider dashboards only show you the damage after the fact. + +- **Real-time cost tracking** — every request metered, every token counted +- **Budget enforcement** — daily and monthly limits with automatic blocking +- **Pre-flight estimation** — rejects requests that would exceed your budget before they hit the API +- **Agent session tracking** — group multi-call agent runs into sessions, detect loops and ghost agents +- **15 LLM providers** — OpenAI, Anthropic, Gemini, Groq, Mistral, DeepSeek, Cohere, and more +- **Zero code changes** — works with any OpenAI/Anthropic SDK via base URL override + +--- + +## Architecture + +``` + Agents (any SDK) --+ + | +--------------------+ OpenAI + MCP Servers -------+---> | AgentLedger :8787 | --> Anthropic + (stdio/HTTP) | | | Groq, Mistral + | | Budget limits | DeepSeek, Gemini + | | Rate limiting | Cohere, + 8 more + | | Token metering | + | | Agent sessions | --> Slack / Webhooks + | | Cost recording | + | | | + | | SQLite/Postgres | + | | Dashboard | + | | Prometheus | + | +--------------------+ +``` + +--- + +## At a Glance + +| | | +|---|---| +| **Providers** | 15 LLM providers, 83+ models with built-in pricing | +| **Overhead** | Sub-10ms proxy latency (~0.1ms typical) | +| **Dependencies** | Zero — single Go binary with embedded SQLite and dashboard | +| **Setup** | One environment variable, zero code changes | +| **License** | Apache 2.0 — all features free and open-source | +| **Platforms** | Linux, macOS, Windows (amd64 + arm64) | + +--- + +## vs LiteLLM + +| | AgentLedger | LiteLLM | +|---|---|---| +| **Architecture** | Go single binary, sub-10ms overhead | Python, documented memory leaks | +| **Cost model** | Per-agent-execution tracking | Per-key/user/team only | +| **Loop detection** | Built-in, zero-config | Not available | +| **Ghost agent detection** | Built-in | Not available | +| **Pre-flight estimation** | Rejects before API call | Post-hoc only | +| **Budget enforcement** | Free, included | Enterprise paywall | +| **Audit logs** | Free, included | Enterprise paywall | +| **Setup** | `brew install` + one env var | Python + pip + database server | +| **Dependencies** | Zero (embedded SQLite + dashboard) | PostgreSQL required, Redis recommended | + +--- + +## Quick Start + +```bash +# Install +brew install wdz-dev/tap/agentledger + +# Start the proxy +agentledger serve + +# Point your agents at it +export OPENAI_BASE_URL=http://localhost:8787/v1 + +# Check your costs +agentledger costs +``` + +[Get started :material-arrow-right:](getting-started/installation.md){ .md-button .md-button--primary } +[View on GitHub :material-github:](https://github.com/WDZ-Dev/agent-ledger){ .md-button } diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..2951c67 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +mkdocs-material==9.6.7 +mkdocs-minify-plugin==0.8.0 diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..296d702 --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,26 @@ +# Roadmap + +## Completed + +- [x] **Phase 1: Core Proxy** — Reverse proxy, token metering, cost calculation, SQLite storage, CLI +- [x] **Phase 2: Budget Enforcement** — Per-key budgets, pre-flight estimation, circuit breaker +- [x] **Phase 3: Agent Attribution** — Session tracking, loop detection, ghost agent detection +- [x] **Phase 4: Observability** — OpenTelemetry metrics, Prometheus endpoint, web dashboard +- [x] **Phase 5: MCP Integration** — Meter MCP tool calls alongside LLM costs +- [x] **Phase 6: Polish & Launch** — Docker, GoReleaser, Helm chart, docs +- [x] **Phase 7: Multi-Provider** — Groq, Mistral, DeepSeek, Gemini, Cohere with path-prefix routing +- [x] **Phase 8: Postgres** — Production-grade PostgreSQL storage backend +- [x] **Phase 9: Multi-Tenancy** — Tenant isolation with header and config-based resolution +- [x] **Phase 10: Alerting** — Slack and webhook notifications with deduplication +- [x] **Phase 11: Rate Limiting** — Per-key request throttling + Homebrew tap +- [x] **Phase 12: Admin API** — Runtime budget rule management + +## Future + +| Feature | Value | Complexity | +|---------|-------|------------| +| Response Caching | High | High | +| WebSocket Live Dashboard | Medium | Medium | +| Plugin / Middleware System | Medium | Very High | + +See [ROADMAP.md](https://github.com/WDZ-Dev/agent-ledger/blob/main/ROADMAP.md) on GitHub for detailed descriptions of each planned feature. diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..590bfe6 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,13 @@ +[data-md-color-scheme="slate"] { + --md-default-bg-color: #0d1117; + --md-default-bg-color--light: #161b22; + --md-default-fg-color: #e6edf3; + --md-primary-fg-color: #161b22; + --md-primary-bg-color: #e6edf3; + --md-accent-fg-color: #3fb950; +} + +.md-typeset h1, +.md-typeset h2 { + font-family: "JetBrains Mono", monospace; +} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..53ba6c9 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,104 @@ +site_name: AgentLedger +site_url: https://wdz-dev.github.io/agent-ledger/ +site_description: "Know what your agents cost. Real-time cost attribution, budget enforcement, and financial observability for AI agents." +repo_url: https://github.com/WDZ-Dev/agent-ledger +repo_name: WDZ-Dev/agent-ledger +edit_uri: edit/main/docs/ + +theme: + name: material + palette: + - scheme: slate + primary: custom + accent: teal + toggle: + icon: material/brightness-4 + name: Switch to light mode + - scheme: default + primary: custom + accent: teal + toggle: + icon: material/brightness-7 + name: Switch to dark mode + font: + text: Inter + code: JetBrains Mono + icon: + repo: fontawesome/brands/github + features: + - navigation.instant + - navigation.instant.prefetch + - navigation.tracking + - navigation.tabs + - navigation.tabs.sticky + - navigation.sections + - navigation.top + - navigation.footer + - search.suggest + - search.highlight + - content.code.copy + - content.code.annotate + - content.tabs.link + - toc.follow + +extra_css: + - stylesheets/extra.css + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/WDZ-Dev/agent-ledger + +copyright: Copyright © 2025-2026 WDZ-Dev — Apache 2.0 + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.tabbed: + alternate_style: true + - pymdownx.snippets + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - tables + - attr_list + - md_in_html + - toc: + permalink: true + +plugins: + - search + - minify: + minify_html: true + +nav: + - Home: index.md + - Getting Started: + - Installation: getting-started/installation.md + - Quick Start: getting-started/quickstart.md + - CLI Reference: getting-started/cli.md + - Configuration: + - Overview: configuration/index.md + - Full Reference: configuration/reference.md + - Features: + - Providers: features/providers.md + - Budget Enforcement: features/budgets.md + - Agent Tracking: features/agent-tracking.md + - MCP Tool Metering: features/mcp.md + - Dashboard: features/dashboard.md + - Multi-Tenancy: features/multi-tenancy.md + - Alerting: features/alerting.md + - Rate Limiting: features/rate-limiting.md + - Circuit Breaker: features/circuit-breaker.md + - Observability: features/observability.md + - Admin API: admin-api.md + - Deployment: + - Docker: deployment/docker.md + - Kubernetes / Helm: deployment/kubernetes.md + - Roadmap: roadmap.md