Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ A Go-based open-source reverse proxy that provides real-time cost attribution, b

## Quick Context

- **What:** Transparent reverse proxy between AI agents and LLM APIs (OpenAI, Anthropic, Groq, Mistral, DeepSeek, Gemini, Cohere)
- **What:** Transparent reverse proxy between AI agents and LLM APIs (OpenAI, Anthropic, Azure OpenAI, Groq, Mistral, DeepSeek, Gemini, Cohere, Together AI, Fireworks AI, Perplexity, OpenRouter, xAI, Cerebras, SambaNova)
- **How:** `export OPENAI_BASE_URL=http://localhost:8787/v1` — zero code changes
- **Why:** No tool tracks per-agent-execution costs, detects loops, or meters MCP calls
- **Language:** Go — single binary, zero runtime dependencies
Expand Down Expand Up @@ -37,7 +37,7 @@ Agents → AgentLedger (Go proxy :8787) → LLM APIs (OpenAI, Anthropic, Groq, M
|---------|---------|
| `cmd/agentledger/` | CLI entrypoint (cobra): `serve`, `costs`, `version` |
| `internal/proxy/` | Core reverse proxy (`httputil.ReverseProxy`), SSE streaming, middleware chain |
| `internal/provider/` | Provider interface + OpenAI/Anthropic/Gemini/Cohere parsers, OpenAI-compatible base type, path-prefix routing |
| `internal/provider/` | Provider interface + OpenAI/Anthropic/Azure/Gemini/Cohere parsers, OpenAI-compatible base type (Groq, Mistral, DeepSeek, Together, Fireworks, Perplexity, OpenRouter, xAI, Cerebras, SambaNova), path-prefix routing |
| `internal/meter/` | Cost calculation engine, model pricing table, tiktoken-go fallback |
| `internal/ledger/` | Storage interface, SQLite (modernc.org/sqlite, CGO-free) + Postgres impls, multi-tenant queries |
| `internal/budget/` | Budget enforcement middleware, circuit breaker |
Expand Down
40 changes: 40 additions & 0 deletions configs/agentledger.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,46 @@ providers:
# upstream: "https://api.cohere.com"
# path_prefix: "/cohere"
# enabled: true
# azure:
# type: "azure" # Azure OpenAI (custom URL scheme)
# upstream: "https://my-resource.openai.azure.com"
# path_prefix: "/azure"
# enabled: true
# together:
# type: "openai"
# upstream: "https://api.together.xyz"
# path_prefix: "/together"
# enabled: true
# fireworks:
# type: "openai"
# upstream: "https://api.fireworks.ai/inference"
# path_prefix: "/fireworks"
# enabled: true
# perplexity:
# type: "openai"
# upstream: "https://api.perplexity.ai"
# path_prefix: "/perplexity"
# enabled: true
# openrouter:
# type: "openai"
# upstream: "https://openrouter.ai/api"
# path_prefix: "/openrouter"
# enabled: true
# xai:
# type: "openai" # xAI (Grok)
# upstream: "https://api.x.ai"
# path_prefix: "/xai"
# enabled: true
# cerebras:
# type: "openai"
# upstream: "https://api.cerebras.ai"
# path_prefix: "/cerebras"
# enabled: true
# sambanova:
# type: "openai"
# upstream: "https://api.sambanova.ai"
# path_prefix: "/sambanova"
# enabled: true

storage:
driver: "sqlite" # "sqlite" or "postgres"
Expand Down
32 changes: 32 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,38 @@ func Load(path string) (*Config, error) {
v.SetDefault("providers.extra.cohere.upstream", "https://api.cohere.com")
v.SetDefault("providers.extra.cohere.path_prefix", "/cohere")
v.SetDefault("providers.extra.cohere.enabled", false)
v.SetDefault("providers.extra.azure.type", "azure")
v.SetDefault("providers.extra.azure.upstream", "")
v.SetDefault("providers.extra.azure.path_prefix", "/azure")
v.SetDefault("providers.extra.azure.enabled", false)
v.SetDefault("providers.extra.together.type", "openai")
v.SetDefault("providers.extra.together.upstream", "https://api.together.xyz")
v.SetDefault("providers.extra.together.path_prefix", "/together")
v.SetDefault("providers.extra.together.enabled", false)
v.SetDefault("providers.extra.fireworks.type", "openai")
v.SetDefault("providers.extra.fireworks.upstream", "https://api.fireworks.ai/inference")
v.SetDefault("providers.extra.fireworks.path_prefix", "/fireworks")
v.SetDefault("providers.extra.fireworks.enabled", false)
v.SetDefault("providers.extra.perplexity.type", "openai")
v.SetDefault("providers.extra.perplexity.upstream", "https://api.perplexity.ai")
v.SetDefault("providers.extra.perplexity.path_prefix", "/perplexity")
v.SetDefault("providers.extra.perplexity.enabled", false)
v.SetDefault("providers.extra.openrouter.type", "openai")
v.SetDefault("providers.extra.openrouter.upstream", "https://openrouter.ai/api")
v.SetDefault("providers.extra.openrouter.path_prefix", "/openrouter")
v.SetDefault("providers.extra.openrouter.enabled", false)
v.SetDefault("providers.extra.xai.type", "openai")
v.SetDefault("providers.extra.xai.upstream", "https://api.x.ai")
v.SetDefault("providers.extra.xai.path_prefix", "/xai")
v.SetDefault("providers.extra.xai.enabled", false)
v.SetDefault("providers.extra.cerebras.type", "openai")
v.SetDefault("providers.extra.cerebras.upstream", "https://api.cerebras.ai")
v.SetDefault("providers.extra.cerebras.path_prefix", "/cerebras")
v.SetDefault("providers.extra.cerebras.enabled", false)
v.SetDefault("providers.extra.sambanova.type", "openai")
v.SetDefault("providers.extra.sambanova.upstream", "https://api.sambanova.ai")
v.SetDefault("providers.extra.sambanova.path_prefix", "/sambanova")
v.SetDefault("providers.extra.sambanova.enabled", false)

v.SetDefault("storage.driver", "sqlite")
v.SetDefault("storage.dsn", "data/agentledger.db")
Expand Down
57 changes: 57 additions & 0 deletions internal/meter/meter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,60 @@ func TestPrefixMatchLongestWins(t *testing.T) {
t.Errorf("expected $0.15 (gpt-4o-mini pricing), got $%f", cost)
}
}

func TestNewestModelsKnown(t *testing.T) {
m := New()

models := []string{
// OpenAI — GPT-5 family
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
"gpt-5-pro",
"gpt-5-codex",
"gpt-5.1",
"gpt-5.2",
"gpt-5.2-pro",
"gpt-5.4",
"gpt-5.4-pro",
// OpenAI — reasoning
"o3-pro",
"o1-pro",
"o4-mini",
// OpenAI — GPT-4.1
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4.1-nano",
// Anthropic 4.5/4.6
"claude-opus-4.6",
"claude-sonnet-4.6",
"claude-haiku-4.5",
// Anthropic — dated variants (prefix match)
"claude-opus-4.6-20260101",
"claude-sonnet-4.6-20260101",
"claude-haiku-4.5-20251001",
// Anthropic — thinking variants (prefix match)
"claude-opus-4.6-thinking",
"claude-sonnet-4.5-thinking",
// xAI
"grok-3",
"grok-3-mini",
}

for _, model := range models {
if !m.KnownModel(model) {
t.Errorf("%q should be a known model", model)
}
}
}

func TestO3ProNotConfusedWithO3(t *testing.T) {
m := New()

// o3-pro must NOT use o3 pricing ($10/$40), it should use its own ($150/$600)
cost := m.Calculate("o3-pro", 1_000_000, 0)
// o3-pro input: $150 per MTok
if math.Abs(cost-150.00) > 1e-9 {
t.Errorf("o3-pro: expected $150.00 input cost, got $%f (may have matched o3 instead)", cost)
}
}
84 changes: 77 additions & 7 deletions internal/meter/pricing.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,69 @@ func DefaultPricing() map[string]ModelPricing {

// OpenAI — reasoning models
"o3": {InputPerMTok: 10.00, OutputPerMTok: 40.00},
"o3-pro": {InputPerMTok: 150.00, OutputPerMTok: 600.00},
"o3-mini": {InputPerMTok: 1.10, OutputPerMTok: 4.40},
"o4-mini": {InputPerMTok: 1.10, OutputPerMTok: 4.40},
"o1": {InputPerMTok: 15.00, OutputPerMTok: 60.00},
"o1-pro": {InputPerMTok: 150.00, OutputPerMTok: 600.00},
"o1-mini": {InputPerMTok: 3.00, OutputPerMTok: 12.00},

// OpenAI — GPT-5.4 family
"gpt-5.4-pro": {InputPerMTok: 30.00, OutputPerMTok: 180.00},
"gpt-5.4": {InputPerMTok: 2.50, OutputPerMTok: 15.00},

// OpenAI — GPT-5.3 family
"gpt-5.3-codex": {InputPerMTok: 1.75, OutputPerMTok: 14.00},
"gpt-5.3-chat": {InputPerMTok: 1.75, OutputPerMTok: 14.00},

// OpenAI — GPT-5.2 family
"gpt-5.2-pro": {InputPerMTok: 10.50, OutputPerMTok: 84.00},
"gpt-5.2-codex": {InputPerMTok: 1.75, OutputPerMTok: 14.00},
"gpt-5.2-chat": {InputPerMTok: 0.875, OutputPerMTok: 7.00},
"gpt-5.2": {InputPerMTok: 1.75, OutputPerMTok: 14.00},

// OpenAI — GPT-5.1 family
"gpt-5.1-codex-max": {InputPerMTok: 1.25, OutputPerMTok: 10.00},
"gpt-5.1-codex-mini": {InputPerMTok: 0.25, OutputPerMTok: 2.00},
"gpt-5.1-codex": {InputPerMTok: 1.25, OutputPerMTok: 10.00},
"gpt-5.1-chat": {InputPerMTok: 0.625, OutputPerMTok: 5.00},
"gpt-5.1": {InputPerMTok: 0.625, OutputPerMTok: 5.00},

// OpenAI — GPT-5 family
"gpt-5-pro": {InputPerMTok: 15.00, OutputPerMTok: 120.00},
"gpt-5-codex": {InputPerMTok: 1.25, OutputPerMTok: 10.00},
"gpt-5-chat": {InputPerMTok: 1.25, OutputPerMTok: 10.00},
"gpt-5-mini": {InputPerMTok: 0.125, OutputPerMTok: 1.00},
"gpt-5-nano": {InputPerMTok: 0.05, OutputPerMTok: 0.40},
"gpt-5": {InputPerMTok: 1.25, OutputPerMTok: 10.00},

// OpenAI — legacy
"gpt-4-turbo": {InputPerMTok: 10.00, OutputPerMTok: 30.00},
"gpt-4": {InputPerMTok: 30.00, OutputPerMTok: 60.00},
"gpt-3.5-turbo": {InputPerMTok: 0.50, OutputPerMTok: 1.50},

// Anthropic — Claude 4 family
// Anthropic — Claude 4.5/4.6 family (reduced pricing from 4.0)
"claude-opus-4.6": {InputPerMTok: 5.00, OutputPerMTok: 25.00},
"claude-opus-4.5": {InputPerMTok: 5.00, OutputPerMTok: 25.00},
"claude-sonnet-4.6": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"claude-sonnet-4.5": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"claude-haiku-4.5": {InputPerMTok: 1.00, OutputPerMTok: 5.00},

// Anthropic — Claude 4.0/4.1 family
"claude-opus-4.1": {InputPerMTok: 15.00, OutputPerMTok: 75.00},
"claude-opus-4": {InputPerMTok: 15.00, OutputPerMTok: 75.00},
"claude-sonnet-4": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"claude-haiku-4": {InputPerMTok: 0.80, OutputPerMTok: 4.00},

// Anthropic — Claude 3.7
"claude-3.7-sonnet": {InputPerMTok: 3.00, OutputPerMTok: 15.00},

// Anthropic — Claude 3.5
"claude-3-5-sonnet": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"claude-3-5-haiku": {InputPerMTok: 0.80, OutputPerMTok: 4.00},
"claude-3.5-sonnet": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"claude-3.5-haiku": {InputPerMTok: 0.80, OutputPerMTok: 4.00},

// Anthropic — Claude 3
"claude-3-opus": {InputPerMTok: 15.00, OutputPerMTok: 75.00},
"claude-3-sonnet": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"claude-3-haiku": {InputPerMTok: 0.25, OutputPerMTok: 1.25},
"claude-3-opus": {InputPerMTok: 15.00, OutputPerMTok: 75.00},
"claude-3-haiku": {InputPerMTok: 0.25, OutputPerMTok: 1.25},

// Google Gemini
"gemini-2.5-pro": {InputPerMTok: 1.25, OutputPerMTok: 10.00},
Expand Down Expand Up @@ -72,5 +112,35 @@ func DefaultPricing() map[string]ModelPricing {
"command-r-plus": {InputPerMTok: 2.50, OutputPerMTok: 10.00},
"command-r": {InputPerMTok: 0.15, OutputPerMTok: 0.60},
"command-light": {InputPerMTok: 0.30, OutputPerMTok: 0.60},

// xAI (Grok)
"grok-3": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"grok-3-mini": {InputPerMTok: 0.30, OutputPerMTok: 0.50},
"grok-2": {InputPerMTok: 2.00, OutputPerMTok: 10.00},

// Perplexity
"sonar-pro": {InputPerMTok: 3.00, OutputPerMTok: 15.00},
"sonar": {InputPerMTok: 1.00, OutputPerMTok: 1.00},
"sonar-reasoning": {InputPerMTok: 1.00, OutputPerMTok: 5.00},

// Together AI (hosted open-source models)
"meta-llama/Llama-3.3-70B-Instruct-Turbo": {InputPerMTok: 0.88, OutputPerMTok: 0.88},
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {InputPerMTok: 3.50, OutputPerMTok: 3.50},
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {InputPerMTok: 0.18, OutputPerMTok: 0.18},
"Qwen/Qwen2.5-72B-Instruct-Turbo": {InputPerMTok: 1.20, OutputPerMTok: 1.20},
"deepseek-ai/DeepSeek-V3": {InputPerMTok: 0.90, OutputPerMTok: 0.90},

// Fireworks AI
"accounts/fireworks/models/llama-v3p3-70b-instruct": {InputPerMTok: 0.90, OutputPerMTok: 0.90},
"accounts/fireworks/models/llama-v3p1-8b-instruct": {InputPerMTok: 0.20, OutputPerMTok: 0.20},
"accounts/fireworks/models/qwen2p5-72b-instruct": {InputPerMTok: 0.90, OutputPerMTok: 0.90},

// Cerebras
"llama-3.3-70b": {InputPerMTok: 0.85, OutputPerMTok: 0.85},
"llama-3.1-8b": {InputPerMTok: 0.10, OutputPerMTok: 0.10},

// SambaNova
"Meta-Llama-3.3-70B-Instruct": {InputPerMTok: 0.60, OutputPerMTok: 0.60},
"Meta-Llama-3.1-8B-Instruct": {InputPerMTok: 0.10, OutputPerMTok: 0.10},
}
}
119 changes: 119 additions & 0 deletions internal/provider/azure.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package provider

import (
"encoding/json"
"net/http"
"strings"
)

// Azure implements the Provider interface for Azure OpenAI Service.
// Azure uses a different URL scheme: /openai/deployments/{deployment}/chat/completions?api-version=...
// Auth is via api-key header instead of Bearer token.
type Azure struct {
upstream string
pathPrefix string
}

// NewAzure creates an Azure OpenAI provider. The upstream should be the Azure
// resource endpoint (e.g., https://my-resource.openai.azure.com).
// Requests arrive at /azure/openai/deployments/{deployment}/chat/completions.
func NewAzure(upstream string) *Azure {
return &Azure{
upstream: upstream,
pathPrefix: "/azure",
}
}

func (a *Azure) Name() string { return "azure" } //nolint:goconst
func (a *Azure) UpstreamURL() string { return a.upstream }
func (a *Azure) PathPrefix() string { return a.pathPrefix }

func (a *Azure) Match(r *http.Request) bool {
p := r.URL.Path
return strings.HasPrefix(p, a.pathPrefix+"/openai/deployments/")
}

// RewritePath strips the /azure prefix so upstream sees /openai/deployments/...
func (a *Azure) RewritePath(path string) string {
return strings.TrimPrefix(path, a.pathPrefix)
}

// azureRequest is the minimal subset of an Azure OpenAI request.
type azureRequest struct {
MaxTokens int `json:"max_tokens"`
Stream bool `json:"stream"`
}

func (a *Azure) ParseRequest(body []byte) (*RequestMeta, error) {
var req azureRequest
if err := json.Unmarshal(body, &req); err != nil {
return nil, err
}
// Azure puts the model (deployment) in the URL path, not the body.
return &RequestMeta{
Model: "azure-deployment",
MaxTokens: req.MaxTokens,
Stream: req.Stream,
}, nil
}

// azureResponse matches the Azure OpenAI response (same as OpenAI format).
type azureResponse struct {
Model string `json:"model"`
Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage"`
}

func (a *Azure) ParseResponse(body []byte) (*ResponseMeta, error) {
var resp azureResponse
if err := json.Unmarshal(body, &resp); err != nil {
return nil, err
}
return &ResponseMeta{
Model: resp.Model,
InputTokens: resp.Usage.PromptTokens,
OutputTokens: resp.Usage.CompletionTokens,
TotalTokens: resp.Usage.TotalTokens,
}, nil
}

// azureStreamChunk matches the Azure OpenAI streaming chunk (same as OpenAI format).
type azureStreamChunk struct {
Model string `json:"model"`
Choices []struct {
Delta struct {
Content string `json:"content"`
} `json:"delta"`
} `json:"choices"`
Usage *struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage,omitempty"`
}

func (a *Azure) ParseStreamChunk(_ string, data []byte) (*StreamChunkMeta, error) {
var chunk azureStreamChunk
if err := json.Unmarshal(data, &chunk); err != nil {
return nil, err
}

meta := &StreamChunkMeta{
Model: chunk.Model,
}

if len(chunk.Choices) > 0 {
meta.Text = chunk.Choices[0].Delta.Content
}

if chunk.Usage != nil {
meta.InputTokens = chunk.Usage.PromptTokens
meta.OutputTokens = chunk.Usage.CompletionTokens
meta.Done = true
}

return meta, nil
}
Loading
Loading