-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbilling.go
More file actions
105 lines (96 loc) · 4.06 KB
/
billing.go
File metadata and controls
105 lines (96 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package llmproxy
// CostInfo contains pricing information for a model.
type CostInfo struct {
// Input is the cost per 1M input tokens in USD.
Input float64
// Output is the cost per 1M output tokens in USD.
Output float64
// CacheRead is the cost per 1M cached input tokens (optional).
CacheRead float64
// CacheWrite is the cost per 1M cache write tokens (optional, Anthropic).
CacheWrite float64
}
// CostLookup is a function that returns the cost for a given provider and model.
// It should return the pricing info or false if the model is not found.
//
// The lookup function allows the pricing data to be managed externally,
// such as downloading from models.dev or using a custom pricing database.
type CostLookup func(provider string, model string) (CostInfo, bool)
// BillingResult contains the calculated cost for a request.
type BillingResult struct {
// Provider is the provider name.
Provider string
// Model is the model identifier.
Model string
// PromptTokens is the number of input tokens.
PromptTokens int
// CompletionTokens is the number of output tokens.
CompletionTokens int
// CachedTokens is the number of prompt tokens served from cache.
CachedTokens int
// TotalTokens is the sum of prompt and completion tokens.
TotalTokens int
// InputCost is the calculated input cost in USD (non-cached prompt tokens).
InputCost float64
// CachedInputCost is the cost for cached prompt tokens in USD.
CachedInputCost float64
// OutputCost is the calculated output cost in USD.
OutputCost float64
// TotalCost is the sum of all costs in USD.
TotalCost float64
}
// CalculateCost computes the billing result from cost info, token usage, and cache usage.
// Cached tokens are billed at the CacheRead rate (if available), and non-cached prompt
// tokens are billed at the full Input rate.
func CalculateCost(provider, model string, costInfo CostInfo, promptTokens, completionTokens int, cacheUsage *CacheUsage) BillingResult {
cachedTokens := 0
if cacheUsage != nil {
// Providers populate only one of these fields — OpenAI/Fireworks/Bedrock
// set CachedTokens while Anthropic sets CacheReadInputTokens. We sum them
// so the same code path works for any provider. The clamp below guards
// against overcounting if a future provider ever sets both fields.
cachedTokens = cacheUsage.CachedTokens + cacheUsage.CacheReadInputTokens
}
// Providers report prompt tokens differently:
// - OpenAI/Fireworks/Bedrock: promptTokens INCLUDES cached tokens
// → non-cached = promptTokens - cachedTokens
// - Anthropic: input_tokens EXCLUDES cached tokens (only new tokens)
// → non-cached = promptTokens (as reported), cached is additional
//
// We detect the style by comparing: if cached > prompt, the provider
// must be reporting non-cached only (Anthropic style).
var nonCachedTokens int
if cachedTokens > promptTokens {
// Anthropic style: promptTokens = non-cached only, cached is separate
nonCachedTokens = promptTokens
// Adjust promptTokens to reflect the true total for the BillingResult
promptTokens = promptTokens + cachedTokens
} else {
// OpenAI style: promptTokens includes cached
nonCachedTokens = promptTokens - cachedTokens
}
// Non-cached prompt tokens at full input rate
inputCost := costInfo.Input * float64(nonCachedTokens) / 1_000_000
// Cached tokens at cache read rate (falls back to full input rate if no cache pricing)
var cachedInputCost float64
if cachedTokens > 0 {
cacheRate := costInfo.CacheRead
if cacheRate <= 0 {
cacheRate = costInfo.Input // fallback to full rate
}
cachedInputCost = cacheRate * float64(cachedTokens) / 1_000_000
}
outputCost := costInfo.Output * float64(completionTokens) / 1_000_000
return BillingResult{
Provider: provider,
Model: model,
PromptTokens: promptTokens,
CompletionTokens: completionTokens,
CachedTokens: cachedTokens,
TotalTokens: promptTokens + completionTokens,
InputCost: inputCost,
CachedInputCost: cachedInputCost,
OutputCost: outputCost,
TotalCost: inputCost + cachedInputCost + outputCost,
}
}