diff --git a/deploy/src/db.ts b/deploy/src/db.ts
index 5e8f122..5373c4c 100644
--- a/deploy/src/db.ts
+++ b/deploy/src/db.ts
@@ -66,6 +66,7 @@ export class ConfigDB extends KeysDbD1 {
providers: providersWithKeys,
routingGroups,
otelSettings: user?.otel ?? project.otel,
+ cacheEnabled: keyInfo.cacheEnabled,
}
}
}
diff --git a/deploy/src/index.ts b/deploy/src/index.ts
index c9d868c..07fe94e 100644
--- a/deploy/src/index.ts
+++ b/deploy/src/index.ts
@@ -16,7 +16,7 @@ along with this program. If not, see .
*/
import { env } from 'cloudflare:workers'
-import { type GatewayOptions, gatewayFetch, LimitDbD1 } from '@pydantic/ai-gateway'
+import { type GatewayOptions, gatewayFetch, KVCacheStorage, LimitDbD1 } from '@pydantic/ai-gateway'
import { instrument } from '@pydantic/logfire-cf-workers'
import logfire from 'logfire'
import { config } from './config'
@@ -40,6 +40,7 @@ const handler = {
kv: env.KV,
kvVersion: await hash(JSON.stringify(config)),
subFetch: fetch,
+ cache: { storage: new KVCacheStorage(env.KV) },
}
try {
return await gatewayFetch(request, url, ctx, gatewayEnv)
diff --git a/deploy/src/types.ts b/deploy/src/types.ts
index 62ddb84..64a1344 100644
--- a/deploy/src/types.ts
+++ b/deploy/src/types.ts
@@ -44,4 +44,5 @@ export interface ApiKey {
spendingLimitMonthly?: number
spendingLimitTotal?: number
providers: ProviderKey[] | '__all__'
+ cacheEnabled?: boolean
}
diff --git a/gateway/src/gateway.ts b/gateway/src/gateway.ts
index 2b38dd0..a49fc91 100644
--- a/gateway/src/gateway.ts
+++ b/gateway/src/gateway.ts
@@ -3,6 +3,7 @@ import { type GatewayOptions, noopLimiter } from '.'
import { apiKeyAuth, setApiKeyCache } from './auth'
import { currentScopeIntervals, type ExceededScope, endOfMonth, endOfWeek, type SpendScope } from './db'
import { type HandlerResponse, RequestHandler } from './handler'
+import { CacheMiddleware } from './middleware/cache'
import { OtelTrace } from './otel'
import { genAiOtelAttributes } from './otel/attributes'
import type { ApiKeyInfo, ProviderProxy } from './types'
@@ -174,6 +175,11 @@ export async function gatewayWithLimiter(
const otel = new OtelTrace(request, apiKeyInfo.otelSettings, options)
+ const middlewares = options.proxyMiddlewares ?? []
+ if (options.cache) {
+ middlewares.push(new CacheMiddleware({ storage: options.cache.storage }))
+ }
+
let result: HandlerResponse | null = null
for (const providerProxy of providerProxies) {
@@ -187,7 +193,7 @@ export async function gatewayWithLimiter(
apiKeyInfo,
restOfPath,
otelSpan,
- middlewares: options.proxyMiddlewares,
+ middlewares,
})
try {
diff --git a/gateway/src/index.ts b/gateway/src/index.ts
index bc9a8a2..43eb398 100644
--- a/gateway/src/index.ts
+++ b/gateway/src/index.ts
@@ -18,6 +18,7 @@ import logfire from 'logfire'
import type { KeysDb, LimitDb } from './db'
import { gateway } from './gateway'
import type { Middleware, Next } from './handler'
+import type { CacheStorage as GatewayCacheStorage } from './middleware/storage'
import type { RateLimiter } from './rateLimiter'
import { refreshGenaiPrices } from './refreshGenaiPrices'
import type { SubFetch } from './types'
@@ -27,6 +28,8 @@ export { changeProjectState as setProjectState, deleteApiKeyCache, setApiKeyCach
export type { Middleware, Next }
export * from './db'
export type { RequestHandler } from './handler'
+export { CacheMiddleware, type CacheOptions } from './middleware/cache'
+export { type CachedResponse, type CacheStorage, KVCacheStorage } from './middleware/storage'
export * from './rateLimiter'
export * from './types'
@@ -42,6 +45,8 @@ export interface GatewayOptions {
proxyPrefixLength?: number
/** proxyMiddlewares: perform actions before and after the request is made to the providers */
proxyMiddlewares?: Middleware[]
+ /** Cache configuration */
+ cache?: { storage: GatewayCacheStorage }
}
export async function gatewayFetch(
diff --git a/gateway/src/middleware/cache.ts b/gateway/src/middleware/cache.ts
new file mode 100644
index 0000000..7fced8f
--- /dev/null
+++ b/gateway/src/middleware/cache.ts
@@ -0,0 +1,158 @@
+import logfire from 'logfire'
+import type { HandlerResponse, Middleware, Next, RequestHandler } from '../handler'
+import type { CachedResponse, CacheStorage as GatewayCacheStorage } from './storage'
+
+export interface CacheOptions {
+ storage: GatewayCacheStorage
+}
+
+export class CacheMiddleware implements Middleware {
+ private options: CacheOptions
+
+ constructor(options: CacheOptions) {
+ this.options = options
+ }
+
+ dispatch(next: Next): Next {
+ return async (handler: RequestHandler) => {
+ if (!handler.apiKeyInfo.cacheEnabled) {
+ return await next(handler)
+ }
+
+ const { method, url, headers } = handler.request
+ // Clone the request to read the body without consuming the original
+ const requestBody = await handler.request.clone().text()
+ const requestUrl = new URL(url)
+ requestUrl.pathname = handler.restOfPath
+ const path = requestUrl.toString()
+
+ const apiKeyId = handler.apiKeyInfo.id
+ const hash = await this.calculateHash(method, path, requestBody, apiKeyId)
+
+ const shouldBypassCache = this.shouldBypassCache(headers)
+
+ if (!shouldBypassCache) {
+ const cached = await this.getCachedResponse(hash)
+
+ if (cached) {
+ logfire.info('Cache hit', { hash, apiKeyId: handler.apiKeyInfo.id })
+ return this.toCachedHandlerResponse(requestBody, cached)
+ }
+ }
+
+ const result = await next(handler)
+
+ const shouldStoreCache = this.shouldStoreCache(handler.request, result)
+ if (shouldStoreCache) {
+ handler.runAfter('cache-store', this.storeCachedResponse(hash, result))
+ }
+
+ return this.addCacheHeaders(result, shouldBypassCache ? 'BYPASS' : 'MISS')
+ }
+ }
+
+ private shouldBypassCache(requestHeaders: Headers): boolean {
+ const cacheControl = requestHeaders.get('cache-control')
+ return cacheControl?.includes('no-cache') || cacheControl?.includes('no-store') || false
+ }
+
+ private shouldStoreCache(request: Request, result: HandlerResponse): boolean {
+ const cacheControl = request.headers.get('cache-control')
+
+ if (cacheControl?.includes('no-store')) {
+ return false
+ }
+
+ if ('responseStream' in result) {
+ return false
+ }
+
+ if ('error' in result || 'unexpectedStatus' in result || 'response' in result || 'modelNotFound' in result) {
+ return false
+ }
+
+ return true
+ }
+
+ private async calculateHash(method: string, url: string, body: string, apiKeyId: number): Promise {
+ const data = `${apiKeyId}:${method}:${url}:${body}`
+ const encoder = new TextEncoder()
+ const dataBuffer = encoder.encode(data)
+ const hashBuffer = await crypto.subtle.digest('SHA-256', dataBuffer)
+ const hashArray = Array.from(new Uint8Array(hashBuffer))
+ const hashHex = hashArray.map((b) => b.toString(16).padStart(2, '0')).join('')
+
+ return hashHex
+ }
+
+ private async getCachedResponse(hash: string): Promise {
+ try {
+ return await this.options.storage.get(hash)
+ } catch (error) {
+ logfire.reportError('Error getting cached response', error as Error, { hash })
+ return null
+ }
+ }
+
+ private async storeCachedResponse(hash: string, result: HandlerResponse): Promise {
+ if (!('successStatus' in result) || 'responseStream' in result) {
+ return
+ }
+
+ try {
+ const { successStatus, responseHeaders, responseBody, requestModel, responseModel } = result
+
+ const headers: Record = {}
+ responseHeaders.forEach((value, key) => {
+ headers[key] = value
+ })
+
+ const cached: CachedResponse = {
+ status: successStatus,
+ headers,
+ body: responseBody,
+ timestamp: Date.now(),
+ requestModel,
+ responseModel,
+ }
+
+ await this.options.storage.set(hash, cached)
+
+ const sizeBytes = new TextEncoder().encode(responseBody).length
+
+ logfire.info('Response cached', { hash, sizeBytes })
+ } catch (error) {
+ logfire.reportError('Error storing cached response', error as Error, { hash })
+ }
+ }
+
+ private toCachedHandlerResponse(
+ requestBody: string,
+ cached: CachedResponse,
+ ): Extract {
+ const responseHeaders = new Headers(cached.headers)
+ const age = Math.floor((Date.now() - cached.timestamp) / 1000)
+
+ responseHeaders.set('Age', age.toString())
+ responseHeaders.set('X-Cache-Status', 'HIT')
+
+ return {
+ successStatus: cached.status,
+ responseHeaders,
+ responseBody: cached.body,
+ requestBody,
+ requestModel: cached.requestModel,
+ responseModel: cached.responseModel ?? 'unknown',
+ usage: { input_tokens: 0, output_tokens: 0 },
+ cost: 0,
+ }
+ }
+
+ private addCacheHeaders(result: HandlerResponse, status: 'HIT' | 'MISS' | 'BYPASS'): HandlerResponse {
+ if ('responseHeaders' in result) {
+ result.responseHeaders.set('X-Cache-Status', status)
+ }
+
+ return result
+ }
+}
diff --git a/gateway/src/middleware/storage.ts b/gateway/src/middleware/storage.ts
new file mode 100644
index 0000000..ca5c187
--- /dev/null
+++ b/gateway/src/middleware/storage.ts
@@ -0,0 +1,37 @@
+export interface CachedResponse {
+ status: number
+ headers: Record
+ body: string
+ timestamp: number
+ requestModel?: string
+ responseModel?: string
+}
+
+export interface CacheStorage {
+ get(hash: string): Promise
+ set(hash: string, response: CachedResponse): Promise
+}
+
+export class KVCacheStorage implements CacheStorage {
+ private kv: KVNamespace
+ private namespace: string
+ private ttl: number
+
+ constructor(kv: KVNamespace, namespace: string = 'response', ttl: number = 86400) {
+ this.kv = kv
+ this.namespace = namespace
+ this.ttl = ttl
+ }
+
+ async get(hash: string): Promise {
+ const kvKey = cacheKey(this.namespace, hash)
+ return await this.kv.get(kvKey, 'json')
+ }
+
+ async set(hash: string, response: CachedResponse): Promise {
+ const kvKey = cacheKey(this.namespace, hash)
+ await this.kv.put(kvKey, JSON.stringify(response), { expirationTtl: this.ttl })
+ }
+}
+
+const cacheKey = (namespace: string, hash: string): string => `${namespace}:${hash}`
diff --git a/gateway/src/types.ts b/gateway/src/types.ts
index f3db208..3db368c 100644
--- a/gateway/src/types.ts
+++ b/gateway/src/types.ts
@@ -36,6 +36,7 @@ export interface ApiKeyInfo {
// among values with same priority, use weight for randomized load balancing; if missing, treat as 1
routingGroups: Record
otelSettings?: OtelSettings
+ cacheEnabled?: boolean
}
export type ProviderID =
diff --git a/gateway/test/cache.spec.ts b/gateway/test/cache.spec.ts
new file mode 100644
index 0000000..512864b
--- /dev/null
+++ b/gateway/test/cache.spec.ts
@@ -0,0 +1,121 @@
+import { describe, expect } from 'vitest'
+import { test } from './setup'
+
+const requestBody = {
+ model: 'gpt-5',
+ messages: [{ role: 'user', content: 'What is the capital of France?' }],
+ max_completion_tokens: 1200,
+}
+const requestHeaders = {
+ Authorization: 'cache-enabled',
+ 'Content-Type': 'application/json',
+ Accept: 'application/json',
+ 'Accept-Encoding': 'deflate',
+}
+
+describe('cache', () => {
+ test('should return MISS on first request', async ({ gateway }) => {
+ const { fetch } = gateway
+
+ const firstResponse = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: requestHeaders,
+ body: JSON.stringify(requestBody),
+ })
+ expect(firstResponse.status).toBe(200)
+ expect(Object.fromEntries(firstResponse.headers.entries())).toMatchInlineSnapshot(`
+ {
+ "content-length": "564",
+ "content-type": "application/json",
+ "pydantic-ai-gateway-price-estimate": "0.0008USD",
+ "server": "uvicorn",
+ "x-cache-status": "MISS",
+ }
+ `)
+
+ const cachedResponse = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: requestHeaders,
+ body: JSON.stringify(requestBody),
+ })
+
+ expect(cachedResponse.status).toBe(200)
+ expect(Object.fromEntries(cachedResponse.headers.entries())).toMatchInlineSnapshot(`
+ {
+ "age": "0",
+ "content-length": "564",
+ "content-type": "application/json",
+ "pydantic-ai-gateway-price-estimate": "0.0008USD",
+ "server": "uvicorn",
+ "x-cache-status": "HIT",
+ }
+ `)
+
+ // Sleep to get a different age.
+ await new Promise((resolve) => setTimeout(resolve, 1000))
+
+ const cachedResponse2 = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: requestHeaders,
+ body: JSON.stringify(requestBody),
+ })
+ expect(Number(cachedResponse2.headers.get('Age'))).toBeGreaterThan(0)
+ })
+
+ test('should return BYPASS with cache-control: no-cache', async ({ gateway }) => {
+ const { fetch } = gateway
+
+ const response = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: { ...requestHeaders, 'Cache-Control': 'no-cache' },
+ body: JSON.stringify(requestBody),
+ })
+
+ expect(response.headers.get('X-Cache-Status')).toBe('BYPASS')
+ })
+
+ test('should return BYPASS with cache-control: no-store', async ({ gateway }) => {
+ const { fetch } = gateway
+
+ const response = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: { ...requestHeaders, 'Cache-Control': 'no-store' },
+ body: JSON.stringify(requestBody),
+ })
+
+ expect(response.headers.get('X-Cache-Status')).toBe('BYPASS')
+ })
+
+ test('should not cache streaming responses', async ({ gateway }) => {
+ const { fetch } = gateway
+
+ const response = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: requestHeaders,
+ body: JSON.stringify({ ...requestBody, stream: true }),
+ })
+ expect(response.headers.get('X-Cache-Status')).toBe('MISS')
+ })
+
+ test('different request bodies should have different cache keys', async ({ gateway }) => {
+ const { fetch } = gateway
+
+ const response1 = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: requestHeaders,
+ body: JSON.stringify({ ...requestBody, messages: [{ role: 'user', content: 'Message A' }] }),
+ })
+ expect(response1.headers.get('X-Cache-Status')).toBe('MISS')
+
+ const response2 = await fetch('https://example.com/openai/chat/completions', {
+ method: 'POST',
+ headers: requestHeaders,
+ body: JSON.stringify({ ...requestBody, messages: [{ role: 'user', content: 'Message B' }] }),
+ })
+ expect(response2.headers.get('X-Cache-Status')).toBe('MISS')
+
+ const body1 = await response1.json()
+ const body2 = await response2.json()
+ expect(body1).not.toBe(body2)
+ })
+})
diff --git a/gateway/test/worker.ts b/gateway/test/worker.ts
index faade12..5cca989 100644
--- a/gateway/test/worker.ts
+++ b/gateway/test/worker.ts
@@ -4,6 +4,7 @@ import {
gatewayFetch,
type KeyStatus,
KeysDbD1,
+ KVCacheStorage,
LimitDbD1,
type Middleware,
type ProviderProxy,
@@ -43,6 +44,7 @@ export function buildGatewayEnv(
proxyPrefixLength,
proxyMiddlewares,
rateLimiter,
+ cache: { storage: new KVCacheStorage(env.KV) },
}
}
@@ -55,6 +57,7 @@ export namespace IDS {
export const keyTinyLimit = 6
export const keyFallbackTest = 7
export const keyFallbackAnthropicGoogleVertex = 8
+ export const keyCacheEnabled = 9
}
class TestKeysDB extends KeysDbD1 {
@@ -247,6 +250,23 @@ class TestKeysDB extends KeysDbD1 {
],
routingGroups: { anthropic: [{ key: 'anthropic' }, { key: 'google-vertex' }] },
}
+ case 'cache-enabled':
+ return {
+ id: IDS.keyCacheEnabled,
+ user: IDS.userDefault,
+ project: IDS.projectDefault,
+ org: IDS.orgDefault,
+ key,
+ status: 'active',
+ providers: this.allProviders,
+ routingGroups: { openai: [{ key: 'openai' }] },
+ cacheEnabled: true,
+ otelSettings: {
+ writeToken: 'write-token',
+ baseUrl: 'https://logfire.pydantic.dev',
+ exporterProtocol: 'http/json',
+ },
+ }
default:
return null
}
diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml
new file mode 100644
index 0000000..5f8f10e
--- /dev/null
+++ b/proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml
@@ -0,0 +1,86 @@
+interactions:
+- request:
+ body: '{"model":"gpt-5","messages":[{"role":"user","content":"What is the capital
+ of France?"}],"max_completion_tokens":1200}'
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '118'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - python-httpx/0.28.1
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAAA3SSTU8CMRCG7/srmp53yYLyEW5ITFAveCFRQza1HaDQbZt21qiE/25aPrpGvPQw
+ z8yb953OPiOESkHHhPINQ15bVUx3i+3L3WT7PnlUg9nzN586qxf3D/O7p9cZzcOEed8Cx/NUh5va
+ KkBp9BFzBwwhqHaHg36v7HbLUQS1EaDC2Npi0S96Za9flKOiHJ7mNkZy8HRM3jJCCNnHNzjUAj7p
+ mJT5uVKD92wNdHxpIoQ6o0KFMu+lR6aR5glyoxF0ND1nTvpOGzpYNZ4Fa7pRqgWY1gZZiBZtLU/k
+ cDGyklr6TeWAeaODuEdjaaSHjJBlDNb88kqtM7XFCs0Oomz35ihH0yITHPZPEA0yleqjUX5FrRKA
+ TCrfWgzljG9ApMm0RdYIaVoga2X7a+aa9jG31OukMrj9Vz8BzsEiiMo6EJL/TpzaHIQ7+6/tsuTo
+ mHpwH5JDhRJc+AgBK9ao4w1Q/+UR6mol9RqcdTIeQvjr7JD9AAAA//8DAOJaZgUFAwAA
+ headers:
+ CF-RAY:
+ - 9aaca9d12fb3cc4d-MAD
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 08 Dec 2025 13:38:31 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - pydantic-28gund
+ openai-processing-ms:
+ - '2410'
+ openai-project:
+ - proj_dKobscVY9YJxeEaDJen54e3d
+ openai-version:
+ - '2020-10-01'
+ x-envoy-upstream-service-time:
+ - '2428'
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '15000'
+ x-ratelimit-limit-tokens:
+ - '40000000'
+ x-ratelimit-remaining-requests:
+ - '14999'
+ x-ratelimit-remaining-tokens:
+ - '39999990'
+ x-ratelimit-reset-requests:
+ - 4ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_276a4c432c6c4f04902eca21289abb10
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml
new file mode 100644
index 0000000..e9e67f2
--- /dev/null
+++ b/proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml
@@ -0,0 +1,88 @@
+interactions:
+- request:
+ body: '{"model":"gpt-5","messages":[{"role":"user","content":"Message A"}],"max_completion_tokens":1200}'
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '97'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - python-httpx/0.28.1
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAAA3STz04bMRDG73mKkU+tlKBk2/Anl4pSlapSVYlWtFJB0dQ7mxi8thnPQhcUiQcp
+ L8eTVHYCu6hw8WF+nm/G841vBgDKlGoGSi9RdB3s6OD8OB4eXRz/bL9XxZuz6Wcz4eKQ8OuF/DhS
+ w5Thf5+RloesLe3rYEmMd2usmVAoqU52tqfFeLL9djeD2pdkU9oiyGg6KsbFdDTeHY13NnlLbzRF
+ NYNfAwCAm3ymDl1Jf9QMxsOHSE0x4oLU7PESgGJvU0RhjCYKOlHDDmrvhFxu+pO/givf2BJa34A1
+ 5wQ1gXhYoistwf3t3y9rfdi/v717ByduBB8YKwHPYOrA/pLASAp/a+oa2VxTQsLookV5gPsObXud
+ pF3m2iIbaRM6yDMCBKZgW3jVq/n+/vbuddYWz0kKKs+QZBmYKmJymk7ciftIZKFiyr0HjEIgS4Kq
+ sRY2A0pVYyBtqjazhUe71R8LU9VETKa4xtoeQOe8YDI1G3K6IatHCyrjTFzOmTB6l8YaxQeV6WoA
+ cJotbZ64pAL7Oshc/Dll2d21muo2qGPF9mRDxQvaPtgbPiM3L0nQ2NjbCaVRL6nsUrsFwqY0vgcG
+ vcf9385z2uuHG7foVCZ7xYsFOqA1BaFyHphKo58+urvGlP7YS9cex5xbVpH40miaiyFOVpRUYWPX
+ +69iG4XqeWXcgjiwyZ8guT1YDf4BAAD//wMApzAU5wEEAAA=
+ headers:
+ CF-RAY:
+ - 9aacb6fae9ca24b7-MAD
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 08 Dec 2025 13:47:35 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - pydantic-28gund
+ openai-processing-ms:
+ - '7188'
+ openai-project:
+ - proj_dKobscVY9YJxeEaDJen54e3d
+ openai-version:
+ - '2020-10-01'
+ x-envoy-upstream-service-time:
+ - '7427'
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '15000'
+ x-ratelimit-limit-tokens:
+ - '40000000'
+ x-ratelimit-remaining-requests:
+ - '14999'
+ x-ratelimit-remaining-tokens:
+ - '39999994'
+ x-ratelimit-reset-requests:
+ - 4ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_6fe764a946564a8e990747a7b308de1e
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml
new file mode 100644
index 0000000..2b49a4d
--- /dev/null
+++ b/proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml
@@ -0,0 +1,90 @@
+interactions:
+- request:
+ body: '{"model":"gpt-5","messages":[{"role":"user","content":"Message B"}],"max_completion_tokens":1200}'
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '97'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - python-httpx/0.28.1
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAAA3RTy04bQRC8+ytacwJpDbYj28SXiKBEoCgCBSmXgKzOTK938Dw2M70Yg5D8G5HC
+ z/lLotk17KKQyxy6umr6Uf3QAxBaiRkIWSBLW5r+yfJ7PB9OTn/dVLefP53fV1/U8Mau7MidHp+J
+ LDH8zxuS/Mw6kN6Whlh718AyEDIl1eF0Mh4NhpPxpAasV2QSbVFyf9wfDUbj/uCoP5jueIXXkqKY
+ wY8eAMBD/aYKnaI7MYNB9hyxFCMuSMxekgBE8CZFBMaoI6NjkbWg9I7J1UWf+MooWPsKYoGBgAuC
+ vDIGmO4YfA7bzZ+vzQfwcbt5AnQKVgVy4mw3vxUYvSSwBOxBeVhpLkDzhyt35c7LNIcIZyDRQUGm
+ rOHZlevDRfA+D4QKfABSmiH3AaTBoHl9yN5RyvpGq6CZYM/6QCnDoslAeid1pAzyoMkps86AWB7s
+ J8ZlZS0GfU9JlwO6aJBrrZOAOQNCoNKs64C3ZWpZc1N1p9Pj7eYpdXBhCCOBdtJUigDdOn0eOaB2
+ HGEPK6XJScogVZyBIbfgIgNLSle2GQ1Z1Obw0qBc7kP0u2kwauMDaD7oLiZQXkVMtnCVMR0AnfOM
+ 9TiTJa53yOOLCXLtdCzmgTB6lxYb2ZeiRh97ANe1qapXPhFl8LbkOfsl1bJHjZpoPdxi78bvdyh7
+ RtMBJtPsDbm5otRi7LhSSJQFqZbaWjjN0XeAXqe5f8t5S7tpXLtFqzJqLu3ND1pASiqZ1LwMpLR8
+ 3XSbFihd+f/SXsZclywihVstac6aQlqFohwr01ygiOvIZOe5dgsKZdD1GaZt9x57fwEAAP//AwCV
+ ZWyCgwQAAA==
+ headers:
+ CF-RAY:
+ - 9aacb72ce91224b7-MAD
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 08 Dec 2025 13:47:45 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - pydantic-28gund
+ openai-processing-ms:
+ - '8718'
+ openai-project:
+ - proj_dKobscVY9YJxeEaDJen54e3d
+ openai-version:
+ - '2020-10-01'
+ x-envoy-upstream-service-time:
+ - '8883'
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '15000'
+ x-ratelimit-limit-tokens:
+ - '40000000'
+ x-ratelimit-remaining-requests:
+ - '14999'
+ x-ratelimit-remaining-tokens:
+ - '39999994'
+ x-ratelimit-reset-requests:
+ - 4ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_79b538eabeb64e568ddf746f7b40f4d8
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml
new file mode 100644
index 0000000..ad7cc1b
--- /dev/null
+++ b/proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml
@@ -0,0 +1,80 @@
+interactions:
+- request:
+ body: '{"model":"gpt-5","messages":[{"role":"user","content":"What is the capital
+ of France?"}],"max_completion_tokens":1}'
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '115'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - python-httpx/0.28.1
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: "{\n \"error\": {\n \"message\": \"Could not finish the message
+ because max_tokens or model output limit was reached. Please try again with
+ higher max_tokens.\",\n \"type\": \"invalid_request_error\",\n \"param\":
+ null,\n \"code\": null\n }\n}"
+ headers:
+ CF-RAY:
+ - 9aaca9bfaaf2cc4d-MAD
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '235'
+ Content-Type:
+ - application/json
+ Date:
+ - Mon, 08 Dec 2025 13:38:26 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - pydantic-28gund
+ openai-processing-ms:
+ - '306'
+ openai-project:
+ - proj_dKobscVY9YJxeEaDJen54e3d
+ openai-version:
+ - '2020-10-01'
+ x-envoy-upstream-service-time:
+ - '449'
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '15000'
+ x-ratelimit-limit-tokens:
+ - '40000000'
+ x-ratelimit-remaining-requests:
+ - '14999'
+ x-ratelimit-remaining-tokens:
+ - '39999990'
+ x-ratelimit-reset-requests:
+ - 4ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_65f02d8d9a39474ab7ba122fc0ba2927
+ status:
+ code: 400
+ message: Bad Request
+version: 1
diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml
new file mode 100644
index 0000000..5f34dca
--- /dev/null
+++ b/proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml
@@ -0,0 +1,95 @@
+interactions:
+- request:
+ body: '{"model":"gpt-5","messages":[{"role":"user","content":"What is the capital
+ of France?"}],"max_completion_tokens":1200,"stream":true,"stream_options":{"include_usage":true}}'
+ headers:
+ accept:
+ - '*/*'
+ accept-encoding:
+ - gzip, deflate
+ connection:
+ - keep-alive
+ content-length:
+ - '172'
+ content-type:
+ - application/json
+ host:
+ - api.openai.com
+ user-agent:
+ - python-httpx/0.28.1
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: 'data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}],"usage":null,"obfuscation":"PIQTUtyOdh"}
+
+
+ data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Paris"},"finish_reason":null}],"usage":null,"obfuscation":"re3rLzM"}
+
+
+ data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"finish_reason":null}],"usage":null,"obfuscation":"sRTMX9sXRGg"}
+
+
+ data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":null,"obfuscation":"q16gDt"}
+
+
+ data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":11,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"xR2uRrbBUrh"}
+
+
+ data: [DONE]
+
+
+ '
+ headers:
+ CF-RAY:
+ - 9aacae5afd4e3ed1-MAD
+ Connection:
+ - keep-alive
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Date:
+ - Mon, 08 Dec 2025 13:41:36 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ cf-cache-status:
+ - DYNAMIC
+ openai-organization:
+ - pydantic-28gund
+ openai-processing-ms:
+ - '1135'
+ openai-project:
+ - proj_dKobscVY9YJxeEaDJen54e3d
+ openai-version:
+ - '2020-10-01'
+ x-envoy-upstream-service-time:
+ - '1282'
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '15000'
+ x-ratelimit-limit-tokens:
+ - '40000000'
+ x-ratelimit-remaining-requests:
+ - '14999'
+ x-ratelimit-remaining-tokens:
+ - '39999990'
+ x-ratelimit-reset-requests:
+ - 4ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_33fa052094294105968f87c4e65778f4
+ status:
+ code: 200
+ message: OK
+version: 1