From 75c31edbd28ac549cc1a0e64c6dfe8300d06a1c4 Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Mon, 8 Dec 2025 14:49:51 +0100 Subject: [PATCH] Support cache on response --- deploy/src/db.ts | 1 + deploy/src/index.ts | 3 +- deploy/src/types.ts | 1 + gateway/src/gateway.ts | 8 +- gateway/src/index.ts | 5 + gateway/src/middleware/cache.ts | 158 ++++++++++++++++++ gateway/src/middleware/storage.ts | 37 ++++ gateway/src/types.ts | 1 + gateway/test/cache.spec.ts | 121 ++++++++++++++ gateway/test/worker.ts | 20 +++ ...87cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml | 86 ++++++++++ ...8ce863c757e7d9339cd0badcab66d33ec5714.yaml | 88 ++++++++++ ...406fc9feeafb5264cc083cb1083a9c838e80c.yaml | 90 ++++++++++ ...b8273f9e45df86a46af3b6a1ef014074413a8.yaml | 80 +++++++++ ...383b111fa3b75552a1ad141a59bf64e6046cb.yaml | 95 +++++++++++ 15 files changed, 792 insertions(+), 2 deletions(-) create mode 100644 gateway/src/middleware/cache.ts create mode 100644 gateway/src/middleware/storage.ts create mode 100644 gateway/test/cache.spec.ts create mode 100644 proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml create mode 100644 proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml create mode 100644 proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml create mode 100644 proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml create mode 100644 proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml diff --git a/deploy/src/db.ts b/deploy/src/db.ts index 5e8f122..5373c4c 100644 --- a/deploy/src/db.ts +++ b/deploy/src/db.ts @@ -66,6 +66,7 @@ export class ConfigDB extends KeysDbD1 { providers: providersWithKeys, routingGroups, otelSettings: user?.otel ?? project.otel, + cacheEnabled: keyInfo.cacheEnabled, } } } diff --git a/deploy/src/index.ts b/deploy/src/index.ts index c9d868c..07fe94e 100644 --- a/deploy/src/index.ts +++ b/deploy/src/index.ts @@ -16,7 +16,7 @@ along with this program. If not, see . */ import { env } from 'cloudflare:workers' -import { type GatewayOptions, gatewayFetch, LimitDbD1 } from '@pydantic/ai-gateway' +import { type GatewayOptions, gatewayFetch, KVCacheStorage, LimitDbD1 } from '@pydantic/ai-gateway' import { instrument } from '@pydantic/logfire-cf-workers' import logfire from 'logfire' import { config } from './config' @@ -40,6 +40,7 @@ const handler = { kv: env.KV, kvVersion: await hash(JSON.stringify(config)), subFetch: fetch, + cache: { storage: new KVCacheStorage(env.KV) }, } try { return await gatewayFetch(request, url, ctx, gatewayEnv) diff --git a/deploy/src/types.ts b/deploy/src/types.ts index 62ddb84..64a1344 100644 --- a/deploy/src/types.ts +++ b/deploy/src/types.ts @@ -44,4 +44,5 @@ export interface ApiKey { spendingLimitMonthly?: number spendingLimitTotal?: number providers: ProviderKey[] | '__all__' + cacheEnabled?: boolean } diff --git a/gateway/src/gateway.ts b/gateway/src/gateway.ts index 2b38dd0..a49fc91 100644 --- a/gateway/src/gateway.ts +++ b/gateway/src/gateway.ts @@ -3,6 +3,7 @@ import { type GatewayOptions, noopLimiter } from '.' import { apiKeyAuth, setApiKeyCache } from './auth' import { currentScopeIntervals, type ExceededScope, endOfMonth, endOfWeek, type SpendScope } from './db' import { type HandlerResponse, RequestHandler } from './handler' +import { CacheMiddleware } from './middleware/cache' import { OtelTrace } from './otel' import { genAiOtelAttributes } from './otel/attributes' import type { ApiKeyInfo, ProviderProxy } from './types' @@ -174,6 +175,11 @@ export async function gatewayWithLimiter( const otel = new OtelTrace(request, apiKeyInfo.otelSettings, options) + const middlewares = options.proxyMiddlewares ?? [] + if (options.cache) { + middlewares.push(new CacheMiddleware({ storage: options.cache.storage })) + } + let result: HandlerResponse | null = null for (const providerProxy of providerProxies) { @@ -187,7 +193,7 @@ export async function gatewayWithLimiter( apiKeyInfo, restOfPath, otelSpan, - middlewares: options.proxyMiddlewares, + middlewares, }) try { diff --git a/gateway/src/index.ts b/gateway/src/index.ts index bc9a8a2..43eb398 100644 --- a/gateway/src/index.ts +++ b/gateway/src/index.ts @@ -18,6 +18,7 @@ import logfire from 'logfire' import type { KeysDb, LimitDb } from './db' import { gateway } from './gateway' import type { Middleware, Next } from './handler' +import type { CacheStorage as GatewayCacheStorage } from './middleware/storage' import type { RateLimiter } from './rateLimiter' import { refreshGenaiPrices } from './refreshGenaiPrices' import type { SubFetch } from './types' @@ -27,6 +28,8 @@ export { changeProjectState as setProjectState, deleteApiKeyCache, setApiKeyCach export type { Middleware, Next } export * from './db' export type { RequestHandler } from './handler' +export { CacheMiddleware, type CacheOptions } from './middleware/cache' +export { type CachedResponse, type CacheStorage, KVCacheStorage } from './middleware/storage' export * from './rateLimiter' export * from './types' @@ -42,6 +45,8 @@ export interface GatewayOptions { proxyPrefixLength?: number /** proxyMiddlewares: perform actions before and after the request is made to the providers */ proxyMiddlewares?: Middleware[] + /** Cache configuration */ + cache?: { storage: GatewayCacheStorage } } export async function gatewayFetch( diff --git a/gateway/src/middleware/cache.ts b/gateway/src/middleware/cache.ts new file mode 100644 index 0000000..7fced8f --- /dev/null +++ b/gateway/src/middleware/cache.ts @@ -0,0 +1,158 @@ +import logfire from 'logfire' +import type { HandlerResponse, Middleware, Next, RequestHandler } from '../handler' +import type { CachedResponse, CacheStorage as GatewayCacheStorage } from './storage' + +export interface CacheOptions { + storage: GatewayCacheStorage +} + +export class CacheMiddleware implements Middleware { + private options: CacheOptions + + constructor(options: CacheOptions) { + this.options = options + } + + dispatch(next: Next): Next { + return async (handler: RequestHandler) => { + if (!handler.apiKeyInfo.cacheEnabled) { + return await next(handler) + } + + const { method, url, headers } = handler.request + // Clone the request to read the body without consuming the original + const requestBody = await handler.request.clone().text() + const requestUrl = new URL(url) + requestUrl.pathname = handler.restOfPath + const path = requestUrl.toString() + + const apiKeyId = handler.apiKeyInfo.id + const hash = await this.calculateHash(method, path, requestBody, apiKeyId) + + const shouldBypassCache = this.shouldBypassCache(headers) + + if (!shouldBypassCache) { + const cached = await this.getCachedResponse(hash) + + if (cached) { + logfire.info('Cache hit', { hash, apiKeyId: handler.apiKeyInfo.id }) + return this.toCachedHandlerResponse(requestBody, cached) + } + } + + const result = await next(handler) + + const shouldStoreCache = this.shouldStoreCache(handler.request, result) + if (shouldStoreCache) { + handler.runAfter('cache-store', this.storeCachedResponse(hash, result)) + } + + return this.addCacheHeaders(result, shouldBypassCache ? 'BYPASS' : 'MISS') + } + } + + private shouldBypassCache(requestHeaders: Headers): boolean { + const cacheControl = requestHeaders.get('cache-control') + return cacheControl?.includes('no-cache') || cacheControl?.includes('no-store') || false + } + + private shouldStoreCache(request: Request, result: HandlerResponse): boolean { + const cacheControl = request.headers.get('cache-control') + + if (cacheControl?.includes('no-store')) { + return false + } + + if ('responseStream' in result) { + return false + } + + if ('error' in result || 'unexpectedStatus' in result || 'response' in result || 'modelNotFound' in result) { + return false + } + + return true + } + + private async calculateHash(method: string, url: string, body: string, apiKeyId: number): Promise { + const data = `${apiKeyId}:${method}:${url}:${body}` + const encoder = new TextEncoder() + const dataBuffer = encoder.encode(data) + const hashBuffer = await crypto.subtle.digest('SHA-256', dataBuffer) + const hashArray = Array.from(new Uint8Array(hashBuffer)) + const hashHex = hashArray.map((b) => b.toString(16).padStart(2, '0')).join('') + + return hashHex + } + + private async getCachedResponse(hash: string): Promise { + try { + return await this.options.storage.get(hash) + } catch (error) { + logfire.reportError('Error getting cached response', error as Error, { hash }) + return null + } + } + + private async storeCachedResponse(hash: string, result: HandlerResponse): Promise { + if (!('successStatus' in result) || 'responseStream' in result) { + return + } + + try { + const { successStatus, responseHeaders, responseBody, requestModel, responseModel } = result + + const headers: Record = {} + responseHeaders.forEach((value, key) => { + headers[key] = value + }) + + const cached: CachedResponse = { + status: successStatus, + headers, + body: responseBody, + timestamp: Date.now(), + requestModel, + responseModel, + } + + await this.options.storage.set(hash, cached) + + const sizeBytes = new TextEncoder().encode(responseBody).length + + logfire.info('Response cached', { hash, sizeBytes }) + } catch (error) { + logfire.reportError('Error storing cached response', error as Error, { hash }) + } + } + + private toCachedHandlerResponse( + requestBody: string, + cached: CachedResponse, + ): Extract { + const responseHeaders = new Headers(cached.headers) + const age = Math.floor((Date.now() - cached.timestamp) / 1000) + + responseHeaders.set('Age', age.toString()) + responseHeaders.set('X-Cache-Status', 'HIT') + + return { + successStatus: cached.status, + responseHeaders, + responseBody: cached.body, + requestBody, + requestModel: cached.requestModel, + responseModel: cached.responseModel ?? 'unknown', + usage: { input_tokens: 0, output_tokens: 0 }, + cost: 0, + } + } + + private addCacheHeaders(result: HandlerResponse, status: 'HIT' | 'MISS' | 'BYPASS'): HandlerResponse { + if ('responseHeaders' in result) { + result.responseHeaders.set('X-Cache-Status', status) + } + + return result + } +} diff --git a/gateway/src/middleware/storage.ts b/gateway/src/middleware/storage.ts new file mode 100644 index 0000000..ca5c187 --- /dev/null +++ b/gateway/src/middleware/storage.ts @@ -0,0 +1,37 @@ +export interface CachedResponse { + status: number + headers: Record + body: string + timestamp: number + requestModel?: string + responseModel?: string +} + +export interface CacheStorage { + get(hash: string): Promise + set(hash: string, response: CachedResponse): Promise +} + +export class KVCacheStorage implements CacheStorage { + private kv: KVNamespace + private namespace: string + private ttl: number + + constructor(kv: KVNamespace, namespace: string = 'response', ttl: number = 86400) { + this.kv = kv + this.namespace = namespace + this.ttl = ttl + } + + async get(hash: string): Promise { + const kvKey = cacheKey(this.namespace, hash) + return await this.kv.get(kvKey, 'json') + } + + async set(hash: string, response: CachedResponse): Promise { + const kvKey = cacheKey(this.namespace, hash) + await this.kv.put(kvKey, JSON.stringify(response), { expirationTtl: this.ttl }) + } +} + +const cacheKey = (namespace: string, hash: string): string => `${namespace}:${hash}` diff --git a/gateway/src/types.ts b/gateway/src/types.ts index f3db208..3db368c 100644 --- a/gateway/src/types.ts +++ b/gateway/src/types.ts @@ -36,6 +36,7 @@ export interface ApiKeyInfo { // among values with same priority, use weight for randomized load balancing; if missing, treat as 1 routingGroups: Record otelSettings?: OtelSettings + cacheEnabled?: boolean } export type ProviderID = diff --git a/gateway/test/cache.spec.ts b/gateway/test/cache.spec.ts new file mode 100644 index 0000000..512864b --- /dev/null +++ b/gateway/test/cache.spec.ts @@ -0,0 +1,121 @@ +import { describe, expect } from 'vitest' +import { test } from './setup' + +const requestBody = { + model: 'gpt-5', + messages: [{ role: 'user', content: 'What is the capital of France?' }], + max_completion_tokens: 1200, +} +const requestHeaders = { + Authorization: 'cache-enabled', + 'Content-Type': 'application/json', + Accept: 'application/json', + 'Accept-Encoding': 'deflate', +} + +describe('cache', () => { + test('should return MISS on first request', async ({ gateway }) => { + const { fetch } = gateway + + const firstResponse = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: requestHeaders, + body: JSON.stringify(requestBody), + }) + expect(firstResponse.status).toBe(200) + expect(Object.fromEntries(firstResponse.headers.entries())).toMatchInlineSnapshot(` + { + "content-length": "564", + "content-type": "application/json", + "pydantic-ai-gateway-price-estimate": "0.0008USD", + "server": "uvicorn", + "x-cache-status": "MISS", + } + `) + + const cachedResponse = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: requestHeaders, + body: JSON.stringify(requestBody), + }) + + expect(cachedResponse.status).toBe(200) + expect(Object.fromEntries(cachedResponse.headers.entries())).toMatchInlineSnapshot(` + { + "age": "0", + "content-length": "564", + "content-type": "application/json", + "pydantic-ai-gateway-price-estimate": "0.0008USD", + "server": "uvicorn", + "x-cache-status": "HIT", + } + `) + + // Sleep to get a different age. + await new Promise((resolve) => setTimeout(resolve, 1000)) + + const cachedResponse2 = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: requestHeaders, + body: JSON.stringify(requestBody), + }) + expect(Number(cachedResponse2.headers.get('Age'))).toBeGreaterThan(0) + }) + + test('should return BYPASS with cache-control: no-cache', async ({ gateway }) => { + const { fetch } = gateway + + const response = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: { ...requestHeaders, 'Cache-Control': 'no-cache' }, + body: JSON.stringify(requestBody), + }) + + expect(response.headers.get('X-Cache-Status')).toBe('BYPASS') + }) + + test('should return BYPASS with cache-control: no-store', async ({ gateway }) => { + const { fetch } = gateway + + const response = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: { ...requestHeaders, 'Cache-Control': 'no-store' }, + body: JSON.stringify(requestBody), + }) + + expect(response.headers.get('X-Cache-Status')).toBe('BYPASS') + }) + + test('should not cache streaming responses', async ({ gateway }) => { + const { fetch } = gateway + + const response = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: requestHeaders, + body: JSON.stringify({ ...requestBody, stream: true }), + }) + expect(response.headers.get('X-Cache-Status')).toBe('MISS') + }) + + test('different request bodies should have different cache keys', async ({ gateway }) => { + const { fetch } = gateway + + const response1 = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: requestHeaders, + body: JSON.stringify({ ...requestBody, messages: [{ role: 'user', content: 'Message A' }] }), + }) + expect(response1.headers.get('X-Cache-Status')).toBe('MISS') + + const response2 = await fetch('https://example.com/openai/chat/completions', { + method: 'POST', + headers: requestHeaders, + body: JSON.stringify({ ...requestBody, messages: [{ role: 'user', content: 'Message B' }] }), + }) + expect(response2.headers.get('X-Cache-Status')).toBe('MISS') + + const body1 = await response1.json() + const body2 = await response2.json() + expect(body1).not.toBe(body2) + }) +}) diff --git a/gateway/test/worker.ts b/gateway/test/worker.ts index faade12..5cca989 100644 --- a/gateway/test/worker.ts +++ b/gateway/test/worker.ts @@ -4,6 +4,7 @@ import { gatewayFetch, type KeyStatus, KeysDbD1, + KVCacheStorage, LimitDbD1, type Middleware, type ProviderProxy, @@ -43,6 +44,7 @@ export function buildGatewayEnv( proxyPrefixLength, proxyMiddlewares, rateLimiter, + cache: { storage: new KVCacheStorage(env.KV) }, } } @@ -55,6 +57,7 @@ export namespace IDS { export const keyTinyLimit = 6 export const keyFallbackTest = 7 export const keyFallbackAnthropicGoogleVertex = 8 + export const keyCacheEnabled = 9 } class TestKeysDB extends KeysDbD1 { @@ -247,6 +250,23 @@ class TestKeysDB extends KeysDbD1 { ], routingGroups: { anthropic: [{ key: 'anthropic' }, { key: 'google-vertex' }] }, } + case 'cache-enabled': + return { + id: IDS.keyCacheEnabled, + user: IDS.userDefault, + project: IDS.projectDefault, + org: IDS.orgDefault, + key, + status: 'active', + providers: this.allProviders, + routingGroups: { openai: [{ key: 'openai' }] }, + cacheEnabled: true, + otelSettings: { + writeToken: 'write-token', + baseUrl: 'https://logfire.pydantic.dev', + exporterProtocol: 'http/json', + }, + } default: return null } diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml new file mode 100644 index 0000000..5f8f10e --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/openai-159ef69d00bcffdce018936eaf487cc1a5c8eb66b52fcb8fc7fe0c79e37c3baa.yaml @@ -0,0 +1,86 @@ +interactions: +- request: + body: '{"model":"gpt-5","messages":[{"role":"user","content":"What is the capital + of France?"}],"max_completion_tokens":1200}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '118' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA3SSTU8CMRCG7/srmp53yYLyEW5ITFAveCFRQza1HaDQbZt21qiE/25aPrpGvPQw + z8yb953OPiOESkHHhPINQ15bVUx3i+3L3WT7PnlUg9nzN586qxf3D/O7p9cZzcOEed8Cx/NUh5va + KkBp9BFzBwwhqHaHg36v7HbLUQS1EaDC2Npi0S96Za9flKOiHJ7mNkZy8HRM3jJCCNnHNzjUAj7p + mJT5uVKD92wNdHxpIoQ6o0KFMu+lR6aR5glyoxF0ND1nTvpOGzpYNZ4Fa7pRqgWY1gZZiBZtLU/k + cDGyklr6TeWAeaODuEdjaaSHjJBlDNb88kqtM7XFCs0Oomz35ihH0yITHPZPEA0yleqjUX5FrRKA + TCrfWgzljG9ApMm0RdYIaVoga2X7a+aa9jG31OukMrj9Vz8BzsEiiMo6EJL/TpzaHIQ7+6/tsuTo + mHpwH5JDhRJc+AgBK9ao4w1Q/+UR6mol9RqcdTIeQvjr7JD9AAAA//8DAOJaZgUFAwAA + headers: + CF-RAY: + - 9aaca9d12fb3cc4d-MAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 08 Dec 2025 13:38:31 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - pydantic-28gund + openai-processing-ms: + - '2410' + openai-project: + - proj_dKobscVY9YJxeEaDJen54e3d + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '2428' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '40000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '39999990' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_276a4c432c6c4f04902eca21289abb10 + status: + code: 200 + message: OK +version: 1 diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml new file mode 100644 index 0000000..e9e67f2 --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/openai-281c234fea62893bcc6cf9938f48ce863c757e7d9339cd0badcab66d33ec5714.yaml @@ -0,0 +1,88 @@ +interactions: +- request: + body: '{"model":"gpt-5","messages":[{"role":"user","content":"Message A"}],"max_completion_tokens":1200}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '97' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA3STz04bMRDG73mKkU+tlKBk2/Anl4pSlapSVYlWtFJB0dQ7mxi8thnPQhcUiQcp + L8eTVHYCu6hw8WF+nm/G841vBgDKlGoGSi9RdB3s6OD8OB4eXRz/bL9XxZuz6Wcz4eKQ8OuF/DhS + w5Thf5+RloesLe3rYEmMd2usmVAoqU52tqfFeLL9djeD2pdkU9oiyGg6KsbFdDTeHY13NnlLbzRF + NYNfAwCAm3ymDl1Jf9QMxsOHSE0x4oLU7PESgGJvU0RhjCYKOlHDDmrvhFxu+pO/givf2BJa34A1 + 5wQ1gXhYoistwf3t3y9rfdi/v717ByduBB8YKwHPYOrA/pLASAp/a+oa2VxTQsLookV5gPsObXud + pF3m2iIbaRM6yDMCBKZgW3jVq/n+/vbuddYWz0kKKs+QZBmYKmJymk7ciftIZKFiyr0HjEIgS4Kq + sRY2A0pVYyBtqjazhUe71R8LU9VETKa4xtoeQOe8YDI1G3K6IatHCyrjTFzOmTB6l8YaxQeV6WoA + cJotbZ64pAL7Oshc/Dll2d21muo2qGPF9mRDxQvaPtgbPiM3L0nQ2NjbCaVRL6nsUrsFwqY0vgcG + vcf9385z2uuHG7foVCZ7xYsFOqA1BaFyHphKo58+urvGlP7YS9cex5xbVpH40miaiyFOVpRUYWPX + +69iG4XqeWXcgjiwyZ8guT1YDf4BAAD//wMApzAU5wEEAAA= + headers: + CF-RAY: + - 9aacb6fae9ca24b7-MAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 08 Dec 2025 13:47:35 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - pydantic-28gund + openai-processing-ms: + - '7188' + openai-project: + - proj_dKobscVY9YJxeEaDJen54e3d + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '7427' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '40000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '39999994' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_6fe764a946564a8e990747a7b308de1e + status: + code: 200 + message: OK +version: 1 diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml new file mode 100644 index 0000000..2b49a4d --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/openai-4aa76245acbac6ffcef36851e34406fc9feeafb5264cc083cb1083a9c838e80c.yaml @@ -0,0 +1,90 @@ +interactions: +- request: + body: '{"model":"gpt-5","messages":[{"role":"user","content":"Message B"}],"max_completion_tokens":1200}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '97' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RTy04bQRC8+ytacwJpDbYj28SXiKBEoCgCBSmXgKzOTK938Dw2M70Yg5D8G5HC + z/lLotk17KKQyxy6umr6Uf3QAxBaiRkIWSBLW5r+yfJ7PB9OTn/dVLefP53fV1/U8Mau7MidHp+J + LDH8zxuS/Mw6kN6Whlh718AyEDIl1eF0Mh4NhpPxpAasV2QSbVFyf9wfDUbj/uCoP5jueIXXkqKY + wY8eAMBD/aYKnaI7MYNB9hyxFCMuSMxekgBE8CZFBMaoI6NjkbWg9I7J1UWf+MooWPsKYoGBgAuC + vDIGmO4YfA7bzZ+vzQfwcbt5AnQKVgVy4mw3vxUYvSSwBOxBeVhpLkDzhyt35c7LNIcIZyDRQUGm + rOHZlevDRfA+D4QKfABSmiH3AaTBoHl9yN5RyvpGq6CZYM/6QCnDoslAeid1pAzyoMkps86AWB7s + J8ZlZS0GfU9JlwO6aJBrrZOAOQNCoNKs64C3ZWpZc1N1p9Pj7eYpdXBhCCOBdtJUigDdOn0eOaB2 + HGEPK6XJScogVZyBIbfgIgNLSle2GQ1Z1Obw0qBc7kP0u2kwauMDaD7oLiZQXkVMtnCVMR0AnfOM + 9TiTJa53yOOLCXLtdCzmgTB6lxYb2ZeiRh97ANe1qapXPhFl8LbkOfsl1bJHjZpoPdxi78bvdyh7 + RtMBJtPsDbm5otRi7LhSSJQFqZbaWjjN0XeAXqe5f8t5S7tpXLtFqzJqLu3ND1pASiqZ1LwMpLR8 + 3XSbFihd+f/SXsZclywihVstac6aQlqFohwr01ygiOvIZOe5dgsKZdD1GaZt9x57fwEAAP//AwCV + ZWyCgwQAAA== + headers: + CF-RAY: + - 9aacb72ce91224b7-MAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 08 Dec 2025 13:47:45 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - pydantic-28gund + openai-processing-ms: + - '8718' + openai-project: + - proj_dKobscVY9YJxeEaDJen54e3d + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '8883' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '40000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '39999994' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_79b538eabeb64e568ddf746f7b40f4d8 + status: + code: 200 + message: OK +version: 1 diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml new file mode 100644 index 0000000..ad7cc1b --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/openai-85c50c20d79a55446c8ee8cce3db8273f9e45df86a46af3b6a1ef014074413a8.yaml @@ -0,0 +1,80 @@ +interactions: +- request: + body: '{"model":"gpt-5","messages":[{"role":"user","content":"What is the capital + of France?"}],"max_completion_tokens":1}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '115' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"error\": {\n \"message\": \"Could not finish the message + because max_tokens or model output limit was reached. Please try again with + higher max_tokens.\",\n \"type\": \"invalid_request_error\",\n \"param\": + null,\n \"code\": null\n }\n}" + headers: + CF-RAY: + - 9aaca9bfaaf2cc4d-MAD + Connection: + - keep-alive + Content-Length: + - '235' + Content-Type: + - application/json + Date: + - Mon, 08 Dec 2025 13:38:26 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - pydantic-28gund + openai-processing-ms: + - '306' + openai-project: + - proj_dKobscVY9YJxeEaDJen54e3d + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '449' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '40000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '39999990' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_65f02d8d9a39474ab7ba122fc0ba2927 + status: + code: 400 + message: Bad Request +version: 1 diff --git a/proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml b/proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml new file mode 100644 index 0000000..5f34dca --- /dev/null +++ b/proxy-vcr/proxy_vcr/cassettes/openai-94748ba2f45b546b778e93a23f7383b111fa3b75552a1ad141a59bf64e6046cb.yaml @@ -0,0 +1,95 @@ +interactions: +- request: + body: '{"model":"gpt-5","messages":[{"role":"user","content":"What is the capital + of France?"}],"max_completion_tokens":1200,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '172' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}],"usage":null,"obfuscation":"PIQTUtyOdh"} + + + data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Paris"},"finish_reason":null}],"usage":null,"obfuscation":"re3rLzM"} + + + data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"finish_reason":null}],"usage":null,"obfuscation":"sRTMX9sXRGg"} + + + data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":null,"obfuscation":"q16gDt"} + + + data: {"id":"chatcmpl-CkVmZ8JZ8oof5i1bL0G7ixPcF9J7h","object":"chat.completion.chunk","created":1765201295,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":11,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"xR2uRrbBUrh"} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9aacae5afd4e3ed1-MAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 08 Dec 2025 13:41:36 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - pydantic-28gund + openai-processing-ms: + - '1135' + openai-project: + - proj_dKobscVY9YJxeEaDJen54e3d + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '1282' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '40000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '39999990' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_33fa052094294105968f87c4e65778f4 + status: + code: 200 + message: OK +version: 1