Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
b4df352
feat(proxy): add Anthropic Messages API endpoint for Claude Code comp…
FammasMaz Dec 10, 2025
7e229f4
feat(anthropic): add extended thinking support to /v1/messages endpoint
FammasMaz Dec 12, 2025
7aea08e
feat(anthropic): force high thinking budget for Opus models by default
FammasMaz Dec 12, 2025
05d89a2
fix: ensure max_tokens exceeds thinking budget and improve error hand…
FammasMaz Dec 13, 2025
e35f3f0
fix(anthropic): properly close all content blocks in streaming wrapper
FammasMaz Dec 14, 2025
4ec92ec
fix(anthropic): add missing uuid import for /v1/messages endpoint
FammasMaz Dec 14, 2025
b70efdf
fix(anthropic): always set custom_reasoning_budget when thinking is e…
FammasMaz Dec 14, 2025
4bd879b
feat(openai): auto-enable full thinking budget for Opus
FammasMaz Dec 14, 2025
758b4b5
fix(anthropic): add missing JSONResponse import for non-streaming res…
FammasMaz Dec 14, 2025
f2d7288
fix(anthropic): ensure message_start is sent before message_stop in s…
FammasMaz Dec 15, 2025
de88557
feat: add /context endpoint for anthropic routes
FammasMaz Dec 16, 2025
beed0bc
Revert "feat(openai): auto-enable full thinking budget for Opus"
FammasMaz Dec 19, 2025
2c93a68
Revert "fix(anthropic): always set custom_reasoning_budget when think…
FammasMaz Dec 19, 2025
b19526c
refactor: Move Anthropic translation layer to rotator_library
FammasMaz Dec 20, 2025
d91f98b
fix(anthropic): improve model detection and document thinking budget
FammasMaz Dec 20, 2025
16c889f
fix(anthropic): handle images in tool results for Claude Code
FammasMaz Dec 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 180 additions & 1 deletion src/proxy_app/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import uuid

# Phase 1: Minimal imports for arg parsing and TUI
import asyncio
Expand Down Expand Up @@ -99,7 +100,7 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.security import APIKeyHeader

print(" → Loading core dependencies...")
Expand Down Expand Up @@ -214,6 +215,13 @@ class EnrichedModelList(BaseModel):
data: List[EnrichedModelCard]


# --- Anthropic API Models (imported from library) ---
from rotator_library.anthropic_compat import (
AnthropicMessagesRequest,
AnthropicCountTokensRequest,
)


# Calculate total loading time
_elapsed = time.time() - _start_time
print(
Expand Down Expand Up @@ -665,6 +673,27 @@ async def verify_api_key(auth: str = Depends(api_key_header)):
return auth


# --- Anthropic API Key Header ---
anthropic_api_key_header = APIKeyHeader(name="x-api-key", auto_error=False)


async def verify_anthropic_api_key(
x_api_key: str = Depends(anthropic_api_key_header),
auth: str = Depends(api_key_header),
):
"""
Dependency to verify API key for Anthropic endpoints.
Accepts either x-api-key header (Anthropic style) or Authorization Bearer (OpenAI style).
"""
# Check x-api-key first (Anthropic style)
if x_api_key and x_api_key == PROXY_API_KEY:
return x_api_key
# Fall back to Bearer token (OpenAI style)
if auth and auth == f"Bearer {PROXY_API_KEY}":
return auth
raise HTTPException(status_code=401, detail="Invalid or missing API Key")
Comment on lines +688 to +694
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When PROXY_API_KEY is not set or empty (open access mode), this function will always raise an HTTPException because neither condition will match. This is inconsistent with verify_api_key at line 794 which allows access when PROXY_API_KEY is not set. Consider adding a check similar to line 794 to allow open access mode.

Copilot uses AI. Check for mistakes.


async def streaming_response_wrapper(
request: Request,
request_data: dict,
Expand Down Expand Up @@ -967,6 +996,156 @@ async def chat_completions(
raise HTTPException(status_code=500, detail=str(e))


# --- Anthropic Messages API Endpoint ---
@app.post("/v1/messages")
async def anthropic_messages(
request: Request,
body: AnthropicMessagesRequest,
client: RotatingClient = Depends(get_rotating_client),
_=Depends(verify_anthropic_api_key),
):
"""
Anthropic-compatible Messages API endpoint.

Accepts requests in Anthropic's format and returns responses in Anthropic's format.
Internally translates to OpenAI format for processing via LiteLLM.

This endpoint is compatible with Claude Code and other Anthropic API clients.
"""
# Initialize logger if enabled
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None

try:
# Log the request to console
log_request_to_console(
url=str(request.url),
headers=dict(request.headers),
client_info=(
request.client.host if request.client else "unknown",
request.client.port if request.client else 0,
),
request_data=body.model_dump(exclude_none=True),
)

# Use the library method to handle the request
result = await client.anthropic_messages(body, raw_request=request)

if body.stream:
# Streaming response
return StreamingResponse(
result,
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
else:
# Non-streaming response
if logger:
logger.log_final_response(
status_code=200,
headers=None,
body=result,
)
return JSONResponse(content=result)

except (
litellm.InvalidRequestError,
ValueError,
litellm.ContextWindowExceededError,
) as e:
error_response = {
"type": "error",
"error": {"type": "invalid_request_error", "message": str(e)},
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
"error": {"type": "authentication_error", "message": str(e)},
}
raise HTTPException(status_code=401, detail=error_response)
except litellm.RateLimitError as e:
error_response = {
"type": "error",
"error": {"type": "rate_limit_error", "message": str(e)},
}
raise HTTPException(status_code=429, detail=error_response)
except (litellm.ServiceUnavailableError, litellm.APIConnectionError) as e:
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=503, detail=error_response)
except litellm.Timeout as e:
error_response = {
"type": "error",
"error": {"type": "api_error", "message": f"Request timed out: {str(e)}"},
}
raise HTTPException(status_code=504, detail=error_response)
except Exception as e:
logging.error(f"Anthropic messages endpoint error: {e}")
if logger:
logger.log_final_response(
status_code=500,
headers=None,
body={"error": str(e)},
)
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=500, detail=error_response)


# --- Anthropic Count Tokens Endpoint ---
@app.post("/v1/messages/count_tokens")
async def anthropic_count_tokens(
request: Request,
body: AnthropicCountTokensRequest,
client: RotatingClient = Depends(get_rotating_client),
_=Depends(verify_anthropic_api_key),
):
"""
Anthropic-compatible count_tokens endpoint.

Counts the number of tokens that would be used by a Messages API request.
This is useful for estimating costs and managing context windows.

Accepts requests in Anthropic's format and returns token count in Anthropic's format.
"""
try:
# Use the library method to handle the request
result = await client.anthropic_count_tokens(body)
return JSONResponse(content=result)

except (
litellm.InvalidRequestError,
ValueError,
litellm.ContextWindowExceededError,
) as e:
error_response = {
"type": "error",
"error": {"type": "invalid_request_error", "message": str(e)},
}
raise HTTPException(status_code=400, detail=error_response)
except litellm.AuthenticationError as e:
error_response = {
"type": "error",
"error": {"type": "authentication_error", "message": str(e)},
}
raise HTTPException(status_code=401, detail=error_response)
except Exception as e:
logging.error(f"Anthropic count_tokens endpoint error: {e}")
error_response = {
"type": "error",
"error": {"type": "api_error", "message": str(e)},
}
raise HTTPException(status_code=500, detail=error_response)


@app.post("/v1/embeddings")
async def embeddings(
request: Request,
Expand Down
8 changes: 7 additions & 1 deletion src/rotator_library/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@
from .providers import PROVIDER_PLUGINS
from .providers.provider_interface import ProviderInterface
from .model_info_service import ModelInfoService, ModelInfo, ModelMetadata
from . import anthropic_compat

__all__ = [
"RotatingClient",
"PROVIDER_PLUGINS",
"ModelInfoService",
"ModelInfo",
"ModelMetadata",
"anthropic_compat",
]


def __getattr__(name):
"""Lazy-load PROVIDER_PLUGINS and ModelInfoService to speed up module import."""
"""Lazy-load PROVIDER_PLUGINS, ModelInfoService, and anthropic_compat to speed up module import."""
if name == "PROVIDER_PLUGINS":
from .providers import PROVIDER_PLUGINS

Expand All @@ -36,4 +38,8 @@ def __getattr__(name):
from .model_info_service import ModelMetadata

return ModelMetadata
if name == "anthropic_compat":
from . import anthropic_compat

return anthropic_compat
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
67 changes: 67 additions & 0 deletions src/rotator_library/anthropic_compat/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Anthropic API compatibility module for rotator_library.

This module provides format translation between Anthropic's Messages API
and OpenAI's Chat Completions API, enabling any OpenAI-compatible provider
to work with Anthropic clients like Claude Code.

Usage:
from rotator_library.anthropic_compat import (
AnthropicMessagesRequest,
AnthropicMessagesResponse,
translate_anthropic_request,
openai_to_anthropic_response,
anthropic_streaming_wrapper,
)
"""

from .models import (
AnthropicTextBlock,
AnthropicImageSource,
AnthropicImageBlock,
AnthropicToolUseBlock,
AnthropicToolResultBlock,
AnthropicMessage,
AnthropicTool,
AnthropicThinkingConfig,
AnthropicMessagesRequest,
AnthropicUsage,
AnthropicMessagesResponse,
AnthropicCountTokensRequest,
AnthropicCountTokensResponse,
)

from .translator import (
anthropic_to_openai_messages,
anthropic_to_openai_tools,
anthropic_to_openai_tool_choice,
openai_to_anthropic_response,
translate_anthropic_request,
)

from .streaming import anthropic_streaming_wrapper

__all__ = [
# Models
"AnthropicTextBlock",
"AnthropicImageSource",
"AnthropicImageBlock",
"AnthropicToolUseBlock",
"AnthropicToolResultBlock",
"AnthropicMessage",
"AnthropicTool",
"AnthropicThinkingConfig",
"AnthropicMessagesRequest",
"AnthropicUsage",
"AnthropicMessagesResponse",
"AnthropicCountTokensRequest",
"AnthropicCountTokensResponse",
# Translator functions
"anthropic_to_openai_messages",
"anthropic_to_openai_tools",
"anthropic_to_openai_tool_choice",
"openai_to_anthropic_response",
"translate_anthropic_request",
# Streaming
"anthropic_streaming_wrapper",
]
Loading
Loading