diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e0238b2b..00e29bb7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -59,7 +59,8 @@ python -m venv .venv source .venv/bin/activate # or `.venv\Scripts\activate` on Windows # Install in development mode with all dependencies -pip install -e ".[dev,relevance,proxy]" +# Include langchain so LangChain integration tests import cleanly (full pytest). +pip install -e ".[dev,relevance,proxy,langchain]" # Run tests pytest diff --git a/headroom/proxy/server.py b/headroom/proxy/server.py index a5a62c3a..d53b3471 100644 --- a/headroom/proxy/server.py +++ b/headroom/proxy/server.py @@ -139,6 +139,26 @@ def _get_image_compressor(): ) logger = logging.getLogger("headroom.proxy") + +def _optional_anthropic_client_for_token_count() -> Any | None: + """Anthropic SDK client for accurate pipeline token counts (count_tokens API). + + Returns None when the package is missing or ANTHROPIC_API_KEY is unset (OpenAI-only + proxy use). In those cases AnthropicProvider falls back to tiktoken approximation. + """ + if not os.environ.get("ANTHROPIC_API_KEY"): + return None + try: + from anthropic import Anthropic + + return Anthropic() + except ImportError: + logger.debug( + "anthropic package not installed; pipeline uses tiktoken approximation for Claude models" + ) + return None + + # Always-on file logging to ~/.headroom/logs/ for `headroom perf` analysis _HEADROOM_LOG_DIR = Path.home() / ".headroom" / "logs" @@ -1597,8 +1617,10 @@ def __init__(self, config: ProxyConfig): gurl = config.gemini_api_url.rstrip("/") HeadroomProxy.GEMINI_API_URL = gurl - # Initialize providers - self.anthropic_provider = AnthropicProvider() + # Initialize providers (SDK client enables accurate Anthropic token counting) + self.anthropic_provider = AnthropicProvider( + client=_optional_anthropic_client_for_token_count() + ) self.openai_provider = OpenAIProvider() # Initialize transforms based on routing mode @@ -2412,11 +2434,11 @@ async def handle_anthropic_messages( # Hook: pre_compress — let hooks modify messages before compression if self.config.hooks: from headroom.hooks import CompressContext - from headroom.transforms.query_echo import extract_user_query + from headroom.transforms import query_echo as _query_echo _hook_ctx = CompressContext( model=model, - user_query=extract_user_query(messages), + user_query=_query_echo.extract_user_query(messages), provider="anthropic", ) try: diff --git a/pyproject.toml b/pyproject.toml index e1723514..05a21d07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ # Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN) "tiktoken>=0.5.0", # Tokenizer for all compressors "pydantic>=2.0.0", # Config and data models - "litellm==1.82.3", # Model registry, pricing, and provider support + "litellm>=1.82.6,<2", # Model registry, pricing, and provider support "click>=8.1.0", # CLI framework "rich>=13.0.0", # Rich terminal output ] @@ -58,6 +58,7 @@ proxy = [ "fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx[http2]>=0.24.0", + "anthropic>=0.18.0", # Accurate Anthropic token counting in proxy pipeline "openai>=2.14.0", # OpenAI API format support "mcp>=1.0.0", # MCP server (headroom_compress, retrieve, stats) "magika>=0.6.0", # ML content detection for ContentRouter @@ -163,7 +164,7 @@ dev = [ "pre-commit>=3.0.0", "openai>=1.0.0", "anthropic>=0.18.0", - "litellm==1.82.3", + "litellm>=1.82.6,<2", "fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx[http2]>=0.24.0", diff --git a/tests/test_cli/test_mcp.py b/tests/test_cli/test_mcp.py index c65bde2f..0d48e7d5 100644 --- a/tests/test_cli/test_mcp.py +++ b/tests/test_cli/test_mcp.py @@ -53,6 +53,9 @@ def mock_claude_config_path(temp_claude_dir): def which_no_claude(cmd): if cmd == "claude": return None + if cmd == "headroom": + # Deterministic: install path uses `headroom` when on PATH; avoid env-specific python -m fallback. + return "/opt/headroom/bin/headroom" return _real_which(cmd) with patch("headroom.cli.mcp.MCP_CONFIG_PATH", config_path):