diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py index 70c4cec..e5365de 100644 --- a/skillclaw/api_server.py +++ b/skillclaw/api_server.py @@ -9,11 +9,13 @@ from __future__ import annotations import asyncio +import base64 import json import logging import os import random import re +import struct import threading import time from contextlib import asynccontextmanager @@ -28,10 +30,10 @@ from .config import SkillClawConfig from .data_formatter import ConversationSample from .prm_scorer import PRMScorer -from .skill_manager import SkillManager -from .utils import run_llm from .protocols import anthropic_messages as anthropic_protocol from .protocols import openai_responses as responses_protocol +from .skill_manager import SkillManager +from .utils import run_llm logger = logging.getLogger(__name__) @@ -1069,6 +1071,290 @@ def _anthropic_to_openai_body(body: dict[str, Any]) -> dict[str, Any]: return anthropic_protocol.to_openai_body(body) +def _anthropic_request_tool_names(body: dict[str, Any]) -> set[str]: + tool_names: set[str] = set() + tools = body.get("tools") + if not isinstance(tools, list): + return tool_names + for item in tools: + if not isinstance(item, dict): + continue + name = str(item.get("name") or "").strip() + if name: + tool_names.add(name) + return tool_names + + +_IMAGE_TOKEN_ESTIMATE = 1600 + + +def _data_url_bytes(url: str) -> bytes | None: + if not url.startswith("data:") or "," not in url: + return None + header, data = url.split(",", 1) + if ";base64" not in header: + return None + try: + return base64.b64decode(data, validate=False) + except Exception: + return None + + +def _image_dimensions_from_bytes(data: bytes) -> tuple[int, int] | None: + if data.startswith(b"\x89PNG\r\n\x1a\n") and len(data) >= 24: + width, height = struct.unpack(">II", data[16:24]) + return (width, height) if width > 0 and height > 0 else None + if data.startswith(b"GIF87a") or data.startswith(b"GIF89a"): + if len(data) >= 10: + width, height = struct.unpack(" 0 and height > 0 else None + return None + if data.startswith(b"RIFF") and len(data) >= 30 and data[8:12] == b"WEBP": + if data[12:16] == b"VP8X": + width = int.from_bytes(data[24:27], "little") + 1 + height = int.from_bytes(data[27:30], "little") + 1 + return (width, height) if width > 0 and height > 0 else None + if data[12:16] == b"VP8 " and len(data) >= 30: + width = struct.unpack(" 0 and height > 0 else None + if data.startswith(b"\xff\xd8"): + index = 2 + while index + 9 < len(data): + if data[index] != 0xFF: + index += 1 + continue + marker = data[index + 1] + index += 2 + if marker in {0xD8, 0xD9}: + continue + if index + 2 > len(data): + return None + segment_length = struct.unpack(">H", data[index:index + 2])[0] + if segment_length < 2 or index + segment_length > len(data): + return None + if marker in { + 0xC0, + 0xC1, + 0xC2, + 0xC3, + 0xC5, + 0xC6, + 0xC7, + 0xC9, + 0xCA, + 0xCB, + 0xCD, + 0xCE, + 0xCF, + }: + if segment_length >= 7: + height, width = struct.unpack(">HH", data[index + 3:index + 7]) + return (width, height) if width > 0 and height > 0 else None + return None + index += segment_length + return None + + +def _image_token_estimate_from_url(url: str) -> int: + data = _data_url_bytes(url) + if data is None: + return _IMAGE_TOKEN_ESTIMATE + dimensions = _image_dimensions_from_bytes(data) + if dimensions is None: + return _IMAGE_TOKEN_ESTIMATE + width, height = dimensions + return max(_IMAGE_TOKEN_ESTIMATE, (width * height + 749) // 750) + + +def _image_token_estimate_from_part(content: dict[str, Any]) -> int: + image_url = content.get("image_url") + url = image_url.get("url") if isinstance(image_url, dict) else image_url + if not isinstance(url, str) or not url: + source = content.get("source") if isinstance(content.get("source"), dict) else {} + if source.get("type") == "base64": + media_type = str(source.get("media_type") or "image/png") + data = str(source.get("data") or "") + url = f"data:{media_type};base64,{data}" if data else "" + else: + url = str(content.get("url") or "") + if not url: + return _IMAGE_TOKEN_ESTIMATE + return _image_token_estimate_from_url(url) + + +def _estimate_image_content_tokens(content: Any) -> int: + if isinstance(content, list): + return sum(_estimate_image_content_tokens(item) for item in content) + if isinstance(content, dict): + item_type = content.get("type") + count = _image_token_estimate_from_part(content) if item_type in {"image", "image_url", "input_image"} else 0 + if "content" in content: + count += _estimate_image_content_tokens(content.get("content")) + return count + return 0 + + +def _token_estimate_text(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if not isinstance(item, dict): + if item is not None: + parts.append(str(item)) + continue + item_type = item.get("type") + if item_type in {"text", "input_text", "output_text"} and isinstance(item.get("text"), str): + parts.append(item["text"]) + elif item_type in {"image", "image_url"}: + parts.append("[image]") + elif "content" in item: + parts.append(_token_estimate_text(item.get("content"))) + return " ".join(part for part in parts if part) + if isinstance(content, dict): + return json.dumps(content, ensure_ascii=False, sort_keys=True) + return str(content) if content is not None else "" + + +def _estimate_openai_body_input_tokens(tokenizer: Any, openai_body: dict[str, Any]) -> int: + messages = list(openai_body.get("messages") or []) + tools = openai_body.get("tools") + image_tokens = sum(_estimate_image_content_tokens(msg.get("content")) for msg in messages if isinstance(msg, dict)) + if tokenizer is not None: + try: + text = tokenizer.apply_chat_template( + _normalize_messages_for_template(messages), + tools=tools if tools else None, + tokenize=False, + add_generation_prompt=True, + ) + tokenized = tokenizer(text, add_special_tokens=False) + input_ids = tokenized["input_ids"] if isinstance(tokenized, dict) else tokenized.input_ids + return max(0, len(input_ids) + image_tokens) + except Exception: + pass + + text_parts = [] + for msg in messages: + if not isinstance(msg, dict): + continue + text_parts.append(f"{msg.get('role', '')}: {_token_estimate_text(msg.get('content'))}") + if msg.get("tool_calls"): + text_parts.append(json.dumps(msg.get("tool_calls"), ensure_ascii=False, sort_keys=True)) + if tools: + text_parts.append(json.dumps(tools, ensure_ascii=False, sort_keys=True)) + text = "\n".join(part for part in text_parts if part) + return max(1, (len(text) + 3) // 4 + image_tokens) + + +def _message_identity(message: dict[str, Any]) -> str: + try: + return json.dumps(message, ensure_ascii=False, sort_keys=True, separators=(",", ":")) + except Exception: + return str(message) + + +def _split_leading_system_messages(messages: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + index = 0 + while index < len(messages): + msg = messages[index] + if not isinstance(msg, dict) or msg.get("role") != "system": + break + index += 1 + return messages[:index], messages[index:] + + +def _canonical_overlap_message(message: dict[str, Any]) -> dict[str, Any]: + normalized = dict(message) + if "content" in normalized: + normalized["content"] = _flatten_message_content(normalized.get("content")) + return normalized + + +def _merge_assistant_overlap_run(messages: list[dict[str, Any]]) -> dict[str, Any]: + content_parts: list[str] = [] + tool_calls: list[Any] = [] + for msg in messages: + content = _flatten_message_content(msg.get("content")) + if content: + content_parts.append(content) + msg_tool_calls = msg.get("tool_calls") + if isinstance(msg_tool_calls, list): + tool_calls.extend(msg_tool_calls) + + merged: dict[str, Any] = {"role": "assistant", "content": " ".join(content_parts)} + if tool_calls: + merged["tool_calls"] = tool_calls + return merged + + +def _messages_for_overlap(messages: list[dict[str, Any]]) -> list[tuple[str, int]]: + entries: list[tuple[str, int]] = [] + index = 0 + while index < len(messages): + msg = messages[index] + if not isinstance(msg, dict): + entries.append((_message_identity({"value": msg}), index + 1)) + index += 1 + continue + + if msg.get("role") == "assistant": + run = [msg] + next_index = index + 1 + has_tool_calls = isinstance(msg.get("tool_calls"), list) and bool(msg.get("tool_calls")) + while next_index < len(messages): + next_msg = messages[next_index] + if not isinstance(next_msg, dict) or next_msg.get("role") != "assistant": + break + run.append(next_msg) + has_tool_calls = has_tool_calls or ( + isinstance(next_msg.get("tool_calls"), list) and bool(next_msg.get("tool_calls")) + ) + next_index += 1 + if has_tool_calls: + entries.append((_message_identity(_merge_assistant_overlap_run(run)), next_index)) + index = next_index + continue + + entries.append((_message_identity(_canonical_overlap_message(msg)), index + 1)) + index += 1 + return entries + + +def _merge_previous_response_messages( + previous_messages: list[dict[str, Any]], + current_messages: list[dict[str, Any]], +) -> list[dict[str, Any]]: + if not previous_messages: + return current_messages + if not current_messages: + return previous_messages + + current_system_messages, current_body_messages = _split_leading_system_messages(current_messages) + if current_system_messages: + _, previous_body_messages = _split_leading_system_messages(previous_messages) + else: + previous_body_messages = previous_messages + + previous_entries = _messages_for_overlap(previous_body_messages) + current_entries = _messages_for_overlap(current_body_messages) + previous_keys = [key for key, _ in previous_entries] + current_keys = [key for key, _ in current_entries] + if current_keys[: len(previous_keys)] == previous_keys: + return current_system_messages + current_body_messages + + max_overlap = min(len(previous_keys), len(current_keys)) + overlap = 0 + for size in range(max_overlap, 0, -1): + if previous_keys[-size:] == current_keys[:size]: + overlap = size + break + current_drop_index = current_entries[overlap - 1][1] if overlap else 0 + return current_system_messages + previous_body_messages + current_body_messages[current_drop_index:] + + def _normalize_responses_content(content: Any) -> str: return responses_protocol.normalize_content_to_text(content) @@ -1092,8 +1378,12 @@ def _openai_chat_to_responses_payload(payload: dict[str, Any], model: str) -> di return responses_protocol.from_openai_chat_payload(payload, model) -def _openai_to_anthropic_response(openai_resp: dict[str, Any], model: str) -> dict[str, Any]: - return anthropic_protocol.from_openai_response(openai_resp, model) +def _openai_to_anthropic_response( + openai_resp: dict[str, Any], + model: str, + tool_names: set[str] | None = None, +) -> dict[str, Any]: + return anthropic_protocol.from_openai_response(openai_resp, model, tool_names) # ------------------------------------------------------------------ # @@ -1420,12 +1710,29 @@ async def delete_response( # forwards container Anthropic SDK calls to ANTHROPIC_BASE_URL). # ---------------------------------------------------------------- # + @app.post("/v1/messages/count_tokens") + async def anthropic_count_tokens( + request: Request, + authorization: Optional[str] = Header(default=None), + x_api_key: Optional[str] = Header(default=None, alias="x-api-key"), + ): + owner: SkillClawAPIServer = request.app.state.owner + owner._mark_request_activity() + auth_header = authorization or (f"Bearer {x_api_key}" if x_api_key else None) + await owner._check_auth(auth_header) + + raw_body = await request.json() + openai_body = _anthropic_to_openai_body(raw_body) + input_tokens = _estimate_openai_body_input_tokens(owner._tokenizer, openai_body) + return JSONResponse(content={"input_tokens": input_tokens}) + @app.post("/v1/messages") async def anthropic_messages( request: Request, authorization: Optional[str] = Header(default=None), x_api_key: Optional[str] = Header(default=None, alias="x-api-key"), x_session_id: Optional[str] = Header(default=None), + x_claude_code_session_id: Optional[str] = Header(default=None, alias="x-claude-code-session-id"), x_turn_type: Optional[str] = Header(default=None), x_session_done: Optional[str] = Header(default=None), ): @@ -1437,6 +1744,7 @@ async def anthropic_messages( raw_body = await request.json() stream = bool(raw_body.get("stream", False)) + tool_names = _anthropic_request_tool_names(raw_body) openai_body = _anthropic_to_openai_body(raw_body) model = raw_body.get("model") or owner._served_model @@ -1445,7 +1753,7 @@ async def anthropic_messages( rewritten_messages, _ = _rewrite_new_session_bootstrap_prompt(incoming_messages) openai_body["messages"] = rewritten_messages - _raw_sid = x_session_id or "" + _raw_sid = x_session_id or x_claude_code_session_id or raw_body.get("session_id") or "" if _raw_sid: session_id = _raw_sid turn_type = _resolve_turn_type(x_turn_type, raw_body.get("turn_type"), default="main") @@ -1463,10 +1771,10 @@ async def anthropic_messages( ) if stream: return StreamingResponse( - owner._stream_anthropic_response(result, model), + owner._stream_anthropic_response(result, model, tool_names), media_type="text/event-stream", ) - return JSONResponse(content=_openai_to_anthropic_response(result["response"], model)) + return JSONResponse(content=_openai_to_anthropic_response(result["response"], model, tool_names)) return app @@ -2237,7 +2545,12 @@ def _responses_native_enabled(self) -> bool: """Return whether /v1/responses should be forwarded as Responses API.""" return str(getattr(self.config, "llm_api_mode", "chat") or "chat").lower() == "responses" - def _prepare_responses_forward(self, body: dict[str, Any], *, stream: bool) -> tuple[str, dict[str, Any], dict[str, str]]: + def _prepare_responses_forward( + self, + body: dict[str, Any], + *, + stream: bool, + ) -> tuple[str, dict[str, Any], dict[str, str]]: """Build URL, body, and headers for native Responses forwarding. Native mode intentionally keeps Responses-only tools (custom, web_search, @@ -2878,9 +3191,14 @@ async def _stream_responses_response(self, response_payload: dict[str, Any]): async for chunk in responses_protocol.stream_response(response_payload): yield chunk - async def _stream_anthropic_response(self, result: dict[str, Any], model: str): + async def _stream_anthropic_response( + self, + result: dict[str, Any], + model: str, + tool_names: set[str] | None = None, + ): """Yield Anthropic-format SSE events from an internal result dict.""" - async for chunk in anthropic_protocol.stream_from_openai_result(result, model): + async for chunk in anthropic_protocol.stream_from_openai_result(result, model, tool_names): yield chunk diff --git a/skillclaw/protocols/anthropic_messages.py b/skillclaw/protocols/anthropic_messages.py index 00cef7f..1cede6b 100644 --- a/skillclaw/protocols/anthropic_messages.py +++ b/skillclaw/protocols/anthropic_messages.py @@ -14,26 +14,237 @@ "content_filter": "stop_sequence", } +_CLAUDE_TOOL_NAME_ALIASES = { + "agent": "Agent", + "ask_user_question": "AskUserQuestion", + "ask-user-question": "AskUserQuestion", + "askuserquestion": "AskUserQuestion", + "bash": "Bash", + "cron_create": "CronCreate", + "cron-create": "CronCreate", + "croncreate": "CronCreate", + "cron_delete": "CronDelete", + "cron-delete": "CronDelete", + "crondelete": "CronDelete", + "cron_list": "CronList", + "cron-list": "CronList", + "cronlist": "CronList", + "edit": "Edit", + "edit_file": "Edit", + "enter_plan_mode": "EnterPlanMode", + "enter-plan-mode": "EnterPlanMode", + "enterplanmode": "EnterPlanMode", + "enter_worktree": "EnterWorktree", + "enter-worktree": "EnterWorktree", + "enterworktree": "EnterWorktree", + "exit_plan_mode": "ExitPlanMode", + "exit-plan-mode": "ExitPlanMode", + "exitplanmode": "ExitPlanMode", + "exit_worktree": "ExitWorktree", + "exit-worktree": "ExitWorktree", + "exitworktree": "ExitWorktree", + "glob": "Glob", + "grep": "Grep", + "list": "LS", + "ls": "LS", + "multi_edit": "MultiEdit", + "multi-edit": "MultiEdit", + "multiedit": "MultiEdit", + "notebook_edit": "NotebookEdit", + "notebook-edit": "NotebookEdit", + "notebookedit": "NotebookEdit", + "notebook_read": "NotebookRead", + "notebook-read": "NotebookRead", + "notebookread": "NotebookRead", + "read": "Read", + "read_file": "Read", + "read-file": "Read", + "file_read": "Read", + "file-read": "Read", + "readfile": "Read", + "schedule_wakeup": "ScheduleWakeup", + "schedule-wakeup": "ScheduleWakeup", + "schedulewakeup": "ScheduleWakeup", + "skill": "Skill", + "task": "Task", + "task_output": "TaskOutput", + "task-output": "TaskOutput", + "taskoutput": "TaskOutput", + "task_stop": "TaskStop", + "task-stop": "TaskStop", + "taskstop": "TaskStop", + "todo_write": "TodoWrite", + "todo-write": "TodoWrite", + "todowrite": "TodoWrite", + "web_fetch": "WebFetch", + "web-fetch": "WebFetch", + "webfetch": "WebFetch", + "web_search": "WebSearch", + "web-search": "WebSearch", + "websearch": "WebSearch", + "write": "Write", + "write_file": "Write", + "write-file": "Write", + "file_write": "Write", + "file-write": "Write", + "writefile": "Write", +} +_FILE_PATH_TOOLS = {"Edit", "MultiEdit", "Read", "Write"} +_PATH_TOOLS = {"Glob", "Grep", "LS"} +_NOTEBOOK_PATH_TOOLS = {"NotebookEdit", "NotebookRead"} + + +def _normalize_tool_use_name(name: Any, tool_names: set[str] | None = None) -> str: + raw = str(name or "unknown_tool") + if tool_names is None: + return raw + if raw in tool_names: + return raw + + lower_tool_names = {tool_name.lower(): tool_name for tool_name in tool_names} + exact_case_match = lower_tool_names.get(raw.lower()) + if exact_case_match: + return exact_case_match + + alias = _CLAUDE_TOOL_NAME_ALIASES.get(raw.lower()) + if alias in tool_names: + return alias + return raw + + +def _sanitize_tool_use_input(name: str, value: Any) -> dict[str, Any]: + """Normalize OpenAI-style tool arguments back to Claude Code schemas.""" + parsed = json_loads_tool_input(value) + sanitized = dict(parsed) + if name == "Bash" and "command" not in sanitized and "cmd" in sanitized: + sanitized["command"] = sanitized.pop("cmd") + + if name in _FILE_PATH_TOOLS and "file_path" not in sanitized: + for alias in ("path", "file"): + if alias in sanitized: + sanitized["file_path"] = sanitized[alias] + break + + if name in _FILE_PATH_TOOLS and "file_path" in sanitized: + sanitized.pop("path", None) + sanitized.pop("file", None) + + if name in _PATH_TOOLS and "path" not in sanitized: + for alias in ("file_path", "file"): + if alias in sanitized: + sanitized["path"] = sanitized[alias] + break + + if name in _PATH_TOOLS and "path" in sanitized: + sanitized.pop("file_path", None) + sanitized.pop("file", None) + + if name in _NOTEBOOK_PATH_TOOLS and "notebook_path" not in sanitized: + for alias in ("path", "file_path", "file"): + if alias in sanitized: + sanitized["notebook_path"] = sanitized[alias] + break + + if name in _NOTEBOOK_PATH_TOOLS and "notebook_path" in sanitized: + sanitized.pop("path", None) + sanitized.pop("file_path", None) + sanitized.pop("file", None) + + if name == "Edit": + for src, dst in (("oldString", "old_string"), ("newString", "new_string"), ("replaceAll", "replace_all")): + if src in sanitized and dst not in sanitized: + sanitized[dst] = sanitized.pop(src) + elif name == "MultiEdit" and isinstance(sanitized.get("edits"), list): + edits = [] + for edit in sanitized["edits"]: + if not isinstance(edit, dict): + edits.append(edit) + continue + item = dict(edit) + for src, dst in (("oldString", "old_string"), ("newString", "new_string"), ("replaceAll", "replace_all")): + if src in item and dst not in item: + item[dst] = item.pop(src) + edits.append(item) + sanitized["edits"] = edits + + if name == "Read" and sanitized.get("pages") == "": + sanitized.pop("pages", None) + + return sanitized + -def _flatten_tool_result_content(content: Any) -> str: +def _flatten_openai_message_content(content: Any) -> str: if isinstance(content, str): return content if isinstance(content, list): parts: list[str] = [] for item in content: if isinstance(item, dict): - if item.get("type") in {"text", "input_text", "output_text"}: - text = item.get("text") - if isinstance(text, str): - parts.append(text) - elif "content" in item: - parts.append(_flatten_tool_result_content(item.get("content"))) + text = item.get("text") + if isinstance(text, str): + parts.append(text) + elif isinstance(item.get("content"), str): + parts.append(item["content"]) elif item is not None: parts.append(str(item)) return " ".join(part for part in parts if part) return str(content) if content is not None else "" +def _anthropic_usage_from_openai_usage(usage: dict[str, Any]) -> dict[str, int]: + prompt_tokens = int(usage.get("prompt_tokens") or usage.get("input_tokens") or 0) + completion_tokens = int(usage.get("completion_tokens") or usage.get("output_tokens") or 0) + details = usage.get("prompt_tokens_details") or usage.get("input_tokens_details") or {} + cached_tokens = 0 + if isinstance(details, dict): + cached_tokens = int(details.get("cached_tokens") or 0) + input_tokens = max(0, prompt_tokens - cached_tokens) + out = {"input_tokens": input_tokens, "output_tokens": completion_tokens} + if cached_tokens: + out["cache_read_input_tokens"] = cached_tokens + return out + + +def _tool_result_to_openai_content(block: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]: + text_parts: list[str] = [] + image_parts: list[dict[str, Any]] = [] + + def collect(value: Any) -> None: + if isinstance(value, str): + if value: + text_parts.append(value) + return + if isinstance(value, list): + for item in value: + collect(item) + return + if isinstance(value, dict): + value_type = value.get("type") + if value_type in {"text", "input_text", "output_text"}: + text = value.get("text") + if isinstance(text, str) and text: + text_parts.append(text) + return + if value_type == "image": + image_part = _image_block_to_openai_part(value) + if image_part: + image_parts.append(image_part) + return + if "content" in value: + collect(value.get("content")) + return + if value is not None: + text_parts.append(str(value)) + + collect(block.get("content")) + text = " ".join(part for part in text_parts if part).strip() + if block.get("is_error") is True: + text = f"Tool error: {text}" if text else "Tool error" + elif not text: + text = "(image result attached)" if image_parts else "(empty)" + return text, image_parts + + def _image_block_to_openai_part(block: dict[str, Any]) -> dict[str, Any] | None: source = block.get("source") if isinstance(block.get("source"), dict) else {} if source.get("type") == "base64": @@ -145,13 +356,15 @@ def to_openai_body(body: dict[str, Any]) -> dict[str, Any]: } ) elif block_type == "tool_result": + tool_text, tool_images = _tool_result_to_openai_content(block) tool_results.append( { "role": "tool", "tool_call_id": str(block.get("tool_use_id") or ""), - "content": _flatten_tool_result_content(block.get("content")), + "content": tool_text, } ) + content_parts.extend(tool_images) text = " ".join(text_parts).strip() has_image = any(part.get("type") == "image_url" for part in content_parts) @@ -164,8 +377,8 @@ def to_openai_body(body: dict[str, Any]) -> dict[str, Any]: continue if tool_results: normalized.extend(tool_results) - if text: - normalized.append({**msg, "content": text}) + if text or content_parts: + normalized.append({**msg, "content": openai_content}) continue normalized.append({**msg, "content": openai_content}) @@ -186,11 +399,15 @@ def to_openai_body(body: dict[str, Any]) -> dict[str, Any]: return openai_body -def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, Any]: +def from_openai_response( + openai_resp: dict[str, Any], + model: str, + tool_names: set[str] | None = None, +) -> dict[str, Any]: """Convert an OpenAI chat completion response to Anthropic /v1/messages format.""" choice = openai_resp.get("choices", [{}])[0] message = choice.get("message", {}) if isinstance(choice.get("message"), dict) else {} - content_text = message.get("content") or "" + content_text = _flatten_openai_message_content(message.get("content")) raw_tool_calls = message.get("tool_calls") tool_calls = raw_tool_calls if isinstance(raw_tool_calls, list) else [] finish_reason = choice.get("finish_reason", "stop") @@ -203,18 +420,20 @@ def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, A if not isinstance(tool_call, dict): continue function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {} + tool_name = _normalize_tool_use_name(function.get("name"), tool_names) content_blocks.append( { "type": "tool_use", "id": str(tool_call.get("id") or f"call_{idx}"), - "name": str(function.get("name") or "unknown_tool"), - "input": json_loads_tool_input(function.get("arguments")), + "name": tool_name, + "input": _sanitize_tool_use_input(tool_name, function.get("arguments")), } ) if not content_blocks: content_blocks.append({"type": "text", "text": ""}) usage = openai_resp.get("usage", {}) + usage = usage if isinstance(usage, dict) else {} return { "id": openai_resp.get("id", "msg_skillclaw"), "type": "message", @@ -223,21 +442,23 @@ def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, A "content": content_blocks, "stop_reason": stop_reason, "stop_sequence": None, - "usage": { - "input_tokens": usage.get("prompt_tokens", 0), - "output_tokens": usage.get("completion_tokens", 0), - }, + "usage": _anthropic_usage_from_openai_usage(usage), } -async def stream_from_openai_result(result: dict[str, Any], model: str) -> AsyncIterator[str]: +async def stream_from_openai_result( + result: dict[str, Any], + model: str, + tool_names: set[str] | None = None, +) -> AsyncIterator[str]: """Yield Anthropic-format SSE events from an internal OpenAI chat result.""" payload = result["response"] - choice = payload.get("choices", [{}])[0] - anthropic_payload = from_openai_response(payload, model) + anthropic_payload = from_openai_response(payload, model, tool_names) content_blocks = anthropic_payload.get("content", []) stop_reason = anthropic_payload.get("stop_reason") or "end_turn" usage = payload.get("usage", {}) + usage = usage if isinstance(usage, dict) else {} + anthropic_usage = _anthropic_usage_from_openai_usage(usage) msg_id = payload.get("id", "msg_skillclaw") def sse(event: str, data: dict[str, Any]) -> str: @@ -255,7 +476,7 @@ def sse(event: str, data: dict[str, Any]) -> str: "model": model, "stop_reason": None, "stop_sequence": None, - "usage": {"input_tokens": usage.get("prompt_tokens", 0), "output_tokens": 0}, + "usage": {**anthropic_usage, "output_tokens": 0}, }, }, ) @@ -316,7 +537,7 @@ def sse(event: str, data: dict[str, Any]) -> str: { "type": "message_delta", "delta": {"stop_reason": stop_reason, "stop_sequence": None}, - "usage": {"output_tokens": usage.get("completion_tokens", 0)}, + "usage": {"output_tokens": anthropic_usage.get("output_tokens", 0)}, }, ) yield sse("message_stop", {"type": "message_stop"}) diff --git a/tests/test_anthropic_messages.py b/tests/test_anthropic_messages.py index 19a2576..c1974f3 100644 --- a/tests/test_anthropic_messages.py +++ b/tests/test_anthropic_messages.py @@ -38,6 +38,73 @@ def test_anthropic_tool_result_blocks_convert_to_openai_tool_messages(): ] +def test_anthropic_tool_result_error_flag_is_preserved_in_tool_content(): + body = { + "model": "claude-code-test", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_1", + "content": "Permission to read file is required.", + "is_error": True, + } + ], + }, + ], + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["messages"] == [ + { + "role": "tool", + "tool_call_id": "toolu_1", + "content": "Tool error: Permission to read file is required.", + } + ] + + +def test_anthropic_tool_result_images_are_preserved_as_followup_user_content(): + body = { + "model": "claude-code-test", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_1", + "content": [ + {"type": "text", "text": "screenshot"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "AAAA", + }, + }, + ], + } + ], + }, + ], + } + + converted = anthropic_messages.to_openai_body(body) + + assert converted["messages"] == [ + {"role": "tool", "tool_call_id": "toolu_1", "content": "screenshot"}, + { + "role": "user", + "content": [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}], + }, + ] + + def test_openai_tool_calls_convert_to_anthropic_tool_use_blocks(): openai_resp = { "id": "chatcmpl_1", @@ -68,6 +135,263 @@ def test_openai_tool_calls_convert_to_anthropic_tool_use_blocks(): ] +def test_openai_read_tool_call_normalizes_to_claude_code_schema(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_read", + "type": "function", + "function": { + "name": "read", + "arguments": '{"path":"/tmp/demo.py","limit":2000,"offset":0,"pages":""}', + }, + } + ], + }, + } + ], + } + + converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test", {"Read"}) + + assert converted["content"] == [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"file_path": "/tmp/demo.py", "limit": 2000, "offset": 0}, + } + ] + + +def test_openai_custom_tool_name_overlapping_claude_alias_is_preserved(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_read", + "type": "function", + "function": { + "name": "read", + "arguments": '{"path":"/tmp/demo.py","mode":"raw"}', + }, + } + ], + }, + } + ], + } + + converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test", {"read"}) + + assert converted["content"] == [ + { + "type": "tool_use", + "id": "call_read", + "name": "read", + "input": {"path": "/tmp/demo.py", "mode": "raw"}, + } + ] + + +def test_openai_common_claude_code_tool_names_are_restored(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_bash", + "type": "function", + "function": {"name": "bash", "arguments": '{"command":"pwd"}'}, + }, + { + "id": "call_edit", + "type": "function", + "function": { + "name": "multiedit", + "arguments": '{"path":"/tmp/demo.py","edits":[]}', + }, + }, + ], + }, + } + ], + } + + converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test", {"Bash", "MultiEdit"}) + + assert converted["content"] == [ + {"type": "tool_use", "id": "call_bash", "name": "Bash", "input": {"command": "pwd"}}, + { + "type": "tool_use", + "id": "call_edit", + "name": "MultiEdit", + "input": {"file_path": "/tmp/demo.py", "edits": []}, + }, + ] + + +def test_openai_claude_code_tool_arguments_are_sanitized_beyond_read(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_bash", + "type": "function", + "function": {"name": "bash", "arguments": '{"cmd":"pwd"}'}, + }, + { + "id": "call_ls", + "type": "function", + "function": {"name": "ls", "arguments": '{"file_path":"/tmp"}'}, + }, + { + "id": "call_notebook", + "type": "function", + "function": {"name": "notebook_read", "arguments": '{"path":"/tmp/demo.ipynb"}'}, + }, + { + "id": "call_edit", + "type": "function", + "function": { + "name": "edit_file", + "arguments": '{"path":"/tmp/demo.py","oldString":"a","newString":"b"}', + }, + }, + ], + }, + } + ], + } + + converted = anthropic_messages.from_openai_response( + openai_resp, + "claude-code-test", + {"Bash", "LS", "NotebookRead", "Edit"}, + ) + + assert converted["content"] == [ + {"type": "tool_use", "id": "call_bash", "name": "Bash", "input": {"command": "pwd"}}, + {"type": "tool_use", "id": "call_ls", "name": "LS", "input": {"path": "/tmp"}}, + { + "type": "tool_use", + "id": "call_notebook", + "name": "NotebookRead", + "input": {"notebook_path": "/tmp/demo.ipynb"}, + }, + { + "type": "tool_use", + "id": "call_edit", + "name": "Edit", + "input": {"file_path": "/tmp/demo.py", "old_string": "a", "new_string": "b"}, + }, + ] + + +def test_openai_current_claude_code_tool_aliases_are_restored(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_agent", + "type": "function", + "function": {"name": "agent", "arguments": '{"prompt":"inspect"}'}, + }, + { + "id": "call_question", + "type": "function", + "function": {"name": "ask_user_question", "arguments": '{"question":"Proceed?"}'}, + }, + { + "id": "call_plan", + "type": "function", + "function": {"name": "enter_plan_mode", "arguments": "{}"}, + }, + { + "id": "call_schedule", + "type": "function", + "function": {"name": "schedule_wakeup", "arguments": '{"delay_seconds":60}'}, + }, + ], + }, + } + ], + } + + converted = anthropic_messages.from_openai_response( + openai_resp, + "claude-code-test", + {"Agent", "AskUserQuestion", "EnterPlanMode", "ScheduleWakeup"}, + ) + + assert converted["content"] == [ + {"type": "tool_use", "id": "call_agent", "name": "Agent", "input": {"prompt": "inspect"}}, + { + "type": "tool_use", + "id": "call_question", + "name": "AskUserQuestion", + "input": {"question": "Proceed?"}, + }, + {"type": "tool_use", "id": "call_plan", "name": "EnterPlanMode", "input": {}}, + { + "type": "tool_use", + "id": "call_schedule", + "name": "ScheduleWakeup", + "input": {"delay_seconds": 60}, + }, + ] + + +def test_openai_cached_prompt_usage_maps_to_anthropic_cache_usage(): + openai_resp = { + "id": "chatcmpl_1", + "choices": [{"finish_reason": "stop", "message": {"role": "assistant", "content": "ok"}}], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 2, + "prompt_tokens_details": {"cached_tokens": 4}, + }, + } + + converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test") + + assert converted["usage"] == { + "input_tokens": 6, + "output_tokens": 2, + "cache_read_input_tokens": 4, + } + + async def _collect_stream_events(result, model): events = [] async for chunk in anthropic_messages.stream_from_openai_result(result, model): @@ -119,6 +443,50 @@ def test_streaming_openai_tool_calls_emit_anthropic_tool_use_events(): ) +def test_streaming_read_tool_call_emits_sanitized_claude_code_arguments(): + import asyncio + import json + + result = { + "response": { + "id": "chatcmpl_1", + "choices": [ + { + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_read", + "type": "function", + "function": { + "name": "Read", + "arguments": '{"path":"/tmp/demo.py","pages":""}', + }, + } + ], + }, + } + ], + } + } + + events = asyncio.run(_collect_stream_events(result, "claude-code-test")) + parsed = [(name, json.loads(data)) for name, data in events] + + assert any( + name == "content_block_start" + and payload["content_block"] == {"type": "tool_use", "id": "call_read", "name": "Read", "input": {}} + for name, payload in parsed + ) + assert any( + name == "content_block_delta" + and payload["delta"] == {"type": "input_json_delta", "partial_json": '{"file_path":"/tmp/demo.py"}'} + for name, payload in parsed + ) + + def test_streaming_openai_tool_calls_use_tool_use_stop_reason_even_if_finish_reason_is_stop(): import asyncio diff --git a/tests/test_anthropic_messages_api.py b/tests/test_anthropic_messages_api.py new file mode 100644 index 0000000..bdeb4b2 --- /dev/null +++ b/tests/test_anthropic_messages_api.py @@ -0,0 +1,188 @@ +import base64 +import struct + +import httpx +import pytest + +from skillclaw.api_server import SkillClawAPIServer +from skillclaw.config import SkillClawConfig + + +@pytest.fixture +def anthropic_server(monkeypatch, tmp_path): + monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) + return SkillClawAPIServer( + SkillClawConfig( + proxy_api_key="skillclaw", + record_enabled=False, + record_dir=str(tmp_path), + claw_type="nanoclaw", + ) + ) + + +@pytest.mark.asyncio +async def test_anthropic_count_tokens_endpoint_returns_local_estimate(anthropic_server): + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test") + try: + response = await client.post( + "/v1/messages/count_tokens", + headers={"x-api-key": "skillclaw"}, + json={ + "model": "claude-code-test", + "messages": [{"role": "user", "content": [{"type": "text", "text": "hello"}]}], + "tools": [{"name": "Read", "description": "read", "input_schema": {"type": "object"}}], + }, + ) + finally: + await client.aclose() + + assert response.status_code == 200 + assert response.json()["input_tokens"] > 0 + + +@pytest.mark.asyncio +async def test_anthropic_count_tokens_accounts_for_image_content(anthropic_server): + class FakeTokenizer: + def apply_chat_template(self, messages, tools=None, tokenize=False, add_generation_prompt=False): + return "user: screenshot" + + def __call__(self, text, add_special_tokens=False): + return {"input_ids": [1, 2, 3]} + + anthropic_server._tokenizer = FakeTokenizer() + png_header = ( + b"\x89PNG\r\n\x1a\n" + + struct.pack(">I", 13) + + b"IHDR" + + struct.pack(">II", 2000, 2000) + + b"\x08\x02\x00\x00\x00" + ) + image_data = base64.b64encode(png_header).decode("ascii") + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test") + try: + response = await client.post( + "/v1/messages/count_tokens", + headers={"x-api-key": "skillclaw"}, + json={ + "model": "claude-code-test", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "look"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": image_data, + }, + }, + ], + } + ], + }, + ) + finally: + await client.aclose() + + assert response.status_code == 200 + assert response.json()["input_tokens"] >= 5000 + + +@pytest.mark.asyncio +async def test_anthropic_messages_uses_claude_code_session_header(anthropic_server): + seen = {} + + async def fake_handle_request(body, session_id, turn_type, session_done): + seen["body"] = body + seen["session_id"] = session_id + seen["turn_type"] = turn_type + seen["session_done"] = session_done + return { + "response": { + "id": "chatcmpl_1", + "choices": [{"message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop"}], + "usage": {"prompt_tokens": 1, "completion_tokens": 1}, + } + } + + anthropic_server._handle_request = fake_handle_request + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test") + try: + response = await client.post( + "/v1/messages", + headers={"x-api-key": "skillclaw", "x-claude-code-session-id": "claude-session-1"}, + json={ + "model": "claude-code-test", + "messages": [{"role": "user", "content": "hi"}], + }, + ) + finally: + await client.aclose() + + assert response.status_code == 200 + assert response.json()["content"] == [{"type": "text", "text": "ok"}] + assert seen["session_id"] == "claude-session-1" + + +@pytest.mark.asyncio +async def test_anthropic_messages_preserves_registered_custom_tool_name(anthropic_server): + async def fake_handle_request(body, session_id, turn_type, session_done): + return { + "response": { + "id": "chatcmpl_1", + "choices": [ + { + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_read", + "type": "function", + "function": { + "name": "read", + "arguments": '{"path":"/tmp/demo.py","mode":"raw"}', + }, + } + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1}, + } + } + + anthropic_server._handle_request = fake_handle_request + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test") + try: + response = await client.post( + "/v1/messages", + headers={"x-api-key": "skillclaw"}, + json={ + "model": "claude-code-test", + "messages": [{"role": "user", "content": "use custom read"}], + "tools": [ + { + "name": "read", + "description": "custom read tool", + "input_schema": {"type": "object"}, + } + ], + }, + ) + finally: + await client.aclose() + + assert response.status_code == 200 + assert response.json()["content"] == [ + { + "type": "tool_use", + "id": "call_read", + "name": "read", + "input": {"path": "/tmp/demo.py", "mode": "raw"}, + } + ] diff --git a/tests/test_responses_native.py b/tests/test_responses_native.py index ea5212e..2370422 100644 --- a/tests/test_responses_native.py +++ b/tests/test_responses_native.py @@ -222,6 +222,221 @@ async def fake_stream(body): assert response.text == 'data: {"type":"response.created","upstream":true}\n\ndata: [DONE]\n\n' +@pytest.mark.asyncio +async def test_responses_chat_bridge_merges_previous_response_history(monkeypatch, tmp_path): + monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) + server = SkillClawAPIServer( + SkillClawConfig( + proxy_api_key="skillclaw", + record_enabled=False, + record_dir=str(tmp_path), + claw_type="nanoclaw", + ) + ) + calls = [] + + async def fake_handle_request(body, session_id, turn_type, session_done): + calls.append(body) + idx = len(calls) + return { + "response": { + "id": f"chatcmpl_{idx}", + "created": 0, + "choices": [ + {"message": {"role": "assistant", "content": f"ok {idx}"}, "finish_reason": "stop"} + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + } + } + + server._handle_request = fake_handle_request + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test") + try: + first = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={"model": "skillclaw-model", "input": "first", "store": True}, + ) + second = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={ + "model": "skillclaw-model", + "input": "second", + "previous_response_id": first.json()["id"], + "store": True, + }, + ) + finally: + await client.aclose() + + assert first.status_code == 200 + assert second.status_code == 200 + assert calls[1]["messages"] == [ + {"role": "user", "content": "first"}, + {"role": "assistant", "content": "ok 1"}, + {"role": "user", "content": "second"}, + ] + + +@pytest.mark.asyncio +async def test_responses_continuation_keeps_new_instructions_first(monkeypatch, tmp_path): + monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) + server = SkillClawAPIServer( + SkillClawConfig( + proxy_api_key="skillclaw", + record_enabled=False, + record_dir=str(tmp_path), + claw_type="nanoclaw", + ) + ) + calls = [] + + async def fake_handle_request(body, session_id, turn_type, session_done): + calls.append(body) + idx = len(calls) + return { + "response": { + "id": f"chatcmpl_{idx}", + "created": 0, + "choices": [ + {"message": {"role": "assistant", "content": f"ok {idx}"}, "finish_reason": "stop"} + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + } + } + + server._handle_request = fake_handle_request + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test") + try: + first = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={"model": "skillclaw-model", "input": "first", "store": True}, + ) + second = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={ + "model": "skillclaw-model", + "instructions": "new system instructions", + "input": "second", + "previous_response_id": first.json()["id"], + "store": True, + }, + ) + finally: + await client.aclose() + + assert first.status_code == 200 + assert second.status_code == 200 + assert calls[1]["messages"] == [ + {"role": "system", "content": "new system instructions"}, + {"role": "user", "content": "first"}, + {"role": "assistant", "content": "ok 1"}, + {"role": "user", "content": "second"}, + ] + + +@pytest.mark.asyncio +async def test_responses_continuation_deduplicates_replayed_output_items(monkeypatch, tmp_path): + monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None) + server = SkillClawAPIServer( + SkillClawConfig( + proxy_api_key="skillclaw", + record_enabled=False, + record_dir=str(tmp_path), + claw_type="nanoclaw", + ) + ) + calls = [] + + async def fake_handle_request(body, session_id, turn_type, session_done): + calls.append(body) + idx = len(calls) + if idx == 1: + return { + "response": { + "id": "chatcmpl_1", + "created": 0, + "choices": [ + { + "message": { + "role": "assistant", + "content": "need tool", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name":"debug"}'}, + } + ], + }, + "finish_reason": "tool_calls", + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + } + } + return { + "response": { + "id": f"chatcmpl_{idx}", + "created": 0, + "choices": [ + {"message": {"role": "assistant", "content": f"ok {idx}"}, "finish_reason": "stop"} + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + } + } + + server._handle_request = fake_handle_request + client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test") + try: + first = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={"model": "skillclaw-model", "input": "first", "store": True}, + ) + first_payload = first.json() + second = await client.post( + "/v1/responses", + headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"}, + json={ + "model": "skillclaw-model", + "input": [ + *first_payload["output"], + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "second"}], + }, + ], + "previous_response_id": first_payload["id"], + "store": True, + }, + ) + finally: + await client.aclose() + + assert first.status_code == 200 + assert second.status_code == 200 + assert calls[1]["messages"] == [ + {"role": "user", "content": "first"}, + { + "role": "assistant", + "content": "need tool", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "Skill", "arguments": '{"name":"debug"}'}, + } + ], + }, + {"role": "user", "content": "second"}, + ] + + def test_prepare_responses_forward_keeps_native_codex_items_out_of_chat_conversion(): server = object.__new__(SkillClawAPIServer) server.config = SkillClawConfig(