diff --git a/skillclaw/api_server.py b/skillclaw/api_server.py
index 70c4cec..e5365de 100644
--- a/skillclaw/api_server.py
+++ b/skillclaw/api_server.py
@@ -9,11 +9,13 @@
 from __future__ import annotations
 
 import asyncio
+import base64
 import json
 import logging
 import os
 import random
 import re
+import struct
 import threading
 import time
 from contextlib import asynccontextmanager
@@ -28,10 +30,10 @@
 from .config import SkillClawConfig
 from .data_formatter import ConversationSample
 from .prm_scorer import PRMScorer
-from .skill_manager import SkillManager
-from .utils import run_llm
 from .protocols import anthropic_messages as anthropic_protocol
 from .protocols import openai_responses as responses_protocol
+from .skill_manager import SkillManager
+from .utils import run_llm
 
 logger = logging.getLogger(__name__)
 
@@ -1069,6 +1071,290 @@ def _anthropic_to_openai_body(body: dict[str, Any]) -> dict[str, Any]:
     return anthropic_protocol.to_openai_body(body)
 
 
+def _anthropic_request_tool_names(body: dict[str, Any]) -> set[str]:
+    tool_names: set[str] = set()
+    tools = body.get("tools")
+    if not isinstance(tools, list):
+        return tool_names
+    for item in tools:
+        if not isinstance(item, dict):
+            continue
+        name = str(item.get("name") or "").strip()
+        if name:
+            tool_names.add(name)
+    return tool_names
+
+
+_IMAGE_TOKEN_ESTIMATE = 1600
+
+
+def _data_url_bytes(url: str) -> bytes | None:
+    if not url.startswith("data:") or "," not in url:
+        return None
+    header, data = url.split(",", 1)
+    if ";base64" not in header:
+        return None
+    try:
+        return base64.b64decode(data, validate=False)
+    except Exception:
+        return None
+
+
+def _image_dimensions_from_bytes(data: bytes) -> tuple[int, int] | None:
+    if data.startswith(b"\x89PNG\r\n\x1a\n") and len(data) >= 24:
+        width, height = struct.unpack(">II", data[16:24])
+        return (width, height) if width > 0 and height > 0 else None
+    if data.startswith(b"GIF87a") or data.startswith(b"GIF89a"):
+        if len(data) >= 10:
+            width, height = struct.unpack("<HH", data[6:10])
+            return (width, height) if width > 0 and height > 0 else None
+        return None
+    if data.startswith(b"RIFF") and len(data) >= 30 and data[8:12] == b"WEBP":
+        if data[12:16] == b"VP8X":
+            width = int.from_bytes(data[24:27], "little") + 1
+            height = int.from_bytes(data[27:30], "little") + 1
+            return (width, height) if width > 0 and height > 0 else None
+        if data[12:16] == b"VP8 " and len(data) >= 30:
+            width = struct.unpack("<H", data[26:28])[0] & 0x3FFF
+            height = struct.unpack("<H", data[28:30])[0] & 0x3FFF
+            return (width, height) if width > 0 and height > 0 else None
+    if data.startswith(b"\xff\xd8"):
+        index = 2
+        while index + 9 < len(data):
+            if data[index] != 0xFF:
+                index += 1
+                continue
+            marker = data[index + 1]
+            index += 2
+            if marker in {0xD8, 0xD9}:
+                continue
+            if index + 2 > len(data):
+                return None
+            segment_length = struct.unpack(">H", data[index:index + 2])[0]
+            if segment_length < 2 or index + segment_length > len(data):
+                return None
+            if marker in {
+                0xC0,
+                0xC1,
+                0xC2,
+                0xC3,
+                0xC5,
+                0xC6,
+                0xC7,
+                0xC9,
+                0xCA,
+                0xCB,
+                0xCD,
+                0xCE,
+                0xCF,
+            }:
+                if segment_length >= 7:
+                    height, width = struct.unpack(">HH", data[index + 3:index + 7])
+                    return (width, height) if width > 0 and height > 0 else None
+                return None
+            index += segment_length
+    return None
+
+
+def _image_token_estimate_from_url(url: str) -> int:
+    data = _data_url_bytes(url)
+    if data is None:
+        return _IMAGE_TOKEN_ESTIMATE
+    dimensions = _image_dimensions_from_bytes(data)
+    if dimensions is None:
+        return _IMAGE_TOKEN_ESTIMATE
+    width, height = dimensions
+    return max(_IMAGE_TOKEN_ESTIMATE, (width * height + 749) // 750)
+
+
+def _image_token_estimate_from_part(content: dict[str, Any]) -> int:
+    image_url = content.get("image_url")
+    url = image_url.get("url") if isinstance(image_url, dict) else image_url
+    if not isinstance(url, str) or not url:
+        source = content.get("source") if isinstance(content.get("source"), dict) else {}
+        if source.get("type") == "base64":
+            media_type = str(source.get("media_type") or "image/png")
+            data = str(source.get("data") or "")
+            url = f"data:{media_type};base64,{data}" if data else ""
+        else:
+            url = str(content.get("url") or "")
+    if not url:
+        return _IMAGE_TOKEN_ESTIMATE
+    return _image_token_estimate_from_url(url)
+
+
+def _estimate_image_content_tokens(content: Any) -> int:
+    if isinstance(content, list):
+        return sum(_estimate_image_content_tokens(item) for item in content)
+    if isinstance(content, dict):
+        item_type = content.get("type")
+        count = _image_token_estimate_from_part(content) if item_type in {"image", "image_url", "input_image"} else 0
+        if "content" in content:
+            count += _estimate_image_content_tokens(content.get("content"))
+        return count
+    return 0
+
+
+def _token_estimate_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if not isinstance(item, dict):
+                if item is not None:
+                    parts.append(str(item))
+                continue
+            item_type = item.get("type")
+            if item_type in {"text", "input_text", "output_text"} and isinstance(item.get("text"), str):
+                parts.append(item["text"])
+            elif item_type in {"image", "image_url"}:
+                parts.append("[image]")
+            elif "content" in item:
+                parts.append(_token_estimate_text(item.get("content")))
+        return " ".join(part for part in parts if part)
+    if isinstance(content, dict):
+        return json.dumps(content, ensure_ascii=False, sort_keys=True)
+    return str(content) if content is not None else ""
+
+
+def _estimate_openai_body_input_tokens(tokenizer: Any, openai_body: dict[str, Any]) -> int:
+    messages = list(openai_body.get("messages") or [])
+    tools = openai_body.get("tools")
+    image_tokens = sum(_estimate_image_content_tokens(msg.get("content")) for msg in messages if isinstance(msg, dict))
+    if tokenizer is not None:
+        try:
+            text = tokenizer.apply_chat_template(
+                _normalize_messages_for_template(messages),
+                tools=tools if tools else None,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+            tokenized = tokenizer(text, add_special_tokens=False)
+            input_ids = tokenized["input_ids"] if isinstance(tokenized, dict) else tokenized.input_ids
+            return max(0, len(input_ids) + image_tokens)
+        except Exception:
+            pass
+
+    text_parts = []
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        text_parts.append(f"{msg.get('role', '')}: {_token_estimate_text(msg.get('content'))}")
+        if msg.get("tool_calls"):
+            text_parts.append(json.dumps(msg.get("tool_calls"), ensure_ascii=False, sort_keys=True))
+    if tools:
+        text_parts.append(json.dumps(tools, ensure_ascii=False, sort_keys=True))
+    text = "\n".join(part for part in text_parts if part)
+    return max(1, (len(text) + 3) // 4 + image_tokens)
+
+
+def _message_identity(message: dict[str, Any]) -> str:
+    try:
+        return json.dumps(message, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
+    except Exception:
+        return str(message)
+
+
+def _split_leading_system_messages(messages: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    index = 0
+    while index < len(messages):
+        msg = messages[index]
+        if not isinstance(msg, dict) or msg.get("role") != "system":
+            break
+        index += 1
+    return messages[:index], messages[index:]
+
+
+def _canonical_overlap_message(message: dict[str, Any]) -> dict[str, Any]:
+    normalized = dict(message)
+    if "content" in normalized:
+        normalized["content"] = _flatten_message_content(normalized.get("content"))
+    return normalized
+
+
+def _merge_assistant_overlap_run(messages: list[dict[str, Any]]) -> dict[str, Any]:
+    content_parts: list[str] = []
+    tool_calls: list[Any] = []
+    for msg in messages:
+        content = _flatten_message_content(msg.get("content"))
+        if content:
+            content_parts.append(content)
+        msg_tool_calls = msg.get("tool_calls")
+        if isinstance(msg_tool_calls, list):
+            tool_calls.extend(msg_tool_calls)
+
+    merged: dict[str, Any] = {"role": "assistant", "content": " ".join(content_parts)}
+    if tool_calls:
+        merged["tool_calls"] = tool_calls
+    return merged
+
+
+def _messages_for_overlap(messages: list[dict[str, Any]]) -> list[tuple[str, int]]:
+    entries: list[tuple[str, int]] = []
+    index = 0
+    while index < len(messages):
+        msg = messages[index]
+        if not isinstance(msg, dict):
+            entries.append((_message_identity({"value": msg}), index + 1))
+            index += 1
+            continue
+
+        if msg.get("role") == "assistant":
+            run = [msg]
+            next_index = index + 1
+            has_tool_calls = isinstance(msg.get("tool_calls"), list) and bool(msg.get("tool_calls"))
+            while next_index < len(messages):
+                next_msg = messages[next_index]
+                if not isinstance(next_msg, dict) or next_msg.get("role") != "assistant":
+                    break
+                run.append(next_msg)
+                has_tool_calls = has_tool_calls or (
+                    isinstance(next_msg.get("tool_calls"), list) and bool(next_msg.get("tool_calls"))
+                )
+                next_index += 1
+            if has_tool_calls:
+                entries.append((_message_identity(_merge_assistant_overlap_run(run)), next_index))
+                index = next_index
+                continue
+
+        entries.append((_message_identity(_canonical_overlap_message(msg)), index + 1))
+        index += 1
+    return entries
+
+
+def _merge_previous_response_messages(
+    previous_messages: list[dict[str, Any]],
+    current_messages: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    if not previous_messages:
+        return current_messages
+    if not current_messages:
+        return previous_messages
+
+    current_system_messages, current_body_messages = _split_leading_system_messages(current_messages)
+    if current_system_messages:
+        _, previous_body_messages = _split_leading_system_messages(previous_messages)
+    else:
+        previous_body_messages = previous_messages
+
+    previous_entries = _messages_for_overlap(previous_body_messages)
+    current_entries = _messages_for_overlap(current_body_messages)
+    previous_keys = [key for key, _ in previous_entries]
+    current_keys = [key for key, _ in current_entries]
+    if current_keys[: len(previous_keys)] == previous_keys:
+        return current_system_messages + current_body_messages
+
+    max_overlap = min(len(previous_keys), len(current_keys))
+    overlap = 0
+    for size in range(max_overlap, 0, -1):
+        if previous_keys[-size:] == current_keys[:size]:
+            overlap = size
+            break
+    current_drop_index = current_entries[overlap - 1][1] if overlap else 0
+    return current_system_messages + previous_body_messages + current_body_messages[current_drop_index:]
+
+
 def _normalize_responses_content(content: Any) -> str:
     return responses_protocol.normalize_content_to_text(content)
 
@@ -1092,8 +1378,12 @@ def _openai_chat_to_responses_payload(payload: dict[str, Any], model: str) -> di
     return responses_protocol.from_openai_chat_payload(payload, model)
 
 
-def _openai_to_anthropic_response(openai_resp: dict[str, Any], model: str) -> dict[str, Any]:
-    return anthropic_protocol.from_openai_response(openai_resp, model)
+def _openai_to_anthropic_response(
+    openai_resp: dict[str, Any],
+    model: str,
+    tool_names: set[str] | None = None,
+) -> dict[str, Any]:
+    return anthropic_protocol.from_openai_response(openai_resp, model, tool_names)
 
 
 # ------------------------------------------------------------------ #
@@ -1420,12 +1710,29 @@ async def delete_response(
         # forwards container Anthropic SDK calls to ANTHROPIC_BASE_URL).
         # ---------------------------------------------------------------- #
 
+        @app.post("/v1/messages/count_tokens")
+        async def anthropic_count_tokens(
+            request: Request,
+            authorization: Optional[str] = Header(default=None),
+            x_api_key: Optional[str] = Header(default=None, alias="x-api-key"),
+        ):
+            owner: SkillClawAPIServer = request.app.state.owner
+            owner._mark_request_activity()
+            auth_header = authorization or (f"Bearer {x_api_key}" if x_api_key else None)
+            await owner._check_auth(auth_header)
+
+            raw_body = await request.json()
+            openai_body = _anthropic_to_openai_body(raw_body)
+            input_tokens = _estimate_openai_body_input_tokens(owner._tokenizer, openai_body)
+            return JSONResponse(content={"input_tokens": input_tokens})
+
         @app.post("/v1/messages")
         async def anthropic_messages(
             request: Request,
             authorization: Optional[str] = Header(default=None),
             x_api_key: Optional[str] = Header(default=None, alias="x-api-key"),
             x_session_id: Optional[str] = Header(default=None),
+            x_claude_code_session_id: Optional[str] = Header(default=None, alias="x-claude-code-session-id"),
             x_turn_type: Optional[str] = Header(default=None),
             x_session_done: Optional[str] = Header(default=None),
         ):
@@ -1437,6 +1744,7 @@ async def anthropic_messages(
 
             raw_body = await request.json()
             stream = bool(raw_body.get("stream", False))
+            tool_names = _anthropic_request_tool_names(raw_body)
             openai_body = _anthropic_to_openai_body(raw_body)
             model = raw_body.get("model") or owner._served_model
 
@@ -1445,7 +1753,7 @@ async def anthropic_messages(
                 rewritten_messages, _ = _rewrite_new_session_bootstrap_prompt(incoming_messages)
                 openai_body["messages"] = rewritten_messages
 
-            _raw_sid = x_session_id or ""
+            _raw_sid = x_session_id or x_claude_code_session_id or raw_body.get("session_id") or ""
             if _raw_sid:
                 session_id = _raw_sid
                 turn_type = _resolve_turn_type(x_turn_type, raw_body.get("turn_type"), default="main")
@@ -1463,10 +1771,10 @@ async def anthropic_messages(
             )
             if stream:
                 return StreamingResponse(
-                    owner._stream_anthropic_response(result, model),
+                    owner._stream_anthropic_response(result, model, tool_names),
                     media_type="text/event-stream",
                 )
-            return JSONResponse(content=_openai_to_anthropic_response(result["response"], model))
+            return JSONResponse(content=_openai_to_anthropic_response(result["response"], model, tool_names))
 
         return app
 
@@ -2237,7 +2545,12 @@ def _responses_native_enabled(self) -> bool:
         """Return whether /v1/responses should be forwarded as Responses API."""
         return str(getattr(self.config, "llm_api_mode", "chat") or "chat").lower() == "responses"
 
-    def _prepare_responses_forward(self, body: dict[str, Any], *, stream: bool) -> tuple[str, dict[str, Any], dict[str, str]]:
+    def _prepare_responses_forward(
+        self,
+        body: dict[str, Any],
+        *,
+        stream: bool,
+    ) -> tuple[str, dict[str, Any], dict[str, str]]:
         """Build URL, body, and headers for native Responses forwarding.
 
         Native mode intentionally keeps Responses-only tools (custom, web_search,
@@ -2878,9 +3191,14 @@ async def _stream_responses_response(self, response_payload: dict[str, Any]):
         async for chunk in responses_protocol.stream_response(response_payload):
             yield chunk
 
-    async def _stream_anthropic_response(self, result: dict[str, Any], model: str):
+    async def _stream_anthropic_response(
+        self,
+        result: dict[str, Any],
+        model: str,
+        tool_names: set[str] | None = None,
+    ):
         """Yield Anthropic-format SSE events from an internal result dict."""
-        async for chunk in anthropic_protocol.stream_from_openai_result(result, model):
+        async for chunk in anthropic_protocol.stream_from_openai_result(result, model, tool_names):
             yield chunk
 
 
diff --git a/skillclaw/protocols/anthropic_messages.py b/skillclaw/protocols/anthropic_messages.py
index 00cef7f..1cede6b 100644
--- a/skillclaw/protocols/anthropic_messages.py
+++ b/skillclaw/protocols/anthropic_messages.py
@@ -14,26 +14,237 @@
     "content_filter": "stop_sequence",
 }
 
+_CLAUDE_TOOL_NAME_ALIASES = {
+    "agent": "Agent",
+    "ask_user_question": "AskUserQuestion",
+    "ask-user-question": "AskUserQuestion",
+    "askuserquestion": "AskUserQuestion",
+    "bash": "Bash",
+    "cron_create": "CronCreate",
+    "cron-create": "CronCreate",
+    "croncreate": "CronCreate",
+    "cron_delete": "CronDelete",
+    "cron-delete": "CronDelete",
+    "crondelete": "CronDelete",
+    "cron_list": "CronList",
+    "cron-list": "CronList",
+    "cronlist": "CronList",
+    "edit": "Edit",
+    "edit_file": "Edit",
+    "enter_plan_mode": "EnterPlanMode",
+    "enter-plan-mode": "EnterPlanMode",
+    "enterplanmode": "EnterPlanMode",
+    "enter_worktree": "EnterWorktree",
+    "enter-worktree": "EnterWorktree",
+    "enterworktree": "EnterWorktree",
+    "exit_plan_mode": "ExitPlanMode",
+    "exit-plan-mode": "ExitPlanMode",
+    "exitplanmode": "ExitPlanMode",
+    "exit_worktree": "ExitWorktree",
+    "exit-worktree": "ExitWorktree",
+    "exitworktree": "ExitWorktree",
+    "glob": "Glob",
+    "grep": "Grep",
+    "list": "LS",
+    "ls": "LS",
+    "multi_edit": "MultiEdit",
+    "multi-edit": "MultiEdit",
+    "multiedit": "MultiEdit",
+    "notebook_edit": "NotebookEdit",
+    "notebook-edit": "NotebookEdit",
+    "notebookedit": "NotebookEdit",
+    "notebook_read": "NotebookRead",
+    "notebook-read": "NotebookRead",
+    "notebookread": "NotebookRead",
+    "read": "Read",
+    "read_file": "Read",
+    "read-file": "Read",
+    "file_read": "Read",
+    "file-read": "Read",
+    "readfile": "Read",
+    "schedule_wakeup": "ScheduleWakeup",
+    "schedule-wakeup": "ScheduleWakeup",
+    "schedulewakeup": "ScheduleWakeup",
+    "skill": "Skill",
+    "task": "Task",
+    "task_output": "TaskOutput",
+    "task-output": "TaskOutput",
+    "taskoutput": "TaskOutput",
+    "task_stop": "TaskStop",
+    "task-stop": "TaskStop",
+    "taskstop": "TaskStop",
+    "todo_write": "TodoWrite",
+    "todo-write": "TodoWrite",
+    "todowrite": "TodoWrite",
+    "web_fetch": "WebFetch",
+    "web-fetch": "WebFetch",
+    "webfetch": "WebFetch",
+    "web_search": "WebSearch",
+    "web-search": "WebSearch",
+    "websearch": "WebSearch",
+    "write": "Write",
+    "write_file": "Write",
+    "write-file": "Write",
+    "file_write": "Write",
+    "file-write": "Write",
+    "writefile": "Write",
+}
+_FILE_PATH_TOOLS = {"Edit", "MultiEdit", "Read", "Write"}
+_PATH_TOOLS = {"Glob", "Grep", "LS"}
+_NOTEBOOK_PATH_TOOLS = {"NotebookEdit", "NotebookRead"}
+
+
+def _normalize_tool_use_name(name: Any, tool_names: set[str] | None = None) -> str:
+    raw = str(name or "unknown_tool")
+    if tool_names is None:
+        return raw
+    if raw in tool_names:
+        return raw
+
+    lower_tool_names = {tool_name.lower(): tool_name for tool_name in tool_names}
+    exact_case_match = lower_tool_names.get(raw.lower())
+    if exact_case_match:
+        return exact_case_match
+
+    alias = _CLAUDE_TOOL_NAME_ALIASES.get(raw.lower())
+    if alias in tool_names:
+        return alias
+    return raw
+
+
+def _sanitize_tool_use_input(name: str, value: Any) -> dict[str, Any]:
+    """Normalize OpenAI-style tool arguments back to Claude Code schemas."""
+    parsed = json_loads_tool_input(value)
+    sanitized = dict(parsed)
+    if name == "Bash" and "command" not in sanitized and "cmd" in sanitized:
+        sanitized["command"] = sanitized.pop("cmd")
+
+    if name in _FILE_PATH_TOOLS and "file_path" not in sanitized:
+        for alias in ("path", "file"):
+            if alias in sanitized:
+                sanitized["file_path"] = sanitized[alias]
+                break
+
+    if name in _FILE_PATH_TOOLS and "file_path" in sanitized:
+        sanitized.pop("path", None)
+        sanitized.pop("file", None)
+
+    if name in _PATH_TOOLS and "path" not in sanitized:
+        for alias in ("file_path", "file"):
+            if alias in sanitized:
+                sanitized["path"] = sanitized[alias]
+                break
+
+    if name in _PATH_TOOLS and "path" in sanitized:
+        sanitized.pop("file_path", None)
+        sanitized.pop("file", None)
+
+    if name in _NOTEBOOK_PATH_TOOLS and "notebook_path" not in sanitized:
+        for alias in ("path", "file_path", "file"):
+            if alias in sanitized:
+                sanitized["notebook_path"] = sanitized[alias]
+                break
+
+    if name in _NOTEBOOK_PATH_TOOLS and "notebook_path" in sanitized:
+        sanitized.pop("path", None)
+        sanitized.pop("file_path", None)
+        sanitized.pop("file", None)
+
+    if name == "Edit":
+        for src, dst in (("oldString", "old_string"), ("newString", "new_string"), ("replaceAll", "replace_all")):
+            if src in sanitized and dst not in sanitized:
+                sanitized[dst] = sanitized.pop(src)
+    elif name == "MultiEdit" and isinstance(sanitized.get("edits"), list):
+        edits = []
+        for edit in sanitized["edits"]:
+            if not isinstance(edit, dict):
+                edits.append(edit)
+                continue
+            item = dict(edit)
+            for src, dst in (("oldString", "old_string"), ("newString", "new_string"), ("replaceAll", "replace_all")):
+                if src in item and dst not in item:
+                    item[dst] = item.pop(src)
+            edits.append(item)
+        sanitized["edits"] = edits
+
+    if name == "Read" and sanitized.get("pages") == "":
+        sanitized.pop("pages", None)
+
+    return sanitized
+
 
-def _flatten_tool_result_content(content: Any) -> str:
+def _flatten_openai_message_content(content: Any) -> str:
     if isinstance(content, str):
         return content
     if isinstance(content, list):
         parts: list[str] = []
         for item in content:
             if isinstance(item, dict):
-                if item.get("type") in {"text", "input_text", "output_text"}:
-                    text = item.get("text")
-                    if isinstance(text, str):
-                        parts.append(text)
-                elif "content" in item:
-                    parts.append(_flatten_tool_result_content(item.get("content")))
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+                elif isinstance(item.get("content"), str):
+                    parts.append(item["content"])
             elif item is not None:
                 parts.append(str(item))
         return " ".join(part for part in parts if part)
     return str(content) if content is not None else ""
 
 
+def _anthropic_usage_from_openai_usage(usage: dict[str, Any]) -> dict[str, int]:
+    prompt_tokens = int(usage.get("prompt_tokens") or usage.get("input_tokens") or 0)
+    completion_tokens = int(usage.get("completion_tokens") or usage.get("output_tokens") or 0)
+    details = usage.get("prompt_tokens_details") or usage.get("input_tokens_details") or {}
+    cached_tokens = 0
+    if isinstance(details, dict):
+        cached_tokens = int(details.get("cached_tokens") or 0)
+    input_tokens = max(0, prompt_tokens - cached_tokens)
+    out = {"input_tokens": input_tokens, "output_tokens": completion_tokens}
+    if cached_tokens:
+        out["cache_read_input_tokens"] = cached_tokens
+    return out
+
+
+def _tool_result_to_openai_content(block: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]:
+    text_parts: list[str] = []
+    image_parts: list[dict[str, Any]] = []
+
+    def collect(value: Any) -> None:
+        if isinstance(value, str):
+            if value:
+                text_parts.append(value)
+            return
+        if isinstance(value, list):
+            for item in value:
+                collect(item)
+            return
+        if isinstance(value, dict):
+            value_type = value.get("type")
+            if value_type in {"text", "input_text", "output_text"}:
+                text = value.get("text")
+                if isinstance(text, str) and text:
+                    text_parts.append(text)
+                return
+            if value_type == "image":
+                image_part = _image_block_to_openai_part(value)
+                if image_part:
+                    image_parts.append(image_part)
+                return
+            if "content" in value:
+                collect(value.get("content"))
+            return
+        if value is not None:
+            text_parts.append(str(value))
+
+    collect(block.get("content"))
+    text = " ".join(part for part in text_parts if part).strip()
+    if block.get("is_error") is True:
+        text = f"Tool error: {text}" if text else "Tool error"
+    elif not text:
+        text = "(image result attached)" if image_parts else "(empty)"
+    return text, image_parts
+
+
 def _image_block_to_openai_part(block: dict[str, Any]) -> dict[str, Any] | None:
     source = block.get("source") if isinstance(block.get("source"), dict) else {}
     if source.get("type") == "base64":
@@ -145,13 +356,15 @@ def to_openai_body(body: dict[str, Any]) -> dict[str, Any]:
                     }
                 )
             elif block_type == "tool_result":
+                tool_text, tool_images = _tool_result_to_openai_content(block)
                 tool_results.append(
                     {
                         "role": "tool",
                         "tool_call_id": str(block.get("tool_use_id") or ""),
-                        "content": _flatten_tool_result_content(block.get("content")),
+                        "content": tool_text,
                     }
                 )
+                content_parts.extend(tool_images)
 
         text = " ".join(text_parts).strip()
         has_image = any(part.get("type") == "image_url" for part in content_parts)
@@ -164,8 +377,8 @@ def to_openai_body(body: dict[str, Any]) -> dict[str, Any]:
             continue
         if tool_results:
             normalized.extend(tool_results)
-            if text:
-                normalized.append({**msg, "content": text})
+            if text or content_parts:
+                normalized.append({**msg, "content": openai_content})
             continue
         normalized.append({**msg, "content": openai_content})
 
@@ -186,11 +399,15 @@ def to_openai_body(body: dict[str, Any]) -> dict[str, Any]:
     return openai_body
 
 
-def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, Any]:
+def from_openai_response(
+    openai_resp: dict[str, Any],
+    model: str,
+    tool_names: set[str] | None = None,
+) -> dict[str, Any]:
     """Convert an OpenAI chat completion response to Anthropic /v1/messages format."""
     choice = openai_resp.get("choices", [{}])[0]
     message = choice.get("message", {}) if isinstance(choice.get("message"), dict) else {}
-    content_text = message.get("content") or ""
+    content_text = _flatten_openai_message_content(message.get("content"))
     raw_tool_calls = message.get("tool_calls")
     tool_calls = raw_tool_calls if isinstance(raw_tool_calls, list) else []
     finish_reason = choice.get("finish_reason", "stop")
@@ -203,18 +420,20 @@ def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, A
         if not isinstance(tool_call, dict):
             continue
         function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
+        tool_name = _normalize_tool_use_name(function.get("name"), tool_names)
         content_blocks.append(
             {
                 "type": "tool_use",
                 "id": str(tool_call.get("id") or f"call_{idx}"),
-                "name": str(function.get("name") or "unknown_tool"),
-                "input": json_loads_tool_input(function.get("arguments")),
+                "name": tool_name,
+                "input": _sanitize_tool_use_input(tool_name, function.get("arguments")),
             }
         )
     if not content_blocks:
         content_blocks.append({"type": "text", "text": ""})
 
     usage = openai_resp.get("usage", {})
+    usage = usage if isinstance(usage, dict) else {}
     return {
         "id": openai_resp.get("id", "msg_skillclaw"),
         "type": "message",
@@ -223,21 +442,23 @@ def from_openai_response(openai_resp: dict[str, Any], model: str) -> dict[str, A
         "content": content_blocks,
         "stop_reason": stop_reason,
         "stop_sequence": None,
-        "usage": {
-            "input_tokens": usage.get("prompt_tokens", 0),
-            "output_tokens": usage.get("completion_tokens", 0),
-        },
+        "usage": _anthropic_usage_from_openai_usage(usage),
     }
 
 
-async def stream_from_openai_result(result: dict[str, Any], model: str) -> AsyncIterator[str]:
+async def stream_from_openai_result(
+    result: dict[str, Any],
+    model: str,
+    tool_names: set[str] | None = None,
+) -> AsyncIterator[str]:
     """Yield Anthropic-format SSE events from an internal OpenAI chat result."""
     payload = result["response"]
-    choice = payload.get("choices", [{}])[0]
-    anthropic_payload = from_openai_response(payload, model)
+    anthropic_payload = from_openai_response(payload, model, tool_names)
     content_blocks = anthropic_payload.get("content", [])
     stop_reason = anthropic_payload.get("stop_reason") or "end_turn"
     usage = payload.get("usage", {})
+    usage = usage if isinstance(usage, dict) else {}
+    anthropic_usage = _anthropic_usage_from_openai_usage(usage)
     msg_id = payload.get("id", "msg_skillclaw")
 
     def sse(event: str, data: dict[str, Any]) -> str:
@@ -255,7 +476,7 @@ def sse(event: str, data: dict[str, Any]) -> str:
                 "model": model,
                 "stop_reason": None,
                 "stop_sequence": None,
-                "usage": {"input_tokens": usage.get("prompt_tokens", 0), "output_tokens": 0},
+                "usage": {**anthropic_usage, "output_tokens": 0},
             },
         },
     )
@@ -316,7 +537,7 @@ def sse(event: str, data: dict[str, Any]) -> str:
         {
             "type": "message_delta",
             "delta": {"stop_reason": stop_reason, "stop_sequence": None},
-            "usage": {"output_tokens": usage.get("completion_tokens", 0)},
+            "usage": {"output_tokens": anthropic_usage.get("output_tokens", 0)},
         },
     )
     yield sse("message_stop", {"type": "message_stop"})
diff --git a/tests/test_anthropic_messages.py b/tests/test_anthropic_messages.py
index 19a2576..c1974f3 100644
--- a/tests/test_anthropic_messages.py
+++ b/tests/test_anthropic_messages.py
@@ -38,6 +38,73 @@ def test_anthropic_tool_result_blocks_convert_to_openai_tool_messages():
     ]
 
 
+def test_anthropic_tool_result_error_flag_is_preserved_in_tool_content():
+    body = {
+        "model": "claude-code-test",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "toolu_1",
+                        "content": "Permission to read file is required.",
+                        "is_error": True,
+                    }
+                ],
+            },
+        ],
+    }
+
+    converted = anthropic_messages.to_openai_body(body)
+
+    assert converted["messages"] == [
+        {
+            "role": "tool",
+            "tool_call_id": "toolu_1",
+            "content": "Tool error: Permission to read file is required.",
+        }
+    ]
+
+
+def test_anthropic_tool_result_images_are_preserved_as_followup_user_content():
+    body = {
+        "model": "claude-code-test",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "toolu_1",
+                        "content": [
+                            {"type": "text", "text": "screenshot"},
+                            {
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": "image/png",
+                                    "data": "AAAA",
+                                },
+                            },
+                        ],
+                    }
+                ],
+            },
+        ],
+    }
+
+    converted = anthropic_messages.to_openai_body(body)
+
+    assert converted["messages"] == [
+        {"role": "tool", "tool_call_id": "toolu_1", "content": "screenshot"},
+        {
+            "role": "user",
+            "content": [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}],
+        },
+    ]
+
+
 def test_openai_tool_calls_convert_to_anthropic_tool_use_blocks():
     openai_resp = {
         "id": "chatcmpl_1",
@@ -68,6 +135,263 @@ def test_openai_tool_calls_convert_to_anthropic_tool_use_blocks():
     ]
 
 
+def test_openai_read_tool_call_normalizes_to_claude_code_schema():
+    openai_resp = {
+        "id": "chatcmpl_1",
+        "choices": [
+            {
+                "finish_reason": "tool_calls",
+                "message": {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_read",
+                            "type": "function",
+                            "function": {
+                                "name": "read",
+                                "arguments": '{"path":"/tmp/demo.py","limit":2000,"offset":0,"pages":""}',
+                            },
+                        }
+                    ],
+                },
+            }
+        ],
+    }
+
+    converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test", {"Read"})
+
+    assert converted["content"] == [
+        {
+            "type": "tool_use",
+            "id": "call_read",
+            "name": "Read",
+            "input": {"file_path": "/tmp/demo.py", "limit": 2000, "offset": 0},
+        }
+    ]
+
+
+def test_openai_custom_tool_name_overlapping_claude_alias_is_preserved():
+    openai_resp = {
+        "id": "chatcmpl_1",
+        "choices": [
+            {
+                "finish_reason": "tool_calls",
+                "message": {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_read",
+                            "type": "function",
+                            "function": {
+                                "name": "read",
+                                "arguments": '{"path":"/tmp/demo.py","mode":"raw"}',
+                            },
+                        }
+                    ],
+                },
+            }
+        ],
+    }
+
+    converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test", {"read"})
+
+    assert converted["content"] == [
+        {
+            "type": "tool_use",
+            "id": "call_read",
+            "name": "read",
+            "input": {"path": "/tmp/demo.py", "mode": "raw"},
+        }
+    ]
+
+
+def test_openai_common_claude_code_tool_names_are_restored():
+    openai_resp = {
+        "id": "chatcmpl_1",
+        "choices": [
+            {
+                "finish_reason": "tool_calls",
+                "message": {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_bash",
+                            "type": "function",
+                            "function": {"name": "bash", "arguments": '{"command":"pwd"}'},
+                        },
+                        {
+                            "id": "call_edit",
+                            "type": "function",
+                            "function": {
+                                "name": "multiedit",
+                                "arguments": '{"path":"/tmp/demo.py","edits":[]}',
+                            },
+                        },
+                    ],
+                },
+            }
+        ],
+    }
+
+    converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test", {"Bash", "MultiEdit"})
+
+    assert converted["content"] == [
+        {"type": "tool_use", "id": "call_bash", "name": "Bash", "input": {"command": "pwd"}},
+        {
+            "type": "tool_use",
+            "id": "call_edit",
+            "name": "MultiEdit",
+            "input": {"file_path": "/tmp/demo.py", "edits": []},
+        },
+    ]
+
+
+def test_openai_claude_code_tool_arguments_are_sanitized_beyond_read():
+    openai_resp = {
+        "id": "chatcmpl_1",
+        "choices": [
+            {
+                "finish_reason": "tool_calls",
+                "message": {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_bash",
+                            "type": "function",
+                            "function": {"name": "bash", "arguments": '{"cmd":"pwd"}'},
+                        },
+                        {
+                            "id": "call_ls",
+                            "type": "function",
+                            "function": {"name": "ls", "arguments": '{"file_path":"/tmp"}'},
+                        },
+                        {
+                            "id": "call_notebook",
+                            "type": "function",
+                            "function": {"name": "notebook_read", "arguments": '{"path":"/tmp/demo.ipynb"}'},
+                        },
+                        {
+                            "id": "call_edit",
+                            "type": "function",
+                            "function": {
+                                "name": "edit_file",
+                                "arguments": '{"path":"/tmp/demo.py","oldString":"a","newString":"b"}',
+                            },
+                        },
+                    ],
+                },
+            }
+        ],
+    }
+
+    converted = anthropic_messages.from_openai_response(
+        openai_resp,
+        "claude-code-test",
+        {"Bash", "LS", "NotebookRead", "Edit"},
+    )
+
+    assert converted["content"] == [
+        {"type": "tool_use", "id": "call_bash", "name": "Bash", "input": {"command": "pwd"}},
+        {"type": "tool_use", "id": "call_ls", "name": "LS", "input": {"path": "/tmp"}},
+        {
+            "type": "tool_use",
+            "id": "call_notebook",
+            "name": "NotebookRead",
+            "input": {"notebook_path": "/tmp/demo.ipynb"},
+        },
+        {
+            "type": "tool_use",
+            "id": "call_edit",
+            "name": "Edit",
+            "input": {"file_path": "/tmp/demo.py", "old_string": "a", "new_string": "b"},
+        },
+    ]
+
+
+def test_openai_current_claude_code_tool_aliases_are_restored():
+    openai_resp = {
+        "id": "chatcmpl_1",
+        "choices": [
+            {
+                "finish_reason": "tool_calls",
+                "message": {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_agent",
+                            "type": "function",
+                            "function": {"name": "agent", "arguments": '{"prompt":"inspect"}'},
+                        },
+                        {
+                            "id": "call_question",
+                            "type": "function",
+                            "function": {"name": "ask_user_question", "arguments": '{"question":"Proceed?"}'},
+                        },
+                        {
+                            "id": "call_plan",
+                            "type": "function",
+                            "function": {"name": "enter_plan_mode", "arguments": "{}"},
+                        },
+                        {
+                            "id": "call_schedule",
+                            "type": "function",
+                            "function": {"name": "schedule_wakeup", "arguments": '{"delay_seconds":60}'},
+                        },
+                    ],
+                },
+            }
+        ],
+    }
+
+    converted = anthropic_messages.from_openai_response(
+        openai_resp,
+        "claude-code-test",
+        {"Agent", "AskUserQuestion", "EnterPlanMode", "ScheduleWakeup"},
+    )
+
+    assert converted["content"] == [
+        {"type": "tool_use", "id": "call_agent", "name": "Agent", "input": {"prompt": "inspect"}},
+        {
+            "type": "tool_use",
+            "id": "call_question",
+            "name": "AskUserQuestion",
+            "input": {"question": "Proceed?"},
+        },
+        {"type": "tool_use", "id": "call_plan", "name": "EnterPlanMode", "input": {}},
+        {
+            "type": "tool_use",
+            "id": "call_schedule",
+            "name": "ScheduleWakeup",
+            "input": {"delay_seconds": 60},
+        },
+    ]
+
+
+def test_openai_cached_prompt_usage_maps_to_anthropic_cache_usage():
+    openai_resp = {
+        "id": "chatcmpl_1",
+        "choices": [{"finish_reason": "stop", "message": {"role": "assistant", "content": "ok"}}],
+        "usage": {
+            "prompt_tokens": 10,
+            "completion_tokens": 2,
+            "prompt_tokens_details": {"cached_tokens": 4},
+        },
+    }
+
+    converted = anthropic_messages.from_openai_response(openai_resp, "claude-code-test")
+
+    assert converted["usage"] == {
+        "input_tokens": 6,
+        "output_tokens": 2,
+        "cache_read_input_tokens": 4,
+    }
+
+
 async def _collect_stream_events(result, model):
     events = []
     async for chunk in anthropic_messages.stream_from_openai_result(result, model):
@@ -119,6 +443,50 @@ def test_streaming_openai_tool_calls_emit_anthropic_tool_use_events():
     )
 
 
+def test_streaming_read_tool_call_emits_sanitized_claude_code_arguments():
+    import asyncio
+    import json
+
+    result = {
+        "response": {
+            "id": "chatcmpl_1",
+            "choices": [
+                {
+                    "finish_reason": "tool_calls",
+                    "message": {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": "call_read",
+                                "type": "function",
+                                "function": {
+                                    "name": "Read",
+                                    "arguments": '{"path":"/tmp/demo.py","pages":""}',
+                                },
+                            }
+                        ],
+                    },
+                }
+            ],
+        }
+    }
+
+    events = asyncio.run(_collect_stream_events(result, "claude-code-test"))
+    parsed = [(name, json.loads(data)) for name, data in events]
+
+    assert any(
+        name == "content_block_start"
+        and payload["content_block"] == {"type": "tool_use", "id": "call_read", "name": "Read", "input": {}}
+        for name, payload in parsed
+    )
+    assert any(
+        name == "content_block_delta"
+        and payload["delta"] == {"type": "input_json_delta", "partial_json": '{"file_path":"/tmp/demo.py"}'}
+        for name, payload in parsed
+    )
+
+
 
 def test_streaming_openai_tool_calls_use_tool_use_stop_reason_even_if_finish_reason_is_stop():
     import asyncio
diff --git a/tests/test_anthropic_messages_api.py b/tests/test_anthropic_messages_api.py
new file mode 100644
index 0000000..bdeb4b2
--- /dev/null
+++ b/tests/test_anthropic_messages_api.py
@@ -0,0 +1,188 @@
+import base64
+import struct
+
+import httpx
+import pytest
+
+from skillclaw.api_server import SkillClawAPIServer
+from skillclaw.config import SkillClawConfig
+
+
+@pytest.fixture
+def anthropic_server(monkeypatch, tmp_path):
+    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+    return SkillClawAPIServer(
+        SkillClawConfig(
+            proxy_api_key="skillclaw",
+            record_enabled=False,
+            record_dir=str(tmp_path),
+            claw_type="nanoclaw",
+        )
+    )
+
+
+@pytest.mark.asyncio
+async def test_anthropic_count_tokens_endpoint_returns_local_estimate(anthropic_server):
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test")
+    try:
+        response = await client.post(
+            "/v1/messages/count_tokens",
+            headers={"x-api-key": "skillclaw"},
+            json={
+                "model": "claude-code-test",
+                "messages": [{"role": "user", "content": [{"type": "text", "text": "hello"}]}],
+                "tools": [{"name": "Read", "description": "read", "input_schema": {"type": "object"}}],
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert response.status_code == 200
+    assert response.json()["input_tokens"] > 0
+
+
+@pytest.mark.asyncio
+async def test_anthropic_count_tokens_accounts_for_image_content(anthropic_server):
+    class FakeTokenizer:
+        def apply_chat_template(self, messages, tools=None, tokenize=False, add_generation_prompt=False):
+            return "user: screenshot"
+
+        def __call__(self, text, add_special_tokens=False):
+            return {"input_ids": [1, 2, 3]}
+
+    anthropic_server._tokenizer = FakeTokenizer()
+    png_header = (
+        b"\x89PNG\r\n\x1a\n"
+        + struct.pack(">I", 13)
+        + b"IHDR"
+        + struct.pack(">II", 2000, 2000)
+        + b"\x08\x02\x00\x00\x00"
+    )
+    image_data = base64.b64encode(png_header).decode("ascii")
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test")
+    try:
+        response = await client.post(
+            "/v1/messages/count_tokens",
+            headers={"x-api-key": "skillclaw"},
+            json={
+                "model": "claude-code-test",
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "look"},
+                            {
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": "image/png",
+                                    "data": image_data,
+                                },
+                            },
+                        ],
+                    }
+                ],
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert response.status_code == 200
+    assert response.json()["input_tokens"] >= 5000
+
+
+@pytest.mark.asyncio
+async def test_anthropic_messages_uses_claude_code_session_header(anthropic_server):
+    seen = {}
+
+    async def fake_handle_request(body, session_id, turn_type, session_done):
+        seen["body"] = body
+        seen["session_id"] = session_id
+        seen["turn_type"] = turn_type
+        seen["session_done"] = session_done
+        return {
+            "response": {
+                "id": "chatcmpl_1",
+                "choices": [{"message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop"}],
+                "usage": {"prompt_tokens": 1, "completion_tokens": 1},
+            }
+        }
+
+    anthropic_server._handle_request = fake_handle_request
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test")
+    try:
+        response = await client.post(
+            "/v1/messages",
+            headers={"x-api-key": "skillclaw", "x-claude-code-session-id": "claude-session-1"},
+            json={
+                "model": "claude-code-test",
+                "messages": [{"role": "user", "content": "hi"}],
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert response.status_code == 200
+    assert response.json()["content"] == [{"type": "text", "text": "ok"}]
+    assert seen["session_id"] == "claude-session-1"
+
+
+@pytest.mark.asyncio
+async def test_anthropic_messages_preserves_registered_custom_tool_name(anthropic_server):
+    async def fake_handle_request(body, session_id, turn_type, session_done):
+        return {
+            "response": {
+                "id": "chatcmpl_1",
+                "choices": [
+                    {
+                        "message": {
+                            "role": "assistant",
+                            "content": "",
+                            "tool_calls": [
+                                {
+                                    "id": "call_read",
+                                    "type": "function",
+                                    "function": {
+                                        "name": "read",
+                                        "arguments": '{"path":"/tmp/demo.py","mode":"raw"}',
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": "tool_calls",
+                    }
+                ],
+                "usage": {"prompt_tokens": 1, "completion_tokens": 1},
+            }
+        }
+
+    anthropic_server._handle_request = fake_handle_request
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=anthropic_server.app), base_url="http://test")
+    try:
+        response = await client.post(
+            "/v1/messages",
+            headers={"x-api-key": "skillclaw"},
+            json={
+                "model": "claude-code-test",
+                "messages": [{"role": "user", "content": "use custom read"}],
+                "tools": [
+                    {
+                        "name": "read",
+                        "description": "custom read tool",
+                        "input_schema": {"type": "object"},
+                    }
+                ],
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert response.status_code == 200
+    assert response.json()["content"] == [
+        {
+            "type": "tool_use",
+            "id": "call_read",
+            "name": "read",
+            "input": {"path": "/tmp/demo.py", "mode": "raw"},
+        }
+    ]
diff --git a/tests/test_responses_native.py b/tests/test_responses_native.py
index ea5212e..2370422 100644
--- a/tests/test_responses_native.py
+++ b/tests/test_responses_native.py
@@ -222,6 +222,221 @@ async def fake_stream(body):
     assert response.text == 'data: {"type":"response.created","upstream":true}\n\ndata: [DONE]\n\n'
 
 
+@pytest.mark.asyncio
+async def test_responses_chat_bridge_merges_previous_response_history(monkeypatch, tmp_path):
+    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+    server = SkillClawAPIServer(
+        SkillClawConfig(
+            proxy_api_key="skillclaw",
+            record_enabled=False,
+            record_dir=str(tmp_path),
+            claw_type="nanoclaw",
+        )
+    )
+    calls = []
+
+    async def fake_handle_request(body, session_id, turn_type, session_done):
+        calls.append(body)
+        idx = len(calls)
+        return {
+            "response": {
+                "id": f"chatcmpl_{idx}",
+                "created": 0,
+                "choices": [
+                    {"message": {"role": "assistant", "content": f"ok {idx}"}, "finish_reason": "stop"}
+                ],
+                "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
+            }
+        }
+
+    server._handle_request = fake_handle_request
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test")
+    try:
+        first = await client.post(
+            "/v1/responses",
+            headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"},
+            json={"model": "skillclaw-model", "input": "first", "store": True},
+        )
+        second = await client.post(
+            "/v1/responses",
+            headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"},
+            json={
+                "model": "skillclaw-model",
+                "input": "second",
+                "previous_response_id": first.json()["id"],
+                "store": True,
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert first.status_code == 200
+    assert second.status_code == 200
+    assert calls[1]["messages"] == [
+        {"role": "user", "content": "first"},
+        {"role": "assistant", "content": "ok 1"},
+        {"role": "user", "content": "second"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_responses_continuation_keeps_new_instructions_first(monkeypatch, tmp_path):
+    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+    server = SkillClawAPIServer(
+        SkillClawConfig(
+            proxy_api_key="skillclaw",
+            record_enabled=False,
+            record_dir=str(tmp_path),
+            claw_type="nanoclaw",
+        )
+    )
+    calls = []
+
+    async def fake_handle_request(body, session_id, turn_type, session_done):
+        calls.append(body)
+        idx = len(calls)
+        return {
+            "response": {
+                "id": f"chatcmpl_{idx}",
+                "created": 0,
+                "choices": [
+                    {"message": {"role": "assistant", "content": f"ok {idx}"}, "finish_reason": "stop"}
+                ],
+                "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
+            }
+        }
+
+    server._handle_request = fake_handle_request
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test")
+    try:
+        first = await client.post(
+            "/v1/responses",
+            headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"},
+            json={"model": "skillclaw-model", "input": "first", "store": True},
+        )
+        second = await client.post(
+            "/v1/responses",
+            headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"},
+            json={
+                "model": "skillclaw-model",
+                "instructions": "new system instructions",
+                "input": "second",
+                "previous_response_id": first.json()["id"],
+                "store": True,
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert first.status_code == 200
+    assert second.status_code == 200
+    assert calls[1]["messages"] == [
+        {"role": "system", "content": "new system instructions"},
+        {"role": "user", "content": "first"},
+        {"role": "assistant", "content": "ok 1"},
+        {"role": "user", "content": "second"},
+    ]
+
+
+@pytest.mark.asyncio
+async def test_responses_continuation_deduplicates_replayed_output_items(monkeypatch, tmp_path):
+    monkeypatch.setattr(SkillClawAPIServer, "_load_tokenizer", lambda self: None)
+    server = SkillClawAPIServer(
+        SkillClawConfig(
+            proxy_api_key="skillclaw",
+            record_enabled=False,
+            record_dir=str(tmp_path),
+            claw_type="nanoclaw",
+        )
+    )
+    calls = []
+
+    async def fake_handle_request(body, session_id, turn_type, session_done):
+        calls.append(body)
+        idx = len(calls)
+        if idx == 1:
+            return {
+                "response": {
+                    "id": "chatcmpl_1",
+                    "created": 0,
+                    "choices": [
+                        {
+                            "message": {
+                                "role": "assistant",
+                                "content": "need tool",
+                                "tool_calls": [
+                                    {
+                                        "id": "call_1",
+                                        "type": "function",
+                                        "function": {"name": "Skill", "arguments": '{"name":"debug"}'},
+                                    }
+                                ],
+                            },
+                            "finish_reason": "tool_calls",
+                        }
+                    ],
+                    "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
+                }
+            }
+        return {
+            "response": {
+                "id": f"chatcmpl_{idx}",
+                "created": 0,
+                "choices": [
+                    {"message": {"role": "assistant", "content": f"ok {idx}"}, "finish_reason": "stop"}
+                ],
+                "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
+            }
+        }
+
+    server._handle_request = fake_handle_request
+    client = httpx.AsyncClient(transport=httpx.ASGITransport(app=server.app), base_url="http://test")
+    try:
+        first = await client.post(
+            "/v1/responses",
+            headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"},
+            json={"model": "skillclaw-model", "input": "first", "store": True},
+        )
+        first_payload = first.json()
+        second = await client.post(
+            "/v1/responses",
+            headers={"Authorization": "Bearer skillclaw", "Session_id": "codex-session-1"},
+            json={
+                "model": "skillclaw-model",
+                "input": [
+                    *first_payload["output"],
+                    {
+                        "type": "message",
+                        "role": "user",
+                        "content": [{"type": "input_text", "text": "second"}],
+                    },
+                ],
+                "previous_response_id": first_payload["id"],
+                "store": True,
+            },
+        )
+    finally:
+        await client.aclose()
+
+    assert first.status_code == 200
+    assert second.status_code == 200
+    assert calls[1]["messages"] == [
+        {"role": "user", "content": "first"},
+        {
+            "role": "assistant",
+            "content": "need tool",
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "Skill", "arguments": '{"name":"debug"}'},
+                }
+            ],
+        },
+        {"role": "user", "content": "second"},
+    ]
+
+
 def test_prepare_responses_forward_keeps_native_codex_items_out_of_chat_conversion():
     server = object.__new__(SkillClawAPIServer)
     server.config = SkillClawConfig(