diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..51b8b4b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,56 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Run tests + run: python tests/test_flush.py + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Check syntax + run: | + python -m py_compile hooks/core.py + python -m py_compile hooks/adapters/cursor.py + python -m py_compile hooks/adapters/claude_code.py + python -m py_compile hooks/flush.py + + shellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: ShellCheck + uses: ludeeus/action-shellcheck@master + with: + scandir: hooks + additional_files: install.sh uninstall.sh diff --git a/README.md b/README.md index 82393c7..3f7dfec 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,21 @@ # coding-agent-insights +[![CI](https://github.com/mazzucci/coding-agent-insights/actions/workflows/ci.yml/badge.svg)](https://github.com/mazzucci/coding-agent-insights/actions/workflows/ci.yml) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) +[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) + Session tracing and observability for AI coding agents, powered by [Phoenix](https://github.com/Arize-ai/phoenix). -Every agent interaction — prompts, tool calls, file edits, shell commands, thinking steps — is captured automatically and sent to Phoenix for search, replay, and analysis. Currently supports Cursor IDE, with plans to expand to other coding agents. +Every agent interaction — prompts, tool calls, file edits, shell commands, thinking steps — is captured automatically and sent to Phoenix for search, replay, and analysis. + +## Supported agents + +| Agent | Mechanism | Status | +|---|---|---| +| **Cursor** | Hook events → JSONL buffer → flush on session end | ✅ Stable | +| **Claude Code** | Stop hook → JSONL transcript parser → flush | ✅ New | + +Both agents produce **identical span structures** in Phoenix — same trace hierarchy, same attributes, same OpenInference conventions. You can compare sessions across agents in a single Phoenix project. ## How it works @@ -11,21 +24,41 @@ flowchart LR subgraph cursor [Cursor IDE] A[Hook event] -->|stdin JSON| B[trace-hook.sh] end + subgraph claude [Claude Code] + G[Stop hook] -->|transcript path| H[claude-code-trace-hook.sh] + end B -->|append| C["/tmp/cursor-traces.jsonl"] - B -->|"on stop/sessionEnd"| D["flush.py (uv run)"] - D -->|read & transform| C - D -->|Phoenix SDK| E[Phoenix] - E --> F[Traces & Sessions UI] + B -->|"on stop/sessionEnd"| D["flush.py → CursorAdapter"] + H -->|read| I["~/.claude/projects/.../session.jsonl"] + H -->|parse| J["flush.py → ClaudeCodeAdapter"] + D --> E[core.py] + J --> E + E -->|Phoenix SDK| F[Phoenix] + F --> K[Traces & Sessions UI] ``` -**Hot path (~5 ms):** Every Cursor hook event is piped to `trace-hook.sh`, a bash script that appends the raw JSON to a local buffer file. +### Cursor + +**Hot path (~5 ms):** Every Cursor hook event is piped to `trace-hook.sh`, which appends the raw JSON to a local buffer file. + +**Flush (on session end):** When a session ends, `flush.py` runs in the background. The Cursor adapter reads the buffer, normalises events, and hands them to the core engine for span construction and Phoenix export. + +### Claude Code -**Flush (on session end):** When a session ends, `flush.py` runs in the background via `uv run`. It reads the buffer, groups events into per-turn traces with proper parent-child relationships, maps them to [OpenInference](https://github.com/Arize-ai/openinference) semantic conventions, and sends them to Phoenix. +**Stop hook:** Claude Code's `Stop` hook fires after each agent turn. The hook script extracts the `transcript_path` from the hook context and passes it to `flush.py`. -**Result:** Each conversation turn becomes a separate trace in Phoenix. All turns from the same Cursor tab are grouped into a Phoenix session, giving you a full conversational thread view. +**Transcript parsing:** The Claude Code adapter reads the JSONL transcript, parses user messages, assistant content blocks (text, thinking, tool_use), and tool results into normalised events. Tool use/result pairs are linked automatically. + +### Shared core + +Both adapters produce `NormalizedEvent` objects that the core engine processes uniformly: turn assignment, span building with monotonic timestamps, parent-child relationships, and Phoenix export using [OpenInference](https://github.com/Arize-ai/openinference) semantic conventions. + +**Result:** Each conversation turn becomes a separate trace in Phoenix. All turns from the same session are grouped together, giving you a full conversational thread view — regardless of which agent generated them. ## What gets captured +### Cursor + | Hook event | Span name | Content | |---|---|---| | `sessionStart` | `session` | Composer mode, background agent flag | @@ -41,6 +74,16 @@ flowchart LR | `subagentStop` | `subagent:` | Task, summary, tool count | | `stop` / `sessionEnd` | `session.end` | Status, reason, duration | +### Claude Code + +| Content block | Span name | Content | +|---|---|---| +| User message | *(first 120 chars of prompt)* | Full prompt text | +| `thinking` | `thinking` | Model reasoning text | +| `tool_use` + `tool_result` | `tool:` | Tool input, output, duration | +| `text` (assistant) | `response` | Final response text | +| Summary | `session.end` | Session summary | + ## Quick start ### Prerequisites @@ -61,102 +104,128 @@ bash install.sh The installer will: 1. Install `uv` if needed -2. Copy hook scripts to `~/.cursor/hooks/` -3. Merge hook config into `~/.cursor/hooks.json` +2. **Auto-detect** Cursor (`~/.cursor/`) and/or Claude Code (`~/.claude/`) +3. Copy hook scripts and configure each detected agent 4. Ask how you want to connect to Phoenix: - - **Local Docker** — spins up Phoenix v13.15.0 + - **Local Docker** — spins up Phoenix v13.15.0 (with gRPC on port 4317) - **Existing URL** — connects to your Phoenix instance - **Skip** — configure later -5. Ask for a Phoenix project name (default: `cursor`) +5. Ask for a Phoenix project name (default: `coding-agent-insights`) -After install, Cursor will trace all agent sessions automatically. +After install, both agents will trace sessions automatically. ### Verify -Open Phoenix at [http://localhost:6006](http://localhost:6006) (or your custom URL), start a Cursor agent conversation, and watch traces appear in the project. +Open Phoenix at [http://localhost:6006](http://localhost:6006), start an agent conversation, and watch traces appear in the project. ## Configuration -All settings are in `~/.cursor/hooks/.coding-agent-insights.env`: +Settings are in `.coding-agent-insights.env` in each agent's hooks directory: ```bash PHOENIX_HOST="http://localhost:6006" -PHOENIX_PROJECT="cursor" -# CURSOR_TRACES_DEBUG="true" -# CURSOR_TRACES_SKIP="field1,field2" -# CURSOR_TRACES_BUFFER="/tmp/cursor-traces.jsonl" +PHOENIX_PROJECT="coding-agent-insights" +AGENT_TYPE="cursor" # or "claude_code" +# TRACES_DEBUG="true" +# TRACES_SKIP="field1,field2" +# TRACES_LOG="/tmp/coding-agent-insights.log" ``` | Variable | Default | Purpose | |---|---|---| | `PHOENIX_HOST` | `http://localhost:6006` | Phoenix server URL | -| `PHOENIX_PROJECT` | `cursor` | Phoenix project name | -| `CURSOR_TRACES_DEBUG` | *(unset)* | Set to `true` for debug logging to `/tmp/cursor-traces.log` | -| `CURSOR_TRACES_SKIP` | *(unset)* | Comma-separated field names to redact from traces | -| `CURSOR_TRACES_BUFFER` | `/tmp/cursor-traces.jsonl` | Path to the event buffer file | +| `PHOENIX_PROJECT` | `coding-agent-insights` | Phoenix project name | +| `AGENT_TYPE` | `cursor` | Agent type (cursor / claude_code) | +| `TRACES_DEBUG` | *(unset)* | Set to `true` for debug logging | +| `TRACES_SKIP` | *(unset)* | Comma-separated field names to redact | +| `TRACES_LOG` | `/tmp/coding-agent-insights.log` | Debug log file path | + +Legacy `CURSOR_TRACES_*` env vars are still supported for backward compatibility. ## Manual flush -Traces flush automatically when a session ends. To flush manually: +Traces flush automatically. To flush manually: ```bash -uv run ~/.cursor/hooks/flush.py -``` - -With debug output: +# Cursor +uv run hooks/flush.py --agent cursor -```bash -CURSOR_TRACES_DEBUG=true uv run ~/.cursor/hooks/flush.py +# Claude Code +uv run hooks/flush.py --agent claude_code --transcript ~/.claude/projects/.../session.jsonl ``` -Check buffer size: +With debug output: ```bash -wc -l /tmp/cursor-traces.jsonl +TRACES_DEBUG=true uv run hooks/flush.py --agent cursor ``` ## Phoenix features ### Traces -Each user turn (prompt + agent response cycle) becomes a trace. Tool calls, file edits, and shell executions appear as child spans with proper input/output attribution. +Each user turn (prompt + agent response cycle) becomes a trace. Tool calls, file edits, and thinking steps appear as child spans with proper input/output attribution. ### Sessions -All turns from the same Cursor conversation are grouped into a Phoenix session. The Sessions tab shows the conversational thread with first input and last output for each turn. +All turns from the same conversation are grouped into a Phoenix session. The Sessions tab shows the conversational thread with first input and last output for each turn. + +### Cross-agent comparison + +Both Cursor and Claude Code traces appear in the same Phoenix project. Compare how different agents handle the same tasks, analyse tool usage patterns, and identify which workflows are most effective. ### Golden datasets Save exemplary traces to Phoenix datasets for future reference — proven prompt patterns, successful tool chains, or reference workflows. See the [coding-agent-insights skill](skills/insights/SKILL.md) for programmatic examples. -## Uninstall +## Architecture -```bash -bash uninstall.sh +``` +hooks/ +├── flush.py # Entrypoint: dispatches to adapter → core → Phoenix +├── core.py # Agent-agnostic engine: NormalizedEvent, turns, spans, posting +├── trace-hook.sh # Cursor: hot-path bash hook (~5ms) +├── claude-code-trace-hook.sh # Claude Code: Stop hook script +└── adapters/ + ├── __init__.py # Adapter registry + ├── cursor.py # Cursor: buffer I/O + event normalisation + └── claude_code.py # Claude Code: JSONL transcript parser + +tests/ +└── test_flush.py # 35 tests covering core, both adapters, cross-adapter parity + +install.sh # Multi-agent installer with auto-detection +uninstall.sh # Cleanup script +docker-compose.yml # Phoenix with HTTP (6006) + gRPC (4317) ``` -This removes hook scripts and config entries. Optionally stops the Phoenix container and removes its data volume. +### Adapter pattern -## Architecture +Each adapter implements: +- `read_events()` → `list[NormalizedEvent]` +- Agent-specific I/O and event normalisation -``` -~/.cursor/ -├── hooks.json # Cursor hook config (managed by installer) -└── hooks/ - ├── trace-hook.sh # Bash hot-path: buffers events (~5ms) - ├── flush.py # Python: transforms & sends to Phoenix - └── .coding-agent-insights.env # User settings (Phoenix URL, project, etc.) +The core engine handles everything else: turn assignment, span building with monotonic timestamps, parent-child relationships, session labels, and Phoenix export. + +**Adding a new agent:** Create `adapters/your_agent.py` with a class that implements `read_events()` returning `NormalizedEvent` objects. Register it in `adapters/__init__.py`. The core engine handles the rest. + +## Tests + +```bash +python tests/test_flush.py ``` -- **trace-hook.sh** runs for every hook event. It sources `.coding-agent-insights.env`, appends the JSON payload to the buffer, and triggers `flush.py` on `stop`/`sessionEnd`. -- **flush.py** runs via `uv run` (isolated Python with `arize-phoenix-client`). It reads the buffer, splits events into turns, builds OpenInference-compliant spans, and posts them to Phoenix. -- **Buffer file** (`/tmp/cursor-traces.jsonl`) acts as a resilient intermediary. If Phoenix is unreachable, the buffer is preserved for retry. +The test suite covers: +- **Core engine** (20 tests): turn assignment, sequencing, timestamps, parent-child relationships, redaction, edge cases +- **Cursor adapter** (6 tests): event normalisation, all hook types, atomic buffer drain +- **Claude Code adapter** (8 tests): transcript parsing, tool use/result pairing, thinking blocks, multi-turn, timestamps +- **Cross-adapter parity** (1 test): both adapters produce consistent span structures ## Contributing 1. Fork the repo 2. Make your changes -3. Test with a real Cursor session +3. Run `python tests/test_flush.py` (all 35 tests must pass) 4. Submit a PR ## License diff --git a/docker-compose.yml b/docker-compose.yml index 1ff4c34..1ac3f65 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,8 @@ services: image: arizephoenix/phoenix:13.15.0 container_name: coding-agent-insights-phoenix ports: - - "6006:6006" + - "6006:6006" # HTTP (UI + REST API) + - "4317:4317" # gRPC (OpenTelemetry collector) volumes: - phoenix-data:/data restart: unless-stopped diff --git a/hooks/adapters/__init__.py b/hooks/adapters/__init__.py new file mode 100644 index 0000000..c4c9e29 --- /dev/null +++ b/hooks/adapters/__init__.py @@ -0,0 +1,24 @@ +""" +coding-agent-insights — adapter registry + +Each adapter normalises agent-specific events into NormalizedEvent objects +that the core engine can process uniformly. +""" +from hooks.adapters.cursor import CursorAdapter +from hooks.adapters.claude_code import ClaudeCodeAdapter + +ADAPTERS = { + "cursor": CursorAdapter, + "claude_code": ClaudeCodeAdapter, +} + + +def get_adapter(agent_type: str): + """Return the adapter class for a given agent type.""" + adapter_cls = ADAPTERS.get(agent_type) + if adapter_cls is None: + raise ValueError( + f"Unknown agent type: {agent_type!r}. " + f"Available: {', '.join(ADAPTERS)}" + ) + return adapter_cls() diff --git a/hooks/adapters/claude_code.py b/hooks/adapters/claude_code.py new file mode 100644 index 0000000..b086844 --- /dev/null +++ b/hooks/adapters/claude_code.py @@ -0,0 +1,386 @@ +""" +coding-agent-insights — Claude Code adapter + +Parses Claude Code JSONL session transcripts and normalises them into +NormalizedEvent objects that match the same trace structure as Cursor. + +Claude Code transcript format: + ~/.claude/projects/{project-path}/{sessionId}.jsonl + Each line is a JSON object with fields: + type: "user" | "assistant" | "result" | "summary" + uuid, parentUuid, sessionId, timestamp + message: { role, content: [...] } + +Assistant content blocks have types: + "text" — plain text response + "thinking" — model reasoning + "tool_use" — tool invocation (id, name, input) + "tool_result" — comes in a subsequent "user" message (linked by tool_use_id) + +Sub-agent transcripts: agent-{shortId}.jsonl with isSidechain: true +""" +import glob +import json +import os +from datetime import datetime, timezone + +from hooks.core import NormalizedEvent, log + + +# Claude Code stores transcripts here +CLAUDE_PROJECTS_DIR = os.environ.get( + "CLAUDE_PROJECTS_DIR", + os.path.expanduser("~/.claude/projects"), +) + + +class ClaudeCodeAdapter: + """Parse Claude Code JSONL transcripts into NormalizedEvents.""" + + agent_type = "claude_code" + + def read_events(self, transcript_path: str | None = None) -> list[NormalizedEvent]: + """Read a Claude Code transcript and return NormalizedEvents. + + Args: + transcript_path: Path to a specific .jsonl transcript file. + If None, reads from CLAUDE_TRANSCRIPT_PATH env var. + """ + path = transcript_path or os.environ.get("CLAUDE_TRANSCRIPT_PATH", "") + if not path: + log("Claude Code: no transcript path provided") + return [] + + if not os.path.exists(path): + log(f"Claude Code: transcript not found: {path}") + return [] + + return self._parse_transcript(path) + + def read_session(self, session_id: str, project_path: str = "") -> list[NormalizedEvent]: + """Read all transcripts for a given session ID. + + Searches the Claude projects directory for matching session files, + including sub-agent sidechains. + """ + if not project_path: + project_path = CLAUDE_PROJECTS_DIR + + events = [] + + # Find matching session files + pattern = os.path.join(project_path, "**", f"{session_id}.jsonl") + for filepath in glob.glob(pattern, recursive=True): + events.extend(self._parse_transcript(filepath)) + + # Also look for sub-agent transcripts + base_dir = os.path.dirname(pattern) if events else project_path + for filepath in glob.glob( + os.path.join(project_path, "**", f"agent-*.jsonl"), recursive=True + ): + try: + with open(filepath) as f: + first_line = f.readline() + if first_line: + first = json.loads(first_line) + if first.get("sessionId") == session_id: + events.extend( + self._parse_transcript(filepath, is_subagent=True) + ) + except (json.JSONDecodeError, OSError): + continue + + events.sort(key=lambda e: e.timestamp) + return events + + # ── Transcript parsing ──────────────────────────────────────────────────── + + def _parse_transcript( + self, path: str, is_subagent: bool = False + ) -> list[NormalizedEvent]: + """Parse a single JSONL transcript file into NormalizedEvents.""" + events: list[NormalizedEvent] = [] + pending_tool_uses: dict[str, NormalizedEvent] = {} # tool_use_id → event + session_id = "" + + try: + with open(path) as f: + lines = f.readlines() + except OSError as e: + log(f"Claude Code: failed to read {path}: {e}") + return [] + + for line_num, line in enumerate(lines): + line = line.strip() + if not line: + continue + try: + raw = json.loads(line) + except json.JSONDecodeError as e: + log(f"Claude Code: malformed line {line_num} in {path}: {e}") + continue + + if not session_id: + session_id = raw.get("sessionId", os.path.basename(path).replace(".jsonl", "")) + + msg_type = raw.get("type", "") + timestamp = self._parse_timestamp(raw.get("timestamp", "")) + + if msg_type == "user": + user_events = self._parse_user_message( + raw, session_id, timestamp, pending_tool_uses, is_subagent + ) + events.extend(user_events) + + elif msg_type == "assistant": + assistant_events = self._parse_assistant_message( + raw, session_id, timestamp, pending_tool_uses, is_subagent + ) + events.extend(assistant_events) + + elif msg_type == "summary": + # Summary messages — session end marker + events.append( + NormalizedEvent( + event_type="session_end", + conversation_id=session_id, + timestamp=timestamp, + agent_type="claude_code", + output_value=self._extract_text(raw.get("summary", "")), + attributes={"is_subagent": str(is_subagent)}, + ) + ) + + # Generate a session_start event from the first real event + if events: + events.insert( + 0, + NormalizedEvent( + event_type="session_start", + conversation_id=session_id, + timestamp=events[0].timestamp - 0.001, + agent_type="claude_code", + attributes={ + "is_subagent": str(is_subagent), + "transcript_path": path, + }, + ), + ) + + return events + + def _parse_user_message( + self, + raw: dict, + session_id: str, + timestamp: float, + pending_tool_uses: dict[str, NormalizedEvent], + is_subagent: bool, + ) -> list[NormalizedEvent]: + """Parse a 'user' type message. + + User messages can be: + 1. Actual user prompts (text content) + 2. Tool results (content[].type == "tool_result") + """ + events: list[NormalizedEvent] = [] + message = raw.get("message", {}) + content = message.get("content", []) + + if isinstance(content, str): + # Simple text prompt + events.append( + NormalizedEvent( + event_type="prompt", + conversation_id=session_id, + timestamp=timestamp, + agent_type="claude_code", + input_value=content, + attributes={"is_subagent": str(is_subagent)}, + ) + ) + return events + + has_tool_result = False + prompt_text = "" + + for block in content: + if not isinstance(block, dict): + continue + + block_type = block.get("type", "") + + if block_type == "tool_result": + has_tool_result = True + tool_use_id = block.get("tool_use_id", "") + result_content = block.get("content", "") + is_error = block.get("is_error", False) + + # Update the pending tool_use event with its result + if tool_use_id in pending_tool_uses: + tool_event = pending_tool_uses.pop(tool_use_id) + tool_event.output_value = self._extract_text(result_content) + tool_event.is_error = bool(is_error) + if is_error: + tool_event.error_message = tool_event.output_value[:500] + tool_event.event_type = "tool_error" + # Duration: from tool_use timestamp to this result timestamp + if timestamp > tool_event.timestamp: + tool_event.duration_ms = (timestamp - tool_event.timestamp) * 1000 + + elif block_type == "text": + text = block.get("text", "") + if text: + prompt_text += text + "\n" + + # If this user message has actual prompt text (not just tool results) + if prompt_text.strip() and not has_tool_result: + events.append( + NormalizedEvent( + event_type="prompt", + conversation_id=session_id, + timestamp=timestamp, + agent_type="claude_code", + input_value=prompt_text.strip(), + attributes={"is_subagent": str(is_subagent)}, + ) + ) + + return events + + def _parse_assistant_message( + self, + raw: dict, + session_id: str, + timestamp: float, + pending_tool_uses: dict[str, NormalizedEvent], + is_subagent: bool, + ) -> list[NormalizedEvent]: + """Parse an 'assistant' type message. + + Assistant content blocks: + - text: model response text + - thinking: model reasoning + - tool_use: tool invocation (result comes later in a user message) + """ + events: list[NormalizedEvent] = [] + message = raw.get("message", {}) + content = message.get("content", []) + model = message.get("model", raw.get("model", "")) + + text_parts = [] + block_offset = 0.0 + + for block in content: + if not isinstance(block, dict): + continue + + block_type = block.get("type", "") + block_ts = timestamp + block_offset + block_offset += 0.001 # Ensure ordering within a message + + if block_type == "thinking": + events.append( + NormalizedEvent( + event_type="thinking", + conversation_id=session_id, + timestamp=block_ts, + agent_type="claude_code", + model=str(model), + output_value=block.get("thinking", ""), + attributes={"is_subagent": str(is_subagent)}, + ) + ) + + elif block_type == "tool_use": + tool_name = block.get("name", "unknown") + tool_input = block.get("input", {}) + tool_id = block.get("id", "") + + tool_event = NormalizedEvent( + event_type="tool_use", + conversation_id=session_id, + timestamp=block_ts, + agent_type="claude_code", + model=str(model), + name=f"tool:{tool_name}", + input_value=json.dumps(tool_input) if isinstance(tool_input, (dict, list)) else str(tool_input), + input_mime_type="application/json", + attributes={ + "tool.name": tool_name, + "tool_use_id": tool_id, + "is_subagent": str(is_subagent), + }, + ) + events.append(tool_event) + + # Track this tool_use so we can fill in the result later + if tool_id: + pending_tool_uses[tool_id] = tool_event + + elif block_type == "text": + text = block.get("text", "") + if text: + text_parts.append(text) + + # Combine all text blocks into a single response event + if text_parts: + events.append( + NormalizedEvent( + event_type="response", + conversation_id=session_id, + timestamp=timestamp + block_offset, + agent_type="claude_code", + model=str(model), + output_value="\n".join(text_parts), + attributes={"is_subagent": str(is_subagent)}, + ) + ) + + return events + + # ── Helpers ─────────────────────────────────────────────────────────────── + + @staticmethod + def _parse_timestamp(ts_value) -> float: + """Parse a Claude Code timestamp (ISO 8601 string or epoch ms) to epoch seconds.""" + if not ts_value: + return 0.0 + + if isinstance(ts_value, (int, float)): + # Claude Code sometimes uses millisecond epoch + if ts_value > 1e12: + return ts_value / 1000.0 + return float(ts_value) + + if isinstance(ts_value, str): + try: + # ISO 8601 format + dt = datetime.fromisoformat(ts_value.replace("Z", "+00:00")) + return dt.timestamp() + except ValueError: + pass + try: + return float(ts_value) + except ValueError: + pass + + return 0.0 + + @staticmethod + def _extract_text(content) -> str: + """Extract plain text from various content formats.""" + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for block in content: + if isinstance(block, str): + parts.append(block) + elif isinstance(block, dict): + if block.get("type") == "text": + parts.append(block.get("text", "")) + elif "text" in block: + parts.append(block["text"]) + return "\n".join(parts) + return str(content) if content else "" diff --git a/hooks/adapters/cursor.py b/hooks/adapters/cursor.py new file mode 100644 index 0000000..4decdd5 --- /dev/null +++ b/hooks/adapters/cursor.py @@ -0,0 +1,249 @@ +""" +coding-agent-insights — Cursor adapter + +Normalises raw Cursor hook events (JSON dicts from trace-hook.sh) into +NormalizedEvent objects for the core engine. +""" +import json +import os +import tempfile + +from hooks.core import NormalizedEvent, log + + +BUFFER_PATH = os.environ.get( + "CURSOR_TRACES_BUFFER", + os.environ.get("TRACES_BUFFER", "/tmp/cursor-traces.jsonl"), +) + +# ── Hook event → event_type mapping ────────────────────────────────────────── + +HOOK_TO_EVENT_TYPE = { + "sessionStart": "session_start", + "beforeSubmitPrompt": "prompt", + "afterAgentThought": "thinking", + "afterAgentResponse": "response", + "preCompact": "compaction", + "stop": "session_end", + "sessionEnd": "session_end", + "postToolUse": "tool_use", + "postToolUseFailure": "tool_error", + "afterShellExecution": "shell", + "afterMCPExecution": "mcp", + "afterFileEdit": "file_edit", + "subagentStop": "subagent", +} + + +class CursorAdapter: + """Read Cursor hook events from the JSONL buffer and normalise them.""" + + agent_type = "cursor" + + def read_events(self) -> list[NormalizedEvent]: + """Read and drain the Cursor buffer file, returning NormalizedEvents.""" + lines = self._read_and_drain_buffer() + if not lines: + return [] + + events = [] + for line in lines: + line = line.strip() + if not line: + continue + try: + raw = json.loads(line) + except json.JSONDecodeError as e: + log(f"Cursor: skipping malformed line: {e}") + continue + events.append(self._normalise(raw)) + return events + + # ── Buffer I/O (atomic drain) ───────────────────────────────────────────── + + def _read_and_drain_buffer(self) -> list[str]: + """Atomically read and drain the buffer file. + + Uses rename-and-read to avoid a race where events appended between + our read() and truncate() are silently lost. + """ + if not os.path.exists(BUFFER_PATH): + return [] + + buf_dir = os.path.dirname(BUFFER_PATH) or "/tmp" + drain_path = os.path.join( + buf_dir, + f".cursor-traces-drain-{os.getpid()}.jsonl", + ) + + try: + os.rename(BUFFER_PATH, drain_path) + except FileNotFoundError: + return [] + except OSError as e: + log(f"Cursor: rename failed, falling back to direct read: {e}") + try: + with open(BUFFER_PATH) as f: + lines = f.readlines() + open(BUFFER_PATH, "w").close() + return lines + except Exception as e2: + log(f"Cursor: fallback read failed: {e2}") + return [] + + try: + with open(drain_path) as f: + lines = f.readlines() + finally: + try: + os.unlink(drain_path) + except OSError: + pass + + return lines + + # ── Event normalisation ─────────────────────────────────────────────────── + + def _normalise(self, raw: dict) -> NormalizedEvent: + """Convert a raw Cursor hook event dict to a NormalizedEvent.""" + hook = raw.get("hook_event_name", "unknown") + event_type = HOOK_TO_EVENT_TYPE.get(hook, hook) + + event = NormalizedEvent( + event_type=event_type, + conversation_id=raw.get("conversation_id", ""), + timestamp=float(raw.get("_timestamp", 0)), + agent_type="cursor", + user_id=str(raw.get("user_email", "")), + model=str(raw.get("model", "")), + ) + + # Duration + dur = raw.get("duration_ms") or raw.get("duration") or 0 + try: + event.duration_ms = float(dur) + except (ValueError, TypeError): + event.duration_ms = 0 + + # Name + event.name = self._make_name(hook, raw) + + # Input / Output + self._extract_io(event, hook, raw) + + # Error + if hook == "postToolUseFailure" or raw.get("status") == "error": + event.is_error = True + event.error_message = str(raw.get("error_message", "")) + + # Extra attributes + self._extract_attrs(event, hook, raw) + + return event + + def _make_name(self, hook: str, raw: dict) -> str: + """Generate a span name from the hook event type.""" + if hook == "postToolUse": + return f"tool:{raw.get('tool_name', 'unknown')}" + if hook == "postToolUseFailure": + return f"tool:{raw.get('tool_name', 'unknown')}.error" + if hook == "afterShellExecution": + return "shell" + if hook == "afterMCPExecution": + return f"mcp:{raw.get('tool_name', 'unknown')}" + if hook == "afterFileEdit": + fp = raw.get("file_path", "unknown") + return f"edit:{os.path.basename(fp)}" + if hook == "subagentStop": + return f"subagent:{raw.get('subagent_type', 'unknown')}" + # For standard types, the core will use the event_type default + return "" + + def _extract_io(self, event: NormalizedEvent, hook: str, raw: dict) -> None: + """Extract input/output values from a raw event.""" + if hook == "beforeSubmitPrompt": + event.input_value = raw.get("prompt", "") + if "attachments" in raw: + event.attributes["attachments"] = raw["attachments"] + + elif hook == "afterAgentThought": + event.output_value = raw.get("text", "") + + elif hook == "afterAgentResponse": + event.output_value = raw.get("text", "") + + elif hook in ("postToolUse", "postToolUseFailure"): + if "tool_input" in raw: + event.input_value = json.dumps(raw["tool_input"]) if isinstance(raw["tool_input"], (dict, list)) else str(raw["tool_input"]) + event.input_mime_type = "application/json" + if "tool_output" in raw: + event.output_value = str(raw["tool_output"]) + + elif hook == "afterShellExecution": + event.input_value = str(raw.get("command", "")) + event.output_value = str(raw.get("output", "")) + + elif hook == "afterMCPExecution": + if "tool_input" in raw: + event.input_value = json.dumps(raw["tool_input"]) if isinstance(raw["tool_input"], (dict, list)) else str(raw["tool_input"]) + event.input_mime_type = "application/json" + if "result_json" in raw: + event.output_value = str(raw["result_json"]) + event.output_mime_type = "application/json" + + elif hook == "afterFileEdit": + event.input_value = raw.get("file_path", "") + if "edits" in raw: + event.output_value = json.dumps(raw["edits"]) + event.output_mime_type = "application/json" + + elif hook == "subagentStop": + event.input_value = str(raw.get("task", "")) + event.output_value = str(raw.get("summary", "")) + + def _extract_attrs(self, event: NormalizedEvent, hook: str, raw: dict) -> None: + """Extract extra attributes from a raw event.""" + # Common attributes + for k in ("conversation_id", "generation_id", "hook_event_name", "cursor_version"): + if k in raw and raw[k] is not None: + event.attributes[k] = str(raw[k]) + + if hook == "sessionStart": + for k in ("composer_mode", "is_background_agent"): + if k in raw: + event.attributes[k] = str(raw[k]) + + elif hook in ("postToolUse", "postToolUseFailure", "afterMCPExecution"): + if "tool_name" in raw: + event.attributes["tool.name"] = str(raw["tool_name"]) + if "duration" in raw: + event.attributes["duration"] = str(raw["duration"]) + if hook == "postToolUseFailure": + for k in ("failure_type",): + if k in raw: + event.attributes[k] = str(raw[k]) + + elif hook == "afterFileEdit": + if "file_path" in raw: + event.attributes["file_path"] = raw["file_path"] + + elif hook == "preCompact": + for k in ("context_tokens", "context_window_size", "context_usage_percent", + "message_count", "trigger"): + if k in raw: + event.attributes[k] = str(raw[k]) + + elif hook == "subagentStop": + for k in ("subagent_type", "status", "duration_ms", + "tool_call_count", "message_count"): + if k in raw: + event.attributes[k] = str(raw[k]) + + elif hook in ("stop", "sessionEnd"): + for k in ("status", "reason", "duration_ms"): + if k in raw: + event.attributes[k] = str(raw[k]) + + elif hook == "afterAgentThought": + if "duration_ms" in raw: + event.attributes["duration_ms"] = str(raw["duration_ms"]) diff --git a/hooks/claude-code-trace-hook.sh b/hooks/claude-code-trace-hook.sh new file mode 100755 index 0000000..30dac9e --- /dev/null +++ b/hooks/claude-code-trace-hook.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# coding-agent-insights — Claude Code hook script +# +# Fires on Claude Code's "Stop" hook (after each agent turn). +# Reads the transcript_path from the hook's JSON stdin and triggers +# flush.py to parse the JSONL transcript and send spans to Phoenix. +# +# Claude Code hooks pass a JSON object on stdin with: +# session_id, hook_event_name, transcript_path, ... +# +# Install by adding to ~/.claude/settings.json: +# { +# "hooks": { +# "Stop": [{ "command": "bash /path/to/claude-code-trace-hook.sh" }] +# } +# } + +ENV_FILE="$(dirname "$0")/.coding-agent-insights.env" +# shellcheck source=/dev/null +[ -f "$ENV_FILE" ] && . "$ENV_FILE" + +FLUSH_SCRIPT="$(dirname "$0")/flush.py" + +# Read the hook context from stdin +INPUT=$(cat) + +# Extract transcript_path from the JSON input +# Uses python for reliable JSON parsing (no jq dependency) +TRANSCRIPT_PATH=$(echo "$INPUT" | python3 -c " +import sys, json +try: + data = json.load(sys.stdin) + print(data.get('transcript_path', '')) +except Exception: + pass +" 2>/dev/null) + +if [ -z "$TRANSCRIPT_PATH" ]; then + # Fallback: try to find the session transcript from session_id + SESSION_ID=$(echo "$INPUT" | python3 -c " +import sys, json +try: + data = json.load(sys.stdin) + print(data.get('session_id', '')) +except Exception: + pass +" 2>/dev/null) + + if [ -n "$SESSION_ID" ]; then + # Search for the transcript in the default Claude projects directory + CLAUDE_DIR="${CLAUDE_PROJECTS_DIR:-$HOME/.claude/projects}" + TRANSCRIPT_PATH=$(find "$CLAUDE_DIR" -name "${SESSION_ID}.jsonl" -type f 2>/dev/null | head -1) + fi +fi + +if [ -z "$TRANSCRIPT_PATH" ] || [ ! -f "$TRANSCRIPT_PATH" ]; then + exit 0 # No transcript to process — exit silently +fi + +# Run flush.py with the Claude Code adapter in the background +uv run "$FLUSH_SCRIPT" --agent claude_code --transcript "$TRANSCRIPT_PATH" & + +exit 0 diff --git a/hooks/core.py b/hooks/core.py new file mode 100644 index 0000000..33aeb9f --- /dev/null +++ b/hooks/core.py @@ -0,0 +1,361 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = ["arize-phoenix-client>=2.0.0"] +# /// +""" +coding-agent-insights — core engine + +Agent-agnostic trace-building and Phoenix posting logic. Adapters +(Cursor, Claude Code, etc.) normalise raw events into the NormalizedEvent +format, then hand them here for span construction and export. +""" +import json +import os +import time +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Optional + +# ── Configuration ───────────────────────────────────────────────────────────── + +PHOENIX_HOST = os.environ.get("PHOENIX_HOST", "http://localhost:6006") +PHOENIX_PROJECT = os.environ.get("PHOENIX_PROJECT", "coding-agent-insights") +SKIP_FIELDS = set( + f.strip() + for f in os.environ.get("TRACES_SKIP", os.environ.get("CURSOR_TRACES_SKIP", "")).split(",") + if f.strip() +) +DEBUG = os.environ.get("TRACES_DEBUG", os.environ.get("CURSOR_TRACES_DEBUG", "")).lower() in ( + "1", + "true", + "yes", +) +LOG_PATH = os.environ.get("TRACES_LOG", "/tmp/coding-agent-insights.log") + + +def log(msg: str) -> None: + if not DEBUG: + return + try: + with open(LOG_PATH, "a") as f: + f.write(f"[{datetime.now(timezone.utc).isoformat()}] {msg}\n") + except Exception: + pass + + +# ── Normalised event dataclass ──────────────────────────────────────────────── + + +@dataclass +class NormalizedEvent: + """Agent-agnostic representation of a single trace event. + + Adapters populate these fields; the core engine uses them to build + OpenInference-compliant spans for Phoenix. + """ + + # Required + event_type: str # e.g. "prompt", "tool_use", "thinking", "response", "session_start", "session_end" + conversation_id: str = "" + timestamp: float = 0.0 # epoch seconds + + # Optional content + name: str = "" # span name override + input_value: str = "" + input_mime_type: str = "text/plain" + output_value: str = "" + output_mime_type: str = "text/plain" + duration_ms: float = 0.0 + + # Error info + is_error: bool = False + error_message: str = "" + + # Agent metadata + agent_type: str = "" # "cursor", "claude_code", etc. + model: str = "" + user_id: str = "" + session_label: str = "" + + # Extra attributes (agent-specific) + attributes: dict = field(default_factory=dict) + + # Internal — set by assign_turns() + _turn_index: int = 0 + _event_seq: int = 0 + _span_id: str = "" + _parent_span_id: str = "" + _trace_id: str = "" + + +# ── Redaction ───────────────────────────────────────────────────────────────── + + +def redact_dict(d: dict) -> dict: + """Redact fields listed in SKIP_FIELDS from a dict (recursive).""" + if not SKIP_FIELDS: + return d + cleaned = {} + for k, v in d.items(): + if k in SKIP_FIELDS: + cleaned[k] = "[redacted]" + elif isinstance(v, dict): + cleaned[k] = redact_dict(v) + else: + cleaned[k] = v + return cleaned + + +def redact_event(event: NormalizedEvent) -> NormalizedEvent: + """Redact sensitive fields from a NormalizedEvent.""" + if not SKIP_FIELDS: + return event + for field_name in ("input_value", "output_value", "error_message", "name"): + if field_name in SKIP_FIELDS: + setattr(event, field_name, "[redacted]") + event.attributes = redact_dict(event.attributes) + return event + + +# ── ID generation ───────────────────────────────────────────────────────────── + + +def make_trace_id(conversation_id: str, turn_index: int = 0) -> str: + key = f"{conversation_id}:turn:{turn_index}" + return uuid.uuid5(uuid.NAMESPACE_URL, key).hex + + +def make_span_id() -> str: + return uuid.uuid4().hex[:16] + + +# ── Span kind / name mapping ───────────────────────────────────────────────── + +EVENT_TYPE_TO_SPAN_KIND = { + "tool_use": "TOOL", + "tool_error": "TOOL", + "shell": "TOOL", + "mcp": "TOOL", + "file_edit": "TOOL", +} + +EVENT_TYPE_TO_DEFAULT_NAME = { + "session_start": "session", + "prompt": "prompt", + "thinking": "thinking", + "response": "response", + "compaction": "compaction", + "session_end": "session.end", + "subagent": "subagent", +} + + +def event_to_span_name(event: NormalizedEvent) -> str: + if event.name: + return event.name + return EVENT_TYPE_TO_DEFAULT_NAME.get(event.event_type, event.event_type) + + +def event_to_span_kind(event: NormalizedEvent) -> str: + return EVENT_TYPE_TO_SPAN_KIND.get(event.event_type, "CHAIN") + + +# ── Turn assignment ─────────────────────────────────────────────────────────── + + +def assign_turns(events: list[NormalizedEvent]) -> None: + """Split events into turns. Each 'prompt' event starts a new turn. + Events before the first prompt go into turn 0. + Each event gets _turn_index, _span_id, _parent_span_id, _trace_id, + and _event_seq (monotonic counter for stable ordering in Phoenix).""" + turn_counters: dict[str, int] = {} + turn_root_spans: dict[str, str] = {} + global_seq = 0 + + for e in events: + e._event_seq = global_seq + global_seq += 1 + + cid = e.conversation_id + if not cid: + e._span_id = make_span_id() + e._turn_index = 0 + continue + + if e.event_type == "prompt": + turn_counters[cid] = turn_counters.get(cid, -1) + 1 + turn_idx = turn_counters[cid] + root_sid = make_span_id() + turn_root_spans[f"{cid}:{turn_idx}"] = root_sid + e._span_id = root_sid + else: + turn_idx = turn_counters.get(cid, 0) + e._span_id = make_span_id() + root_key = f"{cid}:{turn_idx}" + if root_key in turn_root_spans: + e._parent_span_id = turn_root_spans[root_key] + + e._turn_index = turn_idx + e._trace_id = make_trace_id(cid, turn_idx) + + +# ── Session labels ──────────────────────────────────────────────────────────── + + +def find_session_labels(events: list[NormalizedEvent]) -> dict[str, str]: + """Extract session labels from the first prompt of each conversation.""" + labels: dict[str, str] = {} + for e in events: + cid = e.conversation_id + if cid and cid not in labels and e.event_type == "prompt": + labels[cid] = e.input_value[:120] if e.input_value else "untitled" + return labels + + +# ── Span building ───────────────────────────────────────────────────────────── + + +def _json_str(v: object) -> str: + return json.dumps(v) if isinstance(v, (dict, list)) else str(v) + + +def build_span(event: NormalizedEvent, session_label: str | None = None) -> dict: + ts = event.timestamp if event.timestamp else time.time() + + # Micro-offset for monotonic ordering in Phoenix + ts += event._event_seq * 0.0001 + + start_time = datetime.fromtimestamp(ts, tz=timezone.utc).isoformat() + + duration_ms = event.duration_ms + try: + duration_ms = float(duration_ms) + except (ValueError, TypeError): + duration_ms = 0 + end_ts = ts + (duration_ms / 1000.0) if duration_ms else ts + 0.001 + end_time = datetime.fromtimestamp(end_ts, tz=timezone.utc).isoformat() + + # Build attributes + attrs: dict = {} + + if event.conversation_id: + attrs["session.id"] = event.conversation_id + if event.user_id: + attrs["user.id"] = event.user_id + if event.model: + attrs["llm.model_name"] = event.model + if event.agent_type: + attrs["agent.type"] = event.agent_type + + if event.input_value: + attrs["input.value"] = event.input_value + attrs["input.mime_type"] = event.input_mime_type + if event.output_value: + attrs["output.value"] = event.output_value + attrs["output.mime_type"] = event.output_mime_type + + attrs["event.sequence"] = str(event._event_seq) + + if session_label: + attrs["session_label"] = session_label + + # Merge adapter-specific attributes + for k, v in event.attributes.items(): + attrs[k] = _json_str(v) if isinstance(v, (dict, list)) else str(v) + + # Span name — use prompt text for prompt events + name = event_to_span_name(event) + if event.event_type == "prompt" and event.input_value: + name = event.input_value[:120] + + trace_id = event._trace_id or make_trace_id( + event.conversation_id or "unknown", + event._turn_index, + ) + + span: dict = { + "name": name, + "context": { + "trace_id": trace_id, + "span_id": event._span_id or make_span_id(), + }, + "span_kind": event_to_span_kind(event), + "start_time": start_time, + "end_time": end_time, + "status_code": "ERROR" if event.is_error else "OK", + "status_message": event.error_message, + "attributes": attrs, + } + + if event._parent_span_id: + span["parent_id"] = event._parent_span_id + + return span + + +# ── Phoenix posting ─────────────────────────────────────────────────────────── + + +def post_to_phoenix(spans: list[dict]) -> bool: + try: + from phoenix.client import Client + + client = Client(base_url=PHOENIX_HOST) + result = client.spans.log_spans( + project_identifier=PHOENIX_PROJECT, + spans=spans, + ) + log(f"Phoenix SDK response: {result}") + return True + except Exception as e: + log(f"Phoenix SDK error: {e}") + return False + + +# ── Pipeline ────────────────────────────────────────────────────────────────── + + +def inject_output_on_prompts( + events: list[NormalizedEvent], spans: list[dict] +) -> None: + """Copy the last response text into the prompt span's output.value + so Phoenix shows input→output on the root turn span.""" + last_response_per_turn: dict[str, str] = {} + for e in events: + key = f"{e.conversation_id}:{e._turn_index}" + if e.event_type == "response" and e.output_value: + last_response_per_turn[key] = e.output_value[:200] + + for span, event in zip(spans, events): + if event.event_type == "prompt": + key = f"{event.conversation_id}:{event._turn_index}" + if key in last_response_per_turn: + span["attributes"]["output.value"] = last_response_per_turn[key] + span["attributes"]["output.mime_type"] = "text/plain" + + +def process_and_send(events: list[NormalizedEvent]) -> bool: + """Full pipeline: redact → labels → turns → spans → post.""" + if not events: + log("No events to process") + return True + + events = [redact_event(e) for e in events] + session_labels = find_session_labels(events) + assign_turns(events) + + spans = [] + for e in events: + label = session_labels.get(e.conversation_id) + spans.append(build_span(e, session_label=label)) + + inject_output_on_prompts(events, spans) + + log(f"Sending {len(spans)} spans to Phoenix at {PHOENIX_HOST}") + if post_to_phoenix(spans): + log(f"Successfully sent {len(spans)} spans") + return True + else: + log("Failed to send spans to Phoenix") + return False diff --git a/hooks/flush.py b/hooks/flush.py index dfed5ef..ed5184f 100644 --- a/hooks/flush.py +++ b/hooks/flush.py @@ -3,452 +3,91 @@ # dependencies = ["arize-phoenix-client>=2.0.0"] # /// """ -coding-agent-insights — flush script -Reads buffered hook events from a JSONL file, converts them to Phoenix spans, -sends them via the Phoenix client SDK, and truncates the buffer. +coding-agent-insights — flush entrypoint -Only runs on stop/sessionEnd events (called by trace-hook.sh). -""" -import json -import os -import tempfile -import time -import uuid -from datetime import datetime, timezone - -BUFFER_PATH = os.environ.get("CURSOR_TRACES_BUFFER", "/tmp/cursor-traces.jsonl") -PHOENIX_HOST = os.environ.get("PHOENIX_HOST", "http://localhost:6006") -PHOENIX_PROJECT = os.environ.get("PHOENIX_PROJECT", "cursor") -SKIP_FIELDS = set( - f.strip() - for f in os.environ.get("CURSOR_TRACES_SKIP", "").split(",") - if f.strip() -) -DEBUG = os.environ.get("CURSOR_TRACES_DEBUG", "").lower() in ("1", "true", "yes") -LOG_PATH = "/tmp/cursor-traces.log" - - -def log(msg: str) -> None: - if not DEBUG: - return - try: - with open(LOG_PATH, "a") as f: - f.write(f"[{datetime.now(timezone.utc).isoformat()}] {msg}\n") - except Exception: - pass - - -def redact(event: dict) -> dict: - if not SKIP_FIELDS: - return event - cleaned = {} - for k, v in event.items(): - if k in SKIP_FIELDS: - cleaned[k] = "[redacted]" - elif isinstance(v, dict): - cleaned[k] = redact(v) - else: - cleaned[k] = v - return cleaned - - -def make_trace_id(conversation_id: str, turn_index: int = 0) -> str: - key = f"{conversation_id}:turn:{turn_index}" - return uuid.uuid5(uuid.NAMESPACE_URL, key).hex - - -def make_span_id() -> str: - return uuid.uuid4().hex[:16] - - -def event_to_span_name(event: dict) -> str: - hook = event.get("hook_event_name", "unknown") - static = { - "sessionStart": "session", - "beforeSubmitPrompt": "prompt", - "afterAgentThought": "thinking", - "afterAgentResponse": "response", - "preCompact": "compaction", - "stop": "session.end", - "sessionEnd": "session.end", - } - if hook in static: - return static[hook] - if hook == "postToolUse": - return f"tool:{event.get('tool_name', 'unknown')}" - if hook == "postToolUseFailure": - return f"tool:{event.get('tool_name', 'unknown')}.error" - if hook == "afterShellExecution": - return "shell" - if hook == "afterMCPExecution": - return f"mcp:{event.get('tool_name', 'unknown')}" - if hook == "afterFileEdit": - fp = event.get("file_path", "unknown") - return f"edit:{os.path.basename(fp)}" - if hook == "subagentStop": - return f"subagent:{event.get('subagent_type', 'unknown')}" - return hook - - -def event_to_span_kind(event: dict) -> str: - hook = event.get("hook_event_name", "") - if hook in ("postToolUse", "postToolUseFailure", "afterShellExecution", - "afterMCPExecution", "afterFileEdit"): - return "TOOL" - return "CHAIN" - - -def _json_str(v: object) -> str: - return json.dumps(v) if isinstance(v, (dict, list)) else str(v) - - -def event_to_attributes(event: dict) -> dict: - attrs: dict = {} - hook = event.get("hook_event_name", "") - - cid = event.get("conversation_id") - if cid: - attrs["session.id"] = cid - if "user_email" in event: - attrs["user.id"] = str(event["user_email"]) - if "model" in event: - attrs["llm.model_name"] = str(event["model"]) - - for k in ("conversation_id", "generation_id", "hook_event_name", "cursor_version"): - if k in event and event[k] is not None: - attrs[k] = str(event[k]) - - if hook == "sessionStart": - for k in ("composer_mode", "is_background_agent"): - if k in event: - attrs[k] = str(event[k]) - - elif hook == "beforeSubmitPrompt": - if "prompt" in event: - attrs["input.value"] = event["prompt"] - attrs["input.mime_type"] = "text/plain" - if "attachments" in event: - attrs["attachments"] = json.dumps(event["attachments"]) - - elif hook == "afterAgentThought": - if "text" in event: - attrs["output.value"] = event["text"] - attrs["output.mime_type"] = "text/plain" - if "duration_ms" in event: - attrs["duration_ms"] = str(event["duration_ms"]) - - elif hook == "afterAgentResponse": - if "text" in event: - attrs["output.value"] = event["text"] - attrs["output.mime_type"] = "text/plain" - - elif hook == "postToolUse": - if "tool_name" in event: - attrs["tool.name"] = str(event["tool_name"]) - if "tool_input" in event: - attrs["input.value"] = _json_str(event["tool_input"]) - attrs["input.mime_type"] = "application/json" - if "tool_output" in event: - attrs["output.value"] = _json_str(event["tool_output"]) - attrs["output.mime_type"] = "text/plain" - if "duration" in event: - attrs["duration"] = str(event["duration"]) - - elif hook == "postToolUseFailure": - if "tool_name" in event: - attrs["tool.name"] = str(event["tool_name"]) - if "tool_input" in event: - attrs["input.value"] = _json_str(event["tool_input"]) - attrs["input.mime_type"] = "application/json" - for k in ("error_message", "failure_type", "duration"): - if k in event: - attrs[k] = _json_str(event[k]) - - elif hook == "afterShellExecution": - if "command" in event: - attrs["input.value"] = str(event["command"]) - attrs["input.mime_type"] = "text/plain" - if "output" in event: - attrs["output.value"] = str(event["output"]) - attrs["output.mime_type"] = "text/plain" - if "duration" in event: - attrs["duration"] = str(event["duration"]) - - elif hook == "afterMCPExecution": - if "tool_name" in event: - attrs["tool.name"] = str(event["tool_name"]) - if "tool_input" in event: - attrs["input.value"] = _json_str(event["tool_input"]) - attrs["input.mime_type"] = "application/json" - if "result_json" in event: - attrs["output.value"] = str(event["result_json"]) - attrs["output.mime_type"] = "application/json" - if "duration" in event: - attrs["duration"] = str(event["duration"]) - - elif hook == "afterFileEdit": - if "file_path" in event: - attrs["file_path"] = event["file_path"] - attrs["input.value"] = event["file_path"] - attrs["input.mime_type"] = "text/plain" - if "edits" in event: - attrs["output.value"] = json.dumps(event["edits"]) - attrs["output.mime_type"] = "application/json" - - elif hook == "preCompact": - for k in ("context_tokens", "context_window_size", "context_usage_percent", - "message_count", "trigger"): - if k in event: - attrs[k] = str(event[k]) - - elif hook == "subagentStop": - if "task" in event: - attrs["input.value"] = str(event["task"]) - attrs["input.mime_type"] = "text/plain" - if "summary" in event: - attrs["output.value"] = str(event["summary"]) - attrs["output.mime_type"] = "text/plain" - for k in ("subagent_type", "status", "duration_ms", - "tool_call_count", "message_count"): - if k in event: - attrs[k] = str(event[k]) - - elif hook in ("stop", "sessionEnd"): - for k in ("status", "reason", "duration_ms"): - if k in event: - attrs[k] = str(event[k]) - - return attrs +Reads buffered hook events, converts them to Phoenix spans, and sends them. +Supports multiple coding agents through the adapter pattern. +Usage: + # Cursor (default — reads from JSONL buffer) + uv run flush.py -def find_session_labels(events: list[dict]) -> dict[str, str]: - labels: dict[str, str] = {} - for e in events: - cid = e.get("conversation_id") - if cid and cid not in labels and e.get("hook_event_name") == "beforeSubmitPrompt": - prompt = e.get("prompt", "") - labels[cid] = prompt[:120] if prompt else "untitled" - return labels + # Cursor (explicit) + uv run flush.py --agent cursor + # Claude Code (reads transcript from path) + uv run flush.py --agent claude_code --transcript /path/to/session.jsonl -def assign_turns(events: list[dict]) -> None: - """Split events into turns. Each beforeSubmitPrompt starts a new turn. - Events before the first prompt (like sessionStart) go into turn 0. - Each event gets _turn_index, _span_id, _parent_span_id, _trace_id, - and _event_seq (monotonic counter for stable ordering in Phoenix).""" - turn_counters: dict[str, int] = {} - turn_root_spans: dict[str, str] = {} - global_seq = 0 # monotonic counter across all events - - for e in events: - e["_event_seq"] = global_seq - global_seq += 1 - - cid = e.get("conversation_id") - if not cid: - e["_span_id"] = make_span_id() - e["_turn_index"] = 0 - continue - - hook = e.get("hook_event_name", "") - - if hook == "beforeSubmitPrompt": - turn_counters[cid] = turn_counters.get(cid, -1) + 1 - turn_idx = turn_counters[cid] - root_sid = make_span_id() - turn_root_spans[f"{cid}:{turn_idx}"] = root_sid - e["_span_id"] = root_sid - else: - turn_idx = turn_counters.get(cid, 0) - e["_span_id"] = make_span_id() - root_key = f"{cid}:{turn_idx}" - if root_key in turn_root_spans: - e["_parent_span_id"] = turn_root_spans[root_key] - - e["_turn_index"] = turn_idx - e["_trace_id"] = make_trace_id(cid, turn_idx) - - -def build_span(event: dict, session_label: str | None = None) -> dict: - ts = event.get("_timestamp") - if ts is None: - ts = time.time() - else: - ts = float(ts) - - # Add a micro-offset based on event sequence to guarantee unique, - # monotonically increasing start_times even when the wall-clock - # timestamps are identical (the root cause of out-of-order spans - # in Phoenix/Arize). - seq = event.get("_event_seq", 0) - ts += seq * 0.0001 # 0.1 ms per event — well below human perception - - start_time = datetime.fromtimestamp(ts, tz=timezone.utc).isoformat() - - duration_ms = event.get("duration_ms") or event.get("duration") or 0 - try: - duration_ms = float(duration_ms) - except (ValueError, TypeError): - duration_ms = 0 - end_ts = ts + (duration_ms / 1000.0) if duration_ms else ts + 0.001 - end_time = datetime.fromtimestamp(end_ts, tz=timezone.utc).isoformat() + # Claude Code (reads transcript path from stdin, as used by hooks) + echo '{"transcript_path":"/path/to/session.jsonl"}' | uv run flush.py --agent claude_code +""" +import argparse +import json +import os +import sys - hook = event.get("hook_event_name", "") - is_error = hook == "postToolUseFailure" or event.get("status") == "error" +# Allow imports when run via `uv run` from the hooks directory +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - attrs = event_to_attributes(event) - if session_label: - attrs["session_label"] = session_label - if "_event_seq" in event: - attrs["event.sequence"] = str(event["_event_seq"]) +from hooks.core import log, process_and_send +from hooks.adapters.cursor import CursorAdapter +from hooks.adapters.claude_code import ClaudeCodeAdapter - name = event_to_span_name(event) - hook = event.get("hook_event_name", "") - if hook == "beforeSubmitPrompt" and attrs.get("input.value"): - name = attrs["input.value"][:120] - trace_id = event.get("_trace_id") or make_trace_id( - event.get("conversation_id", "unknown"), - event.get("_turn_index", 0), +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="coding-agent-insights — flush traces to Phoenix" ) - - span: dict = { - "name": name, - "context": { - "trace_id": trace_id, - "span_id": event.get("_span_id", make_span_id()), - }, - "span_kind": event_to_span_kind(event), - "start_time": start_time, - "end_time": end_time, - "status_code": "ERROR" if is_error else "OK", - "status_message": event.get("error_message", ""), - "attributes": attrs, - } - - parent_id = event.get("_parent_span_id") - if parent_id: - span["parent_id"] = parent_id - - return span - - -def post_to_phoenix(spans: list[dict]) -> bool: - try: - from phoenix.client import Client - client = Client(base_url=PHOENIX_HOST) - result = client.spans.log_spans( - project_identifier=PHOENIX_PROJECT, - spans=spans, - ) - log(f"Phoenix SDK response: {result}") - return True - except Exception as e: - log(f"Phoenix SDK error: {e}") - return False - - -def _read_and_drain_buffer() -> list[str]: - """Atomically read and drain the buffer file. - - Uses rename-and-read to avoid a race where events appended between - our read() and truncate() are silently lost. The renamed file is - deleted after reading; new hook events go straight to a fresh buffer. - """ - if not os.path.exists(BUFFER_PATH): - return [] - - buf_dir = os.path.dirname(BUFFER_PATH) or "/tmp" - drain_path = os.path.join( - buf_dir, - f".cursor-traces-drain-{os.getpid()}.jsonl", + parser.add_argument( + "--agent", + choices=["cursor", "claude_code"], + default=os.environ.get("AGENT_TYPE", "cursor"), + help="Coding agent type (default: cursor, or AGENT_TYPE env var)", ) - - try: - os.rename(BUFFER_PATH, drain_path) - except FileNotFoundError: - return [] # another flush already drained it - except OSError as e: - log(f"Rename failed, falling back to direct read: {e}") - # Fallback: read + truncate (original behaviour) - try: - with open(BUFFER_PATH) as f: - lines = f.readlines() - open(BUFFER_PATH, "w").close() - return lines - except Exception as e2: - log(f"Fallback read failed: {e2}") - return [] - - try: - with open(drain_path) as f: - lines = f.readlines() - finally: - try: - os.unlink(drain_path) - except OSError: - pass - - return lines + parser.add_argument( + "--transcript", + default=os.environ.get("CLAUDE_TRANSCRIPT_PATH", ""), + help="Path to Claude Code transcript JSONL (Claude Code only)", + ) + return parser.parse_args() def main() -> None: - lines = _read_and_drain_buffer() - - if not lines: - log("Buffer is empty or not found") + args = parse_args() + + if args.agent == "cursor": + adapter = CursorAdapter() + events = adapter.read_events() + elif args.agent == "claude_code": + adapter = ClaudeCodeAdapter() + transcript_path = args.transcript + + # If no transcript path provided, try reading from stdin + # (Claude Code hooks pass context as JSON on stdin) + if not transcript_path and not sys.stdin.isatty(): + try: + stdin_data = sys.stdin.read() + if stdin_data.strip(): + hook_context = json.loads(stdin_data) + transcript_path = hook_context.get("transcript_path", "") + except (json.JSONDecodeError, Exception) as e: + log(f"Failed to read transcript path from stdin: {e}") + + if not transcript_path: + log("Claude Code: no transcript path — nothing to flush") + return + + events = adapter.read_events(transcript_path=transcript_path) + else: + log(f"Unknown agent type: {args.agent}") return - events: list[dict] = [] - for line in lines: - line = line.strip() - if not line: - continue - try: - events.append(json.loads(line)) - except json.JSONDecodeError as e: - log(f"Skipping malformed line: {e}") - if not events: - try: - open(BUFFER_PATH, "w").close() - except Exception: - pass + log(f"No events from {args.agent} adapter") return - log(f"Flushing {len(events)} events to Phoenix at {PHOENIX_HOST}") - - events = [redact(e) for e in events] - session_labels = find_session_labels(events) - assign_turns(events) - - last_response_per_turn: dict[str, str] = {} - for e in events: - cid = e.get("conversation_id", "") - turn = e.get("_turn_index", 0) - key = f"{cid}:{turn}" - if e.get("hook_event_name") == "afterAgentResponse": - text = e.get("text", "") - if text: - last_response_per_turn[key] = text[:200] - - spans = [] - for e in events: - cid = e.get("conversation_id", "") - label = session_labels.get(cid) - span = build_span(e, session_label=label) - hook = e.get("hook_event_name", "") - turn = e.get("_turn_index", 0) - turn_key = f"{cid}:{turn}" - if hook == "beforeSubmitPrompt" and turn_key in last_response_per_turn: - span["attributes"]["output.value"] = last_response_per_turn[turn_key] - span["attributes"]["output.mime_type"] = "text/plain" - spans.append(span) - - if post_to_phoenix(spans): - log(f"Successfully flushed {len(spans)} spans to Phoenix") - else: - log("Flush failed — events were already drained from buffer") + log(f"Flushing {len(events)} events from {args.agent} to Phoenix") + process_and_send(events) if __name__ == "__main__": diff --git a/hooks/trace-hook.sh b/hooks/trace-hook.sh index d88a7bb..29e4c28 100644 --- a/hooks/trace-hook.sh +++ b/hooks/trace-hook.sh @@ -5,6 +5,7 @@ # the heavy lifting of converting events to Phoenix spans. ENV_FILE="$(dirname "$0")/.coding-agent-insights.env" +# shellcheck source=/dev/null [ -f "$ENV_FILE" ] && . "$ENV_FILE" BUFFER="${CURSOR_TRACES_BUFFER:-/tmp/cursor-traces.jsonl}" @@ -27,7 +28,7 @@ fi # Append the timestamp field to the JSON object before the closing brace. # This avoids pulling in jq as a dependency. -TAGGED=$(echo "$INPUT" | sed "s/}$/,\"_timestamp\":$TS}/") +TAGGED="${INPUT%\}},\"_timestamp\":$TS}" echo "$TAGGED" >> "$BUFFER" diff --git a/install.sh b/install.sh index 8e1f580..8bddabb 100755 --- a/install.sh +++ b/install.sh @@ -2,17 +2,15 @@ set -euo pipefail # coding-agent-insights installer -# Copies hook scripts, merges hooks.json, and optionally sets up Phoenix. +# Auto-detects Cursor and/or Claude Code, installs hooks, and sets up Phoenix. SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -HOOKS_DIR="$HOME/.cursor/hooks" -HOOKS_JSON="$HOME/.cursor/hooks.json" -ENV_FILE="$HOOKS_DIR/.coding-agent-insights.env" GREEN='\033[0;32m' YELLOW='\033[1;33m' RED='\033[0;31m' BOLD='\033[1m' +DIM='\033[2m' NC='\033[0m' info() { echo -e "${GREEN}✓${NC} $*"; } @@ -20,7 +18,11 @@ warn() { echo -e "${YELLOW}!${NC} $*"; } err() { echo -e "${RED}✗${NC} $*"; } header(){ echo -e "\n${BOLD}$*${NC}"; } -# ── Prerequisites ────────────────────────────────────────────────────────────── +# Track which agents were installed +INSTALLED_CURSOR=false +INSTALLED_CLAUDE_CODE=false + +# ── Prerequisites ───────────────────────────────────────────────────────────── header "Checking prerequisites…" @@ -53,21 +55,82 @@ else warn "docker not found (only needed for local Phoenix)" fi -# ── Copy hook scripts ────────────────────────────────────────────────────────── +# ── Detect coding agents ───────────────────────────────────────────────────── + +header "Detecting coding agents…" + +HAS_CURSOR=false +HAS_CLAUDE_CODE=false + +if [ -d "$HOME/.cursor" ]; then + HAS_CURSOR=true + info "Cursor detected (~/.cursor/)" +fi + +if [ -d "$HOME/.claude" ]; then + HAS_CLAUDE_CODE=true + info "Claude Code detected (~/.claude/)" +fi + +if [ "$HAS_CURSOR" = false ] && [ "$HAS_CLAUDE_CODE" = false ]; then + warn "Neither Cursor nor Claude Code detected." + echo "" + echo " Which agent(s) would you like to install hooks for?" + echo "" + echo " 1) Cursor" + echo " 2) Claude Code" + echo " 3) Both" + echo "" + read -rp " Choose [1/2/3]: " agent_choice + case "$agent_choice" in + 1) HAS_CURSOR=true ;; + 2) HAS_CLAUDE_CODE=true ;; + 3) HAS_CURSOR=true; HAS_CLAUDE_CODE=true ;; + *) err "Invalid choice."; exit 1 ;; + esac +fi + +# If both are detected, ask which to install +if [ "$HAS_CURSOR" = true ] && [ "$HAS_CLAUDE_CODE" = true ]; then + echo "" + echo " Both Cursor and Claude Code detected. Install hooks for:" + echo "" + echo " 1) Both ${DIM}(recommended)${NC}" + echo " 2) Cursor only" + echo " 3) Claude Code only" + echo "" + read -rp " Choose [1/2/3] (default: 1): " both_choice + both_choice="${both_choice:-1}" + case "$both_choice" in + 1) ;; # keep both true + 2) HAS_CLAUDE_CODE=false ;; + 3) HAS_CURSOR=false ;; + *) err "Invalid choice."; exit 1 ;; + esac +fi -header "Installing hook scripts…" +# ── Install Cursor hooks ───────────────────────────────────────────────────── -mkdir -p "$HOOKS_DIR" -cp "$SCRIPT_DIR/hooks/trace-hook.sh" "$HOOKS_DIR/trace-hook.sh" -chmod +x "$HOOKS_DIR/trace-hook.sh" -cp "$SCRIPT_DIR/hooks/flush.py" "$HOOKS_DIR/flush.py" -info "Copied trace-hook.sh and flush.py to $HOOKS_DIR" +if [ "$HAS_CURSOR" = true ]; then + header "Installing Cursor hooks…" -# ── Merge hooks.json ────────────────────────────────────────────────────────── + CURSOR_HOOKS_DIR="$HOME/.cursor/hooks" + CURSOR_HOOKS_JSON="$HOME/.cursor/hooks.json" + CURSOR_ENV_FILE="$CURSOR_HOOKS_DIR/.coding-agent-insights.env" -header "Configuring Cursor hooks…" + mkdir -p "$CURSOR_HOOKS_DIR" + cp "$SCRIPT_DIR/hooks/trace-hook.sh" "$CURSOR_HOOKS_DIR/trace-hook.sh" + chmod +x "$CURSOR_HOOKS_DIR/trace-hook.sh" + cp "$SCRIPT_DIR/hooks/flush.py" "$CURSOR_HOOKS_DIR/flush.py" + cp "$SCRIPT_DIR/hooks/core.py" "$CURSOR_HOOKS_DIR/core.py" + mkdir -p "$CURSOR_HOOKS_DIR/adapters" + cp "$SCRIPT_DIR/hooks/adapters/__init__.py" "$CURSOR_HOOKS_DIR/adapters/__init__.py" + cp "$SCRIPT_DIR/hooks/adapters/cursor.py" "$CURSOR_HOOKS_DIR/adapters/cursor.py" + cp "$SCRIPT_DIR/hooks/adapters/claude_code.py" "$CURSOR_HOOKS_DIR/adapters/claude_code.py" + info "Copied hook scripts to $CURSOR_HOOKS_DIR" -python3 -c " + # Merge hooks.json + python3 -c " import json, sys, os template_path = sys.argv[1] @@ -91,11 +154,62 @@ for event, commands in template.get('hooks', {}).items(): with open(target_path, 'w') as f: json.dump(target, f, indent=2) f.write('\n') -" "$SCRIPT_DIR/hooks/hooks.json" "$HOOKS_JSON" +" "$SCRIPT_DIR/hooks/hooks.json" "$CURSOR_HOOKS_JSON" + + info "Merged hook entries into $CURSOR_HOOKS_JSON" + INSTALLED_CURSOR=true +fi + +# ── Install Claude Code hooks ──────────────────────────────────────────────── -info "Merged hook entries into $HOOKS_JSON" +if [ "$HAS_CLAUDE_CODE" = true ]; then + header "Installing Claude Code hooks…" -# ── Phoenix setup ────────────────────────────────────────────────────────────── + CLAUDE_HOOKS_DIR="$HOME/.claude/hooks" + CLAUDE_SETTINGS="$HOME/.claude/settings.json" + CLAUDE_ENV_FILE="$CLAUDE_HOOKS_DIR/.coding-agent-insights.env" + + mkdir -p "$CLAUDE_HOOKS_DIR" + cp "$SCRIPT_DIR/hooks/claude-code-trace-hook.sh" "$CLAUDE_HOOKS_DIR/claude-code-trace-hook.sh" + chmod +x "$CLAUDE_HOOKS_DIR/claude-code-trace-hook.sh" + cp "$SCRIPT_DIR/hooks/flush.py" "$CLAUDE_HOOKS_DIR/flush.py" + cp "$SCRIPT_DIR/hooks/core.py" "$CLAUDE_HOOKS_DIR/core.py" + mkdir -p "$CLAUDE_HOOKS_DIR/adapters" + cp "$SCRIPT_DIR/hooks/adapters/__init__.py" "$CLAUDE_HOOKS_DIR/adapters/__init__.py" + cp "$SCRIPT_DIR/hooks/adapters/cursor.py" "$CLAUDE_HOOKS_DIR/adapters/cursor.py" + cp "$SCRIPT_DIR/hooks/adapters/claude_code.py" "$CLAUDE_HOOKS_DIR/adapters/claude_code.py" + info "Copied hook scripts to $CLAUDE_HOOKS_DIR" + + # Merge Claude Code settings.json + python3 -c " +import json, sys, os + +target_path = sys.argv[1] +hook_command = sys.argv[2] + +settings = {} +if os.path.exists(target_path): + with open(target_path) as f: + settings = json.load(f) + +hooks = settings.setdefault('hooks', {}) +stop_hooks = hooks.setdefault('Stop', []) + +# Check if our hook is already registered +existing_cmds = {h.get('command', '') for h in stop_hooks if isinstance(h, dict)} +if hook_command not in existing_cmds: + stop_hooks.append({'command': hook_command}) + +with open(target_path, 'w') as f: + json.dump(settings, f, indent=2) + f.write('\n') +" "$CLAUDE_SETTINGS" "bash $CLAUDE_HOOKS_DIR/claude-code-trace-hook.sh" + + info "Registered Stop hook in $CLAUDE_SETTINGS" + INSTALLED_CLAUDE_CODE=true +fi + +# ── Phoenix setup ───────────────────────────────────────────────────────────── header "Phoenix setup" echo "" @@ -135,7 +249,7 @@ case "$phoenix_choice" in ;; 3) PHOENIX_SETUP="skip" - warn "Skipping Phoenix setup. Set PHOENIX_HOST in $ENV_FILE later." + warn "Skipping Phoenix setup. Set PHOENIX_HOST in the env file later." ;; *) err "Invalid choice. Please run the installer again." @@ -143,34 +257,45 @@ case "$phoenix_choice" in ;; esac -# ── Project name ─────────────────────────────────────────────────────────────── +# ── Project name ────────────────────────────────────────────────────────────── -PHOENIX_PROJECT="cursor" -read -rp " Phoenix project name (default: cursor): " user_project -PHOENIX_PROJECT="${user_project:-cursor}" +PHOENIX_PROJECT="coding-agent-insights" +read -rp " Phoenix project name (default: coding-agent-insights): " user_project +PHOENIX_PROJECT="${user_project:-coding-agent-insights}" -# ── Write env file ───────────────────────────────────────────────────────────── +# ── Write env files ─────────────────────────────────────────────────────────── -cat > "$ENV_FILE" < "$env_path" </dev/null 2>&1; then info "Phoenix is running at $PHOENIX_HOST" @@ -183,7 +308,7 @@ if [ "$PHOENIX_SETUP" = "docker" ]; then done fi -# ── Validate ─────────────────────────────────────────────────────────────────── +# ── Validate ────────────────────────────────────────────────────────────────── if [ "$PHOENIX_SETUP" != "skip" ]; then if curl -sf "$PHOENIX_HOST" >/dev/null 2>&1; then @@ -193,19 +318,44 @@ if [ "$PHOENIX_SETUP" != "skip" ]; then fi fi -# ── Done ─────────────────────────────────────────────────────────────────────── +# ── Done ────────────────────────────────────────────────────────────────────── header "Installation complete!" echo "" -echo " Hook scripts: $HOOKS_DIR/trace-hook.sh" -echo " Flush script: $HOOKS_DIR/flush.py" -echo " Hooks config: $HOOKS_JSON" -echo " Settings: $ENV_FILE" + +if [ "$INSTALLED_CURSOR" = true ]; then + echo " Cursor:" + echo " Hook scripts: $CURSOR_HOOKS_DIR/" + echo " Hooks config: $CURSOR_HOOKS_JSON" + echo " Settings: $CURSOR_ENV_FILE" + echo "" +fi + +if [ "$INSTALLED_CLAUDE_CODE" = true ]; then + echo " Claude Code:" + echo " Hook scripts: $CLAUDE_HOOKS_DIR/" + echo " Settings file: $CLAUDE_SETTINGS" + echo " Env config: $CLAUDE_ENV_FILE" + echo "" +fi + if [ "$PHOENIX_SETUP" = "docker" ]; then echo " Phoenix UI: $PHOENIX_HOST" + echo "" +fi + +if [ "$INSTALLED_CURSOR" = true ]; then + echo " Cursor will now trace agent sessions automatically." +fi +if [ "$INSTALLED_CLAUDE_CODE" = true ]; then + echo " Claude Code will now trace agent sessions on each Stop hook." fi echo "" -echo " Cursor will now trace agent sessions automatically." -echo " Traces flush to Phoenix on session end, or manually:" -echo " uv run $HOOKS_DIR/flush.py" +echo " Traces flush to Phoenix automatically, or manually:" +if [ "$INSTALLED_CURSOR" = true ]; then + echo " uv run $CURSOR_HOOKS_DIR/flush.py --agent cursor" +fi +if [ "$INSTALLED_CLAUDE_CODE" = true ]; then + echo " uv run $CLAUDE_HOOKS_DIR/flush.py --agent claude_code --transcript " +fi echo "" diff --git a/tests/test_flush.py b/tests/test_flush.py index 02f99e9..a51ef40 100644 --- a/tests/test_flush.py +++ b/tests/test_flush.py @@ -1,399 +1,377 @@ """ -Tests for coding-agent-insights flush.py +Tests for coding-agent-insights Validates: -1. Turn ordering / assignment correctness -2. Monotonic sequence numbers and micro-offset timestamps -3. Span parent-child relationships -4. Timestamp ordering within and across turns -5. Edge cases (missing fields, concurrent sessions, rapid events) -6. Atomic buffer drain (race condition fix) -7. Redaction -8. Trace ID determinism + Core engine: + 1. Turn ordering / assignment correctness + 2. Monotonic sequence numbers and micro-offset timestamps + 3. Span parent-child relationships + 4. Timestamp ordering within and across turns + 5. Edge cases (missing fields, concurrent sessions, rapid events) + 6. Redaction + 7. Trace ID determinism + 8. Session labels + 9. Full pipeline (process_and_send) + + Cursor adapter: + 10. Event normalisation (all hook types) + 11. Atomic buffer drain (race condition fix) + + Claude Code adapter: + 12. JSONL transcript parsing + 13. Tool use / tool result pairing + 14. Thinking blocks + 15. Multi-turn conversations + 16. Sub-agent transcripts + 17. Timestamp parsing (ISO 8601 + epoch) + 18. End-to-end: transcript → NormalizedEvents → spans + + Cross-adapter consistency: + 19. Both adapters produce consistent span structures """ import json import os import sys +import tempfile import time import uuid from datetime import datetime, timezone -# Add hooks directory to path so we can import flush -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'hooks')) -import flush +# Add project root to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from hooks.core import ( + NormalizedEvent, + assign_turns, + build_span, + find_session_labels, + inject_output_on_prompts, + make_span_id, + make_trace_id, + process_and_send, + redact_dict, + redact_event, + SKIP_FIELDS, +) +from hooks.adapters.cursor import CursorAdapter +from hooks.adapters.claude_code import ClaudeCodeAdapter # ═══════════════════════════════════════════════════════════════════════════════ -# Helper: build synthetic Cursor hook events +# Helper: build synthetic NormalizedEvents # ═══════════════════════════════════════════════════════════════════════════════ -def make_event(hook_event_name, conversation_id="conv-1", **kwargs): - """Create a synthetic hook event with auto-incrementing timestamp.""" - event = { - "hook_event_name": hook_event_name, - "conversation_id": conversation_id, - "_timestamp": kwargs.pop("_timestamp", time.time()), - } - event.update(kwargs) - return event - - -def make_session(conversation_id="conv-1", num_turns=3, events_per_turn=4): - """ - Generate a realistic multi-turn session: - sessionStart - for each turn: - beforeSubmitPrompt - afterAgentThought - postToolUse (1-2x) - afterAgentResponse - stop - """ +def make_event(event_type, conversation_id="conv-1", **kwargs): + """Create a synthetic NormalizedEvent.""" + return NormalizedEvent( + event_type=event_type, + conversation_id=conversation_id, + timestamp=kwargs.pop("timestamp", time.time()), + agent_type=kwargs.pop("agent_type", "cursor"), + **kwargs, + ) + + +def make_session(conversation_id="conv-1", num_turns=3, agent_type="cursor"): + """Generate a realistic multi-turn session as NormalizedEvents.""" events = [] - ts = 1700000000.0 # fixed base timestamp + ts = 1700000000.0 - # sessionStart - events.append(make_event("sessionStart", conversation_id, - _timestamp=ts, composer_mode="agent", is_background_agent=False)) + events.append(make_event( + "session_start", conversation_id, timestamp=ts, agent_type=agent_type, + attributes={"composer_mode": "agent"}, + )) ts += 0.1 for turn in range(num_turns): - # User prompt - events.append(make_event("beforeSubmitPrompt", conversation_id, - _timestamp=ts, prompt=f"Turn {turn}: do something")) + events.append(make_event( + "prompt", conversation_id, timestamp=ts, agent_type=agent_type, + input_value=f"Turn {turn}: do something", + )) ts += 0.5 - # Agent thinking - events.append(make_event("afterAgentThought", conversation_id, - _timestamp=ts, text=f"Thinking about turn {turn}...", - duration_ms=200)) + events.append(make_event( + "thinking", conversation_id, timestamp=ts, agent_type=agent_type, + output_value=f"Thinking about turn {turn}...", duration_ms=200, + )) ts += 0.3 - # Tool usage(s) - for i in range(min(events_per_turn - 2, 2)): - events.append(make_event("postToolUse", conversation_id, - _timestamp=ts, tool_name=f"tool_{i}", - tool_input={"file": f"test_{i}.py"}, - tool_output="ok", duration=150)) + for i in range(2): + events.append(make_event( + "tool_use", conversation_id, timestamp=ts, agent_type=agent_type, + name=f"tool:tool_{i}", + input_value=json.dumps({"file": f"test_{i}.py"}), + input_mime_type="application/json", + output_value="ok", duration_ms=150, + attributes={"tool.name": f"tool_{i}"}, + )) ts += 0.2 - # Agent response - events.append(make_event("afterAgentResponse", conversation_id, - _timestamp=ts, text=f"Done with turn {turn}")) + events.append(make_event( + "response", conversation_id, timestamp=ts, agent_type=agent_type, + output_value=f"Done with turn {turn}", + )) ts += 0.5 - # Session end - events.append(make_event("stop", conversation_id, - _timestamp=ts, status="completed", reason="normal", - duration_ms=int((ts - 1700000000.0) * 1000))) + events.append(make_event( + "session_end", conversation_id, timestamp=ts, agent_type=agent_type, + attributes={"status": "completed", "reason": "normal"}, + )) return events # ═══════════════════════════════════════════════════════════════════════════════ -# Test 1: Turn assignment correctness +# Core Engine Tests # ═══════════════════════════════════════════════════════════════════════════════ def test_turn_assignment_basic(): - """Each beforeSubmitPrompt should start a new turn; child events stay in that turn.""" + """Each prompt should start a new turn; child events stay in that turn.""" events = make_session(num_turns=3) - flush.assign_turns(events) + assign_turns(events) - assert events[0]["_turn_index"] == 0, f"sessionStart should be turn 0" + assert events[0]._turn_index == 0, "session_start should be turn 0" - prompt_indices = [i for i, e in enumerate(events) if e["hook_event_name"] == "beforeSubmitPrompt"] - assert len(prompt_indices) == 3, f"Expected 3 prompts, got {len(prompt_indices)}" + prompt_indices = [i for i, e in enumerate(events) if e.event_type == "prompt"] + assert len(prompt_indices) == 3 for i, pi in enumerate(prompt_indices): - assert events[pi]["_turn_index"] == i, \ - f"Prompt at index {pi} should be turn {i}, got {events[pi]['_turn_index']}" + assert events[pi]._turn_index == i for idx in range(len(events)): e = events[idx] - if e["hook_event_name"] not in ("beforeSubmitPrompt", "sessionStart", "stop"): + if e.event_type not in ("prompt", "session_start", "session_end"): belonging_prompt_idx = None for pi in reversed(prompt_indices): if pi < idx: belonging_prompt_idx = pi break if belonging_prompt_idx is not None: - expected_turn = events[belonging_prompt_idx]["_turn_index"] - assert e["_turn_index"] == expected_turn, \ - f"Event {e['hook_event_name']} at idx {idx} should be turn {expected_turn}" + expected_turn = events[belonging_prompt_idx]._turn_index + assert e._turn_index == expected_turn print("✓ test_turn_assignment_basic PASSED") def test_turn_assignment_stop_event(): - """The stop event should belong to the LAST turn, not turn 0.""" + """The session_end event should belong to the LAST turn.""" events = make_session(num_turns=2) - flush.assign_turns(events) + assign_turns(events) - stop_event = [e for e in events if e["hook_event_name"] == "stop"][0] - assert stop_event["_turn_index"] == 1, \ - f"stop event should be in turn 1, got {stop_event['_turn_index']}" + end_event = [e for e in events if e.event_type == "session_end"][0] + assert end_event._turn_index == 1 print("✓ test_turn_assignment_stop_event PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 2: Monotonic sequence numbers -# ═══════════════════════════════════════════════════════════════════════════════ - def test_event_sequence_numbers(): """Every event should receive a monotonically increasing _event_seq.""" events = make_session(num_turns=3) - flush.assign_turns(events) + assign_turns(events) - seqs = [e["_event_seq"] for e in events] - assert seqs == list(range(len(events))), \ - f"Sequences should be 0..{len(events)-1}, got {seqs}" + seqs = [e._event_seq for e in events] + assert seqs == list(range(len(events))) print("✓ test_event_sequence_numbers PASSED") def test_event_sequence_attribute_in_span(): """Built spans should contain the event.sequence attribute.""" events = make_session(num_turns=1) - flush.assign_turns(events) - spans = [flush.build_span(e) for e in events] + assign_turns(events) + spans = [build_span(e) for e in events] for span in spans: - assert "event.sequence" in span["attributes"], \ - f"Span '{span['name']}' missing event.sequence attribute" + assert "event.sequence" in span["attributes"] seq_values = [int(s["attributes"]["event.sequence"]) for s in spans] assert seq_values == list(range(len(spans))) print("✓ test_event_sequence_attribute_in_span PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 3: Micro-offset timestamps fix ordering -# ═══════════════════════════════════════════════════════════════════════════════ - def test_micro_offset_guarantees_unique_start_times(): - """Even when all events have the SAME wall-clock timestamp, each span - should get a unique, monotonically increasing start_time.""" + """Even when all events have the SAME timestamp, each span gets a unique start_time.""" ts = 1700000000.0 events = [ - make_event("beforeSubmitPrompt", "conv-1", _timestamp=ts, prompt="test"), - make_event("afterAgentThought", "conv-1", _timestamp=ts, text="thinking"), - make_event("postToolUse", "conv-1", _timestamp=ts, tool_name="edit"), - make_event("postToolUse", "conv-1", _timestamp=ts, tool_name="read"), - make_event("afterAgentResponse", "conv-1", _timestamp=ts, text="done"), + make_event("prompt", "conv-1", timestamp=ts, input_value="test"), + make_event("thinking", "conv-1", timestamp=ts, output_value="thinking"), + make_event("tool_use", "conv-1", timestamp=ts, name="tool:edit"), + make_event("tool_use", "conv-1", timestamp=ts, name="tool:read"), + make_event("response", "conv-1", timestamp=ts, output_value="done"), ] - flush.assign_turns(events) - spans = [flush.build_span(e) for e in events] + assign_turns(events) + spans = [build_span(e) for e in events] start_times = [s["start_time"] for s in spans] - assert len(set(start_times)) == len(start_times), \ - f"All start_times should be unique, got duplicates: {start_times}" + assert len(set(start_times)) == len(start_times), "All start_times should be unique" - # Verify they are in order for i in range(1, len(start_times)): - assert start_times[i] > start_times[i - 1], \ - f"start_time[{i}] should be > start_time[{i-1}]" + assert start_times[i] > start_times[i - 1] print("✓ test_micro_offset_guarantees_unique_start_times PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 4: Parent-child span relationships -# ═══════════════════════════════════════════════════════════════════════════════ - def test_parent_child_relationships(): """Child events should have _parent_span_id pointing to the turn's root span.""" events = make_session(num_turns=2) - flush.assign_turns(events) + assign_turns(events) - prompts = [e for e in events if e["hook_event_name"] == "beforeSubmitPrompt"] + prompts = [e for e in events if e.event_type == "prompt"] for p in prompts: - assert "_parent_span_id" not in p, "Prompt span should not have a parent" + assert p._parent_span_id == "", "Prompt span should not have a parent" for e in events: - if e["hook_event_name"] in ("beforeSubmitPrompt", "sessionStart"): + if e.event_type in ("prompt", "session_start"): continue - if "_parent_span_id" in e: - turn = e["_turn_index"] - root_prompt = [p for p in prompts if p["_turn_index"] == turn] + if e._parent_span_id: + turn = e._turn_index + root_prompt = [p for p in prompts if p._turn_index == turn] if root_prompt: - assert e["_parent_span_id"] == root_prompt[0]["_span_id"] + assert e._parent_span_id == root_prompt[0]._span_id print("✓ test_parent_child_relationships PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 5: Timestamp ordering across turns -# ═══════════════════════════════════════════════════════════════════════════════ - def test_timestamp_ordering_across_turns(): """Verify turns are ordered chronologically.""" events = make_session(num_turns=3) - flush.assign_turns(events) - spans = [flush.build_span(e) for e in events] + assign_turns(events) + spans = [build_span(e) for e in events] prompts = [(s, events[i]) for i, s in enumerate(spans) - if events[i]["hook_event_name"] == "beforeSubmitPrompt"] + if events[i].event_type == "prompt"] for i in range(1, len(prompts)): prev_time = prompts[i-1][0]["start_time"] curr_time = prompts[i][0]["start_time"] - assert curr_time > prev_time, \ - f"Turn {i} start_time ({curr_time}) should be after turn {i-1} ({prev_time})" + assert curr_time > prev_time print("✓ test_timestamp_ordering_across_turns PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 6: Concurrent / interleaved sessions -# ═══════════════════════════════════════════════════════════════════════════════ - def test_interleaved_sessions(): """Two conversations interleaving events should produce correct independent turn indexing.""" ts = 1700000000.0 events = [ - make_event("sessionStart", "conv-A", _timestamp=ts), - make_event("sessionStart", "conv-B", _timestamp=ts + 0.01), - make_event("beforeSubmitPrompt", "conv-A", _timestamp=ts + 0.1, prompt="A turn 0"), - make_event("beforeSubmitPrompt", "conv-B", _timestamp=ts + 0.15, prompt="B turn 0"), - make_event("afterAgentThought", "conv-A", _timestamp=ts + 0.2, text="A thinking"), - make_event("afterAgentThought", "conv-B", _timestamp=ts + 0.25, text="B thinking"), - make_event("afterAgentResponse", "conv-A", _timestamp=ts + 0.3, text="A done"), - make_event("beforeSubmitPrompt", "conv-A", _timestamp=ts + 0.5, prompt="A turn 1"), - make_event("afterAgentResponse", "conv-B", _timestamp=ts + 0.55, text="B done"), - make_event("afterAgentResponse", "conv-A", _timestamp=ts + 0.7, text="A turn 1 done"), + make_event("session_start", "conv-A", timestamp=ts), + make_event("session_start", "conv-B", timestamp=ts + 0.01), + make_event("prompt", "conv-A", timestamp=ts + 0.1, input_value="A turn 0"), + make_event("prompt", "conv-B", timestamp=ts + 0.15, input_value="B turn 0"), + make_event("thinking", "conv-A", timestamp=ts + 0.2, output_value="A thinking"), + make_event("thinking", "conv-B", timestamp=ts + 0.25, output_value="B thinking"), + make_event("response", "conv-A", timestamp=ts + 0.3, output_value="A done"), + make_event("prompt", "conv-A", timestamp=ts + 0.5, input_value="A turn 1"), + make_event("response", "conv-B", timestamp=ts + 0.55, output_value="B done"), + make_event("response", "conv-A", timestamp=ts + 0.7, output_value="A turn 1 done"), ] - flush.assign_turns(events) + assign_turns(events) - a_prompts = [e for e in events if e.get("conversation_id") == "conv-A" - and e["hook_event_name"] == "beforeSubmitPrompt"] - assert [e["_turn_index"] for e in a_prompts] == [0, 1] + a_prompts = [e for e in events if e.conversation_id == "conv-A" and e.event_type == "prompt"] + assert [e._turn_index for e in a_prompts] == [0, 1] - b_prompts = [e for e in events if e.get("conversation_id") == "conv-B" - and e["hook_event_name"] == "beforeSubmitPrompt"] - assert [e["_turn_index"] for e in b_prompts] == [0] + b_prompts = [e for e in events if e.conversation_id == "conv-B" and e.event_type == "prompt"] + assert [e._turn_index for e in b_prompts] == [0] - a_trace_ids = set(e.get("_trace_id") for e in events if e.get("conversation_id") == "conv-A" - and e.get("_trace_id")) - b_trace_ids = set(e.get("_trace_id") for e in events if e.get("conversation_id") == "conv-B" - and e.get("_trace_id")) - assert not a_trace_ids & b_trace_ids, "Trace IDs should not overlap between sessions" + a_trace_ids = set(e._trace_id for e in events if e.conversation_id == "conv-A" and e._trace_id) + b_trace_ids = set(e._trace_id for e in events if e.conversation_id == "conv-B" and e._trace_id) + assert not a_trace_ids & b_trace_ids print("✓ test_interleaved_sessions PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 7: Edge cases -# ═══════════════════════════════════════════════════════════════════════════════ - def test_events_before_first_prompt(): - """Events before any beforeSubmitPrompt should go to turn 0.""" + """Events before any prompt should go to turn 0.""" events = [ - make_event("sessionStart", "conv-1", _timestamp=1700000000.0), - make_event("afterAgentThought", "conv-1", _timestamp=1700000000.1, text="pre-prompt thought"), - make_event("beforeSubmitPrompt", "conv-1", _timestamp=1700000000.5, prompt="first"), + make_event("session_start", "conv-1", timestamp=1700000000.0), + make_event("thinking", "conv-1", timestamp=1700000000.1, output_value="pre-prompt"), + make_event("prompt", "conv-1", timestamp=1700000000.5, input_value="first"), ] - flush.assign_turns(events) + assign_turns(events) - assert events[0]["_turn_index"] == 0 - assert events[1]["_turn_index"] == 0 - assert events[2]["_turn_index"] == 0 + assert events[0]._turn_index == 0 + assert events[1]._turn_index == 0 + assert events[2]._turn_index == 0 print("✓ test_events_before_first_prompt PASSED") def test_missing_conversation_id(): """Events without conversation_id should still get span_id and turn 0.""" - events = [ - {"hook_event_name": "sessionStart", "_timestamp": 1700000000.0}, - ] - flush.assign_turns(events) + events = [NormalizedEvent(event_type="session_start", timestamp=1700000000.0)] + assign_turns(events) - assert events[0]["_turn_index"] == 0 - assert "_span_id" in events[0] - assert "_event_seq" in events[0] + assert events[0]._turn_index == 0 + assert events[0]._span_id != "" print("✓ test_missing_conversation_id PASSED") def test_missing_timestamp(): - """Events without _timestamp should fall back to current time.""" - events = [ - {"hook_event_name": "sessionStart", "conversation_id": "conv-1"}, - ] - flush.assign_turns(events) - span = flush.build_span(events[0]) + """Events without timestamp should fall back to current time.""" + events = [NormalizedEvent(event_type="session_start", conversation_id="conv-1")] + assign_turns(events) + span = build_span(events[0]) assert span["start_time"] is not None print("✓ test_missing_timestamp PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 8: Redaction -# ═══════════════════════════════════════════════════════════════════════════════ - def test_redaction(): - """SKIP_FIELDS should redact top-level and nested fields.""" - original_skip = flush.SKIP_FIELDS + """SKIP_FIELDS should redact fields.""" + import hooks.core as core_mod + original_skip = core_mod.SKIP_FIELDS try: - flush.SKIP_FIELDS = {"prompt", "secret_key"} - event = { - "hook_event_name": "beforeSubmitPrompt", - "prompt": "sensitive prompt text", - "conversation_id": "conv-1", - "nested": {"secret_key": "api-key-123", "safe": "ok"}, - } - redacted = flush.redact(event) - assert redacted["prompt"] == "[redacted]" - assert redacted["nested"]["secret_key"] == "[redacted]" - assert redacted["nested"]["safe"] == "ok" - assert redacted["conversation_id"] == "conv-1" + core_mod.SKIP_FIELDS = {"input_value", "secret_key"} + + event = NormalizedEvent( + event_type="prompt", + conversation_id="conv-1", + input_value="sensitive prompt text", + attributes={"secret_key": "api-key-123", "safe": "ok"}, + ) + redacted = redact_event(event) + assert redacted.input_value == "[redacted]" + assert redacted.attributes["secret_key"] == "[redacted]" + assert redacted.attributes["safe"] == "ok" print("✓ test_redaction PASSED") finally: - flush.SKIP_FIELDS = original_skip + core_mod.SKIP_FIELDS = original_skip -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 9: Trace ID determinism -# ═══════════════════════════════════════════════════════════════════════════════ - def test_trace_id_determinism(): """Same conversation_id + turn_index should always produce the same trace_id.""" - id1 = flush.make_trace_id("conv-abc", 0) - id2 = flush.make_trace_id("conv-abc", 0) - id3 = flush.make_trace_id("conv-abc", 1) + id1 = make_trace_id("conv-abc", 0) + id2 = make_trace_id("conv-abc", 0) + id3 = make_trace_id("conv-abc", 1) - assert id1 == id2, "Same inputs should produce same trace_id" - assert id1 != id3, "Different turn_index should produce different trace_id" + assert id1 == id2 + assert id1 != id3 print("✓ test_trace_id_determinism PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 10: Span construction -# ═══════════════════════════════════════════════════════════════════════════════ - def test_span_construction_prompt(): """Prompt spans should use the prompt text (truncated) as name.""" - events = [ - make_event("beforeSubmitPrompt", "conv-1", _timestamp=1700000000.0, - prompt="Fix the login page authentication bug that causes 500 errors"), - ] - flush.assign_turns(events) - span = flush.build_span(events[0]) + events = [make_event( + "prompt", "conv-1", timestamp=1700000000.0, + input_value="Fix the login page authentication bug that causes 500 errors", + )] + assign_turns(events) + span = build_span(events[0]) assert span["name"].startswith("Fix the login page") assert len(span["name"]) <= 120 assert span["span_kind"] == "CHAIN" assert span["status_code"] == "OK" - assert span["attributes"]["input.value"] == events[0]["prompt"] + assert span["attributes"]["input.value"] == events[0].input_value print("✓ test_span_construction_prompt PASSED") def test_span_construction_tool_error(): """Error spans should have ERROR status.""" - events = [ - make_event("postToolUseFailure", "conv-1", _timestamp=1700000000.0, - tool_name="file_write", error_message="Permission denied", - failure_type="os_error"), - ] - flush.assign_turns(events) - span = flush.build_span(events[0]) + events = [make_event( + "tool_error", "conv-1", timestamp=1700000000.0, + name="tool:file_write.error", + is_error=True, error_message="Permission denied", + attributes={"tool.name": "file_write", "failure_type": "os_error"}, + )] + assign_turns(events) + span = build_span(events[0]) assert span["name"] == "tool:file_write.error" assert span["status_code"] == "ERROR" @@ -404,187 +382,720 @@ def test_span_construction_tool_error(): def test_span_duration_calculation(): """Span end_time should be start_time + duration_ms.""" - events = [ - make_event("afterAgentThought", "conv-1", _timestamp=1700000000.0, - text="thinking", duration_ms=500), - ] - flush.assign_turns(events) - span = flush.build_span(events[0]) + events = [make_event( + "thinking", "conv-1", timestamp=1700000000.0, + output_value="thinking", duration_ms=500, + )] + assign_turns(events) + span = build_span(events[0]) start = datetime.fromisoformat(span["start_time"]) end = datetime.fromisoformat(span["end_time"]) delta_ms = (end - start).total_seconds() * 1000 - assert abs(delta_ms - 500) < 1, f"Duration should be ~500ms, got {delta_ms}ms" + assert abs(delta_ms - 500) < 1 print("✓ test_span_duration_calculation PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 11: Session labels -# ═══════════════════════════════════════════════════════════════════════════════ - def test_session_labels(): """Session label should come from the first prompt of each conversation.""" events = make_session("conv-1", num_turns=3) - labels = flush.find_session_labels(events) + labels = find_session_labels(events) assert "conv-1" in labels assert labels["conv-1"] == "Turn 0: do something" print("✓ test_session_labels PASSED") -# ═══════════════════════════════════════════════════════════════════════════════ -# Test 12: Full end-to-end span generation -# ═══════════════════════════════════════════════════════════════════════════════ - def test_end_to_end_span_generation(): """Full pipeline: events -> assign_turns -> build_span.""" events = make_session("conv-1", num_turns=2) - events = [flush.redact(e) for e in events] - session_labels = flush.find_session_labels(events) - flush.assign_turns(events) + session_labels = find_session_labels(events) + assign_turns(events) spans = [] for e in events: - cid = e.get("conversation_id", "") - label = session_labels.get(cid) - spans.append(flush.build_span(e, session_label=label)) + label = session_labels.get(e.conversation_id) + spans.append(build_span(e, session_label=label)) - # sessionStart(1) + per_turn(prompt + thought + 2 tools + response = 5) * 2 + stop(1) + # session_start(1) + per_turn(prompt + thinking + 2 tools + response = 5) * 2 + session_end(1) expected = 1 + 5 * 2 + 1 assert len(spans) == expected, f"Expected {expected} spans, got {len(spans)}" required = {"name", "context", "span_kind", "start_time", "end_time", "status_code", "attributes"} for span in spans: missing = required - set(span.keys()) - assert not missing, f"Span '{span['name']}' missing fields: {missing}" - - # All spans in a turn should share the same trace_id - turn_traces = {} - for span, event in zip(spans, events): - tid = span["context"]["trace_id"] - turn = event.get("_turn_index", -1) - cid = event.get("conversation_id", "") - key = f"{cid}:{turn}" - if key not in turn_traces: - turn_traces[key] = tid - else: - assert turn_traces[key] == tid - - # All start_times should be unique (micro-offset fix) + assert not missing + start_times = [s["start_time"] for s in spans] - assert len(set(start_times)) == len(start_times), "All start_times should be unique" + assert len(set(start_times)) == len(start_times) print("✓ test_end_to_end_span_generation PASSED") +def test_prompt_output_value_injection(): + """The last response text should be injected as output.value on the prompt span.""" + events = make_session("conv-1", num_turns=1) + session_labels = find_session_labels(events) + assign_turns(events) + + spans = [build_span(e, session_label=session_labels.get(e.conversation_id)) for e in events] + inject_output_on_prompts(events, spans) + + prompt_span = [s for s, e in zip(spans, events) if e.event_type == "prompt"][0] + assert "output.value" in prompt_span["attributes"] + assert prompt_span["attributes"]["output.value"] == "Done with turn 0" + print("✓ test_prompt_output_value_injection PASSED") + + +def test_agent_type_in_span(): + """Spans should include agent.type attribute.""" + events = [make_event("prompt", "conv-1", timestamp=1700000000.0, + input_value="test", agent_type="cursor")] + assign_turns(events) + span = build_span(events[0]) + assert span["attributes"]["agent.type"] == "cursor" + print("✓ test_agent_type_in_span PASSED") + + # ═══════════════════════════════════════════════════════════════════════════════ -# Test 13: Atomic buffer drain +# Cursor Adapter Tests # ═══════════════════════════════════════════════════════════════════════════════ -def test_atomic_buffer_drain(): - """_read_and_drain_buffer should atomically rename the buffer file so - events arriving during processing are not lost.""" - import tempfile +def test_cursor_normalise_prompt(): + """Cursor adapter should normalise beforeSubmitPrompt events.""" + adapter = CursorAdapter() + raw = { + "hook_event_name": "beforeSubmitPrompt", + "conversation_id": "conv-1", + "_timestamp": 1700000000.0, + "prompt": "fix the bug", + "user_email": "user@example.com", + "model": "gpt-4", + } + event = adapter._normalise(raw) + + assert event.event_type == "prompt" + assert event.conversation_id == "conv-1" + assert event.input_value == "fix the bug" + assert event.agent_type == "cursor" + assert event.user_id == "user@example.com" + assert event.model == "gpt-4" + print("✓ test_cursor_normalise_prompt PASSED") + + +def test_cursor_normalise_tool_use(): + """Cursor adapter should normalise postToolUse events.""" + adapter = CursorAdapter() + raw = { + "hook_event_name": "postToolUse", + "conversation_id": "conv-1", + "_timestamp": 1700000000.0, + "tool_name": "file_read", + "tool_input": {"path": "test.py"}, + "tool_output": "content here", + "duration": 150, + } + event = adapter._normalise(raw) + + assert event.event_type == "tool_use" + assert event.name == "tool:file_read" + assert "test.py" in event.input_value + assert event.output_value == "content here" + assert event.attributes["tool.name"] == "file_read" + print("✓ test_cursor_normalise_tool_use PASSED") + + +def test_cursor_normalise_tool_failure(): + """Cursor adapter should normalise postToolUseFailure events with error status.""" + adapter = CursorAdapter() + raw = { + "hook_event_name": "postToolUseFailure", + "conversation_id": "conv-1", + "_timestamp": 1700000000.0, + "tool_name": "file_write", + "error_message": "Permission denied", + "failure_type": "os_error", + } + event = adapter._normalise(raw) + + assert event.event_type == "tool_error" + assert event.is_error is True + assert event.error_message == "Permission denied" + assert event.name == "tool:file_write.error" + print("✓ test_cursor_normalise_tool_failure PASSED") + + +def test_cursor_normalise_all_hook_types(): + """Cursor adapter should handle all known hook event types.""" + adapter = CursorAdapter() + hook_types = { + "sessionStart": "session_start", + "beforeSubmitPrompt": "prompt", + "afterAgentThought": "thinking", + "afterAgentResponse": "response", + "preCompact": "compaction", + "stop": "session_end", + "sessionEnd": "session_end", + "postToolUse": "tool_use", + "postToolUseFailure": "tool_error", + "afterShellExecution": "shell", + "afterMCPExecution": "mcp", + "afterFileEdit": "file_edit", + "subagentStop": "subagent", + } + for hook, expected_type in hook_types.items(): + raw = { + "hook_event_name": hook, + "conversation_id": "conv-1", + "_timestamp": 1700000000.0, + } + event = adapter._normalise(raw) + assert event.event_type == expected_type, f"{hook} → {event.event_type}, expected {expected_type}" + + print("✓ test_cursor_normalise_all_hook_types PASSED") + + +def test_cursor_atomic_buffer_drain(): + """Cursor adapter should atomically rename the buffer file.""" + adapter = CursorAdapter() buf = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) buf_path = buf.name - for i in range(5): - buf.write(json.dumps({"hook_event_name": "test", "seq": i}) + "\n") + buf.write(json.dumps({"hook_event_name": "test", "conversation_id": "c1", "seq": i}) + "\n") buf.flush() buf.close() - original_buffer = flush.BUFFER_PATH + import hooks.adapters.cursor as cursor_mod + original_buffer = cursor_mod.BUFFER_PATH try: - flush.BUFFER_PATH = buf_path - lines = flush._read_and_drain_buffer() + cursor_mod.BUFFER_PATH = buf_path + lines = adapter._read_and_drain_buffer() finally: - flush.BUFFER_PATH = original_buffer - - assert len(lines) == 5, f"Should have read 5 lines, got {len(lines)}" - assert not os.path.exists(buf_path), "Buffer file should be gone after drain" + cursor_mod.BUFFER_PATH = original_buffer - print("✓ test_atomic_buffer_drain PASSED") + assert len(lines) == 5 + assert not os.path.exists(buf_path) + print("✓ test_cursor_atomic_buffer_drain PASSED") -def test_atomic_drain_does_not_lose_late_events(): - """Events appended after rename should survive in the new buffer file.""" - import tempfile +def test_cursor_atomic_drain_late_events(): + """Events appended after rename should survive for the next drain.""" + adapter = CursorAdapter() buf = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) buf_path = buf.name - for i in range(5): - buf.write(json.dumps({"hook_event_name": "test", "seq": i}) + "\n") + buf.write(json.dumps({"hook_event_name": "test", "conversation_id": "c1", "seq": i}) + "\n") buf.flush() buf.close() - original_buffer = flush.BUFFER_PATH + import hooks.adapters.cursor as cursor_mod + original_buffer = cursor_mod.BUFFER_PATH try: - flush.BUFFER_PATH = buf_path - - # Drain (renames the file away) - lines = flush._read_and_drain_buffer() + cursor_mod.BUFFER_PATH = buf_path + lines = adapter._read_and_drain_buffer() assert len(lines) == 5 - # Simulate a late event arriving into the ORIGINAL path - # (since the hook script always appends to BUFFER_PATH) with open(buf_path, "a") as f: - f.write(json.dumps({"hook_event_name": "late_arrival", "seq": 99}) + "\n") + f.write(json.dumps({"hook_event_name": "late_arrival", "conversation_id": "c1", "seq": 99}) + "\n") - # The late event should be in the buffer for the next drain - lines2 = flush._read_and_drain_buffer() - assert len(lines2) == 1, f"Late event should survive, got {len(lines2)} lines" + lines2 = adapter._read_and_drain_buffer() + assert len(lines2) == 1 assert json.loads(lines2[0].strip())["seq"] == 99 finally: - flush.BUFFER_PATH = original_buffer + cursor_mod.BUFFER_PATH = original_buffer if os.path.exists(buf_path): os.unlink(buf_path) - print("✓ test_atomic_drain_does_not_lose_late_events PASSED") + print("✓ test_cursor_atomic_drain_late_events PASSED") # ═══════════════════════════════════════════════════════════════════════════════ -# Test 14: Output.value on root prompt span +# Claude Code Adapter Tests # ═══════════════════════════════════════════════════════════════════════════════ -def test_prompt_output_value_injection(): - """The last afterAgentResponse text should be injected as output.value on the prompt span.""" - events = make_session("conv-1", num_turns=1) - events = [flush.redact(e) for e in events] - session_labels = flush.find_session_labels(events) - flush.assign_turns(events) +def _write_transcript(lines: list[dict]) -> str: + """Write a list of dicts as JSONL to a temp file and return the path.""" + f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) + for line in lines: + f.write(json.dumps(line) + "\n") + f.flush() + f.close() + return f.name + + +def test_claude_code_simple_conversation(): + """Claude Code adapter should parse a simple user→assistant conversation.""" + transcript = [ + { + "type": "user", + "uuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:00Z", + "message": {"role": "user", "content": "Fix the login bug"}, + }, + { + "type": "assistant", + "uuid": "a1", + "parentUuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:05Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + {"type": "text", "text": "I'll fix the login bug for you."}, + ], + }, + }, + ] + path = _write_transcript(transcript) + try: + adapter = ClaudeCodeAdapter() + events = adapter.read_events(transcript_path=path) + + # Should have: session_start, prompt, response + assert len(events) == 3, f"Expected 3 events, got {len(events)}" + assert events[0].event_type == "session_start" + assert events[1].event_type == "prompt" + assert events[1].input_value == "Fix the login bug" + assert events[2].event_type == "response" + assert "fix the login bug" in events[2].output_value.lower() + assert events[2].model == "claude-sonnet-4-20250514" + assert all(e.agent_type == "claude_code" for e in events) + finally: + os.unlink(path) + + print("✓ test_claude_code_simple_conversation PASSED") + + +def test_claude_code_tool_use_and_result(): + """Claude Code adapter should pair tool_use with tool_result.""" + transcript = [ + { + "type": "user", + "uuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:00Z", + "message": {"role": "user", "content": "Read test.py"}, + }, + { + "type": "assistant", + "uuid": "a1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:02Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + { + "type": "tool_use", + "id": "tool-123", + "name": "Read", + "input": {"file_path": "test.py"}, + }, + ], + }, + }, + { + "type": "user", + "uuid": "u2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:04Z", + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tool-123", + "content": "def test_hello():\n pass", + }, + ], + }, + }, + { + "type": "assistant", + "uuid": "a2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:06Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + {"type": "text", "text": "I see the test file."}, + ], + }, + }, + ] + path = _write_transcript(transcript) + try: + adapter = ClaudeCodeAdapter() + events = adapter.read_events(transcript_path=path) + + tool_events = [e for e in events if e.event_type == "tool_use"] + assert len(tool_events) == 1 + assert tool_events[0].name == "tool:Read" + assert tool_events[0].attributes["tool.name"] == "Read" + assert "test.py" in tool_events[0].input_value + # Tool result should be paired + assert "def test_hello" in tool_events[0].output_value + # Duration should be computed from tool_use to tool_result timestamp + assert tool_events[0].duration_ms > 0 + finally: + os.unlink(path) + + print("✓ test_claude_code_tool_use_and_result PASSED") + + +def test_claude_code_tool_error(): + """Claude Code adapter should handle tool errors.""" + transcript = [ + { + "type": "user", + "uuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:00Z", + "message": {"role": "user", "content": "Write to /etc/test"}, + }, + { + "type": "assistant", + "uuid": "a1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:02Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + { + "type": "tool_use", + "id": "tool-456", + "name": "Write", + "input": {"file_path": "/etc/test", "content": "hello"}, + }, + ], + }, + }, + { + "type": "user", + "uuid": "u2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:03Z", + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tool-456", + "content": "Permission denied", + "is_error": True, + }, + ], + }, + }, + ] + path = _write_transcript(transcript) + try: + adapter = ClaudeCodeAdapter() + events = adapter.read_events(transcript_path=path) + + tool_events = [e for e in events if e.event_type in ("tool_use", "tool_error")] + assert len(tool_events) == 1 + assert tool_events[0].is_error is True + assert tool_events[0].event_type == "tool_error" + assert "Permission denied" in tool_events[0].error_message + finally: + os.unlink(path) + + print("✓ test_claude_code_tool_error PASSED") + + +def test_claude_code_thinking_blocks(): + """Claude Code adapter should extract thinking blocks.""" + transcript = [ + { + "type": "user", + "uuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:00Z", + "message": {"role": "user", "content": "Explain recursion"}, + }, + { + "type": "assistant", + "uuid": "a1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:03Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + {"type": "thinking", "thinking": "I should explain recursion with an example..."}, + {"type": "text", "text": "Recursion is when a function calls itself."}, + ], + }, + }, + ] + path = _write_transcript(transcript) + try: + adapter = ClaudeCodeAdapter() + events = adapter.read_events(transcript_path=path) - last_response_per_turn = {} - for e in events: - cid = e.get("conversation_id", "") - turn = e.get("_turn_index", 0) - key = f"{cid}:{turn}" - if e.get("hook_event_name") == "afterAgentResponse": - text = e.get("text", "") - if text: - last_response_per_turn[key] = text[:200] + thinking = [e for e in events if e.event_type == "thinking"] + assert len(thinking) == 1 + assert "example" in thinking[0].output_value - spans = [] - for e in events: - cid = e.get("conversation_id", "") - label = session_labels.get(cid) - span = flush.build_span(e, session_label=label) - hook = e.get("hook_event_name", "") - turn = e.get("_turn_index", 0) - turn_key = f"{cid}:{turn}" - if hook == "beforeSubmitPrompt" and turn_key in last_response_per_turn: - span["attributes"]["output.value"] = last_response_per_turn[turn_key] - span["attributes"]["output.mime_type"] = "text/plain" - spans.append(span) - - prompt_span = [s for s, e in zip(spans, events) - if e["hook_event_name"] == "beforeSubmitPrompt"][0] + responses = [e for e in events if e.event_type == "response"] + assert len(responses) == 1 + assert "Recursion" in responses[0].output_value + finally: + os.unlink(path) + + print("✓ test_claude_code_thinking_blocks PASSED") + + +def test_claude_code_multi_turn(): + """Claude Code adapter should handle multi-turn conversations.""" + transcript = [ + { + "type": "user", + "uuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:00Z", + "message": {"role": "user", "content": "Turn 1 prompt"}, + }, + { + "type": "assistant", + "uuid": "a1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:05Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [{"type": "text", "text": "Turn 1 response"}], + }, + }, + { + "type": "user", + "uuid": "u2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:01:00Z", + "message": {"role": "user", "content": "Turn 2 prompt"}, + }, + { + "type": "assistant", + "uuid": "a2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:01:05Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [{"type": "text", "text": "Turn 2 response"}], + }, + }, + ] + path = _write_transcript(transcript) + try: + adapter = ClaudeCodeAdapter() + events = adapter.read_events(transcript_path=path) + + prompts = [e for e in events if e.event_type == "prompt"] + assert len(prompts) == 2 + assert prompts[0].input_value == "Turn 1 prompt" + assert prompts[1].input_value == "Turn 2 prompt" + + # After assign_turns, they should be in different turns + assign_turns(events) + assert prompts[0]._turn_index == 0 + assert prompts[1]._turn_index == 1 + finally: + os.unlink(path) - assert "output.value" in prompt_span["attributes"] - assert prompt_span["attributes"]["output.value"] == "Done with turn 0" - print("✓ test_prompt_output_value_injection PASSED") + print("✓ test_claude_code_multi_turn PASSED") + + +def test_claude_code_timestamp_parsing(): + """Claude Code adapter should parse various timestamp formats.""" + adapter = ClaudeCodeAdapter() + + # ISO 8601 + ts1 = adapter._parse_timestamp("2025-01-15T10:00:00Z") + assert ts1 > 0 + + # ISO 8601 with timezone offset + ts2 = adapter._parse_timestamp("2025-01-15T10:00:00+00:00") + assert abs(ts1 - ts2) < 1 + + # Epoch milliseconds + ts3 = adapter._parse_timestamp(1705312800000) + assert ts3 > 1e9 # Should be in seconds, not milliseconds + + # Epoch seconds + ts4 = adapter._parse_timestamp(1705312800) + assert abs(ts3 - ts4) < 1 + + # Empty / None + assert adapter._parse_timestamp("") == 0.0 + assert adapter._parse_timestamp(None) == 0.0 + + print("✓ test_claude_code_timestamp_parsing PASSED") + + +def test_claude_code_empty_transcript(): + """Claude Code adapter should handle empty/missing transcripts gracefully.""" + adapter = ClaudeCodeAdapter() + + # Non-existent file + events = adapter.read_events(transcript_path="/nonexistent/path.jsonl") + assert events == [] + + # Empty file + path = _write_transcript([]) + try: + events = adapter.read_events(transcript_path=path) + assert events == [] + finally: + os.unlink(path) + + print("✓ test_claude_code_empty_transcript PASSED") + + +def test_claude_code_end_to_end_spans(): + """Full pipeline: Claude Code transcript → NormalizedEvents → spans.""" + transcript = [ + { + "type": "user", + "uuid": "u1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:00Z", + "message": {"role": "user", "content": "Fix the bug in auth.py"}, + }, + { + "type": "assistant", + "uuid": "a1", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:02Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + {"type": "thinking", "thinking": "Let me look at auth.py first"}, + { + "type": "tool_use", + "id": "tool-1", + "name": "Read", + "input": {"file_path": "auth.py"}, + }, + ], + }, + }, + { + "type": "user", + "uuid": "u2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:04Z", + "message": { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tool-1", + "content": "def login():\n return None # BUG", + }, + ], + }, + }, + { + "type": "assistant", + "uuid": "a2", + "sessionId": "sess-1", + "timestamp": "2025-01-15T10:00:06Z", + "message": { + "role": "assistant", + "model": "claude-sonnet-4-20250514", + "content": [ + {"type": "text", "text": "I found and fixed the bug."}, + ], + }, + }, + ] + path = _write_transcript(transcript) + try: + adapter = ClaudeCodeAdapter() + events = adapter.read_events(transcript_path=path) + assert len(events) > 0 + + # Verify all events have agent_type + for e in events: + assert e.agent_type == "claude_code" + + # Run through full pipeline + session_labels = find_session_labels(events) + assign_turns(events) + spans = [build_span(e, session_label=session_labels.get(e.conversation_id)) for e in events] + inject_output_on_prompts(events, spans) + + # All spans should have required fields + required = {"name", "context", "span_kind", "start_time", "end_time", "status_code", "attributes"} + for span in spans: + missing = required - set(span.keys()) + assert not missing, f"Span '{span['name']}' missing: {missing}" + + # All spans in the same turn should share a trace_id + trace_ids = set(s["context"]["trace_id"] for s in spans) + # One turn = one trace_id (plus possibly session_start in turn 0) + assert len(trace_ids) <= 2 + + # Start times should be unique + start_times = [s["start_time"] for s in spans] + assert len(set(start_times)) == len(start_times) + + # The prompt span should have agent.type = claude_code + prompt_spans = [s for s in spans if "Fix the bug" in s["name"]] + assert len(prompt_spans) == 1 + assert prompt_spans[0]["attributes"]["agent.type"] == "claude_code" + finally: + os.unlink(path) + + print("✓ test_claude_code_end_to_end_spans PASSED") + + +# ═══════════════════════════════════════════════════════════════════════════════ +# Cross-adapter consistency tests +# ═══════════════════════════════════════════════════════════════════════════════ + +def test_cursor_and_claude_code_span_parity(): + """Both adapters should produce spans with the same structure.""" + # Cursor-style events + cursor_events = make_session("conv-cursor", num_turns=1, agent_type="cursor") + assign_turns(cursor_events) + cursor_spans = [build_span(e) for e in cursor_events] + + # Claude Code-style events (manually constructed to match) + cc_events = make_session("conv-claude", num_turns=1, agent_type="claude_code") + assign_turns(cc_events) + cc_spans = [build_span(e) for e in cc_events] + + # Both should produce spans with identical required fields + required = {"name", "context", "span_kind", "start_time", "end_time", "status_code", "attributes"} + for spans_set, label in [(cursor_spans, "cursor"), (cc_spans, "claude_code")]: + for span in spans_set: + missing = required - set(span.keys()) + assert not missing, f"{label} span '{span['name']}' missing: {missing}" + + # Context should have trace_id and span_id + for spans_set in [cursor_spans, cc_spans]: + for span in spans_set: + assert "trace_id" in span["context"] + assert "span_id" in span["context"] + + # Agent type should be preserved + for span in cursor_spans: + assert span["attributes"].get("agent.type") == "cursor" + for span in cc_spans: + assert span["attributes"].get("agent.type") == "claude_code" + + print("✓ test_cursor_and_claude_code_span_parity PASSED") # ═══════════════════════════════════════════════════════════════════════════════ @@ -593,11 +1104,12 @@ def test_prompt_output_value_injection(): if __name__ == "__main__": print("=" * 70) - print("coding-agent-insights — flush.py test suite") + print("coding-agent-insights — test suite") print("=" * 70) print() tests = [ + # Core engine test_turn_assignment_basic, test_turn_assignment_stop_event, test_event_sequence_numbers, @@ -616,9 +1128,26 @@ def test_prompt_output_value_injection(): test_span_duration_calculation, test_session_labels, test_end_to_end_span_generation, - test_atomic_buffer_drain, - test_atomic_drain_does_not_lose_late_events, test_prompt_output_value_injection, + test_agent_type_in_span, + # Cursor adapter + test_cursor_normalise_prompt, + test_cursor_normalise_tool_use, + test_cursor_normalise_tool_failure, + test_cursor_normalise_all_hook_types, + test_cursor_atomic_buffer_drain, + test_cursor_atomic_drain_late_events, + # Claude Code adapter + test_claude_code_simple_conversation, + test_claude_code_tool_use_and_result, + test_claude_code_tool_error, + test_claude_code_thinking_blocks, + test_claude_code_multi_turn, + test_claude_code_timestamp_parsing, + test_claude_code_empty_transcript, + test_claude_code_end_to_end_spans, + # Cross-adapter + test_cursor_and_claude_code_span_parity, ] passed = 0