diff --git a/README.md b/README.md index 4747877..2607192 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ structured chat data and Codex sessions in the browser. | :-------------------------- | :------------------------------------------------------------------------------------------------------------------------- | | Harmony conversation viewer | Renders Harmony conversations with support for different message types and metadata. | | Codex session viewer | Detects Codex session JSONL files, converts them into a conversation, and renders them in the same viewer. | +| Codex sessions browser | Lists local Codex sessions with pagination, prompt/response filters, fuzzy search, and full-text search over indexed turns. | | Flexible loading | Loads data from the clipboard, local `.json` or `.jsonl` files, or public HTTP(S) JSON/JSONL URLs. | | Markdown and HTML rendering | Renders markdown in message content, including formulas and optional HTML blocks. | | Translation | Translates non-English text into English in normal mode or frontend-only mode with a user-provided OpenAI API key. | @@ -58,6 +59,33 @@ There are two ways you can use Euphony. 3. If the conversation is stored at some top-level field → Euphony renders all conversations and treat other top-level fields as each conversation’s metadata 4. Else → Euphony renders the data as raw JSON objects +### Browse Local Codex Sessions + +When you run the local backend, Euphony also exposes a dedicated sessions page +at `/sessions.html`. + +The sessions page can: + +- paginate through local session logs under `~/.codex/sessions` or + `CODEX_HOME/sessions` +- show each session's first prompt, last prompt, last response preview, and + timestamps +- open any session directly in the main Euphony viewer +- filter by: + - first prompt + - last prompt + - last response +- search across sessions with three modes: + - `Exact`: keyword matching on session summary fields such as prompts, + response preview, `cwd`, session id, and file path + - `Fuzzy`: typo-tolerant matching on those same summary fields + - `Full text`: indexed search across multi-turn user and assistant text from + the whole session + +The full-text index is built lazily the first time you run a full-text search +and is stored locally at `~/.codex/euphony-codex-sessions.sqlite3` or inside +`CODEX_HOME` when that environment variable is set. + ### Integrate Euphony into My Web App #### Web Component API @@ -119,13 +147,73 @@ The current backend includes a remote URL fetch path for loading JSON and JSONL To develop Euphony locally, install Node.js and a package manager such as [pnpm](https://pnpm.io/). +For the optional FastAPI backend, use Python 3.9 or newer and install the +Python dependencies from `pyproject.toml`. Euphony currently pins +`openai-harmony==0.0.4` because newer releases may require a local Rust +toolchain during installation on some environments. + +After installing dependencies, the easiest way to run Euphony locally is with +`./start.sh`. + +Start in the default production-style local mode: + +```bash +python3.9 -m pip install -e . +pnpm install +pnpm run build +./start.sh +``` + +This mode starts only the FastAPI server on `http://127.0.0.1:8020` and serves +the prebuilt frontend from `dist/`, including `http://127.0.0.1:8020/sessions.html`. + +Use this mode when you mainly want to open the local app and the sessions page +without running a separate Vite dev server. + +Start in explicit development mode: + +```bash +./start.sh dev +``` + +Development mode starts both: + +- the backend with `uvicorn --reload` +- the Vite frontend dev server with hot module reload + +Use this mode when you are changing frontend or backend code and want immediate +reloads while developing. + +You can override ports in either mode: + +```bash +BACKEND_PORT=8025 FRONTEND_PORT=3005 ./start.sh dev +``` + +`./start.sh` also accepts `MODE=prod` or `MODE=dev` if you prefer environment +variables over positional arguments. + +Common local URLs: + +- backend-served app: `http://127.0.0.1:8020/` +- backend-served sessions page: `http://127.0.0.1:8020/sessions.html` +- frontend dev server in `./start.sh dev`: `http://127.0.0.1:3000/` + +If you prefer to launch the pieces manually, use the commands below. + Start the backend server: ```bash +python3.9 -m pip install -e . pnpm install uvicorn fastapi-main:app --app-dir server --host 127.0.0.1 --port 8020 --reload ``` +The first backend Harmony render may download `o200k_base.tiktoken` from +`https://openaipublic.blob.core.windows.net/encodings/`. If your environment +cannot reach that URL, set `TIKTOKEN_ENCODINGS_BASE` to a local directory that +already contains the tokenizer file. + Start the frontend development server: ```bash diff --git a/server/fastapi-main.py b/server/fastapi-main.py index 1c37921..9ec7d53 100644 --- a/server/fastapi-main.py +++ b/server/fastapi-main.py @@ -3,11 +3,16 @@ import json import logging import os +import re +import sqlite3 import urllib.error import urllib.parse import urllib.request +from dataclasses import dataclass +from difflib import SequenceMatcher +from functools import lru_cache from pathlib import Path -from typing import Any +from typing import Any, Literal, Optional, Union import jmespath from async_lru import alru_cache @@ -15,18 +20,6 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from openai import AsyncOpenAI -from openai_harmony import ( - Author as HarmonyAuthor, - Conversation as HarmonyConversation, - DeveloperContent as HarmonyDeveloperContent, - HarmonyEncodingName, - Message as HarmonyMessage, - RenderConversationConfig, - Role as HarmonyRole, - SystemContent as HarmonySystemContent, - TextContent as HarmonyTextContent, - load_harmony_encoding, -) from pydantic import BaseModel logger = logging.getLogger(__name__) @@ -34,8 +27,8 @@ DIST_DIR = Path(__file__).resolve().parents[1] / "dist" HARMONY_RENDERER_NAME = "o200k_harmony" -HARMONY_RENDERING_ENCODING = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) -HARMONY_RENDER_CONFIG = RenderConversationConfig(auto_drop_analysis=False) +CODEX_SESSION_FILE_MEDIA_TYPE = "application/x-ndjson" +CODEX_SESSION_SEARCH_INDEX_FILE_NAME = "euphony-codex-sessions.sqlite3" MAX_PUBLIC_JSON_BYTES = 25 * 1024 * 1024 TRANSLATION_MAX_CONCURRENCY = 1024 TRANSLATION_SEMAPHORE_ACQUIRE_TIMEOUT_S = 60 @@ -45,6 +38,83 @@ _inflight_translations: dict[str, asyncio.Task["TranslationResult"]] = {} +@dataclass(frozen=True) +class HarmonyRuntime: + # Keep the imported Harmony classes together so the server can still boot + # when the optional backend renderer is unavailable. + author_cls: Any + conversation_cls: Any + developer_content_cls: Any + message_cls: Any + render_config: Any + role_cls: Any + system_content_cls: Any + text_content_cls: Any + encoding: Any + + +_harmony_runtime: Optional[HarmonyRuntime] = None + + +def _build_harmony_runtime() -> HarmonyRuntime: + try: + from openai_harmony import ( + Author as HarmonyAuthor, + Conversation as HarmonyConversation, + DeveloperContent as HarmonyDeveloperContent, + HarmonyEncodingName, + Message as HarmonyMessage, + RenderConversationConfig, + Role as HarmonyRole, + SystemContent as HarmonySystemContent, + TextContent as HarmonyTextContent, + load_harmony_encoding, + ) + except Exception as exc: + raise RuntimeError( + "openai-harmony could not be imported. Install backend dependencies " + "with Python 3.9+ before enabling backend-assisted Harmony rendering." + ) from exc + + try: + encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) + except Exception as exc: + raise RuntimeError( + "openai-harmony imported, but its tokenizer could not be loaded. " + "Allow access to https://openaipublic.blob.core.windows.net/encodings/ " + "for the first download or set TIKTOKEN_ENCODINGS_BASE to a directory " + "that already contains o200k_base.tiktoken." + ) from exc + + return HarmonyRuntime( + author_cls=HarmonyAuthor, + conversation_cls=HarmonyConversation, + developer_content_cls=HarmonyDeveloperContent, + message_cls=HarmonyMessage, + render_config=RenderConversationConfig(auto_drop_analysis=False), + role_cls=HarmonyRole, + system_content_cls=HarmonySystemContent, + text_content_cls=HarmonyTextContent, + encoding=encoding, + ) + + +def _get_harmony_runtime() -> HarmonyRuntime: + global _harmony_runtime + + if _harmony_runtime is None: + try: + _harmony_runtime = _build_harmony_runtime() + except Exception as exc: + logger.warning("Harmony runtime unavailable: %s", exc) + raise HTTPException( + status_code=503, + detail=f"Harmony rendering unavailable: {exc}", + ) from exc + + return _harmony_runtime + + class TranslationRequestBody(BaseModel): source: str @@ -57,7 +127,7 @@ class TranslationResult(BaseModel): class BlobJSONLResponse(BaseModel): - data: list[dict[str, Any]] | list[str] | list[Any] + data: Union[list[dict[str, Any]], list[str], list[Any]] offset: int limit: int total: int @@ -82,6 +152,38 @@ class HarmonyRenderResult(BaseModel): partial_success_error_messages: list[str] +class CodexSessionSummary(BaseModel): + relative_path: str + file_name: str + session_id: Optional[str] + cwd: Optional[str] + started_at: Optional[str] + first_prompt: Optional[str] + first_prompt_time: Optional[str] + last_prompt: Optional[str] + last_prompt_time: Optional[str] + last_response: Optional[str] + last_response_time: Optional[str] + + +class CodexSessionListResponse(BaseModel): + data: list[CodexSessionSummary] + offset: int + limit: int + total: int + matchedCount: int + + +@dataclass(frozen=True) +class CodexSessionSearchDocument: + # Keep the display summary separate from the full searchable text so the + # sessions page can stay lightweight while the index still sees the whole + # conversation text emitted by Codex. + summary: CodexSessionSummary + full_user_text: str + full_assistant_text: str + + def _resolve_frontend_path(path_fragment: str) -> Path: candidate = (DIST_DIR / path_fragment).resolve() try: @@ -91,12 +193,747 @@ def _resolve_frontend_path(path_fragment: str) -> Path: return candidate -def normalize_harmony_content(raw_content: Any, role: HarmonyRole) -> list[Any]: +def _get_codex_home() -> Path: + # Respect CODEX_HOME when users keep sessions outside the default + # ~/.codex directory. Falling back to ~/.codex keeps the feature aligned + # with the default Codex CLI installation layout. + codex_home = os.environ.get("CODEX_HOME") + if codex_home: + return Path(codex_home).expanduser() + return Path.home() / ".codex" + + +def _get_codex_sessions_root() -> Path: + return _get_codex_home() / "sessions" + + +def _resolve_codex_session_path(relative_path: str) -> Path: + sessions_root = _get_codex_sessions_root().resolve() + candidate = (sessions_root / relative_path).resolve() + try: + candidate.relative_to(sessions_root) + except ValueError as exc: + raise HTTPException(status_code=404, detail="Session not found") from exc + return candidate + + +def _get_codex_session_search_index_path() -> Path: + # Store the SQLite index alongside other Codex-local state so searches + # persist across server restarts without touching checked-in project files. + return _get_codex_home() / CODEX_SESSION_SEARCH_INDEX_FILE_NAME + + +def _iter_codex_session_files(sessions_root: Path) -> list[Path]: + if not sessions_root.exists(): + return [] + + session_files = [ + session_file + for session_file in sessions_root.rglob("*.jsonl") + if session_file.is_file() + ] + session_files.sort() + return session_files + + +def _normalize_prompt_text(text: str) -> Optional[str]: + normalized = re.sub(r"\s+", " ", text).strip() + return normalized or None + + +def _build_preview_text(text: str, max_length: int = 280) -> Optional[str]: + # Session cards need compact single-paragraph previews so they remain + # skimmable even when the underlying prompt or response spans many lines. + normalized = _normalize_prompt_text(text) + if normalized is None: + return None + if len(normalized) <= max_length: + return normalized + return normalized[: max_length - 3].rstrip() + "..." + + +def _extract_text_from_content_item(value: Any) -> str: + # Codex session payloads vary between simple strings and nested lists of + # content parts. This helper walks the common text-bearing keys so the + # session summary can surface a readable assistant preview without needing + # the full Euphony conversation parser. + if isinstance(value, str): + return value + + if isinstance(value, list): + text_parts = [_extract_text_from_content_item(item) for item in value] + return "\n".join(part for part in text_parts if part) + + if isinstance(value, dict): + for key in ("text", "message", "content", "output", "summary", "value"): + if key in value: + return _extract_text_from_content_item(value[key]) + + return "" + + +def _extract_user_prompt_from_response_item(payload: dict[str, Any]) -> Optional[str]: + content_items = payload.get("content") + if not isinstance(content_items, list): + return None + + prompt_parts: list[str] = [] + for item in content_items: + if not isinstance(item, dict): + continue + if item.get("type") != "input_text": + continue + + raw_text = item.get("text") + if not isinstance(raw_text, str): + continue + + stripped_text = raw_text.strip() + # Codex records AGENTS and environment metadata as synthetic user + # messages. Skip those wrappers so the session list surfaces the real + # prompt text the user entered at the terminal. + if stripped_text.startswith("# AGENTS.md instructions for "): + continue + if stripped_text.startswith(""): + continue + + prompt_parts.append(stripped_text) + + if not prompt_parts: + return None + return _normalize_prompt_text("\n\n".join(prompt_parts)) + + +def _extract_assistant_response_from_response_item( + payload: dict[str, Any], + *, + preview_length: Optional[int] = 280, +) -> Optional[str]: + content_items = payload.get("content") + if isinstance(content_items, list): + text_parts: list[str] = [] + for item in content_items: + extracted_text = _extract_text_from_content_item(item) + if extracted_text: + text_parts.append(extracted_text) + + if text_parts: + normalized_text = _normalize_prompt_text("\n\n".join(text_parts)) + if normalized_text is None: + return None + if preview_length is None: + return normalized_text + return _build_preview_text(normalized_text, preview_length) + + summary_items = payload.get("summary") + if isinstance(summary_items, list): + summary_text = _extract_text_from_content_item(summary_items) + if summary_text: + normalized_text = _normalize_prompt_text(summary_text) + if normalized_text is None: + return None + if preview_length is None: + return normalized_text + return _build_preview_text(normalized_text, preview_length) + + return None + + +def _tokenize_search_text(text: str) -> list[str]: + normalized_text = _normalize_prompt_text(text) + if normalized_text is None: + return [] + return re.findall(r"\w+", normalized_text.casefold(), flags=re.UNICODE) + + +def _iter_summary_search_fields(summary: CodexSessionSummary) -> list[str]: + return [ + summary.session_id or "", + summary.file_name or "", + summary.relative_path or "", + summary.cwd or "", + summary.first_prompt or "", + summary.last_prompt or "", + summary.last_response or "", + ] + + +def _build_summary_search_blob(summary: CodexSessionSummary) -> str: + return " ".join(field for field in _iter_summary_search_fields(summary) if field) + + +def _score_fuzzy_query_against_summary( + summary: CodexSessionSummary, + normalized_query: str, + query_tokens: list[str], +) -> float: + field_texts: list[str] = [] + for field in _iter_summary_search_fields(summary): + normalized_field = _normalize_prompt_text(field) + if normalized_field is None: + continue + field_texts.append(normalized_field.casefold()) + + if not field_texts: + return 0.0 + + if any(normalized_query in field_text for field_text in field_texts): + return 1.0 + + best_field_ratio = max( + SequenceMatcher(None, normalized_query, field_text).ratio() + for field_text in field_texts + ) + candidate_tokens = [ + token for field_text in field_texts for token in _tokenize_search_text(field_text) + ] + if not candidate_tokens or not query_tokens: + return best_field_ratio + + token_scores: list[float] = [] + for query_token in query_tokens: + best_token_score = 0.0 + for candidate_token in candidate_tokens: + if query_token == candidate_token: + best_token_score = 1.0 + break + + if query_token in candidate_token or candidate_token in query_token: + overlap_ratio = min(len(query_token), len(candidate_token)) / max( + len(query_token), len(candidate_token) + ) + best_token_score = max(best_token_score, 0.82 + (0.18 * overlap_ratio)) + continue + + best_token_score = max( + best_token_score, + SequenceMatcher(None, query_token, candidate_token).ratio(), + ) + + token_scores.append(best_token_score) + + token_average = sum(token_scores) / len(token_scores) + if token_scores and min(token_scores) >= 0.9: + token_average = min(1.0, token_average + 0.08) + + return (0.65 * token_average) + (0.35 * best_field_ratio) + + +@lru_cache(maxsize=4096) +def _summarize_codex_session_file_cached( + file_path_str: str, + sessions_root_str: str, + file_size: int, + modified_time_ns: int, +) -> Optional[CodexSessionSummary]: + del file_size + del modified_time_ns + + session_file = Path(file_path_str) + sessions_root = Path(sessions_root_str) + + document = _extract_codex_session_document( + session_file, sessions_root, include_full_text=False + ) + if document is None: + return None + return document.summary + + +def _extract_codex_session_document( + session_file: Path, + sessions_root: Path, + *, + include_full_text: bool, +) -> Optional[CodexSessionSearchDocument]: + # Parse the JSONL log once and populate both the lightweight card summary + # and, when requested, the larger user/assistant corpora that power the + # persistent full-text index. + relative_path = session_file.relative_to(sessions_root).as_posix() + + session_id: Optional[str] = None + cwd: Optional[str] = None + started_at: Optional[str] = None + first_prompt: Optional[str] = None + first_prompt_time: Optional[str] = None + last_prompt: Optional[str] = None + last_prompt_time: Optional[str] = None + last_response: Optional[str] = None + last_response_time: Optional[str] = None + full_user_text_parts: list[str] = [] + full_assistant_text_parts: list[str] = [] + + try: + with session_file.open("r", encoding="utf-8-sig") as handle: + for raw_line in handle: + line = raw_line.strip() + if not line: + continue + + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + + if not isinstance(event, dict): + continue + + event_type = event.get("type") + event_timestamp = event.get("timestamp") + if not isinstance(event_type, str): + continue + + payload = event.get("payload") + if not isinstance(payload, dict): + payload = {} + + if event_type == "session_meta": + payload_session_id = payload.get("id") + if isinstance(payload_session_id, str): + session_id = payload_session_id + + payload_cwd = payload.get("cwd") + if isinstance(payload_cwd, str): + cwd = payload_cwd + + payload_started_at = payload.get("timestamp") + if isinstance(payload_started_at, str): + started_at = payload_started_at + elif isinstance(event_timestamp, str): + started_at = event_timestamp + continue + + if event_type == "event_msg": + payload_type = payload.get("type") + if payload_type == "user_message": + payload_message = payload.get("message") + if isinstance(payload_message, str): + prompt_text = _normalize_prompt_text(payload_message) + if prompt_text: + if first_prompt is None: + first_prompt = prompt_text + if isinstance(event_timestamp, str): + first_prompt_time = event_timestamp + last_prompt = prompt_text + if isinstance(event_timestamp, str): + last_prompt_time = event_timestamp + if include_full_text: + full_user_text_parts.append(prompt_text) + elif payload_type == "agent_message": + payload_message = payload.get("message") + if isinstance(payload_message, str): + normalized_response = _normalize_prompt_text(payload_message) + if normalized_response: + last_response = _build_preview_text(normalized_response) + if include_full_text: + full_assistant_text_parts.append(normalized_response) + if isinstance(event_timestamp, str): + last_response_time = event_timestamp + continue + + if event_type != "response_item": + continue + + payload_type = payload.get("type") + payload_role = payload.get("role") + + if ( + payload_type == "message" + and payload_role == "assistant" + ): + assistant_response = _extract_assistant_response_from_response_item( + payload + ) + if assistant_response: + last_response = assistant_response + if include_full_text: + full_assistant_response = ( + _extract_assistant_response_from_response_item( + payload, preview_length=None + ) + ) + if full_assistant_response: + full_assistant_text_parts.append(full_assistant_response) + if isinstance(event_timestamp, str): + last_response_time = event_timestamp + continue + + if payload_type != "message" or payload_role != "user": + continue + + fallback_prompt = _extract_user_prompt_from_response_item(payload) + if not fallback_prompt: + continue + + if first_prompt is None: + first_prompt = fallback_prompt + if isinstance(event_timestamp, str): + first_prompt_time = event_timestamp + last_prompt = fallback_prompt + if isinstance(event_timestamp, str): + last_prompt_time = event_timestamp + if include_full_text: + full_user_text_parts.append(fallback_prompt) + except OSError as exc: + logger.warning("Failed to read Codex session file %s: %s", session_file, exc) + return None + + return CodexSessionSearchDocument( + summary=CodexSessionSummary( + relative_path=relative_path, + file_name=session_file.name, + session_id=session_id, + cwd=cwd, + started_at=started_at, + first_prompt=first_prompt, + first_prompt_time=first_prompt_time, + last_prompt=last_prompt, + last_prompt_time=last_prompt_time, + last_response=last_response, + last_response_time=last_response_time, + ), + full_user_text="\n".join(full_user_text_parts), + full_assistant_text="\n".join(full_assistant_text_parts), + ) + + +def _summarize_codex_session_file(session_file: Path) -> Optional[CodexSessionSummary]: + file_stat = session_file.stat() + sessions_root = _get_codex_sessions_root().resolve() + # The cache key includes size and mtime so summary extraction is recomputed + # automatically when Codex appends new events to an existing session file. + return _summarize_codex_session_file_cached( + str(session_file.resolve()), + str(sessions_root), + file_stat.st_size, + file_stat.st_mtime_ns, + ) + + +def _build_codex_session_search_document( + session_file: Path, +) -> Optional[CodexSessionSearchDocument]: + sessions_root = _get_codex_sessions_root().resolve() + return _extract_codex_session_document( + session_file, sessions_root, include_full_text=True + ) + + +def _list_codex_session_summaries() -> list[CodexSessionSummary]: + sessions_root = _get_codex_sessions_root() + summaries: list[CodexSessionSummary] = [] + for session_file in _iter_codex_session_files(sessions_root): + summary = _summarize_codex_session_file(session_file) + if summary is not None: + summaries.append(summary) + + # Sessions are most useful when the newest activity is shown first. Sorting + # by the last observed prompt/response time keeps active sessions near the + # top even if their original start time is older. + summaries.sort( + key=lambda summary: ( + summary.last_prompt_time + or summary.last_response_time + or summary.started_at + or summary.relative_path + ), + reverse=True, + ) + return summaries + + +def _normalize_search_mode(raw_mode: str) -> Literal["exact", "fuzzy", "full_text"]: + normalized_mode = raw_mode.strip().casefold() + if normalized_mode in {"", "exact"}: + return "exact" + if normalized_mode == "fuzzy": + return "fuzzy" + if normalized_mode in {"full_text", "fulltext", "fts"}: + return "full_text" + raise HTTPException( + status_code=400, + detail="searchMode must be one of: exact, fuzzy, full_text", + ) + + +def _filter_summaries_by_exact_search( + summaries: list[CodexSessionSummary], normalized_query: str +) -> list[CodexSessionSummary]: + return [ + summary + for summary in summaries + if normalized_query in _build_summary_search_blob(summary).casefold() + ] + + +def _filter_summaries_by_fuzzy_search( + summaries: list[CodexSessionSummary], normalized_query: str +) -> list[CodexSessionSummary]: + query_tokens = _tokenize_search_text(normalized_query) + if not query_tokens: + return summaries + + scored_summaries: list[tuple[float, int, CodexSessionSummary]] = [] + for original_index, summary in enumerate(summaries): + score = _score_fuzzy_query_against_summary( + summary, normalized_query, query_tokens + ) + if score < 0.72: + continue + scored_summaries.append((score, original_index, summary)) + + scored_summaries.sort( + key=lambda item: ( + item[0], + -item[1], + ), + reverse=True, + ) + return [summary for _, _, summary in scored_summaries] + + +def _open_codex_session_search_index() -> sqlite3.Connection: + index_path = _get_codex_session_search_index_path() + index_path.parent.mkdir(parents=True, exist_ok=True) + connection = sqlite3.connect(index_path) + connection.row_factory = sqlite3.Row + return connection + + +def _ensure_codex_session_search_index(connection: sqlite3.Connection) -> None: + # Maintain a small metadata table for incremental refresh bookkeeping and a + # separate FTS5 table for efficient full-session lookup across user and + # assistant text. + connection.execute( + """ + CREATE TABLE IF NOT EXISTS codex_session_index_metadata ( + relative_path TEXT PRIMARY KEY, + file_name TEXT NOT NULL, + session_id TEXT, + cwd TEXT, + started_at TEXT, + first_prompt TEXT, + first_prompt_time TEXT, + last_prompt TEXT, + last_prompt_time TEXT, + last_response TEXT, + last_response_time TEXT, + file_size INTEGER NOT NULL, + modified_time_ns INTEGER NOT NULL + ) + """ + ) + connection.execute( + """ + CREATE VIRTUAL TABLE IF NOT EXISTS codex_session_search_fts USING fts5( + relative_path UNINDEXED, + file_name, + session_id, + cwd, + first_prompt, + last_prompt, + last_response, + user_text, + assistant_text, + tokenize = 'unicode61 remove_diacritics 2' + ) + """ + ) + + +def _refresh_codex_session_search_index(connection: sqlite3.Connection) -> None: + sessions_root = _get_codex_sessions_root() + _ensure_codex_session_search_index(connection) + + indexed_rows = { + row["relative_path"]: (row["file_size"], row["modified_time_ns"]) + for row in connection.execute( + """ + SELECT relative_path, file_size, modified_time_ns + FROM codex_session_index_metadata + """ + ).fetchall() + } + + current_files: dict[str, tuple[Path, int, int]] = {} + for session_file in _iter_codex_session_files(sessions_root): + file_stat = session_file.stat() + relative_path = session_file.relative_to(sessions_root).as_posix() + current_files[relative_path] = ( + session_file, + file_stat.st_size, + file_stat.st_mtime_ns, + ) + + previous_index_state = indexed_rows.get(relative_path) + if previous_index_state == (file_stat.st_size, file_stat.st_mtime_ns): + continue + + document = _build_codex_session_search_document(session_file) + if document is None: + continue + + summary = document.summary + connection.execute( + """ + INSERT INTO codex_session_index_metadata ( + relative_path, + file_name, + session_id, + cwd, + started_at, + first_prompt, + first_prompt_time, + last_prompt, + last_prompt_time, + last_response, + last_response_time, + file_size, + modified_time_ns + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(relative_path) DO UPDATE SET + file_name = excluded.file_name, + session_id = excluded.session_id, + cwd = excluded.cwd, + started_at = excluded.started_at, + first_prompt = excluded.first_prompt, + first_prompt_time = excluded.first_prompt_time, + last_prompt = excluded.last_prompt, + last_prompt_time = excluded.last_prompt_time, + last_response = excluded.last_response, + last_response_time = excluded.last_response_time, + file_size = excluded.file_size, + modified_time_ns = excluded.modified_time_ns + """, + ( + summary.relative_path, + summary.file_name, + summary.session_id, + summary.cwd, + summary.started_at, + summary.first_prompt, + summary.first_prompt_time, + summary.last_prompt, + summary.last_prompt_time, + summary.last_response, + summary.last_response_time, + file_stat.st_size, + file_stat.st_mtime_ns, + ), + ) + connection.execute( + "DELETE FROM codex_session_search_fts WHERE relative_path = ?", + (summary.relative_path,), + ) + connection.execute( + """ + INSERT INTO codex_session_search_fts ( + relative_path, + file_name, + session_id, + cwd, + first_prompt, + last_prompt, + last_response, + user_text, + assistant_text + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + summary.relative_path, + summary.file_name, + summary.session_id or "", + summary.cwd or "", + summary.first_prompt or "", + summary.last_prompt or "", + summary.last_response or "", + document.full_user_text, + document.full_assistant_text, + ), + ) + + stale_relative_paths = set(indexed_rows) - set(current_files) + for relative_path in stale_relative_paths: + connection.execute( + "DELETE FROM codex_session_index_metadata WHERE relative_path = ?", + (relative_path,), + ) + connection.execute( + "DELETE FROM codex_session_search_fts WHERE relative_path = ?", + (relative_path,), + ) + + connection.commit() + + +def _build_fts_query(search_query: str) -> Optional[str]: + query_tokens = _tokenize_search_text(search_query) + if not query_tokens: + return None + + # Prefix-match each normalized token and require all of them so the search + # remains flexible about word order while still narrowing down noise. + return " AND ".join(f"{token}*" for token in query_tokens) + + +def _filter_summaries_by_full_text_search( + summaries: list[CodexSessionSummary], search_query: str +) -> list[CodexSessionSummary]: + if not summaries: + return [] + + allowed_paths = {summary.relative_path for summary in summaries} + summary_lookup = {summary.relative_path: summary for summary in summaries} + fts_query = _build_fts_query(search_query) + if fts_query is None: + return summaries + + try: + with _open_codex_session_search_index() as connection: + _refresh_codex_session_search_index(connection) + ranked_rows = connection.execute( + """ + SELECT + relative_path, + bm25( + codex_session_search_fts, + 0.4, + 0.2, + 0.1, + 1.5, + 1.5, + 1.2, + 1.0, + 1.0 + ) AS rank_score + FROM codex_session_search_fts + WHERE codex_session_search_fts MATCH ? + ORDER BY rank_score ASC + """, + (fts_query,), + ).fetchall() + except sqlite3.OperationalError as exc: + logger.warning("SQLite full-text search unavailable, falling back to exact search: %s", exc) + return _filter_summaries_by_exact_search( + summaries, search_query.strip().casefold() + ) + + ordered_paths = [ + row["relative_path"] + for row in ranked_rows + if row["relative_path"] in allowed_paths + ] + return [summary_lookup[path] for path in ordered_paths] + + +def normalize_harmony_content( + raw_content: Any, role: Any, runtime: HarmonyRuntime +) -> list[Any]: if raw_content is None: - return [HarmonyTextContent(text="")] + return [runtime.text_content_cls(text="")] if isinstance(raw_content, str): - return [HarmonyTextContent(text=raw_content)] + return [runtime.text_content_cls(text=raw_content)] if isinstance(raw_content, dict): if isinstance(raw_content.get("parts"), list): @@ -106,28 +943,28 @@ def normalize_harmony_content(raw_content: Any, role: HarmonyRole) -> list[Any]: elif isinstance(raw_content, list): raw_items = raw_content else: - return [HarmonyTextContent(text=json.dumps(raw_content, default=str))] + return [runtime.text_content_cls(text=json.dumps(raw_content, default=str))] contents: list[Any] = [] for item in raw_items: if not isinstance(item, dict): - contents.append(HarmonyTextContent(text=str(item))) + contents.append(runtime.text_content_cls(text=str(item))) continue content_type = item.get("content_type") or item.get("type") if content_type == "text" or "text" in item: - contents.append(HarmonyTextContent(text=str(item.get("text", "")))) + contents.append(runtime.text_content_cls(text=str(item.get("text", "")))) continue if ( content_type in {"system", "system_content"} - or role == HarmonyRole.SYSTEM + or role == runtime.role_cls.SYSTEM or "model_identity" in item ): try: contents.append( - HarmonySystemContent.from_dict( + runtime.system_content_cls.from_dict( { key: value for key, value in item.items() @@ -136,17 +973,19 @@ def normalize_harmony_content(raw_content: Any, role: HarmonyRole) -> list[Any]: ) ) except Exception: - contents.append(HarmonyTextContent(text=json.dumps(item, default=str))) + contents.append( + runtime.text_content_cls(text=json.dumps(item, default=str)) + ) continue if ( content_type in {"developer_content", "developer"} - or role == HarmonyRole.DEVELOPER + or role == runtime.role_cls.DEVELOPER or "instructions" in item ): try: contents.append( - HarmonyDeveloperContent.from_dict( + runtime.developer_content_cls.from_dict( { key: value for key, value in item.items() @@ -155,18 +994,22 @@ def normalize_harmony_content(raw_content: Any, role: HarmonyRole) -> list[Any]: ) ) except Exception: - contents.append(HarmonyTextContent(text=json.dumps(item, default=str))) + contents.append( + runtime.text_content_cls(text=json.dumps(item, default=str)) + ) continue - contents.append(HarmonyTextContent(text=json.dumps(item, default=str))) + contents.append(runtime.text_content_cls(text=json.dumps(item, default=str))) - return contents or [HarmonyTextContent(text="")] + return contents or [runtime.text_content_cls(text="")] -def normalize_harmony_conversation(conversation_payload: str) -> HarmonyConversation: +def normalize_harmony_conversation( + conversation_payload: str, runtime: HarmonyRuntime +) -> Any: raw_conversation = json.loads(conversation_payload) raw_messages = raw_conversation.get("messages", []) - messages: list[HarmonyMessage] = [] + messages: list[Any] = [] for raw_message in raw_messages: if not isinstance(raw_message, dict): @@ -179,23 +1022,23 @@ def normalize_harmony_conversation(conversation_payload: str) -> HarmonyConversa raw_role = "user" try: - role = HarmonyRole(raw_role) + role = runtime.role_cls(raw_role) except ValueError: - role = HarmonyRole.USER + role = runtime.role_cls.USER name = raw_message.get("name") if name is None and isinstance(raw_message.get("author"), dict): name = raw_message["author"].get("name") - message = HarmonyMessage( - author=HarmonyAuthor(role=role, name=name), - content=normalize_harmony_content(raw_message.get("content"), role), + message = runtime.message_cls( + author=runtime.author_cls(role=role, name=name), + content=normalize_harmony_content(raw_message.get("content"), role, runtime), channel=raw_message.get("channel"), recipient=raw_message.get("recipient"), ) messages.append(message) - return HarmonyConversation(messages=messages) + return runtime.conversation_cls(messages=messages) async def _call_openai_translate(source_text: str) -> TranslationResult: @@ -302,6 +1145,82 @@ async def ping() -> dict[str, str]: return {"status": "ok"} +@fastapi_app.get("/codex-sessions/", response_model=CodexSessionListResponse) +async def get_codex_sessions( + offset: int = Query(0, ge=0), + limit: int = Query(25, ge=1, le=200), + firstPromptFilter: str = Query(""), + lastPromptFilter: str = Query(""), + responseFilter: str = Query(""), + searchQuery: str = Query(""), + searchMode: str = Query("exact"), +) -> CodexSessionListResponse: + all_summaries = _list_codex_session_summaries() + total = len(all_summaries) + + normalized_first_filter = firstPromptFilter.strip().casefold() + normalized_last_filter = lastPromptFilter.strip().casefold() + normalized_response_filter = responseFilter.strip().casefold() + normalized_search_query = searchQuery.strip().casefold() + normalized_search_mode = _normalize_search_mode(searchMode) + + filtered_summaries = all_summaries + if normalized_first_filter: + filtered_summaries = [ + summary + for summary in filtered_summaries + if normalized_first_filter in (summary.first_prompt or "").casefold() + ] + if normalized_last_filter: + filtered_summaries = [ + summary + for summary in filtered_summaries + if normalized_last_filter in (summary.last_prompt or "").casefold() + ] + if normalized_response_filter: + # Filter against the cached preview text so response searching stays + # fast even when the sessions directory contains many large JSONL logs. + filtered_summaries = [ + summary + for summary in filtered_summaries + if normalized_response_filter in (summary.last_response or "").casefold() + ] + if normalized_search_query: + if normalized_search_mode == "exact": + filtered_summaries = _filter_summaries_by_exact_search( + filtered_summaries, normalized_search_query + ) + elif normalized_search_mode == "fuzzy": + filtered_summaries = _filter_summaries_by_fuzzy_search( + filtered_summaries, normalized_search_query + ) + else: + filtered_summaries = _filter_summaries_by_full_text_search( + filtered_summaries, searchQuery + ) + + return CodexSessionListResponse( + data=filtered_summaries[offset : offset + limit], + offset=offset, + limit=limit, + total=total, + matchedCount=len(filtered_summaries), + ) + + +@fastapi_app.get("/codex-session-file/") +async def get_codex_session_file(relative_path: str = Query(...)) -> Response: + session_file = _resolve_codex_session_path(relative_path) + if not session_file.is_file(): + raise HTTPException(status_code=404, detail="Session not found") + + return FileResponse( + session_file, + media_type=CODEX_SESSION_FILE_MEDIA_TYPE, + filename=session_file.name, + ) + + @fastapi_app.get("/blob-jsonl/", response_model=BlobJSONLResponse) async def get_blob_jsonl( blobURL: str = Query(...), @@ -423,12 +1342,21 @@ async def translate_text( @fastapi_app.get("/harmony-renderer-list/") async def get_harmony_renderer_list() -> HarmonyRendererListResult: + try: + _get_harmony_runtime() + except HTTPException: + # Returning an empty renderer list keeps the rest of the app usable when + # the optional backend Harmony runtime is not configured yet. + return HarmonyRendererListResult(renderers=[]) + return HarmonyRendererListResult(renderers=[HARMONY_RENDERER_NAME]) @fastapi_app.post("/harmony-render/") async def harmony_render(request_body: HarmonyRenderRequestBody) -> HarmonyRenderResult: try: + runtime = _get_harmony_runtime() + if request_body.renderer_name != HARMONY_RENDERER_NAME: raise HTTPException( status_code=400, @@ -438,15 +1366,15 @@ async def harmony_render(request_body: HarmonyRenderRequestBody) -> HarmonyRende ), ) - conversation = normalize_harmony_conversation(request_body.conversation) - tokens = HARMONY_RENDERING_ENCODING.render_conversation( + conversation = normalize_harmony_conversation( + request_body.conversation, runtime + ) + tokens = runtime.encoding.render_conversation( conversation, - config=HARMONY_RENDER_CONFIG, + config=runtime.render_config, ) - display_string = HARMONY_RENDERING_ENCODING.decode_utf8(tokens) - decoded_tokens = [ - HARMONY_RENDERING_ENCODING.decode([token]) for token in tokens - ] + display_string = runtime.encoding.decode_utf8(tokens) + decoded_tokens = [runtime.encoding.decode([token]) for token in tokens] return HarmonyRenderResult( tokens=tokens, decoded_tokens=decoded_tokens, diff --git a/sessions.html b/sessions.html new file mode 100644 index 0000000..1d71ac9 --- /dev/null +++ b/sessions.html @@ -0,0 +1,44 @@ + + + + + + + + + + + + + Euphony: Codex Sessions + + + + + + + + + + + + + + diff --git a/src/components/app/app.css b/src/components/app/app.css index 27d817b..b9e7a08 100644 --- a/src/components/app/app.css +++ b/src/components/app/app.css @@ -78,6 +78,23 @@ font-weight: 700; } + .header-link { + display: inline-flex; + justify-content: center; + align-items: center; + min-height: 34px; + padding: 0 12px; + border: 1px solid var(--gray-300); + border-radius: 999px; + background-color: rgba(255, 255, 255, 0.9); + font-size: var(--font-d2); + font-weight: 600; + + &:hover { + background-color: var(--gray-100); + } + } + & a { color: inherit; text-decoration: none; diff --git a/src/components/app/app.ts b/src/components/app/app.ts index 58fcc75..23ed245 100644 --- a/src/components/app/app.ts +++ b/src/components/app/app.ts @@ -2310,6 +2310,9 @@ export class EuphonyApp extends LitElement { ${this.isEditorMode ? 'Euphony Editor' : 'Euphony'} + Sessions 0 && this.curPage > totalPageNum) { + this.curPage = totalPageNum; + this.updateURL(); + await this.loadSessions(); + return; + } + + this.sessions = payload.data; + this.totalSessions = payload.total; + this.matchedSessions = payload.matchedCount; + this.updateURL(); + } catch (error) { + this.sessions = []; + this.totalSessions = 0; + this.matchedSessions = 0; + this.errorMessage = String(error); + } finally { + this.isLoading = false; + } + } + + private buildSessionFileURL(session: CodexSessionSummary) { + const query = new URLSearchParams({ + relative_path: session.relative_path + }); + return new URL( + `${EUPHONY_API_URL}codex-session-file/?${query.toString()}`, + window.location.href + ).toString(); + } + + private buildViewerURL(session: CodexSessionSummary) { + const viewerURL = new URL('./', window.location.href); + viewerURL.searchParams.set('path', this.buildSessionFileURL(session)); + return viewerURL.toString(); + } + + private formatTimestamp(timestamp: string | null) { + if (!timestamp) { + return 'Not available'; + } + + const parsed = new Date(timestamp); + if (Number.isNaN(parsed.getTime())) { + return timestamp; + } + + return new Intl.DateTimeFormat(undefined, { + dateStyle: 'medium', + timeStyle: 'short' + }).format(parsed); + } + + private renderTimestamp(timestamp: string | null) { + return html` + + `; + } + + private renderPromptSummary( + label: string, + timestamp: string | null, + prompt: string | null, + emptyState = 'No user prompt captured.' + ) { + return html` +
+
${label}
+
${this.renderTimestamp(timestamp)}
+
${prompt ?? emptyState}
+
+ `; + } + + private renderSummaryInfoIcon() { + return html` + + i + + `; + } + + private applyFilters(event: Event) { + event.preventDefault(); + this.appliedFirstPromptFilter = this.firstPromptFilterInput.trim(); + this.appliedLastPromptFilter = this.lastPromptFilterInput.trim(); + this.appliedResponseFilter = this.responseFilterInput.trim(); + this.appliedSearchQuery = this.searchQueryInput.trim(); + this.appliedSearchMode = this.searchModeInput; + this.curPage = 1; + void this.loadSessions(); + } + + private resetFilters() { + this.firstPromptFilterInput = ''; + this.lastPromptFilterInput = ''; + this.responseFilterInput = ''; + this.searchQueryInput = ''; + this.searchModeInput = DEFAULT_SEARCH_MODE; + this.appliedFirstPromptFilter = ''; + this.appliedLastPromptFilter = ''; + this.appliedResponseFilter = ''; + this.appliedSearchQuery = ''; + this.appliedSearchMode = DEFAULT_SEARCH_MODE; + this.curPage = 1; + void this.loadSessions(); + } + + private pageClicked(event: CustomEvent) { + this.curPage = event.detail; + void this.loadSessions(); + } + + private itemsPerPageChanged(event: CustomEvent) { + this.itemsPerPage = event.detail; + this.curPage = 1; + void this.loadSessions(); + } + + render() { + return html` +
+
+
+
+

Local Codex Activity

+

Codex Sessions

+

+ Browse JSONL session logs from your local Codex history, filter + by prompts or responses, run fuzzy or full-text searches across + sessions, and open any match in the main Euphony viewer. +

+
+ +
+ Open Euphony Viewer +

+ Source: ~/.codex/sessions or CODEX_HOME + when it is configured. +

+
+
+ +
+
this.applyFilters(event)}> + + + + + + +
+ Field filters only search their own visible card fields and + always use exact keyword matching. +
+ +
+ + + + +
+ + +
+
+ +
+ ${this.hasSearchQueryInput || this.hasAppliedSearchQuery + ? html` + Search all sessions is active. Full text + scans indexed multi-turn user and assistant text, + Fuzzy tolerates typos across summary + ${this.renderSummaryInfoIcon()} fields, and + Exact matches summary + ${this.renderSummaryInfoIcon()} fields directly. + ` + : html` + Search mode only applies to + Search all sessions. Leave that box empty to + use only the field-specific filters above. + `} +
+ +
+ + +
+
+ +
+
+ ${this.matchedSessions.toLocaleString()} matched session${this + .matchedSessions === 1 + ? '' + : 's'} + ${this.totalSessions !== this.matchedSessions + ? html` + of ${this.totalSessions.toLocaleString()} total + ` + : ''} +
+
+
+ + ${this.isLoading + ? html` +
+
Loading sessions
+
+ Reading Codex session metadata from your local machine. +
+
+ ` + : ''} + + ${!this.isLoading && this.errorMessage + ? html` +
+
Could not load sessions
+
${this.errorMessage}
+
+ ` + : ''} + + ${!this.isLoading && + !this.errorMessage && + this.matchedSessions === 0 + ? html` +
+
No matching sessions
+
+ Try clearing the filters or search query, or create a new + Codex session and reload this page. +
+
+ ` + : ''} + + ${!this.isLoading && + !this.errorMessage && + this.matchedSessions > 0 + ? html` +
+ ${this.sessions.map( + session => html` +
+
+
+ + ${session.session_id ?? session.file_name} + +
+ ${session.relative_path} + ${session.cwd + ? html`| + ${session.cwd}` + : ''} +
+
+ +
+
Started
+
+ ${this.renderTimestamp(session.started_at)} +
+
+
+ +
+ ${this.renderPromptSummary( + 'First user prompt', + session.first_prompt_time, + session.first_prompt + )} + + ${this.renderPromptSummary( + 'Last user prompt', + session.last_prompt_time, + session.last_prompt + )} + + ${this.renderPromptSummary( + 'Last response', + session.last_response_time, + session.last_response, + 'No assistant response captured.' + )} +
+
+ ` + )} +
+ ` + : ''} + + ${!this.isLoading && !this.errorMessage && this.matchedSessions > 0 + ? html` + + ` + : ''} +
+
+ `; + } + + static styles = [ + css` + ${unsafeCSS(componentCSS)} + ` + ]; +} + +declare global { + interface HTMLElementTagNameMap { + 'euphony-codex-sessions-page': EuphonyCodexSessionsPage; + } +} diff --git a/src/utils/api-manager.ts b/src/utils/api-manager.ts index 97884bd..0de1457 100644 --- a/src/utils/api-manager.ts +++ b/src/utils/api-manager.ts @@ -6,16 +6,22 @@ import type { RefreshRendererListResponse } from '../types/common-types'; import type { Conversation } from '../types/harmony-types'; +import { isCodexSessionJSONL } from './codex-session'; import { HARMONY_RENDERER_NAME, renderHarmonyConversationInBrowser } from './harmony-render'; -export let EUPHONY_API_URL = - (import.meta.env.VITE_EUPHONY_API_URL as string) || '/'; +// Let local dev override the backend origin when the helper script launches +// Vite against a non-default API port, while still falling back to the +// existing relative URL in built production assets. +const configuredAPIURL = + (import.meta.env.VITE_EUPHONY_API_URL as string | undefined) || ''; + +export let EUPHONY_API_URL = configuredAPIURL || '/'; if (import.meta.env.DEV) { - EUPHONY_API_URL = 'http://localhost:8020/'; + EUPHONY_API_URL = configuredAPIURL || 'http://localhost:8020/'; } // The maximum number of lines in a JSONL file to read in frontend-only mode @@ -154,26 +160,80 @@ export const extractConversationFromJSONL = ( return null; }; -const isJsonLikePath = (path: string) => - path.split('.').at(-1) === 'json' || path.split('.').at(-2) === 'json'; - -const parseJsonFileOrJsonlText = (text: string): unknown[] => { +const isJsonLikePath = (path: string) => { try { - return [JSON.parse(text)]; - } catch (_) { - const lines = text - .split(/\r?\n/) - .filter(l => l.length > 0) - .slice(0, FRONTEND_ONLY_MODE_MAX_LINES); - const results: unknown[] = []; - for (const line of lines) { + const parsedURL = new URL(path, window.location.href); + const extension = parsedURL.pathname.split('.').at(-1)?.toLowerCase(); + return ( + extension === 'json' || extension === 'jsonl' || extension === 'ndjson' + ); + } catch (_error) { + const extension = path.split('.').at(-1)?.toLowerCase(); + return ( + extension === 'json' || extension === 'jsonl' || extension === 'ndjson' + ); + } +}; + +const isJSONLikeContentType = (contentType: string | null) => { + if (!contentType) { + return false; + } + + const normalized = contentType.toLowerCase(); + return ( + normalized.includes('application/json') || + normalized.includes('application/x-ndjson') || + normalized.includes('application/ndjson') + ); +}; + +const parseJSONLText = (text: string, maxLines: number | null): unknown[] => { + const results: unknown[] = []; + let lineStart = 0; + + // Parse JSONL incrementally so preview reads do not allocate a giant array of + // every line in the file when we only need the first N lines to classify the + // payload type. + while (lineStart <= text.length) { + if (maxLines !== null && results.length >= maxLines) { + break; + } + + const nextLineBreak = text.indexOf('\n', lineStart); + const lineEnd = nextLineBreak === -1 ? text.length : nextLineBreak; + + let line = text.slice(lineStart, lineEnd); + if (line.endsWith('\r')) { + line = line.slice(0, -1); + } + + if (line.length > 0) { try { results.push(JSON.parse(line)); } catch (_) { - // pass + // Ignore malformed lines and keep parsing the rest of the JSONL file. } } - return results; + + if (nextLineBreak === -1) { + break; + } + + lineStart = nextLineBreak + 1; + } + + return results; +}; + +const parseJsonFileOrJsonlText = ( + text: string, + maxLines: number | null = FRONTEND_ONLY_MODE_MAX_LINES +): unknown[] => { + try { + return [JSON.parse(text)]; + } catch (_) { + return parseJSONLText(text, maxLines); } }; @@ -342,9 +402,24 @@ export class BrowserAPIManager { throw new Error(`HTTP error! status: ${response.status}`); } - if (isJsonLikePath(blobURL)) { + if ( + isJsonLikePath(blobURL) || + isJSONLikeContentType(response.headers.get('content-type')) + ) { const text = await response.text(); - const results = parseJsonFileOrJsonlText(text); + let results = parseJsonFileOrJsonlText(text); + + // Codex session detail pages need the complete event stream so later + // assistant turns are not dropped. We still keep the preview cap for + // generic JSONL datasets to avoid heavy browser-side parsing on large + // files that are not single-session logs. + if ( + results.length === FRONTEND_ONLY_MODE_MAX_LINES && + isCodexSessionJSONL(results) + ) { + results = parseJsonFileOrJsonlText(text, null); + } + let conversationData: Conversation[] | string[] | null = extractConversationFromJSONL(results); diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..b88eacb --- /dev/null +++ b/start.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Resolve the repository root from the script location so the script works even +# when it is launched from another directory. +ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" + +# Resolve the requested run mode up front. The default is a production-style +# local launch that only starts the backend server against the existing `dist/` +# build. Passing `dev` as the first positional argument, or setting MODE=dev, +# switches to the full hot-reload workflow with both backend and frontend. +MODE="${MODE:-${1:-prod}}" + +# Allow callers to override the ports without editing the script. +BACKEND_HOST="${BACKEND_HOST:-127.0.0.1}" +BACKEND_PORT="${BACKEND_PORT:-8020}" +FRONTEND_HOST="${FRONTEND_HOST:-127.0.0.1}" +FRONTEND_PORT="${FRONTEND_PORT:-3000}" + +BACKEND_PID="" +FRONTEND_PID="" + +require_command() { + local command_name="$1" + if ! command -v "${command_name}" >/dev/null 2>&1; then + printf 'Missing required command: %s\n' "${command_name}" >&2 + exit 1 + fi +} + +cleanup() { + # Shut child processes down when the user presses Ctrl+C or when the script + # exits for any other reason. `kill` can fail if a process already stopped, + # so ignore those cases quietly. + if [[ -n "${FRONTEND_PID}" ]]; then + kill "${FRONTEND_PID}" >/dev/null 2>&1 || true + fi + if [[ -n "${BACKEND_PID}" ]]; then + kill "${BACKEND_PID}" >/dev/null 2>&1 || true + fi +} + +normalize_mode() { + local value="${1:-prod}" + case "${value}" in + prod|production) + printf 'prod' + ;; + dev|development) + printf 'dev' + ;; + *) + printf 'Unsupported mode: %s\nExpected one of: prod, dev\n' "${value}" >&2 + exit 1 + ;; + esac +} + +wait_for_process_exit() { + local pid="$1" + local label="$2" + local exit_code="0" + + if [[ -n "${pid}" ]] && ! kill -0 "${pid}" >/dev/null 2>&1; then + # Capture the child exit status explicitly so `set -e` does not terminate + # the script before we can print which background task stopped. + if wait "${pid}"; then + exit_code="0" + else + exit_code="$?" + fi + printf '\n%s stopped with exit code %s.\n' "${label}" "${exit_code}" >&2 + exit "${exit_code}" + fi +} + +require_command uv + +trap cleanup EXIT INT TERM + +cd "${ROOT_DIR}" + +MODE="$(normalize_mode "${MODE}")" + +# Production mode serves the already-built frontend from FastAPI, so warn +# early if the caller has not built the assets yet. This keeps the default +# startup fast while still making the missing prerequisite obvious. +if [[ "${MODE}" == "prod" ]] && [[ ! -f "${ROOT_DIR}/dist/index.html" ]]; then + printf 'Missing built frontend assets in dist/. Run `npm run build` first or start with `./start.sh dev`.\n' >&2 + exit 1 +fi + +if [[ "${MODE}" == "dev" ]]; then + require_command npm +fi + +# Build the uvicorn argument list incrementally so production mode starts a +# plain backend process while development mode adds file watching cleanly. +BACKEND_COMMAND=( + uv run python -m uvicorn fastapi-main:app + --app-dir server + --host "${BACKEND_HOST}" + --port "${BACKEND_PORT}" +) + +if [[ "${MODE}" == "dev" ]]; then + BACKEND_COMMAND+=(--reload) +fi + +printf 'Starting backend on http://%s:%s\n' "${BACKEND_HOST}" "${BACKEND_PORT}" +"${BACKEND_COMMAND[@]}" & +BACKEND_PID="$!" + +if [[ "${MODE}" == "dev" ]]; then + # Pass the backend URL into Vite so the frontend dev server still talks to + # the matching API port when callers override BACKEND_PORT. + printf 'Starting frontend dev server on http://%s:%s\n' "${FRONTEND_HOST}" "${FRONTEND_PORT}" + VITE_EUPHONY_API_URL="http://${BACKEND_HOST}:${BACKEND_PORT}/" \ + npm run dev -- --host "${FRONTEND_HOST}" --port "${FRONTEND_PORT}" & + FRONTEND_PID="$!" +fi + +printf '\n' +printf 'Mode: %s\n' "${MODE}" +printf 'Euphony backend: http://%s:%s\n' "${BACKEND_HOST}" "${BACKEND_PORT}" +printf 'Codex sessions: http://%s:%s/sessions.html\n' "${BACKEND_HOST}" "${BACKEND_PORT}" +if [[ "${MODE}" == "dev" ]]; then + printf 'Euphony frontend: http://%s:%s\n' "${FRONTEND_HOST}" "${FRONTEND_PORT}" +else + printf 'Euphony frontend: served by backend from `dist/`\n' +fi +printf '\nPress Ctrl+C to stop all started processes.\n\n' + +# `wait -n` is unavailable in the older Bash 3.2 that ships with macOS, so use +# a small polling loop that exits as soon as any child process stops. The +# subsequent `wait` call still captures the real exit status of that process. +while true; do + wait_for_process_exit "${BACKEND_PID}" "Backend server" + wait_for_process_exit "${FRONTEND_PID}" "Frontend dev server" + + sleep 1 +done diff --git a/tests/test_codex_sessions_api.py b/tests/test_codex_sessions_api.py new file mode 100644 index 0000000..67fa375 --- /dev/null +++ b/tests/test_codex_sessions_api.py @@ -0,0 +1,241 @@ +import importlib.util +import json +import sys +import uuid +from pathlib import Path +from typing import Dict, List + +from fastapi.testclient import TestClient + + +def load_server_module(): + module_path = Path(__file__).resolve().parents[1] / "server" / "fastapi-main.py" + module_name = f"euphony_fastapi_main_{uuid.uuid4().hex}" + spec = importlib.util.spec_from_file_location(module_name, module_path) + assert spec is not None + assert spec.loader is not None + + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def write_session(path: Path, events: List[Dict[str, object]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + "\n".join(json.dumps(event) for event in events) + "\n", + encoding="utf-8", + ) + + +def test_codex_sessions_endpoints_list_filter_and_serve_file( + monkeypatch, tmp_path: Path +) -> None: + codex_home = tmp_path / ".codex" + sessions_dir = codex_home / "sessions" + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + older_session_path = sessions_dir / "2026" / "04" / "23" / "older.jsonl" + newer_session_path = sessions_dir / "2026" / "04" / "24" / "newer.jsonl" + search_session_path = sessions_dir / "2026" / "04" / "25" / "searchable.jsonl" + + write_session( + older_session_path, + [ + { + "timestamp": "2026-04-23T09:00:00Z", + "type": "session_meta", + "payload": { + "id": "session-older", + "timestamp": "2026-04-23T09:00:00Z", + "cwd": "/tmp/older", + }, + }, + { + "timestamp": "2026-04-23T09:01:00Z", + "type": "event_msg", + "payload": { + "type": "user_message", + "message": "Fix the broken dashboard layout", + }, + }, + { + "timestamp": "2026-04-23T09:02:00Z", + "type": "event_msg", + "payload": { + "type": "agent_message", + "message": "Inspecting the dashboard styles.", + }, + }, + { + "timestamp": "2026-04-23T09:05:00Z", + "type": "event_msg", + "payload": { + "type": "user_message", + "message": "Ship the dashboard patch", + }, + }, + { + "timestamp": "2026-04-23T09:06:00Z", + "type": "event_msg", + "payload": { + "type": "agent_message", + "message": "The dashboard patch is ready. " + + ("A" * 400), + }, + }, + ], + ) + + write_session( + newer_session_path, + [ + { + "timestamp": "2026-04-24T12:00:00Z", + "type": "session_meta", + "payload": { + "id": "session-newer", + "timestamp": "2026-04-24T12:00:00Z", + "cwd": "/tmp/newer", + }, + }, + { + "timestamp": "2026-04-24T12:01:00Z", + "type": "event_msg", + "payload": { + "type": "user_message", + "message": "Draft release notes for the April ship", + }, + }, + { + "timestamp": "2026-04-24T12:03:00Z", + "type": "event_msg", + "payload": { + "type": "agent_message", + "message": "Reviewing release-note highlights.", + }, + }, + ], + ) + + write_session( + search_session_path, + [ + { + "timestamp": "2026-04-25T07:00:00Z", + "type": "session_meta", + "payload": { + "id": "session-searchable", + "timestamp": "2026-04-25T07:00:00Z", + "cwd": "/tmp/searchable", + }, + }, + { + "timestamp": "2026-04-25T07:01:00Z", + "type": "event_msg", + "payload": { + "type": "user_message", + "message": "Investigate the nightly regression", + }, + }, + { + "timestamp": "2026-04-25T07:02:00Z", + "type": "event_msg", + "payload": { + "type": "agent_message", + "message": "Triaging a telemetry anomaly in the overnight build.", + }, + }, + { + "timestamp": "2026-04-25T07:04:00Z", + "type": "event_msg", + "payload": { + "type": "user_message", + "message": "Prepare the mitigation summary", + }, + }, + { + "timestamp": "2026-04-25T07:05:00Z", + "type": "event_msg", + "payload": { + "type": "agent_message", + "message": "Mitigation summary drafted for the nightly regression.", + }, + }, + ], + ) + + module = load_server_module() + client = TestClient(module.fastapi_app) + + response = client.get("/codex-sessions/") + assert response.status_code == 200 + + payload = response.json() + assert payload["total"] == 3 + assert payload["matchedCount"] == 3 + assert [item["session_id"] for item in payload["data"]] == [ + "session-searchable", + "session-newer", + "session-older", + ] + assert payload["data"][2]["first_prompt"] == "Fix the broken dashboard layout" + assert payload["data"][2]["last_prompt"] == "Ship the dashboard patch" + assert payload["data"][2]["last_response"].startswith( + "The dashboard patch is ready." + ) + assert payload["data"][2]["last_response"].endswith("...") + assert payload["data"][2]["last_response_time"] == "2026-04-23T09:06:00Z" + + filtered_response = client.get( + "/codex-sessions/", + params={"firstPromptFilter": "release", "lastPromptFilter": "april"}, + ) + assert filtered_response.status_code == 200 + filtered_payload = filtered_response.json() + assert filtered_payload["matchedCount"] == 1 + assert filtered_payload["data"][0]["session_id"] == "session-newer" + + response_filtered_response = client.get( + "/codex-sessions/", + params={"responseFilter": "highlights"}, + ) + assert response_filtered_response.status_code == 200 + response_filtered_payload = response_filtered_response.json() + assert response_filtered_payload["matchedCount"] == 1 + assert response_filtered_payload["data"][0]["session_id"] == "session-newer" + + exact_search_response = client.get( + "/codex-sessions/", + params={"searchQuery": "mitigation summary", "searchMode": "exact"}, + ) + assert exact_search_response.status_code == 200 + exact_search_payload = exact_search_response.json() + assert exact_search_payload["matchedCount"] == 1 + assert exact_search_payload["data"][0]["session_id"] == "session-searchable" + + fuzzy_search_response = client.get( + "/codex-sessions/", + params={"searchQuery": "relese", "searchMode": "fuzzy"}, + ) + assert fuzzy_search_response.status_code == 200 + fuzzy_search_payload = fuzzy_search_response.json() + assert fuzzy_search_payload["matchedCount"] == 1 + assert fuzzy_search_payload["data"][0]["session_id"] == "session-newer" + + full_text_search_response = client.get( + "/codex-sessions/", + params={"searchQuery": "telemetry anomaly", "searchMode": "full_text"}, + ) + assert full_text_search_response.status_code == 200 + full_text_search_payload = full_text_search_response.json() + assert full_text_search_payload["matchedCount"] == 1 + assert full_text_search_payload["data"][0]["session_id"] == "session-searchable" + + file_response = client.get( + "/codex-session-file/", + params={"relative_path": "2026/04/23/older.jsonl"}, + ) + assert file_response.status_code == 200 + assert "session-older" in file_response.text diff --git a/vite.config.ts b/vite.config.ts index 8b939e1..3b09bb3 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -48,6 +48,13 @@ async function getLibraryEntryPoints() { return files.filter(file => !file.includes('app')); } +function getFrontendHTMLInputs() { + return { + main: resolve(__dirname, 'index.html'), + sessions: resolve(__dirname, 'sessions.html') + }; +} + export default defineConfig(async ({ command, mode }) => { if (command === 'serve') { // Development @@ -69,9 +76,7 @@ export default defineConfig(async ({ command, mode }) => { build: { outDir: 'dist', rollupOptions: { - input: { - main: resolve(__dirname, 'index.html') - } + input: getFrontendHTMLInputs() } } }; @@ -84,9 +89,7 @@ export default defineConfig(async ({ command, mode }) => { build: { outDir: 'dist', rollupOptions: { - input: { - main: resolve(__dirname, 'index.html') - } + input: getFrontendHTMLInputs() } }, plugins: []