diff --git a/integrations/chrome-capture-extension/.gitignore b/integrations/chrome-capture-extension/.gitignore new file mode 100644 index 00000000..2792f605 --- /dev/null +++ b/integrations/chrome-capture-extension/.gitignore @@ -0,0 +1,20 @@ +# Runtime captures and local dev artifacts +data/captures/ +data/logs/ + +# Chrome Web Store build artifacts +*.zip +*.crx +*.pem +dist/ +build/ + +# Editor and OS noise +.DS_Store +Thumbs.db +.vscode/ +.idea/ + +# Secrets — never commit +.env +.env.local diff --git a/integrations/chrome-capture-extension/README.md b/integrations/chrome-capture-extension/README.md new file mode 100644 index 00000000..e3a0d860 --- /dev/null +++ b/integrations/chrome-capture-extension/README.md @@ -0,0 +1,225 @@ +# Chrome Capture Extension + +> Chrome MV3 extension that captures conversations from Claude, ChatGPT, and Gemini into your Open Brain via the REST API gateway. + +## What It Does + +A client-side Chrome (or Chromium-based browser) extension that sits on top of Claude.ai, chatgpt.com, and gemini.google.com. When you finish an interesting exchange, click the extension icon and the extension extracts the latest user + assistant turn from the page DOM, runs local sensitivity and duplicate filters, and POSTs the result to your Open Brain REST API gateway. It also supports bulk backfill from Claude and ChatGPT using their internal conversation APIs so you can import your existing chat history in one pass. + +This is a **client-side** integration — unlike the other integrations in this repo (Slack, Discord, email capture) which deploy as Supabase Edge Functions, a Chrome extension runs entirely in the user's browser. It does **not** register as an MCP server. All it does is call the REST API gateway's `/ingest` endpoint with standard `x-brain-key` auth. Every user installs it locally against their own Open Brain. + +## Screenshots + +Placeholder. See [`docs/screenshots/README.md`](docs/screenshots/README.md) for the expected filenames. The four targets are: + +- First-run Configure screen (URL + API key entry) +- Popup on a Claude tab with Capture Current Response visible +- Activity log showing a successful capture plus a duplicate/skipped one +- Sync tab with Claude full/incremental sync controls + +## Prerequisites + +- Working Open Brain setup ([guide](../../docs/01-getting-started.md)) +- The [REST API gateway integration (PR #201)](https://github.com/NateBJones-Projects/OB1/pull/201) deployed and reachable — the extension POSTs to `/open-brain-rest/ingest` and pings `/open-brain-rest/health` +- An `MCP_ACCESS_KEY` (or equivalent `x-brain-key` token) issued by your Open Brain for this device +- Chrome 120+, or any Chromium-based browser that supports MV3 (Edge 120+, Brave, Arc, Opera) + +## Credential Tracker + +Copy this block into a text editor and fill it in as you go. + +```text +CHROME CAPTURE EXTENSION -- CREDENTIAL TRACKER +-------------------------------------- + +FROM YOUR OPEN BRAIN SETUP + REST API base URL: ____________ + (Supabase example: https://YOUR_PROJECT_REF.supabase.co/functions/v1 + Self-hosted example: https://brain.example.com) + x-brain-key API key: ____________ + +BROWSER INFO + Browser + version: ____________ + Extension ID (after install): ____________ + +-------------------------------------- +``` + +## Installation + +1. Download or clone this repository to your machine +2. Open your Chromium-based browser and go to `chrome://extensions` +3. Toggle **Developer mode** on (top-right) +4. Click **Load unpacked** and pick the `integrations/chrome-capture-extension/` folder +5. Pin the extension icon to the toolbar so you can reach it quickly +6. A new tab opens automatically on first install — the Configure Open Brain screen (see below) + +## First-Run Config + +The extension ships with **no hardcoded server URLs**. On first install it opens `popup/config.html` and asks for two things: + +1. **Open Brain REST API URL** — the base URL of your REST API gateway. Examples: + - Supabase-hosted: `https://your-project-ref.supabase.co/functions/v1` + - Self-hosted: `https://brain.example.com` +2. **API Key** — the `x-brain-key` (`MCP_ACCESS_KEY`) you configured when deploying the REST API integration + +When you click **Save & Grant Permission**, Chrome shows a native permission prompt asking whether the extension may access the specific origin you entered. Approve it. This is a one-time grant — Chrome remembers it and the extension can now talk to your Open Brain without asking again. You can revoke the grant any time from `chrome://extensions → Open Brain Capture → Details → Site access`. + +**Storage details:** +- API key → `chrome.storage.local` (per-device only, **never** synced across Chrome profiles) +- API URL (`apiEndpoint`) → `chrome.storage.local` (per-device only). Rationale: the URL alone isn't a secret, but combining it with your Google-account-wide synced profiles would let anyone signed into the same Google account on a shared or loaner laptop see a pre-filled target for your Open Brain. Treating the endpoint as per-device avoids that surface, and also sidesteps `chrome.storage.sync`'s 8KB-per-item quota, which could silently reject saves for very long URLs. +- Platform toggles (ChatGPT / Claude / Gemini) → `chrome.storage.sync` (follows your Google account across devices). If `chrome.storage.sync` is unavailable (policy-managed profile, sync disabled, or quota exceeded) the extension transparently falls back to `chrome.storage.local` so saves never silently fail. + +## Usage + +**Manual capture (primary workflow):** + +1. Open a conversation on Claude.ai, chatgpt.com, or gemini.google.com +2. Click the extension icon in the toolbar +3. Click **Capture Current Response** +4. Watch the Activity log on the Overview tab — you should see `captured` and the sent counter tick up +5. Confirm the thought arrived in your Open Brain (query `search_thoughts` or peek at your database's `thoughts` table) + +**Bulk backfill (Claude, ChatGPT, and Gemini):** + +Switch to the Sync tab and click **Sync All** under the platform you want to import. For Claude and ChatGPT the extension walks each platform's internal conversation API using your existing logged-in session; for Gemini it uses a `chrome.debugger`-based history capture (see "Gemini bulk history sync (Phase B/C)" below). Every path funnels through the same ingest pipeline, and dedup is handled via SHA-256 content fingerprints — running Sync All twice is safe. Incremental **Sync New** imports only conversations not yet captured. Optionally turn on **Auto-sync** to keep new conversations flowing in hands-free (15 min cadence for Claude/ChatGPT, 4 h for Gemini). + +## Supported Sites + +| Site | Manual capture | Bulk sync | Notes | +|------|---------------|-----------|-------| +| `claude.ai` | Yes | Yes | Uses Claude's internal `/api/organizations/.../chat_conversations` endpoint for bulk sync. DOM extractor walks open shadow roots to survive UI refactors. | +| `chatgpt.com`, `chat.openai.com` | Yes | Yes | Uses ChatGPT's `/backend-api/conversations` for bulk sync and `data-message-author-role` selectors for manual capture. | +| `gemini.google.com` | Yes (best-effort) | Yes (debugger-based) | Google exposes no public conversation API, so bulk sync uses `chrome.debugger` to observe Gemini's internal `batchexecute` history-load RPC (`rpcids=hNvQHb`). The "Debugging this browser" banner appears while syncing — see the Gemini bulk history sync section below. Manual-capture selectors target `` and `` Web Components and may drift with Google UI refreshes. | + +## Architecture + +``` +┌──────────────────────────┐ +│ claude.ai / chatgpt.com │ +│ / gemini.google.com tab │ +└──────────┬───────────────┘ + │ content script (bridge.js + extractor-.js) + │ extracts last user+assistant turn from DOM + ▼ +┌──────────────────────────┐ +│ background/service- │ +│ worker.js │ +│ - sensitivity filter │ +│ - SHA-256 fingerprint │ +│ - retry queue (5 tries, │ +│ exponential backoff) │ +└──────────┬───────────────┘ + │ fetch() with x-brain-key header + ▼ +┌──────────────────────────┐ +│ Open Brain REST API │ +│ /open-brain-rest/ingest │ +│ (Supabase Edge Function) │ +└──────────────────────────┘ +``` + +The service worker is the only network caller. Content scripts never touch the network — they only extract DOM text and hand it over via `chrome.runtime.sendMessage`. This keeps the API key out of every page's origin and makes the permission model reviewable. + +## Gemini bulk history sync (Phase B/C) + +Google does not expose a public conversation API for Gemini, so bulk backfill uses a two-part flow that observes Gemini's own internal traffic instead of scraping the DOM. + +**Phase B — chrome.debugger history capture.** When a Gemini tab is open, the extension attaches the MV3 debugger protocol (`chrome.debugger.attach`) and watches `Network.requestWillBeSent`/`loadingFinished` for exactly one URL pattern: `batchexecute` requests with `rpcids=hNvQHb` (Gemini's history-load RPC). Other batchexecute rpcids (`MaZiqc`, `ESY5D`, `L5adhe`, and so on — sidebar, settings, status) are ignored. On `loadingFinished` the service worker fetches the response body via `Network.getResponseBody`, parses the framed positional JSON, and funnels every user+assistant turn in the conversation through the existing capture pipeline (retry queue, sensitivity filter, fingerprint dedup, session metrics). No DOM scraping, no parallel `/ingest` path. + +**Phase C — Sync All orchestrator.** The Sync tab exposes three Gemini controls: + +- **Sync All History** — enumerates every conversation link in your Gemini sidebar (scrolling to load the full list), opens a dedicated background tab, and drives it through each conversation one at a time. Phase B observes the history-load RPC that Gemini fires on page load and resolves a per-conversation waiter. Fingerprint dedup guarantees that re-running Sync All is safe — already-captured turns return `duplicate_fingerprint` / `existing`. +- **Sync New** — same enumeration, but filters against a lifetime list of synced conversation IDs so only conversations you've never captured get navigated. Safe for scheduled use. +- **Auto-sync every 4 hours** — optional. When on, a `chrome.alarms`-driven 4h cadence calls Sync New (capped at 20 conversations per cycle) so new Gemini conversations land in your Open Brain hands-free. Off by default. + +A per-conversation jittered throttle (4–12 s plus a longer "reading pause" every 10 conversations) keeps cadence off Google's bot-detection radar. If Gemini does redirect the sync tab to a CAPTCHA/login page mid-run, the orchestrator detects the unhealthy tab, transitions to a `canceled` paused state, and the Sync All button relabels itself to **Resume Sync**. Solve the challenge in the Gemini tab, then click Resume to pick up where the run left off. + +**What this requires at install time:** + +- Extra manifest permissions: `debugger` (to attach to Gemini tabs) and `scripting` (to run the sidebar-enumeration helper). Chrome shows a combined permission prompt on install / update — "Read and change your data on gemini.google.com" plus "Debug" language. That is expected. +- A visible banner while syncing: Chrome shows "Open Brain Capture started debugging this browser" along the top of Chrome whenever `chrome.debugger` is attached. This is mandatory platform UX — dismissing it cancels the debugger session and the extension will flip to the paused state. Leave it open while Sync All is running. +- No external telemetry, no third-party hosts. Every request that leaves your browser still goes only to your configured Open Brain REST API URL. + +**Why use `chrome.debugger` instead of a content script.** Content scripts can't observe cross-origin response bodies. The Gemini history-load payload is a framed positional-array blob that mixes anti-XSSI prefixes with length-prefixed JSON chunks — parsing it from a `fetch()` interceptor in page context would be fragile and require re-implementing half of Google's `batchexecute` protocol in the page. The debugger path gets the raw response bytes exactly as Gemini's own JS receives them. + +**Turn off:** set the Gemini toggle to off in Settings, and the debugger detaches from every open Gemini tab immediately. Uninstalling the extension clears all persisted state (sync state, fingerprint cache, retry queue) with it. + +## Host Permissions Approach + +This extension uses **`optional_host_permissions` + runtime `chrome.permissions.request()`**, not `` at install time. Trade-off analysis: + +| Approach | Pros | Cons | +|----------|------|------| +| `host_permissions: [""]` | One-line manifest, no prompt flow | Chrome Web Store flags it as a high-risk permission, install-time prompt scares users, extension can hit any site | +| `optional_host_permissions` + runtime request (chosen) | Minimum-viable permissions, user sees exactly which origin they're granting, survives Chrome Web Store review | Requires a Configure screen + one extra click during setup | + +The extension declares `optional_host_permissions: ["https://*/*", "http://localhost/*", "http://127.0.0.1/*"]` in the manifest — the HTTPS wildcard covers public deployments, and the two loopback HTTP entries exist so local dev setups (e.g. `http://localhost:54321`) work without dropping TLS requirements for everyone else. On the Configure screen the extension parses the user's URL, derives an origin pattern like `https://your-project-ref.supabase.co/*`, and calls `chrome.permissions.request({ origins: [origin] })`. The user approves once; Chrome persists the grant; the service worker can now `fetch()` that origin. Nothing else. + +The `content_scripts` entries for `claude.ai`, `chatgpt.com`, and `gemini.google.com` remain as normal `host_permissions` because the content scripts inject at `document_idle` on page load — they can't wait for a runtime prompt. Those three origins are scoped narrowly and visible in the install dialog. + +## Security + +- **API key storage.** The `x-brain-key` lives in `chrome.storage.local`. Chrome encrypts local storage on disk with OS-level keys, and the key is **never** written to `chrome.storage.sync` — meaning it does not propagate to your other Chrome profiles on the same Google account. Rotate by reopening the Configure screen and saving a new value. Uninstalling the extension removes the key along with it. +- **API URL storage.** The Open Brain API URL (`apiEndpoint`) also lives in `chrome.storage.local` only, alongside the key. The URL itself isn't a secret, but sync-replicating it would leak your brain's location to any Chrome profile signed into the same Google account (shared laptops, family devices, loaner Chromebooks). Keeping the endpoint per-device avoids that pre-fill attack surface. +- **Transport security.** The Configure screen rejects any API URL that isn't `https://…` or `http://localhost` / `http://127.0.0.1` (with optional port). The manifest's `optional_host_permissions` reflects the same policy: `https://*/*` plus narrow loopback exceptions only. Plaintext `http://` endpoints over the public internet are not accepted — the `x-brain-key` header and captured conversation text would travel in the clear. +- **Client-side sensitivity filtering.** `data/sensitivity-patterns.json` holds regex patterns for SSNs, passports, bank accounts, API keys, credit cards, passwords-in-URLs, and medical/financial markers. Anything matching a `restricted` pattern is blocked locally before the request is even built — the text never leaves the browser, and the activity log shows a `restricted_blocked` entry. `personal` matches pass through silently and are NOT logged — the intent is to capture them alongside the rest of the conversation, not to separately surface them. Patterns compile once per session and are tested with `String.prototype.match` regex semantics. +- **Outbound requests.** Only the service worker calls `fetch()`, and only to the user-configured origin. No telemetry, no analytics, no third-party hosts. +- **Retry queue integrity.** Failed captures live in `chrome.storage.local` with the full payload and a `nextRetryAt` timestamp. Retries honour exponential backoff (1, 2, 4, 8, 16 minutes, capped at 60), max 5 attempts, then a dead-letter entry in the activity log. Fingerprints live across retries so a retry-then-manual-retry doesn't produce duplicates in Open Brain. +- **CSP.** Manifest V3 service workers run under a strict CSP that forbids `eval` and remote script loading. The lib scripts are all local. + +## Publishing to Chrome Web Store + +**Status: future work.** This contribution is currently distributed as an unpacked/developer-mode install. To publish to the Chrome Web Store, a maintainer will need to: + +1. Provide a 1.0.0-ready icon set (16/32/48/128 PNGs — see [`icons/README.md`](icons/README.md)) +2. Fill in the store listing: description, category (Productivity), screenshots, privacy policy URL +3. Draft the **permission justifications** — the store review team requires a paragraph per declared permission. Suggested text: + - `storage` — "Persists user-supplied Open Brain API URL, API key, and per-platform capture toggles." + - `alarms` — "Scheduled retry of failed ingests and optional auto-sync from Claude/ChatGPT (15 min) and Gemini (4 hours)." + - `activeTab`, `tabs` — "Resolves the active conversation tab when the user clicks Capture and creates a transient background tab to drive Gemini bulk sync." + - `cookies` — "Reads the `lastActiveOrg` cookie on claude.ai and the session cookie on chatgpt.com to bulk-fetch conversations via each platform's internal API using the user's own session." + - `debugger` — "Attaches the debugger protocol to gemini.google.com tabs only, and only to observe the one internal history-load RPC (`batchexecute` with `rpcids=hNvQHb`) that Gemini itself calls to load conversation turns. No injected code, no DOM modification, no other origins." + - `scripting` — "Runs a single sidebar-enumeration helper in the Gemini tab to collect conversation IDs for bulk sync. The helper only reads `a[href*=\"/app/\"]` anchors; it does not mutate the page." + - Host permissions for `claude.ai`, `chatgpt.com`, `chat.openai.com`, `gemini.google.com` — "Content scripts extract the latest conversation turn from the page DOM when the user clicks Capture." + - `optional_host_permissions` — "Runtime-granted by the user to reach their specific Open Brain API URL." +4. Pay the $5 one-time developer registration fee +5. Submit for review (typically 3–7 business days) + +Alternatively, host the packed `.crx` on a maintainer-owned update URL and let users sideload without going through the store at all. + +## Known Limitations + +- **ChatGPT and Gemini extractors are best-effort and unverified against live pages.** The ChatGPT and Gemini DOM extractors were written from public selector knowledge (`[data-message-author-role]`, `` / `` Web Components, aria-label fallbacks) and have not been exhaustively verified on a live logged-in session at merge time. They may break with any vendor UI refresh — OpenAI and Google both ship Gemini/ChatGPT UI changes on short cadence. When they break, manual capture on those platforms will return "No conversation turns found" until a maintainer updates the selectors. The Claude manual-capture extractor walks open shadow roots and has been exercised against live claude.ai; it is more resilient. Bulk sync (Claude + ChatGPT) uses internal JSON APIs and is far less fragile than any DOM path. +- **Bulk sync depends on vendor-internal APIs that are not publicly supported.** Anthropic's `/api/organizations/.../chat_conversations` and OpenAI's `/backend-api/conversations` endpoints are undocumented and subject to change without notice. Expect periodic maintenance PRs. If you rely on auto-sync, monitor the Sync Log tab for sustained errors. +- **DOM extraction is fragile.** Claude, ChatGPT, and Gemini all ship UI rewrites without notice. When a platform shuffles its selectors, manual capture returns "No conversation turns found" until the extractor is updated. The Gemini extractor is especially exposed — Google ships new Gemini UIs every few months. Expect occasional maintenance PRs. Bulk sync (Claude + ChatGPT) uses stable internal JSON APIs and is far less fragile than DOM extraction. +- **No passive/ambient capture.** The extension only captures when the user explicitly clicks Capture or runs Sync. A previous "observe every turn" design was retired because keeping up with selector churn on every render was not sustainable. The Settings panel has no Auto/Manual capture-mode toggle — that UI was dropped in the initial public release because it controlled only the ambient path. If ambient capture ever ships, the toggle comes back with it. +- **Gemini bulk sync relies on the debugger protocol.** Google does not expose a public conversation history API. The extension observes Gemini's own internal `batchexecute` history-load RPC via `chrome.debugger`, which requires Chrome to show the "Open Brain Capture started debugging this browser" banner while a run is live — dismissing the banner detaches the debugger and pauses the sync. See "Gemini bulk history sync (Phase B/C)" for the full flow. +- **Large conversations.** The REST API `/ingest` endpoint accepts a single payload per request. A 400-turn Claude thread becomes one very large POST. If your gateway has a request size cap (Supabase default is 10MB), Sync All may dead-letter the longest conversations. Check the activity log and trim in your dashboard if that happens. +- **Sensitivity filter is regex-only.** It's deliberately conservative — false negatives are possible. Treat it as a guardrail, not a vault. For truly sensitive content, don't paste it into an AI chat in the first place. + +## Troubleshooting + +**Issue: Extension icon has a yellow `!` badge and captures fail** +Solution: The extension is not configured. Click the icon, then click **Open Configure screen** in the yellow banner, and supply your Open Brain REST API URL + API key. + +**Issue: "Missing x-brain-key API key" error when I click Capture** +Solution: Either the API key was never saved, or Chrome's local storage got cleared (this can happen after a browser profile reset). Open the Settings tab → **Reconfigure API URL & Key** and re-enter. + +**Issue: "Cannot reach the page" error when capturing** +Solution: The content script isn't loaded on this tab. Refresh the tab and retry. If the page is still on the same URL family that the manifest declares (`claude.ai/*`, `chatgpt.com/*`, etc.), the refresh will re-inject the script. If the error persists, disable and re-enable the extension from `chrome://extensions`. + +**Issue: "No conversation turns found" on Claude / ChatGPT / Gemini** +Solution: The site DOM has changed and the extractor selectors are stale. Check the repo for a newer version of the extension; if there isn't one yet, open an issue with a sample of the current DOM and the `chrome://extensions → errors` output. + +**Issue: Sync All reports every conversation as `existing` but your Open Brain is empty** +Solution: The SHA-256 fingerprint cache is populated but the ingest POSTs are silently rejected. Open the Activity log on the Overview tab and look for `queued_retry` or `dead_letter` entries — those will show the actual API error. Common cause: the REST API gateway is deployed but `MCP_ACCESS_KEY` was rotated and you didn't update the extension. + +**Issue: I configured the extension but Test Connection says "fetch failed"** +Solution: Your browser doesn't have host permission for that origin. Open the Configure screen and save again — Chrome will re-prompt. If it still fails, verify the URL is reachable from your browser (paste it directly into the address bar, expect a 401 or similar from the gateway). + +## Tool Surface Area + +This integration is a **capture source**, not an MCP server — it doesn't expose any tools to your AI. It only writes into Open Brain. The AI-facing tool count of your setup is unchanged by installing this extension. + +If you're weighing whether to add more MCP-exposing extensions on top, see the [MCP Tool Audit & Optimization Guide](../../docs/05-tool-audit.md) for how to keep your tool count manageable as your Open Brain grows. diff --git a/integrations/chrome-capture-extension/background/gemini-debugger.js b/integrations/chrome-capture-extension/background/gemini-debugger.js new file mode 100644 index 00000000..60a4f141 --- /dev/null +++ b/integrations/chrome-capture-extension/background/gemini-debugger.js @@ -0,0 +1,566 @@ +/** + * Open Brain Capture — Gemini durable history capture via chrome.debugger + * + * Phase B: attach chrome.debugger to https://gemini.google.com/* tabs, watch + * for the batchexecute `rpcids=hNvQHb` request/response (Gemini's internal + * conversation-history loader), pair the request+response so MV3 service- + * worker suspensions don't lose state mid-response, fetch the response body + * on loadingFinished, and funnel extracted turns through + * `processCaptureRequest`. + * + * What this file does NOT do: + * - It does NOT observe StreamGenerate (the live per-turn stream). That + * path would be ambient capture, which the extension deliberately + * dropped in the initial public release (see service-worker.js notes). + * Only Phase B's history-load path ships here, and it only fires when + * the user (or the Sync All orchestrator on their behalf) opens a + * conversation. + * + * Coordination with the sync orchestrator: + * - When Phase C's gemini-sync.js drives a bulk backfill it navigates a + * hidden tab to `/app/` and waits on a per-conversation + * waiter. The page loads the conversation by firing the hNvQHb RPC; we + * observe the response here, funnel every turn through the capture + * pipeline (retry queue, sensitivity filter, fingerprint dedup), and + * then ping `OBGeminiSync.notifyHistoryCaptured(conversationId, totals)` + * so the orchestrator's waiter resolves and it can drive the next + * conversation. + * + * Respects the user's Gemini toggle: if the user disables Gemini capture in + * the popup settings, this module detaches from all tabs and stops listening + * until re-enabled. No probes, no telemetry, no third-party hosts. + */ + +/* global chrome, OBConfig */ + +(function () { + 'use strict'; + + // Phase B: conversation history is loaded via a batchexecute RPC. + // `rpcids=hNvQHb` is the history-load variant, confirmed via the Gemini + // network research referenced in the README. Other batchexecute rpcids + // (MaZiqc, ESY5D, L5adhe, etc.) handle sidebar/settings/status and are + // ignored by the URL guard below. + const BATCHEXECUTE_PATH = 'batchexecute'; + const HISTORY_RPCID = 'hNvQHb'; + const DEBUGGER_PROTOCOL_VERSION = '1.3'; + const REQUEST_STASH_TTL_MS = 120 * 1000; + const GEMINI_URL_PATTERN = 'https://gemini.google.com/'; + + // chrome.storage.session key prefix for the pending-request stash. + // Full key: `${STASH_KEY_PREFIX}${tabId}:${requestId}`. + const STASH_KEY_PREFIX = 'ob_gemini_stash_'; + + // chrome.storage.local key the popup reads to show the paused indicator. + const PAUSED_STATE_KEY = 'ob_gemini_paused'; + + // Hard cap on batchexecute response-body size before we even try to parse. + // Gemini's hNvQHb payload is dominated by the conversation transcript plus + // candidate metadata; in practice the largest payloads we've seen in + // research fixtures clock in under 2 MB. 8 MB gives us ~4x headroom for + // long-thread outliers while protecting the SW from a pathological body + // (parser bug, wrong url match, Google format change) OOM'ing the worker. + const MAX_RESPONSE_BODY_BYTES = 8 * 1024 * 1024; + + // In-memory mirror of the persisted stash for speed. Canonical copy lives + // in chrome.storage.session; this map is always re-derivable from there. + const pendingRequests = new Map(); + + const attachedTabs = new Set(); + let capturePausedByUser = false; + let geminiEnabled = true; + let initialized = false; + + const LOG = (msg, ...rest) => console.log(`[OB Gemini] ${msg}`, ...rest); + const ERR = (msg, ...rest) => console.error(`[OB Gemini] ${msg}`, ...rest); + + function isHistoryUrl(url) { + return typeof url === 'string' + && url.includes(BATCHEXECUTE_PATH) + && url.includes(`rpcids=${HISTORY_RPCID}`); + } + + // --------------------------------------------------------------------------- + // Stash — chrome.storage.session-backed, in-memory mirrored + // --------------------------------------------------------------------------- + + function stashKey(tabId, requestId) { + return `${STASH_KEY_PREFIX}${tabId}:${requestId}`; + } + + async function stashSet(tabId, requestId, entry) { + const key = stashKey(tabId, requestId); + pendingRequests.set(key, entry); + try { + await chrome.storage.session.set({ [key]: entry }); + } catch (err) { + ERR(`stashSet failed key=${key}:`, err?.message || err); + } + } + + async function stashDelete(tabId, requestId) { + const key = stashKey(tabId, requestId); + pendingRequests.delete(key); + try { + await chrome.storage.session.remove(key); + } catch (err) { + ERR(`stashDelete failed key=${key}:`, err?.message || err); + } + } + + function stashGet(tabId, requestId) { + const entry = pendingRequests.get(stashKey(tabId, requestId)); + if (!entry) return null; + if (Date.now() - entry.startedAt > REQUEST_STASH_TTL_MS) return null; + return entry; + } + + async function stashRehydrate() { + try { + const all = await chrome.storage.session.get(null); + const now = Date.now(); + const expired = []; + let live = 0; + for (const [key, value] of Object.entries(all)) { + if (!key.startsWith(STASH_KEY_PREFIX)) continue; + if (!value || typeof value !== 'object' || typeof value.startedAt !== 'number') { + expired.push(key); + continue; + } + if (now - value.startedAt > REQUEST_STASH_TTL_MS) { + expired.push(key); + continue; + } + pendingRequests.set(key, value); + live += 1; + } + if (expired.length) { + await chrome.storage.session.remove(expired); + } + LOG(`stash rehydrate live=${live} expired=${expired.length}`); + } catch (err) { + ERR('stashRehydrate failed:', err?.message || err); + } + } + + async function stashDropForTab(tabId) { + const prefix = `${STASH_KEY_PREFIX}${tabId}:`; + const keys = []; + for (const key of pendingRequests.keys()) { + if (key.startsWith(prefix)) keys.push(key); + } + if (!keys.length) return; + for (const key of keys) pendingRequests.delete(key); + try { + await chrome.storage.session.remove(keys); + } catch (err) { + ERR(`stashDropForTab failed tab=${tabId}:`, err?.message || err); + } + } + + // --------------------------------------------------------------------------- + // Paused-state flag — persisted for the popup + // --------------------------------------------------------------------------- + + async function setPausedByUser(paused) { + capturePausedByUser = Boolean(paused); + try { + await chrome.storage.local.set({ [PAUSED_STATE_KEY]: capturePausedByUser }); + } catch (err) { + ERR('setPausedByUser failed:', err?.message || err); + } + } + + function isCapturePausedByUser() { + return capturePausedByUser; + } + + // --------------------------------------------------------------------------- + // Settings — read Gemini toggle from user config + // --------------------------------------------------------------------------- + + async function readGeminiEnabled() { + try { + const config = await OBConfig.getConfig(); + return config?.enabledPlatforms?.gemini !== false; + } catch (err) { + ERR('readGeminiEnabled failed — defaulting to enabled:', err?.message || err); + return true; + } + } + + async function applyEnabledState(nextEnabled) { + const prevEnabled = geminiEnabled; + geminiEnabled = Boolean(nextEnabled); + + if (geminiEnabled && !prevEnabled) { + LOG('gemini capture enabled — attaching to open tabs'); + await attachToOpenGeminiTabs(); + } else if (!geminiEnabled && prevEnabled) { + LOG('gemini capture disabled — detaching all tabs'); + await detachFromAllTabs(); + } + } + + // --------------------------------------------------------------------------- + // Attach lifecycle + // --------------------------------------------------------------------------- + + async function attachToGeminiTab(tabId) { + if (!geminiEnabled) return; + if (attachedTabs.has(tabId)) return; + try { + await chrome.debugger.attach({ tabId }, DEBUGGER_PROTOCOL_VERSION); + await chrome.debugger.sendCommand({ tabId }, 'Network.enable', {}); + attachedTabs.add(tabId); + LOG(`attached tab=${tabId}`); + // A successful attach clears any prior "user canceled" paused state. + if (capturePausedByUser) await setPausedByUser(false); + } catch (err) { + ERR(`attach failed tab=${tabId}:`, err?.message || String(err)); + } + } + + async function detachFromTab(tabId) { + if (!attachedTabs.has(tabId)) { + await stashDropForTab(tabId); + return; + } + try { + await chrome.debugger.detach({ tabId }); + LOG(`detached tab=${tabId}`); + } catch (err) { + // detach often fails if the tab is already closed; not fatal + ERR(`detach failed tab=${tabId}:`, err?.message || String(err)); + } + attachedTabs.delete(tabId); + await stashDropForTab(tabId); + } + + async function attachToOpenGeminiTabs() { + try { + const tabs = await chrome.tabs.query({ url: 'https://gemini.google.com/*' }); + LOG(`startup scan: ${tabs.length} Gemini tab(s) open`); + for (const tab of tabs) { + if (typeof tab.id === 'number') await attachToGeminiTab(tab.id); + } + } catch (err) { + ERR('attachToOpenGeminiTabs failed:', err?.message || err); + } + } + + async function detachFromAllTabs() { + const snapshot = Array.from(attachedTabs); + for (const tabId of snapshot) await detachFromTab(tabId); + } + + // --------------------------------------------------------------------------- + // Event wiring + // --------------------------------------------------------------------------- + + function wireTabListeners() { + chrome.tabs.onUpdated.addListener(async (tabId, changeInfo) => { + if (typeof changeInfo.url !== 'string') return; + if (changeInfo.url.startsWith(GEMINI_URL_PATTERN)) { + await attachToGeminiTab(tabId); + } else if (attachedTabs.has(tabId)) { + await detachFromTab(tabId); + } + }); + + chrome.tabs.onRemoved.addListener(async (tabId) => { + if (attachedTabs.has(tabId)) { + await detachFromTab(tabId); + } else { + await stashDropForTab(tabId); + } + }); + } + + function wireDebuggerListeners() { + chrome.debugger.onDetach.addListener(async (source, reason) => { + const tabId = source.tabId; + if (typeof tabId !== 'number') return; + LOG(`onDetach tab=${tabId} reason=${reason}`); + attachedTabs.delete(tabId); + await stashDropForTab(tabId); + if (reason === 'canceled_by_user') { + await setPausedByUser(true); + } + }); + + chrome.debugger.onEvent.addListener((source, method, params) => { + const tabId = source.tabId; + if (typeof tabId !== 'number' || !attachedTabs.has(tabId)) return; + + if (method === 'Network.requestWillBeSent') { + handleRequestWillBeSent(tabId, params).catch((err) => + ERR(`requestWillBeSent handler failed tab=${tabId}:`, err?.message || err) + ); + } else if (method === 'Network.loadingFinished') { + handleLoadingFinished(tabId, params).catch((err) => + ERR(`loadingFinished handler failed tab=${tabId}:`, err?.message || err) + ); + } + }); + } + + function wireSettingsListener() { + // OBConfig stores non-secret platform toggles in chrome.storage.sync under + // STORAGE_KEYS.settings (and falls back to chrome.storage.local if sync + // is unavailable). Watch both so enabling/disabling Gemini capture takes + // effect regardless of which area currently holds the settings blob. + const settingsKey = OBConfig.STORAGE_KEYS.settings; + chrome.storage.onChanged.addListener(async (changes, areaName) => { + if (areaName !== 'sync' && areaName !== 'local') return; + if (!(settingsKey in changes)) return; + const next = await readGeminiEnabled(); + await applyEnabledState(next); + }); + } + + // --------------------------------------------------------------------------- + // Request/response handlers + // --------------------------------------------------------------------------- + + async function handleRequestWillBeSent(tabId, params) { + const url = params?.request?.url ?? ''; + const requestId = params.requestId; + + // Phase B: history load for a conversation the user (or the sync + // orchestrator) opened. The request body isn't needed — the user prompts + // and assistant turns are all embedded in the response body. + if (isHistoryUrl(url)) { + const entry = { + tabId, + requestId, + url, + kind: 'history', + startedAt: Date.now() + }; + await stashSet(tabId, requestId, entry); + LOG(`requestWillBeSent tab=${tabId} requestId=${requestId} kind=history`); + return; + } + + // Not a URL we care about. + } + + async function handleLoadingFinished(tabId, params) { + const requestId = params.requestId; + const entry = stashGet(tabId, requestId); + if (!entry) return; + + const elapsed = Date.now() - entry.startedAt; + LOG(`loadingFinished tab=${tabId} requestId=${requestId} kind=${entry.kind || 'unknown'} elapsed=${elapsed}ms`); + + let body = null; + try { + const result = await chrome.debugger.sendCommand( + { tabId }, + 'Network.getResponseBody', + { requestId } + ); + const rawBody = typeof result?.body === 'string' ? result.body : null; + const bodyLen = rawBody ? rawBody.length : 0; + const base64Encoded = Boolean(result?.base64Encoded); + + // batchexecute hNvQHb responses are always text/JSON with the anti-XSSI + // prefix — never binary. A base64Encoded=true would mean either Gemini + // changed its content type or we're misinterpreting a different + // request. Drop defensively rather than parse garbage. + if (base64Encoded) { + ERR(`unexpected base64Encoded body tab=${tabId} requestId=${requestId} length=${bodyLen} — dropping`); + await stashDelete(tabId, requestId); + return; + } + + // Bounded parse. See MAX_RESPONSE_BODY_BYTES for the rationale. + if (bodyLen > MAX_RESPONSE_BODY_BYTES) { + ERR(`response body exceeds cap tab=${tabId} requestId=${requestId} length=${bodyLen} cap=${MAX_RESPONSE_BODY_BYTES} — dropping`); + await stashDelete(tabId, requestId); + return; + } + + body = rawBody; + LOG(`body received tab=${tabId} length=${bodyLen} base64=false`); + } catch (err) { + ERR(`getResponseBody failed tab=${tabId} requestId=${requestId}:`, err?.message || err); + await stashDelete(tabId, requestId); + return; + } + + // Phase B is the only request kind we handle here. + if (entry.kind === 'history') { + try { + await routeHistoryThroughCapturePipeline({ tabId, requestId, responseBody: body }); + } finally { + await stashDelete(tabId, requestId); + } + return; + } + + // Unknown kind — drop defensively. + await stashDelete(tabId, requestId); + } + + async function routeHistoryThroughCapturePipeline({ tabId, requestId, responseBody }) { + const extractor = self.OBGeminiHistoryExtractor; + if (!extractor || typeof extractor.extractGeminiHistory !== 'function') { + ERR(`OBGeminiHistoryExtractor unavailable — dropping tab=${tabId} requestId=${requestId}`); + return; + } + + const turns = extractor.extractGeminiHistory({ responseBody }); + if (!Array.isArray(turns) || turns.length === 0) { + LOG(`history extractor returned empty tab=${tabId} requestId=${requestId} — dropping`); + // Let the orchestrator's 15s capture timeout fire naturally so the + // conversation lands in failedIds, not in everSyncedIds. Calling + // notifyHistoryCaptured with zero totals here would mark the + // conversation as completed-with-zero-turns and permanently skip + // it on future incremental syncs even when the payload was just a + // transient parse failure. A natural timeout lets the user retry + // via "Sync All" after we ship an extractor fix. + return; + } + + const captureHandler = self.processCaptureRequest; + if (typeof captureHandler !== 'function') { + ERR(`processCaptureRequest unavailable in SW scope — dropping tab=${tabId} requestId=${requestId}`); + return; + } + + // Loop the turns serially to keep the ingest pipeline's retry queue, + // sensitivity filter, and fingerprint dedup operating predictably per + // turn. Fingerprint dedup guarantees that re-opening the same + // conversation does NOT produce duplicate thoughts; each turn either + // ingests new or returns 'duplicate_fingerprint' / 'existing'. + LOG(`history load tab=${tabId} requestId=${requestId} turns=${turns.length}`); + + let captured = 0; + let skippedDup = 0; + let other = 0; + + for (const turn of turns) { + const combinedText = `User: ${turn.userPrompt}\n\nAssistant: ${turn.assistantText}`; + try { + const result = await captureHandler({ + platform: 'gemini', + captureMode: 'sync', + text: combinedText, + sourceMetadata: { + gemini_conversation_id: turn.conversationId, + gemini_response_id: turn.responseId, + gemini_candidate_id: turn.candidateId, + gemini_language: turn.language, + gemini_model: turn.model, + gemini_user_prompt: turn.userPrompt, + gemini_assistant_text: turn.assistantText, + gemini_captured_at: turn.capturedAt, + gemini_history_order: turn.historyOrder, + gemini_capture_kind: 'history' + }, + assistantLength: turn.assistantText.length, + preview: turn.assistantText + }); + + const status = result?.status || 'unknown'; + if (status === 'duplicate_fingerprint' || status === 'existing') { + skippedDup += 1; + } else if (status === 'complete' || status === 'captured' || status === 'inserted') { + captured += 1; + } else { + other += 1; + LOG(`history turn[${turn.historyOrder}] tab=${tabId} status=${status}`); + } + } catch (err) { + other += 1; + ERR(`history turn[${turn.historyOrder}] threw tab=${tabId}:`, err?.message || err); + } + } + + LOG(`history captured tab=${tabId} requestId=${requestId} captured=${captured} dedup=${skippedDup} other=${other} total=${turns.length}`); + + // Phase C hook: notify the sync orchestrator (if present) so it can + // un-block its per-conversation waiter. Use the first turn's + // conversation ID — all turns in a single hNvQHb response share it. + // + // The sync orchestrator keys its waiters by the BARE conversation hash + // (derived from the /app/ URL it navigates to). Our extractor + // returns the PREFIXED form (c_) straight from Gemini's JSON. + // Strip the prefix at the notify boundary so sync's Map lookup hits. + // The stored metadata on the thought keeps the prefixed form — that's + // canonical for retrieval. This normalization is sync-waiter-only. + // + // Silently no-ops when Phase C isn't loaded or no sync is in flight. + const rawConversationId = turns[0]?.conversationId; + const firstConversationId = + typeof rawConversationId === 'string' && rawConversationId.startsWith('c_') + ? rawConversationId.slice(2) + : rawConversationId; + if ( + typeof firstConversationId === 'string' && + firstConversationId && + self.OBGeminiSync && + typeof self.OBGeminiSync.notifyHistoryCaptured === 'function' + ) { + try { + self.OBGeminiSync.notifyHistoryCaptured(firstConversationId, { + captured, + skippedDup, + other, + total: turns.length + }); + } catch (err) { + ERR(`notifyHistoryCaptured threw tab=${tabId}:`, err?.message || err); + } + } + } + + // --------------------------------------------------------------------------- + // Init + // --------------------------------------------------------------------------- + + async function initGeminiDebugger() { + if (initialized) return; + initialized = true; + + LOG('init'); + + geminiEnabled = await readGeminiEnabled(); + LOG(`gemini capture enabled=${geminiEnabled}`); + + await stashRehydrate(); + + wireDebuggerListeners(); + wireTabListeners(); + wireSettingsListener(); + + if (geminiEnabled) { + await attachToOpenGeminiTabs(); + } + + LOG('event listeners wired'); + } + + // Auto-initialize on SW wake. Idempotent. + initGeminiDebugger().catch((err) => ERR('init failed:', err?.message || err)); + + // Expose to the classic importScripts service-worker global scope. + self.OBGeminiDebugger = { + initGeminiDebugger, + attachToGeminiTab, + detachFromTab, + detachFromAllTabs, + isCapturePausedByUser, + // Constants for tests and later wiring. + DEBUGGER_PROTOCOL_VERSION, + REQUEST_STASH_TTL_MS, + GEMINI_URL_PATTERN, + STASH_KEY_PREFIX, + PAUSED_STATE_KEY, + // Read-only views of internal state. + _attachedTabs: attachedTabs, + _pendingRequests: pendingRequests + }; +})(); diff --git a/integrations/chrome-capture-extension/background/gemini-sync.js b/integrations/chrome-capture-extension/background/gemini-sync.js new file mode 100644 index 00000000..ccede6a1 --- /dev/null +++ b/integrations/chrome-capture-extension/background/gemini-sync.js @@ -0,0 +1,1039 @@ +/** + * Open Brain Capture — Gemini "Sync All History" orchestrator (Phase C) + * + * Drives a one-shot full-history backfill by walking the Gemini sidebar, + * navigating a dedicated background tab to each conversation, and waiting + * for the Phase B debugger capture (gemini-debugger.js → hNvQHb batchexecute) + * to call back through `notifyHistoryCaptured(id, result)`. + * + * Design principles: + * - No DOM scraping for content — Phase B still owns that via chrome.debugger. + * - Resumable across MV3 service-worker restarts via chrome.storage.local. + * - User-cancelable at any time. + * - No per-conversation API calls from this module; it only coordinates. + * - No telemetry, no third-party hosts. + * + * State transitions and bookkeeping live in the pure helper at + * `lib/gemini-sync-state.js` (`OBGeminiSyncState`). + */ + +/* global chrome, self, OBGeminiSyncState */ + +(function () { + 'use strict'; + + // --------------------------------------------------------------------------- + // Constants + // --------------------------------------------------------------------------- + + // Persisted state key. Single object under chrome.storage.local so rehydrate + // on SW wake is a single read. + const STATE_STORAGE_KEY = 'ob_gemini_sync_state'; + + const GEMINI_APP_URL = 'https://gemini.google.com/app'; + + // Hard ceiling to avoid runaway iteration on pathological sidebar DOMs. + const DEFAULT_CAP = 2000; + + // Gentler cap for auto/incremental runs. Keeps total navigations per + // scheduled cycle low so we don't tempt Google's bot detector. If there + // are more than this many new conversations since the last run, the + // remainder waits for the next alarm. + const DEFAULT_AUTO_CAP = 20; + + // Max time to wait between navigating the sync tab and Phase B firing the + // capture callback. A typical hNvQHb round-trip is 0.5s-3s; 15s absorbs + // slow networks without pinning the orchestrator forever. + const CAPTURE_WAIT_TIMEOUT_MS = 15000; + + // Max time to wait for Phase B's debugger to re-attach to the sync tab + // after we navigate. If we don't see OBGeminiDebugger._attachedTabs list + // our tab within this window, we proceed anyway — capture will either + // happen or time out via CAPTURE_WAIT_TIMEOUT_MS. + const ATTACH_WAIT_TIMEOUT_MS = 2000; + const ATTACH_POLL_INTERVAL_MS = 100; + + // Sidebar enumeration: how long to scroll the sidebar for and how many + // scrolls to perform before giving up. + const ENUMERATE_SCROLL_STEPS = 60; + const ENUMERATE_SCROLL_PAUSE_MS = 250; + + // Heartbeat stale threshold — if we see a record in state=syncing whose + // heartbeat is older than this, we assume the previous SW died + // mid-conversation and the user may want to resume manually. + const STALE_HEARTBEAT_MS = 5 * 60 * 1000; + + // Anti-bot throttle. An earlier experiment with a uniform 4s cadence + // triggered Google's bot challenge around conversation 21. Mitigations: + // - Longer base interval (8s average) + // - Randomized jitter (4-12s range) with full-float precision so delays + // never cluster on whole-second ticks (a classic bot signature) + // - Periodic "reading pauses" every N conversations to break cadence + const THROTTLE_MIN_MS = 4000; + const THROTTLE_MAX_MS = 12000; + const READING_PAUSE_EVERY_N = 10; + const READING_PAUSE_MIN_MS = 20000; + const READING_PAUSE_MAX_MS = 35000; + + const LOG = (msg, ...rest) => console.log(`[OB Gemini SYNC] ${msg}`, ...rest); + const ERR = (msg, ...rest) => console.error(`[OB Gemini SYNC] ${msg}`, ...rest); + + // Live waiter registry for notifyHistoryCaptured. Created lazily because + // OBGeminiSyncState may not yet be on the global when this IIFE runs; + // we access it via `getStateModule()` below. + let waiters = null; + + // In-memory flag to short-circuit the main loop when cancel was requested. + // Also mirrored into persisted state for resume-after-wake behavior. + let cancelRequested = false; + + // Guards against concurrent startSync invocations from the popup. This is + // set synchronously by every entry point (startSync, syncIncremental, + // resumeSync, resumeIfInterrupted) BEFORE any await, so a second entry + // that lands during the first entry's first microtask turn still observes + // the lock. Cleared in the finally block of each entry point. + let syncInFlight = false; + + // --------------------------------------------------------------------------- + // Lazy accessor for the state helper module + // --------------------------------------------------------------------------- + + function getStateModule() { + const mod = self.OBGeminiSyncState; + if (!mod) { + throw new Error('OBGeminiSyncState module not loaded'); + } + return mod; + } + + function getWaiters() { + if (!waiters) waiters = getStateModule().createWaiterRegistry(); + return waiters; + } + + // --------------------------------------------------------------------------- + // Persistence + // --------------------------------------------------------------------------- + + async function loadState() { + try { + const stored = await chrome.storage.local.get({ [STATE_STORAGE_KEY]: null }); + const raw = stored[STATE_STORAGE_KEY]; + if (!raw || typeof raw !== 'object') { + return getStateModule().createInitialState(); + } + // Defensive merge — guarantees shape even if stored record is from + // an older extension version. + const fresh = getStateModule().createInitialState(); + const merged = { + ...fresh, + ...raw, + totals: { ...fresh.totals, ...(raw.totals || {}) }, + pendingIds: Array.isArray(raw.pendingIds) ? raw.pendingIds : [], + completedIds: Array.isArray(raw.completedIds) ? raw.completedIds : [], + failedIds: Array.isArray(raw.failedIds) ? raw.failedIds : [] + }; + return merged; + } catch (err) { + ERR('loadState failed — returning initial:', err?.message || err); + return getStateModule().createInitialState(); + } + } + + async function saveState(record) { + try { + await chrome.storage.local.set({ [STATE_STORAGE_KEY]: record }); + } catch (err) { + ERR('saveState failed:', err?.message || err); + } + } + + async function updateState(mutator) { + const record = await loadState(); + const next = mutator(record) || record; + await saveState(next); + return next; + } + + // --------------------------------------------------------------------------- + // Sidebar enumeration — runs in the page via chrome.scripting.executeScript + // --------------------------------------------------------------------------- + + /** + * Page-context function. Scrolls the sidebar conversation list and returns + * every conversation id it can find as hrefs of the form `/app/`. + * + * Gemini's DOM changes frequently. We cast a wide net: any anchor whose + * href matches /app/[a-z0-9]+ is treated as a conversation link. Duplicates + * are collapsed. + */ + function enumerateSidebar(scrollSteps, scrollPauseMs) { + const isValidId = (id) => typeof id === 'string' && /^[a-z0-9]{8,}$/i.test(id); + + const collect = () => { + const ids = new Set(); + const anchors = document.querySelectorAll('a[href*="/app/"]'); + for (const anchor of anchors) { + const href = anchor.getAttribute('href') || ''; + const m = href.match(/\/app\/([a-z0-9]+)/i); + if (m && isValidId(m[1])) ids.add(m[1]); + } + return ids; + }; + + // Find the most likely scroll container. Gemini's sidebar is typically + // a `