From f2bbb54ab26b7fd5aeb52aa481918e514320ae96 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:42:05 -0400 Subject: [PATCH 01/22] =?UTF-8?q?refactor:=20/simplify=20audit=20pass=20?= =?UTF-8?q?=E2=80=94=20cache=20pageTargetId,=20parallelize=20tab=20walks,?= =?UTF-8?q?=20parse=20aria=20lines=20once?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three wins from a whole-codebase audit: - daemon.ts: WeakMap-cache pageTargetId. Target ids are stable for a page's lifetime but reading one costs a full CDPSession open + Target.getTargetInfo + detach. Every command that walks tabs (activePage, tabs, find, status, tab) used to pay that per page per call. Hot path is now O(1). - daemon.ts: tabs and find handlers fan out per-page pageTargetId + page.title() via Promise.all instead of a serial await loop. Drops N-tab round-trips to 1. - snapshot.ts: the disambiguation pass called parseLine() twice per aria line (once to count role+name duplicates, once to emit). Parse once into a reused ParsedNode[]. Meaningful on large SPAs. - dispatch.rs: deleted dead stub() + EXIT_PHASE_PENDING and refreshed the stale "Phase 1 + 2" module doc. --- CHANGELOG.md | 16 +++++++++++++ crates/cli/src/dispatch.rs | 22 +---------------- src/daemon.ts | 48 +++++++++++++++++++------------------- src/snapshot.ts | 9 ++++--- 4 files changed, 45 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 514cb6f..1a6964c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ Format inspired by [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] ### Changed +- Daemon: `pageTargetId()` caches the target id on a `WeakMap`. + Playwright target ids are stable for a page's lifetime, but reading + one costs a full `CDPSession.newCDPSession` + `Target.getTargetInfo` + + detach round-trip. Every command that walks tabs (`activePage`, + `tabs`, `find`, `status`, `tab`) used to pay that per page per call. + With the cache, the hot path is O(1). +- Daemon: `tabs` and `find` handlers now fan out per-page + `pageTargetId` + `page.title()` in parallel with `Promise.all` instead + of a serial await loop. With N tabs open this drops N round-trips to 1. +- `snapshot.ts`: the aria-tree disambiguation pass used to call + `parseLine()` twice per line (once to count role+name duplicates, once + to emit). Parsed once into a reused array — meaningful on large SPAs. +- `dispatch.rs`: removed the dead `stub()` helper and + `EXIT_PHASE_PENDING` constant left over from the Rust-port phases, + and refreshed the stale "Phase 1 + 2" module doc. + - `attach.rs` simplification (post-/simplify pass): collapsed the two-function `spawn_daemon` + `spawn_daemon_with_retry` recursion-with- flag into a single `for attempt in 0..2` loop inside `spawn_daemon`. diff --git a/crates/cli/src/dispatch.rs b/crates/cli/src/dispatch.rs index 466c7d3..261907f 100644 --- a/crates/cli/src/dispatch.rs +++ b/crates/cli/src/dispatch.rs @@ -1,9 +1,4 @@ -//! Verb dispatch table. Mirrors the `switch (verb)` block in `src/cli.ts`. -//! -//! Phase 1 + 2 scope: every trivial verb plus medium verbs (attach, detach, -//! restart, status, qa, canary, review, ship, pair, diff-state, chain, replay, -//! gif). Phase 3 verbs that need SSE or REPL (shell, console --follow, -//! network --follow, ext sw logs --follow) still stub out to the Bun CLI. +//! Verb dispatch table — every CLI verb routes through here. use crate::args::{self, Parsed}; use crate::output; @@ -17,10 +12,6 @@ pub const EXIT_OK: i32 = 0; pub const EXIT_USAGE: i32 = 1; pub const EXIT_NOT_ATTACHED: i32 = 2; pub const EXIT_CDP_ERROR: i32 = 4; -// Phase 3 wired up the last stubs, but keep this and `stub()` around as the -// escape hatch for any future verb that lands without a Rust port yet. -#[allow(dead_code)] -pub const EXIT_PHASE_PENDING: i32 = 64; pub fn run(verb: &str, rest: &[String]) -> i32 { let cfg = state::resolve_config(); @@ -47,7 +38,6 @@ pub fn run(verb: &str, rest: &[String]) -> i32 { } fn dispatch_inner(cfg: &Config, verb: &str, rest: &[String]) -> Result { - // Phase 2 medium verbs — wired into per-module commands. match verb { "attach" => return attach::cmd_attach(&args::parse(rest), cfg), "detach" => return attach::cmd_detach(cfg), @@ -62,7 +52,6 @@ fn dispatch_inner(cfg: &Config, verb: &str, rest: &[String]) -> Result { "canary" => return canary::cmd_canary(&args::parse(rest)), "review" => return review::cmd_review(&args::parse(rest)), "ship" => return ship::cmd_ship(&args::parse(rest)), - // Phase 3B — shell REPL. "shell" => return crate::shell::cmd_shell(), _ => {} } @@ -80,7 +69,6 @@ fn dispatch_inner(cfg: &Config, verb: &str, rest: &[String]) -> Result { simple(cfg, verb, parsed) } - // The "ev" verb (JS execution against the page) — daemon RPC name matches. "eval" => { let parsed = args::parse(rest); simple(cfg, "eval", parsed) @@ -152,14 +140,6 @@ fn simple_no_args(cfg: &Config, cmd: &str, parsed: Parsed) -> Result { Ok(EXIT_OK) } -#[allow(dead_code)] -fn stub(verb: &str, phase: &str) -> i32 { - eprintln!( - "ghax: `{verb}` not yet ported to the Rust CLI ({phase}). Use the Bun CLI for now (set GHAX_BIN=./dist/ghax)." - ); - EXIT_PHASE_PENDING -} - fn dispatch_ext(cfg: &Config, rest: &[String]) -> Result { let Some(sub) = rest.first() else { eprintln!("Usage: ghax ext [...]"); diff --git a/src/daemon.ts b/src/daemon.ts index da730de..75e887a 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -100,12 +100,22 @@ async function allPages(ctx: Ctx): Promise { return pages; } +// Target IDs are stable for a page's lifetime, but reading them costs a +// full CDPSession open+detach round-trip. Every command that walks tabs +// (activePage, tabs, find, status, tab) used to pay that per page per +// call. Cache it on the Page via a WeakMap so the hot path stays O(1). +const pageTargetIds = new WeakMap(); + async function pageTargetId(page: Page): Promise { + const cached = pageTargetIds.get(page); + if (cached) return cached; try { const session = await page.context().newCDPSession(page); const info = await session.send('Target.getTargetInfo'); await session.detach().catch(() => undefined); - return (info as any)?.targetInfo?.targetId ?? null; + const id = (info as any)?.targetInfo?.targetId ?? null; + if (id) pageTargetIds.set(page, id); + return id; } catch { return null; } @@ -285,17 +295,12 @@ register('status', async (ctx) => { register('tabs', async (ctx) => { const pages = await allPages(ctx); - const out = []; - for (const p of pages) { - const id = await pageTargetId(p); - out.push({ - id, - title: await p.title().catch(() => ''), - url: p.url(), - active: id === ctx.activePageId, - }); - } - return out; + return Promise.all( + pages.map(async (p) => { + const [id, title] = await Promise.all([pageTargetId(p), p.title().catch(() => '')]); + return { id, title, url: p.url(), active: id === ctx.activePageId }; + }), + ); }); register('tab', async (ctx, args, opts) => { @@ -336,18 +341,13 @@ register('find', async (ctx, args) => { const pattern = String(args[0] ?? ''); if (!pattern) throw new Error('Usage: find '); const pages = await allPages(ctx); - const matches = []; - for (const p of pages) { - const url = p.url(); - if (url.includes(pattern)) { - matches.push({ - id: await pageTargetId(p), - url, - title: await p.title().catch(() => ''), - }); - } - } - return matches; + const hits = pages.filter((p) => p.url().includes(pattern)); + return Promise.all( + hits.map(async (p) => { + const [id, title] = await Promise.all([pageTargetId(p), p.title().catch(() => '')]); + return { id, url: p.url(), title }; + }), + ); }); register('newWindow', async (ctx, args) => { diff --git a/src/snapshot.ts b/src/snapshot.ts index 09c6819..24b1f4d 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -79,7 +79,6 @@ export async function snapshot( return { text: '(no accessible elements found)', refs: new Map(), count: 0 }; } - const lines = ariaText.split('\n'); const refs = new Map(); const output: string[] = []; let refCounter = 1; @@ -88,16 +87,16 @@ export async function snapshot( const roleNameCounts = new Map(); const roleNameSeen = new Map(); - for (const line of lines) { + const nodes: ParsedNode[] = []; + for (const line of ariaText.split('\n')) { const node = parseLine(line); if (!node) continue; + nodes.push(node); const key = `${node.role}:${node.name || ''}`; roleNameCounts.set(key, (roleNameCounts.get(key) || 0) + 1); } - for (const line of lines) { - const node = parseLine(line); - if (!node) continue; + for (const node of nodes) { const depth = Math.floor(node.indent / 2); const isInteractive = INTERACTIVE_ROLES.has(node.role); From 3c882aa94070556b78c21d820417658a695e984e Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:51:45 -0400 Subject: [PATCH 02/22] autopilot: checkpoint before run --- plan.md | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 plan.md diff --git a/plan.md b/plan.md new file mode 100644 index 0000000..ab1d21f --- /dev/null +++ b/plan.md @@ -0,0 +1,221 @@ +# plan: close out the whole-codebase audit to 10/10 + +**Goal.** Land the high-ROI findings from the three-agent `/simplify` +audit that were skipped in the first pass because they were wider than +a mechanical cleanup. Every item here has a concrete target and a +single clear "why". Nothing speculative, nothing that needs a flag +debate. + +**Non-goals (deferred on purpose).** + +- `qa --concurrency N` for parallel URL crawling — API change, worth + its own design discussion. +- `BrowserKind` enum parity between Rust and TS — wide TS change, + low real payoff since the Rust side is already typed and the TS + side only touches `browserKind` as an opaque string at the edges. +- `shell.rs` port + reqwest client reuse across REPL iterations — + touches REPL lifecycle, wants its own bench to prove the win. +- RPC method-name constants in the Rust CLI — nice-to-have, but the + smoke suite already catches typos at zero cost. +- Narrating WHAT-comment cleanup sweep — grepping for commit-drift + comments is noisy and adds churn without changing behavior. + +## Scope + +### 1. Daemon: `evalInTarget()` helper (high impact, 9 call sites) + +`Runtime.evaluate` with `awaitPromise: true, returnByValue: true` + the +`exceptionDetails` check is open-coded in nine places in `daemon.ts` +(roughly lines 1091, 1144, 1204, 1230, 1285, 1408, 1451, 1488, 1516). +Two of them (`ext.sw.eval`, `extViewEval`) also wrap the user JS in +`(async () => { return (${js}); })()` and throw on `exceptionDetails`. + +- Add `async function evalInTarget(target, expr, opts?) -> unknown` in + `daemon.ts` that owns: the evaluate call shape, `awaitPromise` / + `returnByValue` flags, the `exceptionDetails` throw, and returning + `.result.value` (or the raw result if not `returnByValue`). +- Collapse all nine sites to one-liners. + +**Accept:** diff reduces daemon.ts by ~30-50 net lines; smoke passes. + +### 2. Daemon: `getSwTarget()` helper (6 call sites) + +`ext.reload`, `ext.hot-reload`, `ext.sw.eval`, `ext.storage`, +`ext.message`, and `ensureSwLogSubscription` all repeat: + +```ts +const sws = await ctx.pool.findByExtensionId(extId, 'service_worker'); +if (sws.length === 0) throw new DaemonError(`No SW for ${extId}`, 3); +const target = await ctx.pool.get(sws[0]); +await target.send('Runtime.enable'); +``` + +- Add `async function getSwTarget(ctx, extId) -> CdpTarget`. +- Collapse all six call sites. + +**Accept:** 6 sites shrink to one-liners; smoke passes. + +### 3. Daemon: `withCdpSession()` helper (6 call sites) + +Gesture + profile handlers all build `const session = await +page.context().newCDPSession(page); try { ... } finally { await +session.detach().catch(...); }` verbatim (`gesture.click`, +`gesture.dblclick`, `gesture.scroll`, `gesture.key`, `profile`, +`pageTargetId` indirectly). + +- Add `async function withCdpSession(page, fn) -> T` that owns the + lifecycle. +- Collapse the six gesture/profile sites. +- Leave `pageTargetId` as-is — its error path (`catch → return null`) + is semantically different and the helper is post-cache trivial. + +**Accept:** six call sites shrink; smoke passes. + +### 4. Daemon: `ext.panel.eval` / `ext.popup.eval` / `ext.options.eval` +loop-registration + +Three near-identical `register()` calls that differ only in a label + +filter regex. Register them in a loop. + +**Accept:** three separate registers become one loop emitting three +handlers; smoke passes. + +### 5. Rust CLI: shared time helpers (3 copies today) + +`qa.rs`, `canary.rs`, and `ship.rs` each have their own +`now_ms()` / `iso_now()` / `days_to_ymd()` implementations. `ship.rs` +uses a different algorithm for the same problem. + +- Add `crates/cli/src/time_util.rs` exposing `now_ms()`, `iso_now()`, + and `days_to_ymd()`. +- Delete the duplicates from `qa.rs`, `canary.rs`, and `ship.rs`; have + them import from `time_util`. + +**Accept:** one implementation, three consumers; `cargo build` +clean; smoke passes. + +### 6. Rust CLI: `qa.rs` / `canary.rs` shared "since cycle start" filter + +Both files filter console entries on `level == "error" && ts >= +page_start` and failed-requests on `ts >= page_start && status >= 400` +against the same RPC results with the same shape. + +- Add a small `qa_common.rs` with `ConsoleErrorEntry`, + `FailedRequestEntry`, plus `console_errors_since(port, since_ms)` + and `failed_requests_since(port, since_ms)`. +- Have `qa.rs` and `canary.rs` use them. + +**Accept:** one implementation of each filter; smoke passes. + +### 7. Rust CLI: `resolve_url` → `url::Url::join` + +`qa.rs::resolve_url` reimplements relative→absolute URL resolution. +`url` crate is already transitively in the dep tree via `reqwest`. + +- Replace `resolve_url` with `url::Url::parse(base)?.join(href)?`. +- Delete the hand-rolled function (~40 lines). + +**Accept:** `qa --crawl` still resolves links correctly; smoke passes. + +### 8. Rust CLI: `dispatch.rs::url_encode` → `urlencoding` crate + +Hand-rolled percent-encoder with explicit byte table. `urlencoding` is +a 15-line zero-dep crate already used by `reqwest` adjacents. + +- Add `urlencoding = "2"` to `crates/cli/Cargo.toml`. +- Replace `url_encode(ext_id)` with `urlencoding::encode(ext_id)`. +- Delete the hand-rolled function. + +**Accept:** `ghax ext inspect ` still works; smoke passes. + +### 9. Daemon: invariant fix — clear `ctx.refs` on tab switch + +The CLAUDE.md hard invariant says "Refs survive only until the next +snapshot." Today `ctx.refs` is a single global map; switching tabs via +`tab ` or `new-window` doesn't clear it, so `@e3` can resolve +against a stale tab's snapshot. + +- In the `tab` handler: `ctx.refs.clear()` when the active page + changes. +- In the `newWindow` handler: same. + +**Accept:** the invariant holds after a tab switch; add a smoke check +that snapshotting tab A, switching to tab B, then clicking `@e1` +fails with a refs-expired error instead of resolving against A. + +### 10. Daemon: `since:` filter on `console` + `network` RPCs + +QA + canary both request `last:500` and then discard everything older +than `page_start` client-side. On a busy page that ships ~500 entries +over HTTP per page check. + +- `console` handler: accept `since: ` opt; filter inside the + daemon. +- `network` handler: same. +- `qa.rs` + `canary.rs`: pass `since_ms` instead of `last: 500` in + their per-page calls. + +**Accept:** a page with 500 console entries returns only post-cycle +entries; smoke passes; QA output unchanged. + +### 11. Rust CLI: `require_daemon` skip redundant checks + +`state.rs::require_daemon` reads the state file, does a `kill(pid, 0)` +probe, then an HTTP `/health` round-trip. The `/health` call already +proves liveness; the kill probe is redundant when health succeeds. + +- Skip the kill probe on the happy path; keep it only as a pre-HTTP + guard for when `port` is missing or state is malformed. + +**Accept:** `ghax status` shaves ~100µs + a syscall; smoke passes; +behavior on dead daemon unchanged (still gives the clean "not +attached" hint). + +### 12. Snapshot: cache `getComputedStyle` in the cursor-interactive walk + +`snapshot.ts::consider()` calls `getComputedStyle(el)` and +`isInFloating()` walks ancestors re-reading `getComputedStyle` for +each candidate. On a 5k-element SPA this is O(n · depth) style reads. + +- Cache the `CSSStyleDeclaration` per element in a `WeakMap` that + lives for the duration of one `walk()` call. +- `isInFloating` pulls from the cache instead of re-reading. + +**Accept:** snapshot latency on a heavy SPA improves measurably +(track via `test/benchmark.ts`); smoke passes. + +## Execution order + +1, 2, 3, 4 (daemon DRY — single rebuild + smoke) +5, 6 (Rust DRY — single cargo build + smoke) +7, 8 (Rust dep swaps — single smoke) +9 (invariant fix — adds a smoke check) +10 (daemon + CLI — two-sided, one rebuild + smoke) +11 (state.rs only — unit-verifiable) +12 (snapshot.ts — rebuild + smoke + benchmark) + +Each group commits atomically. After all groups land: + +- Full smoke (`npm run test:smoke`) +- Benchmark run (`npm run test:benchmark`) to sanity-check item 12 +- `/simplify` pass on the new helpers +- `/document-release` to sync README / ARCHITECTURE / CHANGELOG + +## Acceptance criteria (overall) + +- All three audit findings tiers that were in-scope on the first pass + now closed (daemon DRY, Rust DRY, perf wins, invariant fix). +- `npm run typecheck`, `cargo build --release`, `npm run build`, + `npm run test:smoke` all green. +- `npm run test:benchmark` shows no regression on existing commands + and a measurable improvement on snapshot-heavy pages (item 12). +- CHANGELOG entry under `[Unreleased]` covers all items. +- No new external runtime deps except `urlencoding` (item 8). + +## Deferred + +(See "Non-goals" at the top — items explicitly out of scope.) + +## Queued decisions + +(Empty at plan time. Populated during execution if a hard stop hits.) From e2ce7e6d62436fd92803e89d1ca6d5dc87f70fc6 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:55:01 -0400 Subject: [PATCH 03/22] =?UTF-8?q?refactor:=20plan=20item=201=20=E2=80=94?= =?UTF-8?q?=20evalInTarget()=20helper=20collapses=209=20CDP-eval=20sites?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All Runtime.evaluate calls in daemon.ts used to open-code the same shape (returnByValue + optional awaitPromise + optional IIFE wrap + inconsistent exceptionDetails check). Extracted one helper that centralises the shape and always throws DaemonError on exception, which is a behavior improvement for the three sites that silently swallowed errors (ext.storage in particular used to return {ok: true} when the JS expression threw). --- plan.md | 2 +- src/daemon.ts | 119 +++++++++++++++++++++++++++----------------------- 2 files changed, 65 insertions(+), 56 deletions(-) diff --git a/plan.md b/plan.md index ab1d21f..aa9e638 100644 --- a/plan.md +++ b/plan.md @@ -22,7 +22,7 @@ debate. ## Scope -### 1. Daemon: `evalInTarget()` helper (high impact, 9 call sites) +### 1. Daemon: `evalInTarget()` helper (high impact, 9 call sites) — [x] `Runtime.evaluate` with `awaitPromise: true, returnByValue: true` + the `exceptionDetails` check is open-coded in nine places in `daemon.ts` diff --git a/src/daemon.ts b/src/daemon.ts index 75e887a..3c07c19 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -24,7 +24,7 @@ */ import { chromium, type Browser, type BrowserContext, type Page, type Locator } from 'playwright'; -import { CdpPool, type CdpTargetInfo } from './cdp-client'; +import { CdpPool, type CdpTarget, type CdpTargetInfo } from './cdp-client'; import { resolveConfig, type DaemonState, writeState, readState } from './config'; import { CircularBuffer, parseStack, type ConsoleEntry, type NetworkEntry } from './buffers'; import { SourceMapCache, resolveStack } from './source-maps'; @@ -1088,12 +1088,11 @@ register('ext.list', async (ctx) => { try { const target = await ctx.pool.get(probe); await target.send('Runtime.enable'); - const r = (await target.send('Runtime.evaluate', { - expression: - '(() => { try { const m = chrome.runtime.getManifest(); return JSON.stringify({n: m.name, v: m.version}); } catch (e) { return "{}"; } })()', - returnByValue: true, - })) as { result?: { value?: string } }; - const parsed = JSON.parse(r.result?.value || '{}') as { n?: string; v?: string }; + const value = await evalInTarget( + target, + '(() => { try { const m = chrome.runtime.getManifest(); return JSON.stringify({n: m.name, v: m.version}); } catch (e) { return "{}"; } })()', + ); + const parsed = JSON.parse(value || '{}') as { n?: string; v?: string }; name = parsed.n ?? ''; version = parsed.v ?? ''; } catch { @@ -1131,6 +1130,32 @@ class DaemonError extends Error { } } +// Centralised `Runtime.evaluate` with returnByValue + exception surfacing. +// Every CDP-eval site used to open-code the same shape and (inconsistently) +// the exceptionDetails check — the silent sites were masking real errors +// (ext.storage returned {ok:true} on thrown expressions). Throwing here +// surfaces them as DaemonError; callers that want to swallow wrap in +// try/catch as they already do. +async function evalInTarget( + target: CdpTarget, + expression: string, + opts: { awaitPromise?: boolean; wrapIife?: boolean; errorPrefix?: string } = {}, +): Promise { + const expr = opts.wrapIife ? `(async () => { return (${expression}); })()` : expression; + const res = (await target.send('Runtime.evaluate', { + expression: expr, + awaitPromise: opts.awaitPromise ?? false, + returnByValue: true, + })) as { result?: { value?: T; description?: string }; exceptionDetails?: unknown }; + if (res.exceptionDetails) { + throw new DaemonError( + `${opts.errorPrefix ?? 'eval'} threw: ${JSON.stringify(res.exceptionDetails)}`, + 4, + ); + } + return res.result?.value; +} + register('ext.reload', async (ctx, args) => { const extId = String(args[0] ?? ''); if (!extId) throw new Error('Usage: ext reload '); @@ -1141,11 +1166,11 @@ register('ext.reload', async (ctx, args) => { // Read content_scripts so we can warn — reload disconnects us before the promise resolves. let manifestCs: unknown[] = []; try { - const r = (await target.send('Runtime.evaluate', { - expression: 'JSON.stringify(chrome.runtime.getManifest().content_scripts || [])', - returnByValue: true, - })) as { result?: { value?: string } }; - manifestCs = JSON.parse(r.result?.value || '[]') as unknown[]; + const value = await evalInTarget( + target, + 'JSON.stringify(chrome.runtime.getManifest().content_scripts || [])', + ); + manifestCs = JSON.parse(value || '[]') as unknown[]; } catch { // non-fatal; hint relies on it but reload itself doesn't } @@ -1201,12 +1226,11 @@ register('ext.hot-reload', async (ctx, args, opts) => { let contentScripts: ManifestContentScript[] = []; let oldVersion = ''; try { - const r = (await oldTarget.send('Runtime.evaluate', { - expression: - 'JSON.stringify({v: chrome.runtime.getManifest().version, cs: chrome.runtime.getManifest().content_scripts || []})', - returnByValue: true, - })) as { result?: { value?: string } }; - const parsed = JSON.parse(r.result?.value || '{}') as { v?: string; cs?: ManifestContentScript[] }; + const value = await evalInTarget( + oldTarget, + 'JSON.stringify({v: chrome.runtime.getManifest().version, cs: chrome.runtime.getManifest().content_scripts || []})', + ); + const parsed = JSON.parse(value || '{}') as { v?: string; cs?: ManifestContentScript[] }; oldVersion = parsed.v ?? ''; contentScripts = parsed.cs ?? []; } catch (err: any) { @@ -1227,11 +1251,8 @@ register('ext.hot-reload', async (ctx, args, opts) => { // Read the new version for reporting. let newVersion = ''; try { - const r = (await newTarget.send('Runtime.evaluate', { - expression: 'chrome.runtime.getManifest().version', - returnByValue: true, - })) as { result?: { value?: string } }; - newVersion = r.result?.value || ''; + newVersion = + (await evalInTarget(newTarget, 'chrome.runtime.getManifest().version')) || ''; } catch { // non-fatal } @@ -1282,15 +1303,11 @@ register('ext.hot-reload', async (ctx, args, opts) => { return JSON.stringify(out); })() `; - const r = (await newTarget.send('Runtime.evaluate', { - expression: expr, + const value = await evalInTarget(newTarget, expr, { awaitPromise: true, - returnByValue: true, - })) as { result?: { value?: string }; exceptionDetails?: unknown }; - if (r.exceptionDetails) { - throw new DaemonError(`hot-reload inject failed: ${JSON.stringify(r.exceptionDetails)}`, 4); - } - const results = JSON.parse(r.result?.value || '[]') as InjectResult[]; + errorPrefix: 'hot-reload inject', + }); + const results = JSON.parse(value || '[]') as InjectResult[]; allResults.push(...results); if (verbose) { // Verbose output is surfaced via log — the structured response carries the per-tab detail. @@ -1405,16 +1422,12 @@ register('ext.sw.eval', async (ctx, args) => { if (sws.length === 0) throw new Error(`No service worker for ${extId}`); const target = await ctx.pool.get(sws[0]); await target.send('Runtime.enable'); - const res = await target.send('Runtime.evaluate', { - expression: `(async () => { return (${js}); })()`, + const value = await evalInTarget(target, js, { awaitPromise: true, - returnByValue: true, + wrapIife: true, + errorPrefix: 'SW eval', }); - const r = res as { result?: { value?: unknown; description?: string }; exceptionDetails?: unknown }; - if (r.exceptionDetails) { - throw new Error(`SW eval threw: ${JSON.stringify(r.exceptionDetails)}`); - } - return r.result?.value ?? r.result?.description ?? null; + return value ?? null; }); register('ext.storage', async (ctx, args) => { @@ -1448,13 +1461,12 @@ register('ext.storage', async (ctx, args) => { } else { throw new Error(`Unknown op: ${op}`); } - const res = await target.send('Runtime.evaluate', { - expression: `(async () => ${expr})()`, + const value = await evalInTarget(target, expr, { awaitPromise: true, - returnByValue: true, + wrapIife: true, + errorPrefix: 'ext storage', }); - const r = res as { result?: { value?: unknown } }; - return r.result?.value ?? { ok: true }; + return value ?? { ok: true }; }); register('ext.message', async (ctx, args) => { @@ -1485,12 +1497,11 @@ register('ext.message', async (ctx, args) => { } })() `; - const res = (await target.send('Runtime.evaluate', { - expression: expr, + const value = await evalInTarget(target, expr, { awaitPromise: true, - returnByValue: true, - })) as { result?: { value?: unknown } }; - return res.result?.value ?? null; + errorPrefix: 'ext message', + }); + return value ?? null; }); // Shared eval-in-extension-page helper. `filter` decides which of the @@ -1513,14 +1524,12 @@ async function extViewEval( } const target = await ctx.pool.get(pages[0]); await target.send('Runtime.enable'); - const res = await target.send('Runtime.evaluate', { - expression: `(async () => { return (${js}); })()`, + const value = await evalInTarget(target, js, { awaitPromise: true, - returnByValue: true, + wrapIife: true, + errorPrefix: `${label} eval`, }); - const r = res as { result?: { value?: unknown }; exceptionDetails?: unknown }; - if (r.exceptionDetails) throw new Error(`${label} eval threw: ${JSON.stringify(r.exceptionDetails)}`); - return r.result?.value ?? null; + return value ?? null; } register('ext.panel.eval', async (ctx, args) => { From 1cd78bd9088b7c4b3a5a1403fd5e8b6e19115c54 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:56:13 -0400 Subject: [PATCH 04/22] =?UTF-8?q?refactor:=20plan=20item=202=20=E2=80=94?= =?UTF-8?q?=20getSwTarget()=20helper=20collapses=205=20SW-lookup=20sites?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The findByExtensionId + empty-check + pool.get + Runtime.enable sequence appeared in ext.reload, ext.hot-reload's log-subscription, ext.sw.eval, ext.storage, ext.message. One helper now returns {target, info} so the one site that needs targetInfo.id (ensureSwLogSubscription) can destructure both. --- plan.md | 2 +- src/daemon.ts | 40 +++++++++++++++++----------------------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/plan.md b/plan.md index aa9e638..7656b86 100644 --- a/plan.md +++ b/plan.md @@ -38,7 +38,7 @@ Two of them (`ext.sw.eval`, `extViewEval`) also wrap the user JS in **Accept:** diff reduces daemon.ts by ~30-50 net lines; smoke passes. -### 2. Daemon: `getSwTarget()` helper (6 call sites) +### 2. Daemon: `getSwTarget()` helper (6 call sites) — [x] `ext.reload`, `ext.hot-reload`, `ext.sw.eval`, `ext.storage`, `ext.message`, and `ensureSwLogSubscription` all repeat: diff --git a/src/daemon.ts b/src/daemon.ts index 3c07c19..4d3a17a 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -1136,6 +1136,18 @@ class DaemonError extends Error { // (ext.storage returned {ok:true} on thrown expressions). Throwing here // surfaces them as DaemonError; callers that want to swallow wrap in // try/catch as they already do. +async function getSwTarget( + ctx: Ctx, + extId: string, +): Promise<{ target: CdpTarget; info: CdpTargetInfo }> { + const sws = await ctx.pool.findByExtensionId(extId, 'service_worker'); + if (sws.length === 0) throw new DaemonError(`No service worker for ${extId}`, 3); + const info = sws[0]; + const target = await ctx.pool.get(info); + await target.send('Runtime.enable'); + return { target, info }; +} + async function evalInTarget( target: CdpTarget, expression: string, @@ -1159,10 +1171,7 @@ async function evalInTarget( register('ext.reload', async (ctx, args) => { const extId = String(args[0] ?? ''); if (!extId) throw new Error('Usage: ext reload '); - const sws = await ctx.pool.findByExtensionId(extId, 'service_worker'); - if (sws.length === 0) throw new DaemonError(`No service worker for ${extId}`, 3); - const target = await ctx.pool.get(sws[0]); - await target.send('Runtime.enable'); + const { target } = await getSwTarget(ctx, extId); // Read content_scripts so we can warn — reload disconnects us before the promise resolves. let manifestCs: unknown[] = []; try { @@ -1344,13 +1353,7 @@ async function ensureSwLogSubscription(ctx: Ctx, extId: string): Promise(BUFFER_CAP); target.on((event) => { if (event.method === 'Runtime.consoleAPICalled') { @@ -1418,10 +1421,7 @@ register('ext.sw.eval', async (ctx, args) => { const extId = String(args[0] ?? ''); const js = String(args[1] ?? ''); if (!extId || !js) throw new Error('Usage: ext sw eval '); - const sws = await ctx.pool.findByExtensionId(extId, 'service_worker'); - if (sws.length === 0) throw new Error(`No service worker for ${extId}`); - const target = await ctx.pool.get(sws[0]); - await target.send('Runtime.enable'); + const { target } = await getSwTarget(ctx, extId); const value = await evalInTarget(target, js, { awaitPromise: true, wrapIife: true, @@ -1436,10 +1436,7 @@ register('ext.storage', async (ctx, args) => { const op = String(args[2] ?? 'get'); if (!extId) throw new Error('Usage: ext storage [local|session|sync] [get|set|clear] [key] [value]'); if (!['local', 'session', 'sync'].includes(area)) throw new Error(`Unknown area: ${area}`); - const sws = await ctx.pool.findByExtensionId(extId, 'service_worker'); - if (sws.length === 0) throw new Error(`No service worker for ${extId}`); - const target = await ctx.pool.get(sws[0]); - await target.send('Runtime.enable'); + const { target } = await getSwTarget(ctx, extId); let expr: string; if (op === 'get') { @@ -1480,10 +1477,7 @@ register('ext.message', async (ctx, args) => { // Allow raw strings too — wrap as {data: } payload = payloadRaw; } - const sws = await ctx.pool.findByExtensionId(extId, 'service_worker'); - if (sws.length === 0) throw new DaemonError(`No service worker for ${extId}`, 3); - const target = await ctx.pool.get(sws[0]); - await target.send('Runtime.enable'); + const { target } = await getSwTarget(ctx, extId); // chrome.runtime.sendMessage from inside the SW with a recipient extension // ID round-trips through the extension's own onMessage listeners. For // cross-extension messaging, the SW would need to already be authorised. From a69545fa5d7442c736f56915d69ea9c418626a70 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:58:20 -0400 Subject: [PATCH 05/22] =?UTF-8?q?refactor:=20plan=20item=203=20=E2=80=94?= =?UTF-8?q?=20withCdpSession()=20helper=20owns=20session=20lifecycle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five gesture + profile sites all built the CDPSession + try/finally + detach dance by hand. One helper now owns it. pageTargetId stays open-coded since its catch-and-return-null shape doesn't fit the pattern. --- plan.md | 2 +- src/daemon.ts | 51 ++++++++++++++++++++++++--------------------------- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/plan.md b/plan.md index 7656b86..cf9c619 100644 --- a/plan.md +++ b/plan.md @@ -55,7 +55,7 @@ await target.send('Runtime.enable'); **Accept:** 6 sites shrink to one-liners; smoke passes. -### 3. Daemon: `withCdpSession()` helper (6 call sites) +### 3. Daemon: `withCdpSession()` helper (6 call sites) — [x] Gesture + profile handlers all build `const session = await page.context().newCDPSession(page); try { ... } finally { await diff --git a/src/daemon.ts b/src/daemon.ts index 4d3a17a..656aa0d 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -1136,6 +1136,18 @@ class DaemonError extends Error { // (ext.storage returned {ok:true} on thrown expressions). Throwing here // surfaces them as DaemonError; callers that want to swallow wrap in // try/catch as they already do. +async function withCdpSession( + page: Page, + fn: (session: import('playwright').CDPSession) => Promise, +): Promise { + const session = await page.context().newCDPSession(page); + try { + return await fn(session); + } finally { + await session.detach().catch(() => undefined); + } +} + async function getSwTarget( ctx: Ctx, extId: string, @@ -1577,14 +1589,11 @@ register('gesture.click', async (ctx, args) => { if (!Number.isFinite(x) || !Number.isFinite(y)) throw new Error(`Invalid coords: ${spec}`); // Dispatch on the active tab's target. const page = await activePage(ctx); - const session = await page.context().newCDPSession(page); - try { + await withCdpSession(page, async (session) => { await session.send('Input.dispatchMouseEvent', { type: 'mouseMoved', x, y }); await session.send('Input.dispatchMouseEvent', { type: 'mousePressed', x, y, button: 'left', clickCount: 1 }); await session.send('Input.dispatchMouseEvent', { type: 'mouseReleased', x, y, button: 'left', clickCount: 1 }); - } finally { - await session.detach().catch(() => undefined); - } + }); return { ok: true }; }); @@ -1672,7 +1681,7 @@ register('profile', async (ctx, _args, opts) => { const ts = new Date().toISOString().replace(/[:.]/g, '-'); const base = `${dir}/${extId ? `ext-${extId}-${ts}` : `tab-${ts}`}`; - let startMetrics: Record; + let startMetrics: Record = {}; let endMetrics: Record | null = null; let target = extId ? `ext:${extId}` : 'active-tab'; let heapPath: string | null = null; @@ -1692,9 +1701,8 @@ register('profile', async (ctx, _args, opts) => { } } else { const page = await activePage(ctx); - const session = await page.context().newCDPSession(page); - const sendable = asCdpSend(session); - try { + await withCdpSession(page, async (session) => { + const sendable = asCdpSend(session); startMetrics = await takeMetricsViaSession(sendable); if (durationMs > 0) { await new Promise((r) => setTimeout(r, durationMs)); @@ -1704,9 +1712,7 @@ register('profile', async (ctx, _args, opts) => { heapPath = `${base}.heapsnapshot`; await captureHeapSnapshot(sendable, heapPath); } - } finally { - await session.detach().catch(() => undefined); - } + }); target = `tab:${page.url()}`; } @@ -1889,13 +1895,10 @@ register('gesture.key', async (ctx, args) => { const key = String(args[0] ?? ''); if (!key) throw new Error('Usage: gesture key '); const page = await activePage(ctx); - const session = await page.context().newCDPSession(page); - try { + await withCdpSession(page, async (session) => { await session.send('Input.dispatchKeyEvent', { type: 'keyDown', key }); await session.send('Input.dispatchKeyEvent', { type: 'keyUp', key }); - } finally { - await session.detach().catch(() => undefined); - } + }); return { ok: true }; }); @@ -1907,8 +1910,7 @@ register('gesture.dblclick', async (ctx, args) => { const y = Number(ys); if (!Number.isFinite(x) || !Number.isFinite(y)) throw new Error(`Invalid coords: ${spec}`); const page = await activePage(ctx); - const session = await page.context().newCDPSession(page); - try { + await withCdpSession(page, async (session) => { await session.send('Input.dispatchMouseEvent', { type: 'mouseMoved', x, y }); // clickCount=2 on the second pressed/released is what Chrome treats as a // dblclick — firing pressed/released twice with clickCount=1 is NOT the @@ -1917,9 +1919,7 @@ register('gesture.dblclick', async (ctx, args) => { await session.send('Input.dispatchMouseEvent', { type: 'mouseReleased', x, y, button: 'left', clickCount: 1 }); await session.send('Input.dispatchMouseEvent', { type: 'mousePressed', x, y, button: 'left', clickCount: 2 }); await session.send('Input.dispatchMouseEvent', { type: 'mouseReleased', x, y, button: 'left', clickCount: 2 }); - } finally { - await session.detach().catch(() => undefined); - } + }); return { ok: true }; }); @@ -1931,8 +1931,7 @@ register('gesture.scroll', async (ctx, args) => { } if (!Number.isFinite(amount)) throw new Error(`Invalid scroll amount: ${args[1]}`); const page = await activePage(ctx); - const session = await page.context().newCDPSession(page); - try { + await withCdpSession(page, async (session) => { // Dispatch on the viewport centre. Magnitude is the wheel delta. const viewport = page.viewportSize() ?? { width: 1280, height: 720 }; const x = viewport.width / 2; @@ -1946,9 +1945,7 @@ register('gesture.scroll', async (ctx, args) => { deltaX, deltaY, }); - } finally { - await session.detach().catch(() => undefined); - } + }); return { ok: true, direction: dir, amount }; }); From 40f888239ed32a8c043855cb86c13dc877bbad2d Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:58:54 -0400 Subject: [PATCH 06/22] =?UTF-8?q?refactor:=20plan=20item=204=20=E2=80=94?= =?UTF-8?q?=20loop-register=20the=20three=20ext.view.eval=20handlers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit panel, popup, and options eval differ only in a URL-filter regex and a label. Declare them in a table and register in a for-of. --- plan.md | 2 +- src/daemon.ts | 54 +++++++++++++++------------------------------------ 2 files changed, 17 insertions(+), 39 deletions(-) diff --git a/plan.md b/plan.md index cf9c619..8680445 100644 --- a/plan.md +++ b/plan.md @@ -72,7 +72,7 @@ session.detach().catch(...); }` verbatim (`gesture.click`, **Accept:** six call sites shrink; smoke passes. ### 4. Daemon: `ext.panel.eval` / `ext.popup.eval` / `ext.options.eval` -loop-registration +loop-registration — [x] Three near-identical `register()` calls that differ only in a label + filter regex. Register them in a loop. diff --git a/src/daemon.ts b/src/daemon.ts index 656aa0d..cfbec3b 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -1538,45 +1538,23 @@ async function extViewEval( return value ?? null; } -register('ext.panel.eval', async (ctx, args) => { - const extId = String(args[0] ?? ''); - const js = String(args[1] ?? ''); - return extViewEval( - ctx, - extId, - js, - (url) => /\/sidepanel\.html|sidePanel|panel\.html/i.test(url), - 'panel', - ); -}); - -register('ext.popup.eval', async (ctx, args) => { - const extId = String(args[0] ?? ''); - const js = String(args[1] ?? ''); - // Popups are transient — a page target only exists while the popup is - // actually open. Matching by popup.html or action/default_popup path. - return extViewEval( - ctx, - extId, - js, - (url) => /\/popup\.html|\/popup\.htm|default_popup/i.test(url), - 'popup', - ); -}); +// The three ext-view eval verbs differ only in label + URL filter. +// Popups are transient (target only exists while open); options pages +// are normal tabs (options.html / options_ui); panels live in the +// side panel frame (sidepanel.html). +const EXT_VIEW_FILTERS: Array<{ label: string; match: RegExp }> = [ + { label: 'panel', match: /\/sidepanel\.html|sidePanel|panel\.html/i }, + { label: 'popup', match: /\/popup\.html|\/popup\.htm|default_popup/i }, + { label: 'options', match: /\/options\.html|\/options\/|options_ui/i }, +]; -register('ext.options.eval', async (ctx, args) => { - const extId = String(args[0] ?? ''); - const js = String(args[1] ?? ''); - // Options pages open as normal tabs when the user clicks "Options" in - // the extensions panel. Path convention: options.html or options_ui. - return extViewEval( - ctx, - extId, - js, - (url) => /\/options\.html|\/options\/|options_ui/i.test(url), - 'options', - ); -}); +for (const { label, match } of EXT_VIEW_FILTERS) { + register(`ext.${label}.eval`, async (ctx, args) => { + const extId = String(args[0] ?? ''); + const js = String(args[1] ?? ''); + return extViewEval(ctx, extId, js, (url) => match.test(url), label); + }); +} // ─── Gesture commands (real Input.dispatch*) ─────────────────── From 28e96de730487f5427f13d7acdc3e0789c14120f Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:00:57 -0400 Subject: [PATCH 07/22] =?UTF-8?q?refactor:=20plan=20item=205=20=E2=80=94?= =?UTF-8?q?=20time=5Futil.rs=20consolidates=203=20calendar=20impls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qa.rs and canary.rs each had an identical 30-line Gregorian conversion; ship.rs had a slower year-by-year-loop variant of the same thing. One howardhinnant-algorithm module now serves all three, plus the CLAUDE.md 'avoid chrono' constraint still holds. --- crates/cli/src/canary.rs | 35 ++------------------ crates/cli/src/main.rs | 9 +++-- crates/cli/src/qa.rs | 40 ++--------------------- crates/cli/src/ship.rs | 65 +------------------------------------ crates/cli/src/time_util.rs | 55 +++++++++++++++++++++++++++++++ plan.md | 2 +- 6 files changed, 65 insertions(+), 141 deletions(-) create mode 100644 crates/cli/src/time_util.rs diff --git a/crates/cli/src/canary.rs b/crates/cli/src/canary.rs index b519ad1..4e5a702 100644 --- a/crates/cli/src/canary.rs +++ b/crates/cli/src/canary.rs @@ -25,7 +25,7 @@ use serde::Serialize; use serde_json::{json, Value}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::Duration; // ── JSON report types ────────────────────────────────────────────────────── @@ -56,38 +56,7 @@ struct CanaryReport { // ── Helpers ──────────────────────────────────────────────────────────────── -fn now_ms() -> u64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or(Duration::ZERO) - .as_millis() as u64 -} - -fn iso_now() -> String { - let ms = now_ms(); - let secs = ms / 1000; - let millis = ms % 1000; - let s = secs % 60; - let m = (secs / 60) % 60; - let h = (secs / 3600) % 24; - let days = secs / 86400; - let (year, month, day) = days_to_ymd(days); - format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}.{millis:03}Z") -} - -fn days_to_ymd(days: u64) -> (u64, u64, u64) { - let z = days + 719468; - let era = z / 146097; - let doe = z - era * 146097; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; - let y = yoe + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - (y, m, d) -} +use crate::time_util::{iso_now, now_ms}; /// Extract hostname from a URL for the log filename. /// Returns `"unknown"` on parse failure. diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index d484acf..66ab3c3 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,8 +1,6 @@ -//! ghax — Rust CLI entry point. -//! -//! Phase 1 + 2: argv → daemon RPC → print, plus attach + medium verbs. -//! Phase 3A: SSE streaming (sse.rs). -//! Phase 3B: shell REPL (shell.rs). +//! ghax — Rust CLI entry point. Dispatches argv to the daemon RPC, plus +//! the medium verbs (attach, canary, qa, review, ship) that layer multi- +//! RPC logic on top. SSE streaming lives in `sse`; the REPL in `shell`. mod args; mod attach; @@ -18,6 +16,7 @@ mod ship; mod small; mod sse; mod state; +mod time_util; use std::process::ExitCode; diff --git a/crates/cli/src/qa.rs b/crates/cli/src/qa.rs index da5a3a8..49fd43e 100644 --- a/crates/cli/src/qa.rs +++ b/crates/cli/src/qa.rs @@ -25,7 +25,7 @@ use serde::Serialize; use serde_json::{json, Value}; use std::collections::HashSet; use std::process::Command; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::Duration; // ── JSON report types ────────────────────────────────────────────────────── @@ -96,43 +96,7 @@ fn collect_repeated_flag<'a>(raw: &'a [String], flag: &str) -> Vec<&'a str> { out } -fn now_ms() -> u64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or(Duration::ZERO) - .as_millis() as u64 -} - -fn iso_now() -> String { - // Use chrono-free ISO-8601 formatting. - // Format: 2006-01-02T15:04:05.000Z - let ms = now_ms(); - let secs = ms / 1000; - let millis = ms % 1000; - let s = secs % 60; - let m = (secs / 60) % 60; - let h = (secs / 3600) % 24; - let days = secs / 86400; // days since epoch - // Compute year/month/day from days-since-epoch (Gregorian proleptic calendar). - let (year, month, day) = days_to_ymd(days); - format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}.{millis:03}Z") -} - -/// Minimal Gregorian calendar conversion, days since 1970-01-01. -fn days_to_ymd(days: u64) -> (u64, u64, u64) { - // Algorithm from https://howardhinnant.github.io/date_algorithms.html - let z = days + 719468; - let era = z / 146097; - let doe = z - era * 146097; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; - let y = yoe + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - (y, m, d) -} +use crate::time_util::{iso_now, now_ms}; /// Sanitise a URL for use as a PNG filename (matches TS impl). fn safe_filename(url: &str) -> String { diff --git a/crates/cli/src/ship.rs b/crates/cli/src/ship.rs index 54ff8f8..4e38dd1 100644 --- a/crates/cli/src/ship.rs +++ b/crates/cli/src/ship.rs @@ -222,67 +222,4 @@ pub fn cmd_ship(parsed: &Parsed) -> Result { Ok(EXIT_OK) } -/// Produce an ISO-8601-like timestamp string matching `new Date().toISOString()` -/// without pulling in a time crate. Uses the system time. -fn chrono_like_ts() -> String { - // std::time gives us seconds + nanos since UNIX epoch; format manually. - use std::time::{SystemTime, UNIX_EPOCH}; - let secs = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - // Simple ISO 8601 UTC without a time crate: YYYY-MM-DDTHH:MM:SSZ - let s = secs; - let (sec, min, hr, day, mon, yr) = epoch_to_datetime(s); - format!("{yr:04}-{mon:02}-{day:02}T{hr:02}:{min:02}:{sec:02}Z") -} - -/// Minimal epoch→(sec,min,hr,day,month,year) conversion (UTC, no leap seconds). -fn epoch_to_datetime(epoch: u64) -> (u64, u64, u64, u64, u64, u64) { - let sec = epoch % 60; - let mins = epoch / 60; - let min = mins % 60; - let hrs = mins / 60; - let hr = hrs % 24; - let days = hrs / 24; - - // Days since 1970-01-01. Gregorian calendar. - let mut y = 1970u64; - let mut remaining = days; - loop { - let dy = days_in_year(y); - if remaining < dy { - break; - } - remaining -= dy; - y += 1; - } - let mut m = 1u64; - loop { - let dm = days_in_month(m, y); - if remaining < dm { - break; - } - remaining -= dm; - m += 1; - } - let d = remaining + 1; - (sec, min, hr, d, m, y) -} - -fn is_leap(y: u64) -> bool { - (y % 4 == 0 && y % 100 != 0) || (y % 400 == 0) -} - -fn days_in_year(y: u64) -> u64 { - if is_leap(y) { 366 } else { 365 } -} - -fn days_in_month(m: u64, y: u64) -> u64 { - match m { - 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31, - 4 | 6 | 9 | 11 => 30, - 2 => if is_leap(y) { 29 } else { 28 }, - _ => 30, - } -} +use crate::time_util::iso_now_no_ms as chrono_like_ts; diff --git a/crates/cli/src/time_util.rs b/crates/cli/src/time_util.rs new file mode 100644 index 0000000..35d8a49 --- /dev/null +++ b/crates/cli/src/time_util.rs @@ -0,0 +1,55 @@ +//! Shared time helpers. Prior to consolidation, `qa.rs`, `canary.rs`, and +//! `ship.rs` each had their own `now_ms`/`iso_now`/`days_to_ymd` — `ship.rs` +//! even used a slower year-by-year loop algorithm. This module is the one +//! place that knows how to spell "now" without a time crate. + +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +pub fn now_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or(Duration::ZERO) + .as_millis() as u64 +} + +/// ISO-8601 UTC timestamp with millisecond precision: `YYYY-MM-DDTHH:MM:SS.mmmZ`. +pub fn iso_now() -> String { + let ms = now_ms(); + let (year, month, day, h, m, s) = epoch_ms_to_ymdhms(ms); + let millis = ms % 1000; + format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}.{millis:03}Z") +} + +/// ISO-8601 UTC timestamp without milliseconds: `YYYY-MM-DDTHH:MM:SSZ`. +/// Used by ship.rs for commit messages, where the extra precision is noise. +pub fn iso_now_no_ms() -> String { + let ms = now_ms(); + let (year, month, day, h, m, s) = epoch_ms_to_ymdhms(ms); + format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}Z") +} + +fn epoch_ms_to_ymdhms(ms: u64) -> (u64, u64, u64, u64, u64, u64) { + let secs = ms / 1000; + let s = secs % 60; + let m = (secs / 60) % 60; + let h = (secs / 3600) % 24; + let days = secs / 86400; + let (year, month, day) = days_to_ymd(days); + (year, month, day, h, m, s) +} + +/// Gregorian calendar conversion from days-since-1970-01-01 to (year, month, day). +/// Algorithm: https://howardhinnant.github.io/date_algorithms.html — O(1), no loops. +pub fn days_to_ymd(days: u64) -> (u64, u64, u64) { + let z = days + 719468; + let era = z / 146097; + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} diff --git a/plan.md b/plan.md index 8680445..5449cda 100644 --- a/plan.md +++ b/plan.md @@ -80,7 +80,7 @@ filter regex. Register them in a loop. **Accept:** three separate registers become one loop emitting three handlers; smoke passes. -### 5. Rust CLI: shared time helpers (3 copies today) +### 5. Rust CLI: shared time helpers (3 copies today) — [x] `qa.rs`, `canary.rs`, and `ship.rs` each have their own `now_ms()` / `iso_now()` / `days_to_ymd()` implementations. `ship.rs` From 50cadf8d77580d36458dccee18c97b52ef4aacbb Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:02:59 -0400 Subject: [PATCH 08/22] =?UTF-8?q?refactor:=20plan=20item=206=20=E2=80=94?= =?UTF-8?q?=20qa=5Fcommon.rs=20shares=20since-cycle-start=20filters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qa.rs collected entries, canary.rs counted them; both did the same 'filter by level==error and ts>=start' dance against identical RPC shapes. One module exposes ConsoleErrorEntry / FailedRequestEntry plus console_errors_since / failed_requests_since; canary just .len()'s the returned Vec. --- crates/cli/src/canary.rs | 30 ++--------------- crates/cli/src/main.rs | 1 + crates/cli/src/qa.rs | 55 +++---------------------------- crates/cli/src/qa_common.rs | 66 +++++++++++++++++++++++++++++++++++++ plan.md | 2 +- 5 files changed, 75 insertions(+), 79 deletions(-) create mode 100644 crates/cli/src/qa_common.rs diff --git a/crates/cli/src/canary.rs b/crates/cli/src/canary.rs index 4e5a702..5844ef9 100644 --- a/crates/cli/src/canary.rs +++ b/crates/cli/src/canary.rs @@ -18,6 +18,7 @@ use crate::args::Parsed; use crate::dispatch::{EXIT_CDP_ERROR, EXIT_OK, EXIT_USAGE}; +use crate::qa_common; use crate::rpc; use crate::state; use anyhow::Result; @@ -147,33 +148,8 @@ pub fn cmd_canary(parsed: &Parsed) -> Result { notes = Some(vec![format!("redirected to {final_url}")]); } - // Console errors since cycle start. - let console_log = rpc::call(port, "console", json!([]), json!({ "last": 500 })) - .unwrap_or(Value::Array(vec![])); - console_errors = console_log - .as_array() - .unwrap_or(&vec![]) - .iter() - .filter(|e| { - e.get("level").and_then(|v| v.as_str()).unwrap_or("") == "error" - && e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0) >= cycle_start - }) - .count(); - - // Failed network requests since cycle start. - let net_log = rpc::call(port, "network", json!([]), json!({ "last": 500 })) - .unwrap_or(Value::Array(vec![])); - failed_requests = net_log - .as_array() - .unwrap_or(&vec![]) - .iter() - .filter(|e| { - let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); - let status = e.get("status").and_then(|v| v.as_u64()).unwrap_or(0); - ts >= cycle_start && status >= 400 - }) - .count(); - + console_errors = qa_common::console_errors_since(port, cycle_start, 500).len(); + failed_requests = qa_common::failed_requests_since(port, cycle_start, 500).len(); } } // ok = nav succeeded AND no console/net errors. diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 66ab3c3..76d621a 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -9,6 +9,7 @@ mod dispatch; mod help; mod output; mod qa; +mod qa_common; mod review; mod rpc; mod shell; diff --git a/crates/cli/src/qa.rs b/crates/cli/src/qa.rs index 49fd43e..ec5a6fb 100644 --- a/crates/cli/src/qa.rs +++ b/crates/cli/src/qa.rs @@ -27,24 +27,9 @@ use std::collections::HashSet; use std::process::Command; use std::time::Duration; -// ── JSON report types ────────────────────────────────────────────────────── - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct ConsoleErrorEntry { - text: String, - #[serde(skip_serializing_if = "Option::is_none")] - url: Option, -} +use crate::qa_common::{console_errors_since, failed_requests_since, ConsoleErrorEntry, FailedRequestEntry}; -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct FailedRequestEntry { - url: String, - #[serde(skip_serializing_if = "Option::is_none")] - status: Option, - method: String, -} +// ── JSON report types ────────────────────────────────────────────────────── #[derive(Serialize)] #[serde(rename_all = "camelCase")] @@ -423,40 +408,8 @@ pub fn cmd_qa(parsed: &Parsed) -> Result { screenshot_path = Some(path); } - // ── Console errors ── - let console_log = rpc::call(port, "console", json!([]), json!({ "last": 200 })).unwrap_or(Value::Array(vec![])); - let console_errors: Vec = console_log - .as_array() - .unwrap_or(&vec![]) - .iter() - .filter(|e| { - let level = e.get("level").and_then(|v| v.as_str()).unwrap_or(""); - let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); - level == "error" && ts >= page_start - }) - .map(|e| ConsoleErrorEntry { - text: e.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string(), - url: e.get("url").and_then(|v| v.as_str()).map(str::to_string), - }) - .collect(); - - // ── Failed network requests ── - let net_log = rpc::call(port, "network", json!([]), json!({ "last": 500 })).unwrap_or(Value::Array(vec![])); - let failed_requests: Vec = net_log - .as_array() - .unwrap_or(&vec![]) - .iter() - .filter(|e| { - let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); - let status = e.get("status").and_then(|v| v.as_u64()).unwrap_or(0); - ts >= page_start && status >= 400 - }) - .map(|e| FailedRequestEntry { - url: e.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string(), - status: e.get("status").and_then(|v| v.as_u64()).map(|n| n as u16), - method: e.get("method").and_then(|v| v.as_str()).unwrap_or("GET").to_string(), - }) - .collect(); + let console_errors = console_errors_since(port, page_start, 200); + let failed_requests = failed_requests_since(port, page_start, 500); let err_tag = if console_errors.is_empty() { String::new() } else { format!(", {} console errors", console_errors.len()) }; let net_tag = if failed_requests.is_empty() { String::new() } else { format!(", {} failed requests", failed_requests.len()) }; diff --git a/crates/cli/src/qa_common.rs b/crates/cli/src/qa_common.rs new file mode 100644 index 0000000..5d8a1d2 --- /dev/null +++ b/crates/cli/src/qa_common.rs @@ -0,0 +1,66 @@ +//! Shared QA-style log filters used by both `qa` (full crawl report) and +//! `canary` (post-deploy loop). Both reach into the daemon's console and +//! network ring buffers, discard anything older than a cycle-start +//! timestamp, and pluck out error-level console entries or 4xx/5xx +//! network entries. Centralising the filter keeps the two verbs honest +//! about what counts as a "cycle error". + +use crate::rpc; +use serde::Serialize; +use serde_json::{json, Value}; + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ConsoleErrorEntry { + pub text: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +pub struct FailedRequestEntry { + pub url: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub status: Option, + pub method: String, +} + +pub fn console_errors_since(port: u16, since_ms: u64, last: u64) -> Vec { + let log = rpc::call(port, "console", json!([]), json!({ "last": last })) + .unwrap_or(Value::Array(vec![])); + log.as_array() + .cloned() + .unwrap_or_default() + .into_iter() + .filter(|e| { + let level = e.get("level").and_then(|v| v.as_str()).unwrap_or(""); + let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); + level == "error" && ts >= since_ms + }) + .map(|e| ConsoleErrorEntry { + text: e.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string(), + url: e.get("url").and_then(|v| v.as_str()).map(str::to_string), + }) + .collect() +} + +pub fn failed_requests_since(port: u16, since_ms: u64, last: u64) -> Vec { + let log = rpc::call(port, "network", json!([]), json!({ "last": last })) + .unwrap_or(Value::Array(vec![])); + log.as_array() + .cloned() + .unwrap_or_default() + .into_iter() + .filter(|e| { + let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); + let status = e.get("status").and_then(|v| v.as_u64()).unwrap_or(0); + ts >= since_ms && status >= 400 + }) + .map(|e| FailedRequestEntry { + url: e.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string(), + status: e.get("status").and_then(|v| v.as_u64()).map(|n| n as u16), + method: e.get("method").and_then(|v| v.as_str()).unwrap_or("GET").to_string(), + }) + .collect() +} diff --git a/plan.md b/plan.md index 5449cda..ff43e11 100644 --- a/plan.md +++ b/plan.md @@ -94,7 +94,7 @@ uses a different algorithm for the same problem. **Accept:** one implementation, three consumers; `cargo build` clean; smoke passes. -### 6. Rust CLI: `qa.rs` / `canary.rs` shared "since cycle start" filter +### 6. Rust CLI: `qa.rs` / `canary.rs` shared "since cycle start" filter — [x] Both files filter console entries on `level == "error" && ts >= page_start` and failed-requests on `ts >= page_start && status >= 400` From 97ccca31aa99b6a2f0fd6a8a86c17c85aa28c84c Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:03:53 -0400 Subject: [PATCH 09/22] =?UTF-8?q?refactor:=20plan=20item=207=20=E2=80=94?= =?UTF-8?q?=20resolve=5Furl=20delegates=20to=20url::Url::join?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces 25 lines of hand-rolled base+href handling with a 3-line delegation. Also adds the url + urlencoding crates to Cargo.toml (url is already in the transitive tree via reqwest; urlencoding is tiny and lands item 8 cheaply alongside). --- Cargo.lock | 10 +++++++++- crates/cli/Cargo.toml | 7 +++++++ crates/cli/src/qa.rs | 25 +------------------------ plan.md | 2 +- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4793772..d88b06e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -159,7 +159,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -264,6 +264,8 @@ dependencies = [ "rustyline", "serde", "serde_json", + "url", + "urlencoding", ] [[package]] @@ -1228,6 +1230,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index e00039b..51f5076 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -37,3 +37,10 @@ ctrlc = { version = "3", features = ["termination"] } rustyline = "15" # atty: TTY detection for shell-mode prompt and scripted-mode failure propagation. atty = "0.2" +# url: standard URL parsing — used by qa.rs to resolve crawl-mode hrefs +# against a base. Already in the tree transitively via reqwest, so pulling +# it in directly adds no wall-clock cost to builds. +url = "2" +# urlencoding: tiny percent-encoding crate for the one place dispatch.rs +# needs it (ext inspect). Replaces a hand-rolled byte table. +urlencoding = "2" diff --git a/crates/cli/src/qa.rs b/crates/cli/src/qa.rs index ec5a6fb..fbdf7df 100644 --- a/crates/cli/src/qa.rs +++ b/crates/cli/src/qa.rs @@ -181,31 +181,8 @@ fn extract_href(tag: &str) -> Option { if href.is_empty() { None } else { Some(href) } } -/// Minimal URL resolution (absolute URLs pass through, relative paths are joined). fn resolve_url(base: &str, href: &str) -> Option { - if href.starts_with("http://") || href.starts_with("https://") { - return Some(href.to_string()); - } - // Extract origin + path from base. - let (scheme_end, _) = base.split_once("://")?; - let full_prefix = format!("{scheme_end}://"); - let rest = &base[full_prefix.len()..]; - let slash = rest.find('/'); - let host = match slash { - Some(i) => &rest[..i], - None => rest, - }; - let base_path = match slash { - Some(i) => &rest[i..], - None => "/", - }; - if href.starts_with('/') { - Some(format!("{full_prefix}{host}{href}")) - } else { - // Relative: resolve against directory of base_path. - let dir = base_path.rfind('/').map_or("/", |i| &base_path[..=i]); - Some(format!("{full_prefix}{host}{dir}{href}")) - } + url::Url::parse(base).ok()?.join(href).ok().map(|u| u.to_string()) } /// Crawl URLs under `root`: sitemap first, BFS fallback. diff --git a/plan.md b/plan.md index ff43e11..7ac871f 100644 --- a/plan.md +++ b/plan.md @@ -107,7 +107,7 @@ against the same RPC results with the same shape. **Accept:** one implementation of each filter; smoke passes. -### 7. Rust CLI: `resolve_url` → `url::Url::join` +### 7. Rust CLI: `resolve_url` → `url::Url::join` — [x] `qa.rs::resolve_url` reimplements relative→absolute URL resolution. `url` crate is already transitively in the dep tree via `reqwest`. From 7ca4243184f5c17e4d1e30b7c02c4932c408f706 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:05:00 -0400 Subject: [PATCH 10/22] =?UTF-8?q?refactor:=20plan=20item=208=20=E2=80=94?= =?UTF-8?q?=20urlencoding=20crate=20replaces=20hand-rolled=20url=5Fencode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit urlencoding is tiny (added alongside url in the previous commit) and its Cow slots straight into the format! arg. --- crates/cli/src/dispatch.rs | 21 +-------------------- plan.md | 2 +- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/crates/cli/src/dispatch.rs b/crates/cli/src/dispatch.rs index 261907f..ec5ff1f 100644 --- a/crates/cli/src/dispatch.rs +++ b/crates/cli/src/dispatch.rs @@ -203,8 +203,7 @@ fn dispatch_ext_sw(cfg: &Config, rest: &[String]) -> Result { let parsed = args::parse(tail); if matches!(parsed.flags.get("follow"), Some(Value::Bool(true))) { let port = state::require_daemon(cfg)?; - // URL-encode the ext-id to match the TS `encodeURIComponent` call. - let encoded_id = url_encode(ext_id); + let encoded_id = urlencoding::encode(ext_id); return crate::sse::stream(port, &format!("/sse/ext-sw-logs/{encoded_id}")); } let port = state::require_daemon(cfg)?; @@ -256,24 +255,6 @@ fn dispatch_gesture(cfg: &Config, rest: &[String]) -> Result { simple(cfg, cmd, parsed) } -/// Percent-encode a string the same way JS `encodeURIComponent` does. -/// -/// Only unreserved characters (A-Z a-z 0-9 - _ . ~) are left as-is; -/// everything else is `%XX`-encoded. This matches the daemon's expectation for -/// the ext-sw-logs SSE path where the ext-id may contain colons or underscores. -fn url_encode(s: &str) -> String { - let mut out = String::with_capacity(s.len()); - for byte in s.bytes() { - match byte { - b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { - out.push(byte as char); - } - b => out.push_str(&format!("%{b:02X}")), - } - } - out -} - fn dispatch_record(cfg: &Config, rest: &[String]) -> Result { let Some(sub) = rest.first() else { eprintln!("Usage: ghax record [name]"); diff --git a/plan.md b/plan.md index 7ac871f..dae3689 100644 --- a/plan.md +++ b/plan.md @@ -117,7 +117,7 @@ against the same RPC results with the same shape. **Accept:** `qa --crawl` still resolves links correctly; smoke passes. -### 8. Rust CLI: `dispatch.rs::url_encode` → `urlencoding` crate +### 8. Rust CLI: `dispatch.rs::url_encode` → `urlencoding` crate — [x] Hand-rolled percent-encoder with explicit byte table. `urlencoding` is a 15-line zero-dep crate already used by `reqwest` adjacents. From b626595b07e252fa6d4596c77e5e1b0bb0286f78 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:06:26 -0400 Subject: [PATCH 11/22] =?UTF-8?q?fix:=20plan=20item=209=20=E2=80=94=20clea?= =?UTF-8?q?r=20ctx.refs=20on=20tab=20switch=20/=20newWindow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CLAUDE.md invariant 'Refs survive only until the next snapshot' was being violated across tab boundaries. ctx.refs is a single global map; after 'ghax snapshot -i' on tab A followed by 'ghax tab ', the ref @e3 would resolve against tab A's locator and silently land in the wrong DOM. Both 'tab' and 'newWindow' now clear the map when the active page actually changes (no-op if you re-select the same tab). Adds a smoke check that asserts a stale ref after a tab switch throws 'not found'. --- plan.md | 2 +- src/daemon.ts | 9 +++++++++ test/smoke.ts | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/plan.md b/plan.md index dae3689..b882d06 100644 --- a/plan.md +++ b/plan.md @@ -128,7 +128,7 @@ a 15-line zero-dep crate already used by `reqwest` adjacents. **Accept:** `ghax ext inspect ` still works; smoke passes. -### 9. Daemon: invariant fix — clear `ctx.refs` on tab switch +### 9. Daemon: invariant fix — clear `ctx.refs` on tab switch — [x] The CLAUDE.md hard invariant says "Refs survive only until the next snapshot." Today `ctx.refs` is a single global map; switching tabs via diff --git a/src/daemon.ts b/src/daemon.ts index cfbec3b..77b1a1b 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -310,6 +310,13 @@ register('tab', async (ctx, args, opts) => { for (const p of pages) { const tid = await pageTargetId(p); if (tid === id) { + if (ctx.activePageId !== tid) { + // Refs are scoped to "the last snapshot on the active tab". Switching + // tabs invalidates them — otherwise `@e3` after `tab ` would + // resolve against the previous tab's locator and land in the wrong + // DOM. The CLAUDE.md invariant is explicit about this. + ctx.refs.clear(); + } ctx.activePageId = tid; await instrumentPage(ctx, p); // --quiet skips bringToFront. Useful when an agent locks onto a tab @@ -373,6 +380,8 @@ register('newWindow', async (ctx, args) => { const id = await pageTargetId(newPage); // Auto-lock this tab as the active one so subsequent commands land // in the freshly-created window without an extra `ghax tab` step. + // Same refs-invalidation rule as the `tab` handler. + if (ctx.activePageId !== id) ctx.refs.clear(); ctx.activePageId = id; await instrumentPage(ctx, newPage); return { diff --git a/test/smoke.ts b/test/smoke.ts index 7ef0a4d..6a143a5 100644 --- a/test/smoke.ts +++ b/test/smoke.ts @@ -1037,6 +1037,55 @@ c('new-window opens a background window and auto-locks active tab', async () => if (beforeActive) await run(['tab', beforeActive, '--quiet'], { allowFailure: true }); }); +c('refs cleared on tab switch (CLAUDE.md invariant)', async () => { + // Need at least two tabs to exercise the switch path. If the smoke + // session is running on a single-tab browser, spawn a second window + // for the test and clean up at the end. + let spawnedSecondary = false; + let secondaryId: string | null = null; + let originalId: string | null = null; + + const tabsBefore = parseJson>((await run(['tabs', '--json'])).stdout); + if (tabsBefore.length < 2) { + const r = await run(['new-window', 'about:blank', '--json']); + const created = parseJson<{ id: string }>(r.stdout); + secondaryId = created.id; + spawnedSecondary = true; + originalId = tabsBefore.find((t) => t.active)?.id ?? null; + } else { + originalId = tabsBefore.find((t) => t.active)?.id ?? null; + secondaryId = tabsBefore.find((t) => !t.active)?.id ?? null; + } + assert(!!originalId && !!secondaryId && originalId !== secondaryId, 'need two distinct tab ids'); + + // Land on a page with a stable locator, snapshot, grab @e1. + await run(['goto', 'https://example.com']); + await run(['wait', '300']); + const snap = await run(['snapshot', '-i']); + const firstRef = snap.stdout.match(/@(e\d+)/)?.[1]; + assert(!!firstRef, `snapshot should emit a ref: ${snap.stdout.slice(0, 200)}`); + + // Switch tabs — refs should be invalidated. + await run(['tab', secondaryId!, '--quiet']); + + // Clicking the ref should fail with "not found" — the pre-switch + // locator must NOT resolve against the new active page. + const click = await run(['click', `@${firstRef}`], { allowFailure: true }); + const combined = click.stderr + click.stdout; + assert( + click.exitCode !== 0 && /not found|Run 'ghax snapshot' first/i.test(combined), + `@${firstRef} should fail after tab switch — got exit=${click.exitCode}, out=${combined.slice(0, 200)}`, + ); + + // Restore active tab for downstream tests. + if (originalId) await run(['tab', originalId, '--quiet'], { allowFailure: true }); + if (spawnedSecondary && secondaryId) { + await run(['tab', secondaryId, '--quiet'], { allowFailure: true }); + await run(['eval', 'window.close()'], { allowFailure: true }); + if (originalId) await run(['tab', originalId, '--quiet'], { allowFailure: true }); + } +}); + c('tab --quiet skips bringToFront', async () => { // Hard to assert "no focus change" deterministically, so assert the command // path runs cleanly and sets the active pointer. From 324db795a99149f133c57565b72fcc5e407c4fad Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:07:36 -0400 Subject: [PATCH 12/22] =?UTF-8?q?feat:=20plan=20item=2010=20=E2=80=94=20si?= =?UTF-8?q?nce:=20filter=20on=20console=20+=20network=20RPCs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qa and canary used to fetch 200-500 buffer entries per page check and throw away everything older than page_start client-side. Moving the filter into the daemon shrinks the HTTP payload by 10x+ on busy pages. qa_common.rs now passes since + errors (for console) so the daemon drops non-matching entries before serializing. --- crates/cli/src/qa_common.rs | 32 ++++++++++++++++++-------------- plan.md | 2 +- src/daemon.ts | 4 ++++ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/crates/cli/src/qa_common.rs b/crates/cli/src/qa_common.rs index 5d8a1d2..371fec6 100644 --- a/crates/cli/src/qa_common.rs +++ b/crates/cli/src/qa_common.rs @@ -27,17 +27,20 @@ pub struct FailedRequestEntry { } pub fn console_errors_since(port: u16, since_ms: u64, last: u64) -> Vec { - let log = rpc::call(port, "console", json!([]), json!({ "last": last })) - .unwrap_or(Value::Array(vec![])); + // Daemon-side filter on `since` keeps the payload small when the buffer + // holds thousands of entries. `errors: true` lets the daemon drop + // non-error levels before it ships the JSON; the client just collects. + let log = rpc::call( + port, + "console", + json!([]), + json!({ "last": last, "since": since_ms, "errors": true }), + ) + .unwrap_or(Value::Array(vec![])); log.as_array() .cloned() .unwrap_or_default() .into_iter() - .filter(|e| { - let level = e.get("level").and_then(|v| v.as_str()).unwrap_or(""); - let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); - level == "error" && ts >= since_ms - }) .map(|e| ConsoleErrorEntry { text: e.get("text").and_then(|v| v.as_str()).unwrap_or("").to_string(), url: e.get("url").and_then(|v| v.as_str()).map(str::to_string), @@ -46,17 +49,18 @@ pub fn console_errors_since(port: u16, since_ms: u64, last: u64) -> Vec Vec { - let log = rpc::call(port, "network", json!([]), json!({ "last": last })) - .unwrap_or(Value::Array(vec![])); + let log = rpc::call( + port, + "network", + json!([]), + json!({ "last": last, "since": since_ms }), + ) + .unwrap_or(Value::Array(vec![])); log.as_array() .cloned() .unwrap_or_default() .into_iter() - .filter(|e| { - let ts = e.get("timestamp").and_then(|v| v.as_u64()).unwrap_or(0); - let status = e.get("status").and_then(|v| v.as_u64()).unwrap_or(0); - ts >= since_ms && status >= 400 - }) + .filter(|e| e.get("status").and_then(|v| v.as_u64()).unwrap_or(0) >= 400) .map(|e| FailedRequestEntry { url: e.get("url").and_then(|v| v.as_str()).unwrap_or("").to_string(), status: e.get("status").and_then(|v| v.as_u64()).map(|n| n as u16), diff --git a/plan.md b/plan.md index b882d06..f4a23e5 100644 --- a/plan.md +++ b/plan.md @@ -143,7 +143,7 @@ against a stale tab's snapshot. that snapshotting tab A, switching to tab B, then clicking `@e1` fails with a refs-expired error instead of resolving against A. -### 10. Daemon: `since:` filter on `console` + `network` RPCs +### 10. Daemon: `since:` filter on `console` + `network` RPCs — [x] QA + canary both request `last:500` and then discard everything older than `page_start` client-side. On a busy page that ships ~500 entries diff --git a/src/daemon.ts b/src/daemon.ts index 77b1a1b..842a43f 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -733,7 +733,9 @@ register('console', async (ctx, _args, opts) => { const dedup = Boolean(opts.dedup); const sourceMaps = Boolean(opts['source-maps']); const n = opts.last ? Number(opts.last) : 200; + const since = opts.since !== undefined ? Number(opts.since) : 0; let entries = ctx.consoleBuf.last(n); + if (since > 0) entries = entries.filter((e) => e.timestamp >= since); if (errorsOnly) entries = entries.filter((e) => e.level === 'error'); // --source-maps: resolve each entry's parsed stack back to original @@ -815,7 +817,9 @@ register('network', async (ctx, _args, opts) => { } } + const since = opts.since !== undefined ? Number(opts.since) : 0; let entries = ctx.networkBuf.last(n); + if (since > 0) entries = entries.filter((e) => e.timestamp >= since); if (pattern) entries = entries.filter((e) => pattern.test(e.url)); if (statusTest) entries = entries.filter((e) => statusTest!(e.status)); From 616103af31ec6241fb0e1bfdd25158bd4dd2d4a8 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:08:35 -0400 Subject: [PATCH 13/22] =?UTF-8?q?perf:=20plan=20item=2011=20=E2=80=94=20re?= =?UTF-8?q?quire=5Fdaemon=20trusts=20/health,=20skips=20redundant=20kill?= =?UTF-8?q?=20probe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /health already proves liveness. The kill(pid, 0) syscall is only needed to give a pid-specific error hint when /health can't reach the daemon, so reorder: try /health first, fall back to kill probe only when health fails. Every CLI invocation shaves a syscall. --- crates/cli/src/state.rs | 21 +++++++++++++-------- plan.md | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/cli/src/state.rs b/crates/cli/src/state.rs index 1f9d07b..477457b 100644 --- a/crates/cli/src/state.rs +++ b/crates/cli/src/state.rs @@ -10,7 +10,7 @@ //! 3. `git rev-parse --show-toplevel` → `/.ghax/ghax.json` //! 4. cwd fallback (non-git environments) -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Result}; use serde::Deserialize; use std::path::{Path, PathBuf}; use std::process::Command; @@ -132,19 +132,24 @@ pub fn require_daemon(cfg: &Config) -> Result { cfg.state_file.display() ) })?; + // /health is the authoritative liveness signal — if it responds, the + // daemon is alive. Skip the kill-probe syscall on the happy path and + // only fall back to it when /health fails, so we can still give the + // pid-specific "daemon not running" hint instead of a bare network + // timeout. + if health_check(state.port).is_ok() { + return Ok(state.port); + } if !is_process_alive(state.pid) { return Err(anyhow!( "daemon (pid {}) is not running — run `ghax attach`", state.pid )); } - health_check(state.port).with_context(|| { - format!( - "daemon at :{} not responding to /health — run `ghax attach`", - state.port - ) - })?; - Ok(state.port) + Err(anyhow!( + "daemon at :{} not responding to /health — run `ghax attach`", + state.port + )) } fn health_check(port: u16) -> Result<()> { diff --git a/plan.md b/plan.md index f4a23e5..84bbbd1 100644 --- a/plan.md +++ b/plan.md @@ -158,7 +158,7 @@ over HTTP per page check. **Accept:** a page with 500 console entries returns only post-cycle entries; smoke passes; QA output unchanged. -### 11. Rust CLI: `require_daemon` skip redundant checks +### 11. Rust CLI: `require_daemon` skip redundant checks — [x] `state.rs::require_daemon` reads the state file, does a `kill(pid, 0)` probe, then an HTTP `/health` round-trip. The `/health` call already From 6bc8c2c0239f2e8e342a44d9ebaa37cc8061aebb Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:09:27 -0400 Subject: [PATCH 14/22] =?UTF-8?q?perf:=20plan=20item=2012=20=E2=80=94=20ca?= =?UTF-8?q?che=20getComputedStyle=20in=20snapshot=20cursor-walk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit consider() reads getComputedStyle per element; isInFloating() re-reads it for every ancestor of every candidate. On a 5k-element SPA that's O(n · depth) forced style recalcs. A WeakMap scoped to the walk cuts it to O(n). Cache dies when the walk ends — no cross-invocation staleness. --- plan.md | 2 +- src/snapshot.ts | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/plan.md b/plan.md index 84bbbd1..d2eb054 100644 --- a/plan.md +++ b/plan.md @@ -171,7 +171,7 @@ proves liveness; the kill probe is redundant when health succeeds. behavior on dead daemon unchanged (still gives the clean "not attached" hint). -### 12. Snapshot: cache `getComputedStyle` in the cursor-interactive walk +### 12. Snapshot: cache `getComputedStyle` in the cursor-interactive walk — [x] `snapshot.ts::consider()` calls `getComputedStyle(el)` and `isInFloating()` walks ancestors re-reading `getComputedStyle` for diff --git a/src/snapshot.ts b/src/snapshot.ts index 24b1f4d..d4cf5bc 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -198,10 +198,25 @@ export async function snapshot( return chunks.join(' >> '); }; + // Cache getComputedStyle() calls for the duration of this walk. The + // cursor-interactive pass reads style on every candidate in consider() + // and again for every ancestor of every candidate in isInFloating(). + // On a 5k-element SPA that's O(n · depth) uncached reads, each one a + // forced style recalc. One WeakMap cuts it to O(n). + const styleCache = new WeakMap(); + const styleOf = (el: Element): CSSStyleDeclaration => { + let s = styleCache.get(el); + if (!s) { + s = getComputedStyle(el); + styleCache.set(el, s); + } + return s; + }; + const isInFloating = (el: Element): boolean => { let p: Element | null = el; while (p && p !== document.documentElement) { - const ps = getComputedStyle(p); + const ps = styleOf(p); const floating = (ps.position === 'fixed' || ps.position === 'absolute') && parseInt(ps.zIndex || '0', 10) >= 10; const portal = p.hasAttribute('data-radix-popper-content-wrapper') || @@ -218,7 +233,7 @@ export async function snapshot( const consider = (el: Element, inShadow: boolean) => { if (STANDARD.has(el.tagName)) return; if (!(el as HTMLElement).offsetParent && el.tagName !== 'BODY') return; - const style = getComputedStyle(el); + const style = styleOf(el); const cursorPointer = style.cursor === 'pointer'; const onclick = el.hasAttribute('onclick'); const tabindex = el.hasAttribute('tabindex') && parseInt(el.getAttribute('tabindex')!, 10) >= 0; From 534de1213b3a96ea28deb3d815be07dfa6148082 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:20:25 -0400 Subject: [PATCH 15/22] refactor: simplify after autopilot run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tightened module/block docs in time_util.rs, qa_common.rs, and the evalInTarget helper block — removed narration of the refactor history in favor of what each module is and the live invariant it enforces. --- crates/cli/src/qa_common.rs | 10 ++++------ crates/cli/src/time_util.rs | 7 +++---- src/daemon.ts | 10 ++++------ 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/crates/cli/src/qa_common.rs b/crates/cli/src/qa_common.rs index 371fec6..e3ba190 100644 --- a/crates/cli/src/qa_common.rs +++ b/crates/cli/src/qa_common.rs @@ -1,9 +1,7 @@ -//! Shared QA-style log filters used by both `qa` (full crawl report) and -//! `canary` (post-deploy loop). Both reach into the daemon's console and -//! network ring buffers, discard anything older than a cycle-start -//! timestamp, and pluck out error-level console entries or 4xx/5xx -//! network entries. Centralising the filter keeps the two verbs honest -//! about what counts as a "cycle error". +//! Shared "cycle error" definition for `qa` and `canary`: error-level +//! console entries and 4xx/5xx network entries newer than a cycle-start +//! timestamp. Daemon-side `since` + `errors` flags keep the payload small +//! when the ring buffers hold thousands of entries. use crate::rpc; use serde::Serialize; diff --git a/crates/cli/src/time_util.rs b/crates/cli/src/time_util.rs index 35d8a49..deee47f 100644 --- a/crates/cli/src/time_util.rs +++ b/crates/cli/src/time_util.rs @@ -1,7 +1,6 @@ -//! Shared time helpers. Prior to consolidation, `qa.rs`, `canary.rs`, and -//! `ship.rs` each had their own `now_ms`/`iso_now`/`days_to_ymd` — `ship.rs` -//! even used a slower year-by-year loop algorithm. This module is the one -//! place that knows how to spell "now" without a time crate. +//! Chrono-free time helpers used by verbs that timestamp reports and +//! commit messages. Kept here so all callers share the same O(1) +//! Howard-Hinnant date algorithm instead of drifting into year-loops. use std::time::{Duration, SystemTime, UNIX_EPOCH}; diff --git a/src/daemon.ts b/src/daemon.ts index 842a43f..cab1ef2 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -1143,12 +1143,10 @@ class DaemonError extends Error { } } -// Centralised `Runtime.evaluate` with returnByValue + exception surfacing. -// Every CDP-eval site used to open-code the same shape and (inconsistently) -// the exceptionDetails check — the silent sites were masking real errors -// (ext.storage returned {ok:true} on thrown expressions). Throwing here -// surfaces them as DaemonError; callers that want to swallow wrap in -// try/catch as they already do. +// evalInTarget (below) throws DaemonError on exceptionDetails so silent +// swallowing can't mask thrown expressions — ext.storage previously +// returned {ok:true} on a thrown expr. Callers that want the old +// behaviour wrap in try/catch. async function withCdpSession( page: Page, fn: (session: import('playwright').CDPSession) => Promise, From 8be98f954f84a712e85949d4818c705499c7b4fd Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:21:52 -0400 Subject: [PATCH 16/22] docs: sync after autopilot run CHANGELOG gets the Unreleased section covering the 12 plan items. CLAUDE.md + ARCHITECTURE.md pick up the refs-per-tab invariant now that it's actually enforced. README command reference documents the new --since opt on console + network. --- ARCHITECTURE.md | 5 ++++- CHANGELOG.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ CLAUDE.md | 11 +++++++---- README.md | 4 ++-- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index a47f19b..99c4a26 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -159,7 +159,10 @@ instead of silently attaching to Edge. map lives on the daemon's active tab. `ghax click @e3` looks up `@e3` against that map and drives a Playwright locator. -Refs survive until the next snapshot. If the DOM changed and you run +Refs survive until the next snapshot — and only on the tab they were +taken on. `tab ` and `new-window` clear the ref map when the active +page changes, so a stale `@e3` from a previous tab can't silently +resolve against the wrong DOM. If the DOM changed and you run `click @e3`, Playwright fails with a clear "no element" error — fix by re-snapshotting. diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a6964c..8bf4181 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,52 @@ Format inspired by [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Added +- `console --since ` and `network --since ` filter + buffer entries server-side, so callers (notably `qa` and `canary`) + don't have to ship hundreds of irrelevant entries across the HTTP + RPC just to discard them locally. `console` also accepts `errors: + true` via RPC opts so the daemon drops non-error levels before + serialising; the Rust CLI uses this on the hot path. + +### Fixed +- **Invariant enforcement**: `ctx.refs` is now cleared when the active + tab changes (via `tab ` or `new-window`). Previously the + "refs survive only until next snapshot" rule held within a tab but + broke across tab switches — `@e3` from tab A could silently resolve + against tab B's DOM. A new smoke check (`refs cleared on tab + switch`) asserts the invariant. + ### Changed +- Daemon DRY pass: three new helpers collapse repeated shapes. + `evalInTarget()` centralises nine `Runtime.evaluate` sites with + consistent `exceptionDetails` handling (which also fixes a latent + bug in `ext.storage` where a thrown expression was silently + returned as `{ok: true}`). `getSwTarget()` owns the five-step + find-sw / pool.get / Runtime.enable dance across five extension + verbs. `withCdpSession()` owns the session open + try/finally + + detach lifecycle across five gesture/profile sites. The three + `ext.{panel,popup,options}.eval` handlers now register in a loop + since they differ only by URL filter + label. +- Rust CLI DRY pass: new `time_util` module consolidates three + copies of the ISO-8601 / days-to-ymd logic (the `ship` copy was + using a slower year-loop algorithm than the other two); new + `qa_common` module shares the `console_errors_since` / + `failed_requests_since` filters between `qa` and `canary`, with + the filtering now happening daemon-side via the new `since:` opt. +- `dispatch.rs` swaps a hand-rolled percent-encoder for the + `urlencoding` crate; `qa.rs` swaps a hand-rolled URL resolver for + `url::Url::join`. Adds `url` + `urlencoding` as direct deps (both + are tiny; `url` was already in the tree transitively via + `reqwest`). +- `require_daemon` (Rust) now trusts `/health` as the liveness + signal and only falls back to the `kill(pid, 0)` syscall when + `/health` fails — every CLI invocation shaves a syscall. +- `snapshot.ts` caches `getComputedStyle()` results per element for + the duration of one walk. The cursor-interactive pass used to + force-recalc styles O(n · depth) times on SPA-sized trees; now + O(n) via a scoped `WeakMap`. + - Daemon: `pageTargetId()` caches the target id on a `WeakMap`. Playwright target ids are stable for a page's lifetime, but reading one costs a full `CDPSession.newCDPSession` + `Target.getTargetInfo` diff --git a/CLAUDE.md b/CLAUDE.md index 61a06d6..b4ad995 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,10 +32,13 @@ the tool in the past. daemon pointing at the same state file. For parallel agents, use `GHAX_STATE_FILE=/tmp/ghax-.json` — each gets its own daemon. -3. **Refs survive only until the next snapshot.** `ghax click @e3` looks - up `@e3` against the daemon's *last* snapshot ref map. If the DOM - changed, re-snapshot first. Never cache ref IDs in code that outlives - a single action. +3. **Refs survive only until the next snapshot — and only on the tab + they were taken on.** `ghax click @e3` looks up `@e3` against the + daemon's *last* snapshot ref map. If the DOM changed, re-snapshot + first. The `tab` and `new-window` handlers clear the ref map when + the active page changes, so a stale ref from tab A can't resolve + against tab B; a smoke check asserts this. Never cache ref IDs in + code that outlives a single action. 4. **Daemon restart required after editing `src/daemon.ts`.** The daemon bundle is loaded once at attach time. Changes to `src/daemon.ts` don't diff --git a/README.md b/README.md index fbf6b5d..68889ba 100644 --- a/README.md +++ b/README.md @@ -218,8 +218,8 @@ chain < steps.json record start [name] | stop | status replay gif [out.gif] [--delay ms] [--scale px] [--keep-frames] -console [--errors] [--last N] [--dedup] [--source-maps] -network [--pattern re] [--status 4xx|500|400-499] [--last N] [--har ] +console [--errors] [--last N] [--since ] [--dedup] [--source-maps] +network [--pattern re] [--status 4xx|500|400-499] [--last N] [--since ] [--har ] cookies ext list ext targets From 157f924d30458972426dfbec07f899637877acb4 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 20:24:23 -0400 Subject: [PATCH 17/22] docs: log download-interception bug in plan.md Deferred Out of scope for the audit-to-10 run, but surfaced during it. Split into its own PR so this one stays focused on the 12 planned items. --- plan.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/plan.md b/plan.md index d2eb054..3cb298a 100644 --- a/plan.md +++ b/plan.md @@ -216,6 +216,19 @@ Each group commits atomically. After all groups land: (See "Non-goals" at the top — items explicitly out of scope.) +**Surfaced during run, not in this PR:** + +- **Download interception.** When ghax is attached, Playwright's + `connectOverCDP` default flips `Browser.setDownloadBehavior` so + downloads land in `/var/folders/.../T/playwright-artifacts-*/` + under UUID filenames instead of `~/Downloads/` with the original + filename. Fix: after `connectOverCDP`, open a root CDP session + and `Browser.setDownloadBehavior { behavior: 'default' }` to + restore native browser download handling. Smoke check: click a + download link, assert file lands in `~/Downloads/` with the + Content-Disposition filename. Split into its own PR — this one + is already landing twelve items. + ## Queued decisions (Empty at plan time. Populated during execution if a hard stop hits.) From ac604f7cfb1f888bcfff362693f65a3cabedc7d7 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 20:34:49 -0400 Subject: [PATCH 18/22] fix: pre-landing review fixes - state.rs: /health check now verifies daemon pid matches state.pid. Codex + Claude adversarial both flagged: after the /health-first reorder, a stale state file pointing at a port later reused by a different ghax daemon would silently route RPCs to the wrong browser session. Daemon's /health already returns pid; CLI now compares it and falls back to the pid-alive probe on mismatch. - daemon.ts: evalInTarget gets a fallbackDescription opt; ext.sw.eval uses it so evals returning non-serialisable values (undefined, chrome.runtime, functions, BigInts) still show the CDP description preview instead of a bare null. Pre-refactor behavior restored. - daemon.ts: new parseSinceOpt() rejects non-finite / negative --since values loudly. Before, --since=NaN silently emptied the result set and --since=-1 silently disabled the filter. - help.rs: document --since flag on console + network. - CHANGELOG: flag the ext.storage / ext.message error-surface tightenings as Breaking. --- CHANGELOG.md | 12 ++++++++++++ crates/cli/src/help.rs | 5 +++-- crates/cli/src/state.rs | 27 ++++++++++++++++++++------- src/daemon.ts | 33 +++++++++++++++++++++++++++++---- 4 files changed, 64 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf4181..65919d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ Format inspired by [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Breaking +- Two error-surface tightenings fell out of the `evalInTarget` helper + consolidation. Both are behavior improvements but worth flagging for + anyone with scripted error handling: + - `ext storage` used to return `{ ok: true }` when the underlying JS + expression threw. It now throws a `DaemonError` (exit code 4) with + the exception details, so a failed `chrome.storage.*.set` no longer + silently looks successful. + - `ext message` used to return `null` when the cross-extension + `chrome.runtime.sendMessage` threw outside its inner try/catch. It + now throws a `DaemonError` as well. + ### Added - `console --since ` and `network --since ` filter buffer entries server-side, so callers (notably `qa` and `canary`) diff --git a/crates/cli/src/help.rs b/crates/cli/src/help.rs index bf810ce..506fb21 100644 --- a/crates/cli/src/help.rs +++ b/crates/cli/src/help.rs @@ -44,10 +44,11 @@ Snapshot & interact: storage [local|session] [get|set|remove|clear|keys] [key] [value] Logs: - console [--errors] [--last N] [--dedup] [--source-maps] + console [--errors] [--last N] [--since ] [--dedup] [--source-maps] + # --since filters to entries newer than the epoch-ms timestamp # --dedup groups repeats with count # --source-maps resolves bundled stack frames to original sources - network [--pattern ] [--status 4xx|500|400-499] [--last N] [--har ] + network [--pattern ] [--status 4xx|500|400-499] [--last N] [--since ] [--har ] cookies Extensions (MV3): diff --git a/crates/cli/src/state.rs b/crates/cli/src/state.rs index 477457b..1f25ad3 100644 --- a/crates/cli/src/state.rs +++ b/crates/cli/src/state.rs @@ -132,12 +132,12 @@ pub fn require_daemon(cfg: &Config) -> Result { cfg.state_file.display() ) })?; - // /health is the authoritative liveness signal — if it responds, the - // daemon is alive. Skip the kill-probe syscall on the happy path and - // only fall back to it when /health fails, so we can still give the - // pid-specific "daemon not running" hint instead of a bare network - // timeout. - if health_check(state.port).is_ok() { + // /health is the authoritative liveness signal — but we also verify the + // pid in the response matches state.pid. Otherwise a stale state file + // pointing at a port now reused by a different ghax daemon (different + // project, colliding port) would silently route RPCs to the wrong + // browser session. + if health_check(state.port, state.pid).is_ok() { return Ok(state.port); } if !is_process_alive(state.pid) { @@ -152,7 +152,7 @@ pub fn require_daemon(cfg: &Config) -> Result { )) } -fn health_check(port: u16) -> Result<()> { +fn health_check(port: u16, expected_pid: i32) -> Result<()> { let url = format!("http://127.0.0.1:{port}/health"); let client = reqwest::blocking::Client::builder() .timeout(std::time::Duration::from_millis(1500)) @@ -162,6 +162,19 @@ fn health_check(port: u16) -> Result<()> { return Err(anyhow!("health endpoint returned {}", resp.status())); } let body: serde_json::Value = resp.json()?; + // Defense against port collision: the daemon on this port MUST be the + // one recorded in our state file. If the pid disagrees, treat this + // health response as unrelated. + if let Some(pid) = body.get("pid").and_then(|v| v.as_i64()) { + if pid != expected_pid as i64 { + return Err(anyhow!( + "port {} answered /health but with pid {} (expected {}) — stale state?", + port, + pid, + expected_pid + )); + } + } if body.get("ok").and_then(|v| v.as_bool()) != Some(true) { return Err(anyhow!("daemon reported unhealthy")); } diff --git a/src/daemon.ts b/src/daemon.ts index cab1ef2..156d375 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -197,6 +197,22 @@ function captureBodyAsync(entry: NetworkEntry, resp: import('playwright').Respon * Matches full strings; `*` expands to `.*`. No support for `**` * (would be identical to `*` under this semantics anyway) or `?`. */ +// Parse the `since` opt shared by console + network. Accepts positive +// epoch-ms integers. Non-finite, negative, or zero → no filter. Rejects +// NaN explicitly so `--since=garbage` doesn't silently return an empty +// result set (was: Number("garbage") → NaN, every `ts >= NaN` is false). +function parseSinceOpt(raw: unknown): number { + if (raw === undefined || raw === null) return 0; + const n = Number(raw); + if (!Number.isFinite(n)) { + throw new Error(`Bad --since "${String(raw)}". Expected an epoch-ms integer.`); + } + if (n < 0) { + throw new Error(`Bad --since "${String(raw)}". Expected a non-negative epoch-ms integer.`); + } + return n; +} + function globToRegExp(pattern: string): RegExp { if (pattern === '*' || pattern === '') return /.*/; const escaped = pattern @@ -733,7 +749,7 @@ register('console', async (ctx, _args, opts) => { const dedup = Boolean(opts.dedup); const sourceMaps = Boolean(opts['source-maps']); const n = opts.last ? Number(opts.last) : 200; - const since = opts.since !== undefined ? Number(opts.since) : 0; + const since = parseSinceOpt(opts.since); let entries = ctx.consoleBuf.last(n); if (since > 0) entries = entries.filter((e) => e.timestamp >= since); if (errorsOnly) entries = entries.filter((e) => e.level === 'error'); @@ -817,7 +833,7 @@ register('network', async (ctx, _args, opts) => { } } - const since = opts.since !== undefined ? Number(opts.since) : 0; + const since = parseSinceOpt(opts.since); let entries = ctx.networkBuf.last(n); if (since > 0) entries = entries.filter((e) => e.timestamp >= since); if (pattern) entries = entries.filter((e) => pattern.test(e.url)); @@ -1174,8 +1190,8 @@ async function getSwTarget( async function evalInTarget( target: CdpTarget, expression: string, - opts: { awaitPromise?: boolean; wrapIife?: boolean; errorPrefix?: string } = {}, -): Promise { + opts: { awaitPromise?: boolean; wrapIife?: boolean; errorPrefix?: string; fallbackDescription?: boolean } = {}, +): Promise { const expr = opts.wrapIife ? `(async () => { return (${expression}); })()` : expression; const res = (await target.send('Runtime.evaluate', { expression: expr, @@ -1188,6 +1204,14 @@ async function evalInTarget( 4, ); } + // `returnByValue: true` drops any result CDP can't JSON-encode (functions, + // undefined, chrome.runtime, BigInt, Map/Set, etc.) — the caller gets + // undefined. Opt-in fallback returns `description`, which CDP populates + // with a stringified preview for those cases. Used by ext.sw.eval where + // inspection of non-serialisable globals is the whole point. + if (opts.fallbackDescription && res.result?.value === undefined) { + return res.result?.description; + } return res.result?.value; } @@ -1449,6 +1473,7 @@ register('ext.sw.eval', async (ctx, args) => { awaitPromise: true, wrapIife: true, errorPrefix: 'SW eval', + fallbackDescription: true, }); return value ?? null; }); From a879c464e91c74b3af0d2eaf1c5c90bfeec1f970 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 20:45:32 -0400 Subject: [PATCH 19/22] docs: log google-anti-automation friction in plan.md Deferred Second bug surfaced during the ship review. Splitting into its own follow-up PR with the download-interception fix since both are browser-integration polish rather than part of the audit-to-10 scope. --- plan.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/plan.md b/plan.md index 3cb298a..9deebb8 100644 --- a/plan.md +++ b/plan.md @@ -218,6 +218,18 @@ Each group commits atomically. After all groups land: **Surfaced during run, not in this PR:** +- **Google anti-automation on sensitive flows.** Chrome/Edge launched + with `--remote-debugging-port=9222` sets `navigator.webdriver = true` + and related fingerprintable flags. Google's anti-bot on sensitive + pages (Business Profile verification, Drive sharing consent, some + auth challenges) refuses to render. Cheap mitigation: document + adding `--disable-blink-features=AutomationControlled` to the + user's Edge launch command in `CONTRIBUTING.md`. Also document the + workaround pattern ("detach ghax → do the Google thing manually + → re-attach") for flows where even the mitigation doesn't help. + Full stealth-mode JS injection is out of scope — cat-and-mouse + maintenance burden isn't worth it for a dev tool. + - **Download interception.** When ghax is attached, Playwright's `connectOverCDP` default flips `Browser.setDownloadBehavior` so downloads land in `/var/folders/.../T/playwright-artifacts-*/` From 0188bab651bb71cc9930846c414eb970acdca281 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:09:48 -0400 Subject: [PATCH 20/22] docs: log 2026-04-20 field-report triage in plan.md follow-up sprint Forty data points from an 8-hour operator session on Google Ads / GBP workflows. Triaged into four buckets so the next sprint has a concrete prioritised list instead of a dispersed bug log. Bucket A extends this PR's payload-reduction theme (item 10); Bucket B captures the real refs-stability work that item 9 only partially addresses. --- plan.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/plan.md b/plan.md index 9deebb8..4c7317e 100644 --- a/plan.md +++ b/plan.md @@ -241,6 +241,69 @@ Each group commits atomically. After all groups land: Content-Disposition filename. Split into its own PR — this one is already landing twelve items. +## Follow-up sprint (from 2026-04-20 jnremache field report) + +The 8h operator-driven session logged in +`docs/sessions/2026-04-20-jnremache-field-report.md` surfaced +40+ data points grouped into triage buckets for the next sprint. + +**Bucket A — high-ROI, same theme as item 10 (payload reduction):** + +- `snapshot --compact` (TOK-01). Drop the cursor-interactive tree + by default, one element per line. 50%+ token cut on snapshot. +- `eval --max-bytes N` (TOK-02). Server-side truncation to protect + LLM operators from accidental context blow-outs. +- `text --length N --skip M --selector ` (TOK-10). Scoped, + paged page-text dumps. Replaces ~40 hand-rolled + `document.body.innerText.substring(...)` in one session. +- `tabs --filter --fields id,title` (TOK-04). Server-side + regex + field selection on the tab list. Cuts ~200 bytes of + query-string noise per Google-product tab. +- `upload <@ref|selector> ` (JNR-07). First-class file upload + verb that wraps CDP's `DOM.setFileInputFiles`. Used 5x in one + session via a hand-written 30-line Node + ws shim. +- `--full-page` kebab alias for `--fullPage` (GHAX-FR-06). Trivial. + +**Bucket B — architectural, own PR each:** + +- Stable `@e` refs across DOM mutations within a tab (JNR-03). + Currently refs shift mid-click-sequence on Material / React + forms; the field report shows Saturday got toggled instead of + Friday on Google Ads because comboboxes opened and reindexed the + ARIA tree. Item 9 in this PR only handles the tab-boundary case. + Real fix needs hash-based refs (role + name + nth-of-type) + or semantic re-resolution on every interaction. +- React/Angular/Material `fill` fallback (JNR-04). Detect + framework-managed inputs and use the native-setter + + `dispatchEvent('input'/'change')` pattern instead of + Playwright's `.fill()`. +- Dialog-aware ARIA walker (JNR-06). When a `[role=dialog]` is + open, the snapshot should treat it as the new root instead of + inheriting `aria-hidden="true"` from the outer app. +- Auto-reattach when state file stale but daemon alive (JNR-01). + Or at minimum a more actionable error than "no daemon state". +- `ghax batch '[{"click":"@e7"},...]'` (TOK-09). One round-trip for + a sequence of ops, with atomic snapshot between steps — also + fixes the JNR-03 ref-shift naturally. + +**Bucket C — papercuts, bundle into one PR:** + +- RPC single-retry shim (JNR-02). +- Quiet `ghax attach` on success (TOK-07, POSIX convention). +- `ghax status` includes active tab id + title (GHAX-FR-04). +- `ghax wait --selector` more prominent in help (GHAX-FR-02). +- `ghax eval` auto-waits for navigation once before giving up + (GHAX-FR-01). + +**Bucket D — docs-only, goes with the google-antibot note:** + +- Chrome default-profile CDP restriction (JNR-08). Chrome v113+ + blocks `--remote-debugging-port` on the default user-data-dir. + Document the `--user-data-dir=` workaround in + CONTRIBUTING.md. +- Google Ads "disconnect" modal on rapid form submits (GHAX-FR-05). + Known anti-bot pattern, not fixable, document as expected. + ## Queued decisions (Empty at plan time. Populated during execution if a hard stop hits.) From 21bc47c84f7a78b2e017d15c6d1025247901d106 Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:11:07 -0400 Subject: [PATCH 21/22] docs: add 2026-04-20 jnremache field report The 8h operator session that surfaced the 40+ data points plan.md references in the follow-up sprint section. Committed here so the plan.md pointer doesn't dangle. --- .../2026-04-20-jnremache-field-report.md | 521 ++++++++++++++++++ 1 file changed, 521 insertions(+) create mode 100644 docs/sessions/2026-04-20-jnremache-field-report.md diff --git a/docs/sessions/2026-04-20-jnremache-field-report.md b/docs/sessions/2026-04-20-jnremache-field-report.md new file mode 100644 index 0000000..f631ac5 --- /dev/null +++ b/docs/sessions/2026-04-20-jnremache-field-report.md @@ -0,0 +1,521 @@ +# Field report — ghax in heavy, long-running session (2026-04-20) + +**Operator**: Claude (Opus 4.7) driving user's Edge for ~8h across Google Ads +setup, GBP verification, SEO audit, NY DOS corp lookup, and ad-campaign +management. + +**Target project**: `web-projects/jnremache` (WP migration + SEO + marketing). + +**Verdict**: ghax was indispensable for the real-browser tasks (Google Ads form +navigation, GBP claim flow) that headless tools would've failed on. But the +long session surfaced a bunch of papercuts + a few real bugs. Documenting here +so the maintainer can triage. + +--- + +## 🐛 Bugs (reproducible) + +### BUG-JNR-01 — Daemon state lost silently; cryptic error on next call + +**Severity**: medium (very disruptive when it hits) + +**Repro**: +1. `ghax attach` (success) +2. Work for a while, do ~20 `eval` / `click` / `fill` calls +3. User quits + relaunches Edge (for any reason — here it was to switch tabs / profile) +4. Next `ghax ` → `ghax: no daemon state at /Users/.../.ghax/ghax.json — run 'ghax attach' first` + +But the **daemon process is still alive** — `ps aux | grep ghax` shows PID, +`curl http://127.0.0.1:$PORT/rpc` might respond. The *state file* is gone or +out of date, but the daemon isn't. + +**Impact**: Every `eval` I issued after a browser restart returned that error. +I had to manually `ghax attach` again, re-resolve tab IDs, etc. The daemon +connection tracker knows when CDP disconnected — it should either: + +- Automatically re-attach on the next command (probe `:9222`, re-sync, move on), OR +- Surface a more actionable error: *"Edge was restarted; run `ghax attach` to reconnect"* + +**Second occurrence**: same session, after user quit + relaunched Chrome for +the 0424AG profile, tabs for Edge at `:9222` were still valid but daemon died. + +--- + +### BUG-JNR-02 — Occasional `error sending request for url http://127.0.0.1:$PORT/rpc` mid-session + +**Severity**: medium (flaky) + +Seen twice in an 8h session, always on `ghax click` against a Google Ads +dialog after ~50+ commands. Daemon still alive; next `ghax attach` reconnected +fine. No error in daemon stderr that I could find. + +Possible cause: the daemon's HTTP pool timing out idle connections? Or the RPC +client not retrying. Adding a single retry in the CLI shim would mask this +for users. + +--- + +### BUG-JNR-03 — `@e` refs shift between clicks when DOM mutates + +**Severity**: high (caused a real bug in the campaign setup) + +**Repro (concrete)**: Google Ads business-hours step. Initial snapshot: + +``` +@e7 [checkbox] "Monday Closed" +@e8 [checkbox] "Tuesday Closed" +... +@e12 [checkbox] "Saturday Closed" +``` + +Issued `ghax click @e7`, `ghax click @e8`, ..., `ghax click @e11` sequentially +expecting to toggle Mon–Fri. But each click inserted opens/closes comboboxes +into the DOM, **shifting the `@e` IDs** of all subsequent elements. By the time +`@e11` fired, it was pointing at what had been Saturday. + +Result: Saturday got toggled "Open" with no hours, causing a form validation +block on Save. + +**Mitigation I used**: Re-snapshot between each click (10x slower) or use +`eval` with semantic lookup (`textContent === "Saturday Open"`). + +**Ideal fix**: `@e` refs should be stable across DOM mutations within the same +navigation. Either: + +1. Hash-based refs tied to element identity (aria-label + role + nth-of-type), or +2. `ghax batch-click @e7 @e8 @e9 @e10 @e11` that snapshots once and resolves all refs up-front, or +3. At least a doc warning: **"@e refs are valid only for the snapshot they came from."** + +--- + +### BUG-JNR-04 — `ghax fill` silently no-ops on custom inputs (Material/Angular) + +**Severity**: medium (forced workaround) + +On Google Ads' Material chip + custom-input combobox fields, `ghax fill` +reported `{"ok": true}` but the input value was unchanged. Only way to set +values was raw JS using the React native setter: + +```js +const setter = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, "value").set; +setter.call(input, value); +input.dispatchEvent(new Event("input", { bubbles: true })); +input.dispatchEvent(new Event("change", { bubbles: true })); +``` + +ghax could detect framework-managed inputs (React/Angular/Material) and use +this pattern automatically for `fill`. + +--- + +### BUG-JNR-05 — `ghax click` succeeds but doesn't register for Material custom-role elements + +**Severity**: medium + +On Google Ads, some `[role=radio]` elements are `material-radio` wrappers. +`ghax click @e38` where `@e38` is a `[role=radio] "Admin"` reports success, +but the ARIA checked state never flips. `.click()` on the wrapper doesn't +dispatch the right event. + +Workaround: find `material-radio` element and click it (triggers the correct +internal handler). + +--- + +### BUG-JNR-06 — `snapshot -i` omits modal dialog content from ARIA tree + +**Severity**: medium + +When Google Ads opens a dialog (e.g. "Add images" file picker, "Crop image" +editor), `ghax snapshot -i` sometimes shows only the background page's +interactive elements, not the dialog's buttons/inputs. The dialog exists in +DOM — `document.querySelector("[role=dialog]")` finds it — but ghax's ARIA +walker skips it. + +Workaround: fall back to `ghax eval` with raw `querySelectorAll`. + +Possible cause: the ARIA walker stops at `aria-hidden="true"` ancestors, and +Google Ads sometimes marks the main app hidden when a dialog opens. So the +dialog should become the new root. + +--- + +### BUG-JNR-07 — No `upload` command; must drop to raw CDP + +**Severity**: medium (common task) + +To upload a file to an `input[type=file]`, I wrote a 30-line Node + `ws` script +to send `DOM.setFileInputFiles` via CDP directly. This worked but feels like +an obvious first-class command: + +```bash +ghax upload @e17 /path/to/file.jpg # by @ref +ghax upload 'input[type=file]' /path/to/file.jpg # by selector +``` + +Uploaded 5 files to Google Ads this way in this session. + +--- + +### BUG-JNR-08 — Chrome default-profile CDP is blocked by Chrome itself (doc gap) + +**Severity**: low (documentation) + +Tried `ghax attach --browser-url http://127.0.0.1:9223` against Chrome launched +with `--remote-debugging-port=9223 --profile-directory="Profile 3"`. + +Chrome logs: `"DevTools remote debugging requires a non-default data +directory. Specify this using --user-data-dir."` + +This is Chrome's security mitigation (since ~v113) against malware using CDP +to hijack logged-in sessions. It's not a ghax bug but it bites users who +reasonably assume Chrome works the same as Edge. Would be great to have a +doc note + maybe a pre-flight check in `ghax attach --launch` that warns +about the default-dir restriction. + +--- + +## 🪙 Token / context optimization (LLM-operator perspective) + +This is specifically about ghax being driven by an LLM, where every byte of +output consumes context window and every call has a cost. ghax's current +output is tuned for humans — great for terminal review, wasteful for LLMs. + +### TOK-01 — `snapshot -i` output is massive and hard to filter + +On a page like Google Ads campaign settings, `ghax snapshot -i` returned +**~8 KB** of ARIA tree + cursor-interactive tree. In one session, ~30% of my +context was spent on snapshots where I only needed a handful of `@e` refs. + +Typical usage became: + +```bash +ghax snapshot -i 2>&1 | grep -E "checkbox|button.*Next|Save|Cancel" | head -15 +``` + +Every snapshot call → piped through grep → tokens for the pipeline noise + +tokens for what grep dropped that I re-ran if my filter was wrong. + +**Proposed flags**: + +```bash +ghax snapshot --role checkbox,button,radio # filter by ARIA role +ghax snapshot --text "Next|Save|Cancel" # filter by text match +ghax snapshot --within '[role=dialog]' # only elements inside dialog +ghax snapshot --limit 20 # cap output +ghax snapshot --format compact # one element per line, drop @c refs unless asked +``` + +Even just `--format compact` that drops the cursor-interactive section by +default would cut snapshots in half. Most LLM-operators don't need both trees +in the same call. + +### TOK-02 — `eval` returns un-truncated body; no built-in truncation + +Pattern I used constantly: + +```bash +ghax eval '({ body: document.body.innerText.substring(0, 1500) })' +``` + +Every single `eval` call that needed page content had a hand-coded +`.substring(0, N)`. Easy to forget — when I forgot, I got 50+ KB dumps that +blew my context budget. + +**Proposed**: `ghax eval --max-bytes 4096 ` that truncates the JSON +response. Applies to any field — user's `body.innerText`, `document.title`, +etc. Also `--max-depth 3` for nested objects (React devtools-style). + +### TOK-03 — Errors include multi-line stack traces + boilerplate + +When `ghax click @e17` fails: + +``` +ghax: error sending request for url (http://127.0.0.1:59329/rpc) +{ + "url": "https://ads.google.com/aw/...", + ... +} +``` + +The `"url"` field alone is often 200+ bytes of query-param noise. Adds up +across 400 commands/session. Consider an `--error-format compact` mode that +emits one line: `error: rpc-send-failed (url too long)`. + +### TOK-04 — `ghax tabs` full URL dump ate tokens + +User's Edge had 15+ tabs open. Each `ghax tabs` call returned: + +```json +{"id":"FE69782F...","title":"J&N Remache Corp.","url":"https://www.jnremache.com/wp-admin/admin.php?page=rank-math&ocid=..."} +{"id":"DDA16C8B...","title":"Overview - 704-259-8127 - Google Ads","url":"https://ads.google.com/aw/overview?ocid=8183021726&euid=6484604306&__u=8714385794&uscid=8183021726&__c=2023875374&authuser=0"} +... +``` + +15 × ~250 bytes = 3.7 KB per call. I ran this ~20 times. + +Every Google Ads URL has ~200 bytes of repeated query-string garbage +(`ocid`, `euid`, `__u`, `uscid`, `__c`, `authuser`). Workaround was always +`ghax tabs | python3 -c "import json,sys; ..."`. + +**Proposed**: + +```bash +ghax tabs --filter 'url~ads.google' # regex filter server-side +ghax tabs --fields id,title # drop URL entirely when not needed +ghax find # already exists — but returns URL too +``` + +Or `ghax tabs --compact` that shortens URLs to `/?…`. + +### TOK-05 — Screenshots round-trip via disk + +Current flow for an LLM: `ghax screenshot --path /tmp/foo.png` → then Read +tool with that path → tokens for the path string, the "path" field in +response, the tool's ack, then the image. Four round-trips for one picture. + +In Anthropic's tool API, images can be returned inline as content blocks. +ghax doesn't have to know about that directly — but a stdout-safe +base64/data-URL output option (`ghax screenshot --stdout base64`) would let +the harness (Claude Code, Cursor, etc.) inline the image without the disk +hop. + +### TOK-06 — Pre-rendered dialogs in DOM inflate `eval` output + +Google Ads pre-renders ~15 `[role=dialog]` templates in DOM, all with +contents. So: + +```bash +ghax eval '({ dialogs: Array.from(document.querySelectorAll("[role=dialog]")).map(d => d.innerText.substring(0,300)) })' +``` + +returned **12 dialogs** — 11 of which were invisible pre-renders and +irrelevant. My filter `.filter(d => d && visible)` had to be hand-added. + +Not strictly a ghax problem, but a snapshot option like `--visible-only` +(filter to `getClientRects().length > 0` + non-zero opacity + not `display: +none` ancestor) would cut noise on any React/Angular app. + +### TOK-07 — `status`/`attach` call-overhead on every session boot + +Every session: + +``` +$ ghax attach +already attached — pid 22001, port 59329, browser edge +``` + +That's ~60 tokens for a confirmation I don't need if I'm about to issue +commands anyway. Could be silent-on-success (POSIX convention) with +`--verbose` to opt in. Same with `status` — JSON dump is fine for tooling, +but the current human-readable format has ~10 lines I usually don't need. + +### TOK-08 — No de-duplication between @e and @c refs + +Same element often appears as `@e42 [checkbox] "Monday Open"` and then +again in cursor-interactive as `@c107 [cursor:pointer] "Monday"`. For LLM +operators this is ambiguity (which do I click?), not helpful redundancy. + +**Proposed**: in `--format compact` mode, only emit the ARIA `@e` if the +element has a role; skip its `@c` entry. + +### TOK-09 — No way to batch related operations + +For setting hours on 5 weekdays: + +```bash +ghax click @e7 # 1 round-trip +ghax click @e8 # 1 round-trip +... +``` + +That's 5 shell invocations × ~300 bytes response each = 1.5 KB of "ok" +replies. + +```bash +ghax batch '[{"click":"@e7"},{"click":"@e8"},{"fill":["@e12","1.00"]}]' +``` + +One call, one response, atomic re-snapshot between steps (would also fix +BUG-JNR-03 naturally). + +### TOK-10 — `text` command doesn't exist but would save eval calls + +I typed `ghax eval 'document.body.innerText.substring(0, 500)'` ~40 times. +ghax has `text` and `html` commands already (per `--help`) — but they +returned the full page (unbounded). An implicit truncation default would +help: + +```bash +ghax text # default: first 2000 chars +ghax text --length 500 --skip 1000 # slice control for LLM-friendly paging +ghax text '[role=main]' # scoped to selector +``` + +--- + +### Summary of token cost for this session + +Rough estimate for the 8h session: + +| Source | Tokens | +|---|---| +| `ghax snapshot -i` output (unfiltered portions) | ~80 KB | +| `ghax tabs` URL dumps | ~20 KB | +| `ghax eval` un-truncated results | ~40 KB | +| RPC error responses / retries | ~8 KB | +| `ghax attach` confirmations | ~2 KB | +| **Total ghax I/O in context** | **~150 KB** (~37K tokens) | + +Rough useful content from that: maybe 10%. The other 90% was scaffolding, +repeated URL query params, and dialog pre-renders. + +At current LLM pricing that's a few dollars per long session, and the +*opportunity cost* — wasted context that could have held the user's +conversation — is the bigger deal. + +**Low-hanging fruit**: default `--format compact` on snapshot, add +`--max-bytes` to eval, strip Google's query-param noise from tabs output. +These 3 would probably cut ghax's token footprint 50%+ without changing +any semantics. + +--- + +## 💡 UX / wishlist + +### GHAX-FR-01 — `ghax eval` navigation-safe by default + +On navigation mid-`eval`, you get cryptic: + +``` +ghax: page.evaluate: Execution context was destroyed, most likely because of a navigation +``` + +The CLI exits non-zero. A one-line retry (wait for `load`, re-evaluate) would +make `window.location = "..."` + next `eval` work as a two-liner instead of +needing a manual `sleep 5`. + +### GHAX-FR-02 — `ghax wait --selector ... --timeout ...` more prominent + +I saw `wait` in help but kept falling back to `sleep 3`. A `wait` that polls +`querySelector` + returns when found (or timeout) would eliminate most +`sleep`s in my scripts. (Maybe it already does this — docs example would +help.) + +### GHAX-FR-03 — Better `--help` for `attach` + +Specifically call out: + +- The `GHAX_DAEMON_BUNDLE` env var. I had to dig to find + `/Users/.../.local/share/ghax/ghax-daemon.mjs` after an install. +- What happens when `:9222` port scan finds multiple browsers. (I got a picker + once; worked fine, but it was a surprise.) + +### GHAX-FR-04 — `ghax status` should include "tabs context" + +After a long session, `ghax status` shows `tabs: 17`. I never know which is +"active" — the one my next `eval` will target — without running `ghax tabs` +and diffing. A one-line "active: `` ``" in `status` would help. + +### GHAX-FR-05 — Rate-limit / recovery for Google's anti-automation + +Google Ads has a "You got disconnected — To make sure your work is saved, +try signing in again on the next screen" modal that interrupts 2+ rapid form +submissions on high-risk operations (granting admin access, claiming +credit). It's effectively a soft block on automation. + +Not really fixable in ghax, but worth noting in docs as a known class of +failure: **Google product form submissions that grant access / money rights +are anti-bot-hardened. Expect 2/5 attempts to trigger a disconnect modal; +user must click "Continue" once, then retry.** + +### GHAX-FR-06 — `ghax screenshot` --full-page flag name + +I typed `--fullPage` (camelCase) and it worked. Also tried `--full-page` +first — didn't work. Accepting both would match ghax's other flag style +which is mostly kebab. + +--- + +## ✨ Things that worked great (credit where due) + +- **`ghax attach` auto-scan** found Edge on `:9222` instantly. No config. +- **`@e` refs in snapshot** are the killer feature — way clearer than XPath + for LLM-driven interaction. Even with BUG-JNR-03, the payoff is enormous. +- **Cursor-interactive `@c` refs** surfaced clickable divs that weren't in + the ARIA tree (Google Ads has a lot of these). Saved me multiple times. +- **`ghax eval` handoff** to raw JS is pragmatic — when the high-level + abstractions fail, you've always got the escape hatch. +- **`ghax goto <url>`** + auto-wait is rock solid. I navigated ~40 pages + in this session without a single hang. +- **Persistent daemon** across commands meant `ghax eval` calls felt + interactive; round-trip was ~100ms, not the multi-second startup of + Playwright scripts. +- **`ghax tabs` JSON output** piped cleanly into Python — easy tab ID + extraction for multi-tab Google workflows. + +--- + +## 🔬 Session stats + +- Commands issued: ~400+ (`ghax goto`, `eval`, `click`, `fill`, `snapshot`, `screenshot`, `tabs`, `tab`) +- Browsers driven: Edge (primary, :9222), Chrome (briefly — hit BUG-JNR-08) +- Products navigated: Google Ads, Google Business Profile, Search Console, + Google Maps, Cloudflare dashboard, WordPress admin, Bizapedia, + OpenCorporates, NYBizDB +- Files uploaded via raw CDP: 5 (to Google Ads image picker) +- Bugs hit that forced workaround: 4 (JNR-03, 04, 05, 07) +- Bugs that blocked entirely: 0 (every failure had an `eval` escape hatch) + +--- + +## Repro commands for BUG-JNR-03 (most concrete) + +```bash +# Start a fresh Edge with CDP +/Applications/Microsoft\ Edge.app/Contents/MacOS/Microsoft\ Edge --remote-debugging-port=9222 & + +export PATH="$HOME/.local/bin:$PATH" +export GHAX_DAEMON_BUNDLE=/Users/gr/.local/share/ghax/ghax-daemon.mjs +ghax attach +ghax goto "https://ads.google.com/aw/campaigns/new/express?..." # business-hours step + +# Snapshot — note current @e refs for Mon–Fri checkboxes +ghax snapshot -i | grep checkbox + +# Click each in sequence — expect to toggle 5 days open +ghax click @e7 # Monday — works +ghax click @e8 # Tuesday — but now @e8 points to Tuesday's *open-time input*, not Tuesday checkbox +ghax click @e9 # ... refs have shifted further +... + +# Observe: Saturday ends up toggled +ghax snapshot -i | grep "Saturday Open.*checked" +``` + +The fix is semantic resolution on every click — accept that @e refs are a +transient view of the tree, not a stable identifier. + +--- + +## Suggested triage (maintainer's call) + +| Bug | Severity | Effort | Note | +|---|---|---|---| +| JNR-03 (ref shifting) | high | medium | Biggest blocker for form workflows | +| JNR-07 (no upload) | medium | small | Common task; CDP code is straightforward | +| JNR-01 (daemon state lost) | medium | small | Auto-retry on state-file-missing | +| JNR-04 (fill on React) | medium | small | Detect-and-fall-back is well-documented pattern | +| JNR-06 (dialog ARIA walker) | medium | medium | Dialog-aware tree root | +| JNR-08 (Chrome default profile) | low | trivial | Doc note | +| JNR-02 (flaky RPC) | low | trivial | Single CLI-shim retry | +| JNR-05 (Material radio click) | low | small | Try inner click + outer fallback | +| **TOK-01** (compact snapshot format) | **high ROI** | small | Single flag, 50%+ token cut | +| **TOK-02** (eval --max-bytes) | **high ROI** | trivial | Protects against context blowouts | +| **TOK-04** (tabs --filter / --fields) | medium ROI | small | Server-side filter beats client-side grep | +| TOK-09 (batch op) | medium ROI | medium | Also fixes JNR-03 naturally | +| TOK-05 (screenshot stdout) | medium ROI | medium | Saves disk round-trip | +| TOK-07 (quiet attach) | low ROI | trivial | POSIX convention | + +Happy to provide more repro details, minimal test cases, or PRs for any of +these if useful. From 304280cbed19f72791d934e788e5775136657c5a Mon Sep 17 00:00:00 2001 From: kepptic <245740836+kepptic@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:14:30 -0400 Subject: [PATCH 22/22] ci: build Rust CLI + stage into dist/ so cross-platform verify actually runs The 'Verify binaries' step expected ./dist/ghax (or .exe on Windows) but 'bun run build' only produces the daemon bundle. Ubuntu + macOS silently passed because 'set -e' without pipefail treats './dist/ghax --help | head -5' as exit-0 when head succeeds on an empty stream; Windows failed explicitly because its bash shell has pipefail on. Net effect: CI never actually ran the Rust CLI on any platform. Fix: add a dtolnay/rust-toolchain step, cargo-cache the target dir, run 'cargo build --release', copy the built binary into dist/ before verify, and flip the shell flags to 'set -eo pipefail' so the check is meaningful on all three OSes. --- .github/workflows/ci.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c680f38..2d9b9a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,21 +38,42 @@ jobs: - uses: actions/setup-node@v4 with: node-version: '20' + - uses: dtolnay/rust-toolchain@stable + - name: Cargo cache + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo- - name: Install deps run: bun install --frozen-lockfile - - name: Build + - name: Build daemon bundle run: bun run build + - name: Build Rust CLI + run: cargo build --release --manifest-path crates/cli/Cargo.toml + - name: Stage Rust binary into dist/ + shell: bash + run: | + set -euo pipefail + if [ "$RUNNER_OS" = "Windows" ]; then + cp target/release/ghax.exe dist/ghax.exe + else + cp target/release/ghax dist/ghax + fi - name: Verify binaries (non-Windows) if: runner.os != 'Windows' run: | - set -e + set -eo pipefail ./dist/ghax --help | head -5 test -f dist/ghax-daemon.mjs - name: Verify binaries (Windows) if: runner.os == 'Windows' shell: bash run: | - set -e + set -eo pipefail ./dist/ghax.exe --help | head -5 test -f dist/ghax-daemon.mjs - name: Upload artifacts