From b3a7efecf30f8d70cc0ca3068f42a29033c9d871 Mon Sep 17 00:00:00 2001 From: LeonTing1010 Date: Sat, 9 May 2026 00:04:51 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(extension):=20self-heal=20=E2=80=94=20?= =?UTF-8?q?MV3=20SW=20keep-alive=20+=20chrome://=20uniform=20handling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ADR 2026-05-08-failure-detection-phase-2 §2C: substrate-side fixes for two bugs that caused spurious classifier firings during 2026-05-08 traffic-source dogfood session. (i) MV3 SW keep-alive Cause: SW unloads ~30s idle → daemon WS dies → peer_unreachable to engine → classifyOpFailure correctly routes to reconnect_extension, but the root cause is preventable here. Fix: chrome.alarms.create('tap-keepalive', periodInMinutes: 0.4). onAlarm listener calls chrome.runtime.getPlatformInfo (no-op API call resets idle timer). 24s period < 30s idle window. (ii) chrome:// guard uniform across daemon/popup paths Cause: line 334 had `if (isInternal && !fromDaemon)` — daemon path exempted from the chrome:// safeguard. When user reloaded extension (active tab = chrome://extensions), daemon-driven navs threw tab_closed: Cannot access a chrome:// URL. Fix: drop the !fromDaemon clause. Daemon and popup paths both open a managed background tab when active is chrome:// / data://, never clobber. UX preserved for popup; daemon now correct. Static guards (ADR §3 D6): extension/test/self-heal.test.mjs (5 source-text constraints): Rule (i)/1: chrome.alarms.create('tap-keepalive', ...) present Rule (i)/2: onAlarm.addListener wired to that alarm name Rule (i)/3: periodInMinutes < 0.5 (under MV3 idle window) Rule (ii)/1: `if (isInternal && !fromDaemon)` deleted Rule (ii)/2: `if (isInternal)` opens new tab via chrome.tabs.create CDD: RED: 5/5 constraint tests fail GREEN: 5/5 pass after edits Regression: architecture (17/17) + protocol (32/32) + multi-tab (15/15) + tap-format (0/0) + wire_codes pass; no regressions. Companion to tap-core PR #52 (§2A classifyOpFailure). §2A still correctly routes peer_unreachable → reconnect_extension on the rare SW-failure cases (network drop, debugger detach); this PR removes the IDLE-CAUSED firing pattern which was 99% of dogfood instances. Real-world verification: next user dogfood session — SW should remain alive across 30s idle gaps; chrome:// active tab should no longer break daemon nav. Manifest already declares `alarms` permission. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) --- extension/background.js | 33 ++++++-- extension/test/self-heal.test.mjs | 134 ++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 9 deletions(-) create mode 100644 extension/test/self-heal.test.mjs diff --git a/extension/background.js b/extension/background.js index b6e14b6..74a4760 100644 --- a/extension/background.js +++ b/extension/background.js @@ -11,6 +11,23 @@ console.log('[tap] extension runtime ready') +// --- MV3 SW keep-alive (ADR 2026-05-08-failure-detection-phase-2 §2C(i)) --- +// +// MV3 unloads the SW after ~30s idle, breaking the daemon's WebSocket and +// surfacing as `peer_unreachable` to engine. classifyOpFailure routes that +// to reconnect_extension, but the root cause is fixable here: chrome.alarms +// fires even when the SW is unloaded, waking it up. Calling any chrome +// API in the listener resets the SW idle timer. +// +// periodInMinutes 0.4 = 24s, comfortably under the 30s idle window. +chrome.alarms.create('tap-keepalive', { periodInMinutes: 0.4 }) +chrome.alarms.onAlarm.addListener((alarm) => { + if (alarm.name === 'tap-keepalive') { + // No-op API call resets the idle timer. getPlatformInfo is cheapest. + chrome.runtime.getPlatformInfo(() => {}) + } +}) + // --- State --- // --- Session Manager --- @@ -331,18 +348,16 @@ async function handleMethod(method, params = {}, senderTabId = null, { fromDaemo tabId = tab.id } else { const isInternal = current.url?.startsWith('chrome://') || current.url?.startsWith('data:') - if (isInternal && !fromDaemon) { - // Popup/content-script path: the user is actively looking at a - // chrome:// or data: tab — don't clobber it, open the target in a - // new tab instead. This is UX-preserving replacement. + if (isInternal) { + // ADR 2026-05-08-failure-detection-phase-2 §2C(ii): + // chrome:// or data:// active tab → open in background tab, + // never clobber. (Daemon path was previously exempted; the + // exemption caused tab_closed errors during dogfood.) const tab = await chrome.tabs.create({ url: params.url, active: false }) tabId = tab.id } else { - // Daemon path (and regular-URL tabs): navigate in place via - // tabs.update. chrome://newtab/ is our own placeholder from - // session.create and can be navigated away from in place — the - // previous "create a replacement tab" branch leaked the original - // chrome://newtab/ on every session-based nav. + // Regular-URL tabs: navigate in place via tabs.update. + // chrome://newtab/ is handled by the isInternal branch above. await chrome.tabs.update(tabId, { url: params.url }) } } diff --git a/extension/test/self-heal.test.mjs b/extension/test/self-heal.test.mjs new file mode 100644 index 0000000..caad2cf --- /dev/null +++ b/extension/test/self-heal.test.mjs @@ -0,0 +1,134 @@ +/** + * Constraint: extension self-heals MV3 SW idle die + always uses + * managed background tabs for daemon-driven navs. + * + * Classification: safety / what — violations cause silent peer_unreachable + * spurious failures and active-tab clobbering during daemon ops. + * + * Per ADR `2026-05-08-failure-detection-phase-2.md` §2C: + * (i) chrome.alarms keep-alive prevents MV3 SW idle (~30s timeout) from + * firing peer_unreachable to engine. + * (ii) fromDaemon exemption deleted — daemon-driven navs ALWAYS open a + * managed background tab when active is chrome://, never clobber. + * + * Adversarial framing (Phase 1a): + * "If a half-implementation made this test pass, it could (a) add the + * chrome.alarms.create call but never wire onAlarm.addListener (no-op + * timer that doesn't actually wake SW) — caught by Rule (i)/2; (b) + * delete the !fromDaemon expression but introduce a different + * bypass like `if (isInternal && something_else)` that lets daemon + * navs through — caught by Rule (ii)/2 which asserts the strict + * isInternal-only guard pattern." + * + * Run: node extension/test/self-heal.test.mjs + */ + +import { strict as assert } from "node:assert"; +import { readFileSync } from "node:fs"; + +const BG_SRC = readFileSync( + new URL("../background.js", import.meta.url), + "utf-8", +); + +let passed = 0; +let failed = 0; + +function test(name, fn) { + try { + fn(); + passed++; + console.log(` \x1b[32m✓\x1b[0m ${name}`); + } catch (e) { + failed++; + console.log(` \x1b[31m✗\x1b[0m ${name}`); + console.log(` ${e.message}`); + } +} + +// ═══════════════════════════════════════════════════════════ +// Rule (i): MV3 SW keep-alive via chrome.alarms +// Why: MV3 SW unloads after ~30s of inactivity. Without a keep-alive, +// idle daemon connections produce spurious peer_unreachable on next +// op; classifyOpFailure routes those to reconnect_extension, but the +// root cause is fixable here, not at the engine layer. +// ═══════════════════════════════════════════════════════════ + +console.log("\n -- Rule (i): MV3 SW keep-alive --\n"); + +test("chrome.alarms.create with name 'tap-keepalive' exists", () => { + // The name string is part of the contract — onAlarm dispatch matches + // on it, and arch tests cite it directly. + assert( + /chrome\.alarms\.create\s*\(\s*["']tap-keepalive["']/.test(BG_SRC), + "background.js must call chrome.alarms.create with name 'tap-keepalive'", + ); +}); + +test("chrome.alarms.onAlarm.addListener wired to keepalive", () => { + assert( + /chrome\.alarms\.onAlarm\.addListener/.test(BG_SRC), + "background.js must register a chrome.alarms.onAlarm listener", + ); + // Listener body must reference the keepalive alarm name (otherwise it + // would be a no-op dispatcher matching nothing). + const listenerStart = BG_SRC.indexOf("chrome.alarms.onAlarm.addListener"); + const listenerBody = BG_SRC.slice(listenerStart, listenerStart + 600); + assert( + /tap-keepalive/.test(listenerBody), + "onAlarm listener body must dispatch on the 'tap-keepalive' alarm name", + ); +}); + +test("keepalive period is < 0.5 minutes (< 30s, MV3 idle window)", () => { + // Default MV3 SW idle is 30s; keepalive must fire faster. Accept any + // periodInMinutes literal < 0.5 (i.e. <= 0.4 typical, or 0.49). + const m = BG_SRC.match( + /chrome\.alarms\.create\s*\(\s*["']tap-keepalive["']\s*,\s*\{[^}]*periodInMinutes:\s*([\d.]+)/, + ); + assert(m, "chrome.alarms.create must specify periodInMinutes"); + const period = parseFloat(m[1]); + assert( + period < 0.5, + `periodInMinutes ${period} >= 0.5 — SW would idle-die between alarms (MV3 idle ~30s = 0.5min)`, + ); +}); + +// ═══════════════════════════════════════════════════════════ +// Rule (ii): chrome:// guard does NOT exempt fromDaemon +// Why: dogfood 2026-05-08 — when active tab was chrome://extensions +// (during reload), daemon-driven navs got `tab_closed: Cannot access +// a chrome:// URL`. The exemption was a UX-preserving heuristic for +// popup path that wrongly applied to daemon path. +// ═══════════════════════════════════════════════════════════ + +console.log("\n -- Rule (ii): chrome:// guard always open background tab --\n"); + +test("no `&& !fromDaemon` exemption in isInternal nav guard", () => { + // Strict text check: the exact stale pattern must be absent. + assert( + !/if\s*\(\s*isInternal\s*&&\s*!fromDaemon\s*\)/.test(BG_SRC), + "Stale exemption `if (isInternal && !fromDaemon)` must be deleted; " + + "daemon-driven navs always open managed background tab.", + ); +}); + +test("isInternal guard exists and opens new tab", () => { + // Positive form: must be the strict `if (isInternal) { ... chrome.tabs.create ... }` shape. + // Search for `if (isInternal)` followed by chrome.tabs.create within ~300 chars. + const idx = BG_SRC.search(/if\s*\(\s*isInternal\s*\)/); + assert( + idx !== -1, + "Must contain `if (isInternal)` guard (without && !fromDaemon)", + ); + const block = BG_SRC.slice(idx, idx + 400); + assert( + /chrome\.tabs\.create/.test(block), + "isInternal branch must call chrome.tabs.create to open a new tab", + ); +}); + +// ═══════════════════════════════════════════════════════════ + +console.log(`\n ${passed} passed, ${failed} failed`); +process.exit(failed ? 1 : 0); From 0657bbb6917952e42a799bb40c6e9b4d95b0841a Mon Sep 17 00:00:00 2001 From: LeonTing1010 Date: Sat, 9 May 2026 00:26:53 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat(extension):=20origin-mismatch=20nav=20?= =?UTF-8?q?opens=20new=20background=20tab=20=E2=80=94=20=C2=A72C(iii)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends PR #5 §2C with the third dogfood symptom not covered by §2C(i) SW keep-alive or §2C(ii) chrome:// guard: Cause (2026-05-08 dogfood): Cloudflare nav redirected through CF auth chain → tab on dash.cloudflare.com/two-factor. Subsequent juejin.cn nav called chrome.tabs.update(tabId, {url}) on same tab, but eval ran with cloudflare login page state → silent data corruption. Same root cause for parallel batch calls (multiple plans sharing one tab). Fix: Before nav, parse target URL and compare origin to current tab's origin. If different → chrome.tabs.create new background tab. Same- origin SPA-style navs continue to use cheap tabs.update. Combined guard: `if (isInternal || crossOrigin)` — chrome:// active tab (§2C(ii)) OR cross-origin nav (§2C(iii)) both → new bg tab. Static guards (3 new in self-heal.test.mjs): (iii)/1: nav handler computes target origin via `new URL(params.url)` (iii)/2: nav handler accesses `.origin` ≥ 2× (target + current) (iii)/3: origin comparison result gates a chrome.tabs.create branch (accepts both inline and via-boolean idioms) CDD: RED: 3/3 new tests fail GREEN: 8/8 self-heal tests pass after edit Regression: architecture (17/17) + protocol (32/32) + multi-tab (15/15) + tap-format + wire_codes — no regressions Companion to tap-core PR #52 §2A: classifyOpFailure correctly routes peer_unreachable / tab_closed to typed UserActionRequired, but the RIGHT fix for cross-origin tab pollution is to never let it happen in the first place — substrate-side handling. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) --- extension/background.js | 28 +++++++--- extension/test/self-heal.test.mjs | 86 ++++++++++++++++++++++++++++--- 2 files changed, 101 insertions(+), 13 deletions(-) diff --git a/extension/background.js b/extension/background.js index 74a4760..8dbe8bf 100644 --- a/extension/background.js +++ b/extension/background.js @@ -348,16 +348,30 @@ async function handleMethod(method, params = {}, senderTabId = null, { fromDaemo tabId = tab.id } else { const isInternal = current.url?.startsWith('chrome://') || current.url?.startsWith('data:') - if (isInternal) { - // ADR 2026-05-08-failure-detection-phase-2 §2C(ii): - // chrome:// or data:// active tab → open in background tab, - // never clobber. (Daemon path was previously exempted; the - // exemption caused tab_closed errors during dogfood.) + // ADR 2026-05-08-failure-detection-phase-2 §2C(iii) — compute + // target vs current origin to decide tabs.update vs tabs.create. + // Cross-origin nav must NOT clobber an existing tab: the previous + // page may be in a redirect chain (e.g. CF auth) whose state + // would leak into the eval that follows. Same-origin SPA navs + // remain cheap (tabs.update). + let crossOrigin = false + try { + const target = new URL(params.url) + if (current.url) { + const currentParsed = new URL(current.url) + crossOrigin = target.origin !== currentParsed.origin + } + } catch { + // Malformed URL — treat as cross-origin (safer: open new tab) + crossOrigin = true + } + if (isInternal || crossOrigin) { + // chrome:// / data:// active tab (§2C(ii)) OR cross-origin nav + // (§2C(iii)) → open new background tab, never clobber. const tab = await chrome.tabs.create({ url: params.url, active: false }) tabId = tab.id } else { - // Regular-URL tabs: navigate in place via tabs.update. - // chrome://newtab/ is handled by the isInternal branch above. + // Same-origin SPA-style nav: cheap tabs.update. await chrome.tabs.update(tabId, { url: params.url }) } } diff --git a/extension/test/self-heal.test.mjs b/extension/test/self-heal.test.mjs index caad2cf..fb70e7b 100644 --- a/extension/test/self-heal.test.mjs +++ b/extension/test/self-heal.test.mjs @@ -114,17 +114,91 @@ test("no `&& !fromDaemon` exemption in isInternal nav guard", () => { }); test("isInternal guard exists and opens new tab", () => { - // Positive form: must be the strict `if (isInternal) { ... chrome.tabs.create ... }` shape. - // Search for `if (isInternal)` followed by chrome.tabs.create within ~300 chars. - const idx = BG_SRC.search(/if\s*\(\s*isInternal\s*\)/); + // The guard may be `if (isInternal)` or `if (isInternal || )`. + // What matters: isInternal participates in a guard whose body opens + // a new background tab via chrome.tabs.create. + const idx = BG_SRC.search(/if\s*\(\s*isInternal[\s|)]/); assert( idx !== -1, - "Must contain `if (isInternal)` guard (without && !fromDaemon)", + "Must contain `if (isInternal ...)` guard (with isInternal as first condition)", ); - const block = BG_SRC.slice(idx, idx + 400); + const block = BG_SRC.slice(idx, idx + 600); assert( /chrome\.tabs\.create/.test(block), - "isInternal branch must call chrome.tabs.create to open a new tab", + "isInternal-branch must call chrome.tabs.create to open a new tab", + ); +}); + +// ═══════════════════════════════════════════════════════════ +// Rule (iii): origin-mismatch nav → new background tab +// Why: 2026-05-08 dogfood — Cloudflare nav redirected through CF +// auth chain, leaving tab on dash.cloudflare.com/two-factor. Next +// nav (juejin.cn/search) called `chrome.tabs.update(tabId, { url })` +// to navigate same tab, but the eval ran on cloudflare login page — +// silent data corruption. Same applies to parallel batch calls +// sharing a tab. Fix: when daemon-driven nav target origin differs +// from current tab origin, open a new background tab instead of +// clobbering. Same-origin navs continue to use tabs.update (cheap). +// ═══════════════════════════════════════════════════════════ + +console.log("\n -- Rule (iii): origin-mismatch nav → new background tab --\n"); + +test("nav handler computes target origin", () => { + // Source-text proxy: must call new URL(...) on params.url to extract origin. + // Pattern: `new URL(params.url)` followed by `.origin` access OR variable + // assignment that's later compared to current origin. + assert( + /new URL\(params\.url\)/.test(BG_SRC), + "nav handler must construct URL(params.url) to extract target origin", + ); +}); + +test("nav handler compares target.origin vs current.origin", () => { + // Must read .origin from both target and current to compare. + // Looser pattern: at least 2 occurrences of `.origin` near nav case + // (one for target, one for current). + const navStart = BG_SRC.indexOf("case 'nav':"); + assert(navStart !== -1, "nav case handler must exist"); + // Search a 2000-char window starting from `case 'nav':`. + const navBlock = BG_SRC.slice(navStart, navStart + 2000); + const originAccesses = navBlock.match(/\.origin\b/g) || []; + assert( + originAccesses.length >= 2, + `nav handler must access .origin on both target and current to compare; ` + + `found ${originAccesses.length} .origin access(es) in 2000-char window`, + ); +}); + +test("origin mismatch branch opens new tab via chrome.tabs.create", () => { + // Two acceptable idioms: + // (a) inline: if (target.origin !== current.origin) { chrome.tabs.create(...) } + // (b) variable: const cross = a.origin !== b.origin; if (... || cross) { chrome.tabs.create(...) } + // What matters: somewhere in the nav handler there's an `.origin !== + // .origin` comparison whose result drives a chrome.tabs.create branch. + const navStart = BG_SRC.indexOf("case 'nav':"); + const navBlock = BG_SRC.slice(navStart, navStart + 3000); + // Step 1: confirm origin-vs-origin comparison appears. + assert( + /\.origin\s*!==?\s*[a-zA-Z_$.]*\.origin/.test(navBlock), + "nav handler must compare `.origin !== .origin` (cross-origin detection)", + ); + // Step 2: confirm chrome.tabs.create appears within the same nav block. + assert( + /chrome\.tabs\.create/.test(navBlock), + "nav handler must call chrome.tabs.create somewhere", + ); + // Step 3: confirm the result of the origin comparison influences a + // boolean used in the if-guard. Look for either: + // - inline: if (...origin !==...origin...) { ... chrome.tabs.create + // - variable: crossOrigin (or similar) referenced in if + assigned from origin compare + const inlinePattern = + /if\s*\([^)]*\.origin\s*!==?[^)]*\.origin[^)]*\)\s*\{[\s\S]{0,500}chrome\.tabs\.create/; + const variablePattern = + /(\w+)\s*=\s*[^;]*\.origin\s*!==?\s*[a-zA-Z_$.]*\.origin[\s\S]{0,500}if\s*\([^)]*\1[^)]*\)\s*\{[\s\S]{0,500}chrome\.tabs\.create/; + assert( + inlinePattern.test(navBlock) || variablePattern.test(navBlock), + "nav handler must use the origin comparison (inline or via boolean " + + "variable) to gate a chrome.tabs.create branch", ); });