From 6dc16dab7457154b188b25e241c93719d98d79b5 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 14 Mar 2026 14:38:56 -0500 Subject: [PATCH 1/3] feat: screenshot element/region clipping (--clip, --viewport, CSS/@ref) Add element crop (CSS selector or @ref), region clip (--clip x,y,w,h), and viewport-only (--viewport) modes to the screenshot command. Uses Playwright's native locator.screenshot() and page.screenshot({ clip }). Full page remains the default. Includes 10 new tests covering all modes and error paths. --- SKILL.md | 13 ++++- SKILL.md.tmpl | 11 ++++ browse/SKILL.md | 2 +- browse/src/cli.ts | 3 +- browse/src/commands.ts | 2 +- browse/src/meta-commands.ts | 60 +++++++++++++++++++-- browse/test/commands.test.ts | 101 +++++++++++++++++++++++++++++++++++ 7 files changed, 184 insertions(+), 8 deletions(-) diff --git a/SKILL.md b/SKILL.md index 2ecb0be..c90218c 100644 --- a/SKILL.md +++ b/SKILL.md @@ -128,6 +128,17 @@ $B viewport 375x812 # iPhone $B screenshot /tmp/mobile.png $B viewport 1440x900 # Desktop $B screenshot /tmp/desktop.png + +# Element screenshot (crop to specific element) +$B screenshot "#hero-banner" /tmp/hero.png +$B snapshot -i +$B screenshot @e3 /tmp/button.png + +# Region crop +$B screenshot --clip 0,0,800,600 /tmp/above-fold.png + +# Viewport only (no scroll) +$B screenshot --viewport /tmp/viewport.png ``` ### Test file upload @@ -337,7 +348,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | `diff ` | Text diff between pages | | `pdf [path]` | Save as PDF | | `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. | -| `screenshot [path]` | Save screenshot | +| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) | ### Snapshot | Command | Description | diff --git a/SKILL.md.tmpl b/SKILL.md.tmpl index 5ab56a8..c827f5f 100644 --- a/SKILL.md.tmpl +++ b/SKILL.md.tmpl @@ -102,6 +102,17 @@ $B viewport 375x812 # iPhone $B screenshot /tmp/mobile.png $B viewport 1440x900 # Desktop $B screenshot /tmp/desktop.png + +# Element screenshot (crop to specific element) +$B screenshot "#hero-banner" /tmp/hero.png +$B snapshot -i +$B screenshot @e3 /tmp/button.png + +# Region crop +$B screenshot --clip 0,0,800,600 /tmp/above-fold.png + +# Viewport only (no scroll) +$B screenshot --viewport /tmp/viewport.png ``` ### Test file upload diff --git a/browse/SKILL.md b/browse/SKILL.md index 2694ac4..e383e90 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -227,7 +227,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`. | `diff ` | Text diff between pages | | `pdf [path]` | Save as PDF | | `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. | -| `screenshot [path]` | Save screenshot | +| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) | ### Snapshot | Command | Description | diff --git a/browse/src/cli.ts b/browse/src/cli.ts index f8b7902..7d6eacd 100644 --- a/browse/src/cli.ts +++ b/browse/src/cli.ts @@ -283,7 +283,8 @@ Inspection: js | eval | css | attrs console [--clear|--errors] | network [--clear] | dialog [--clear] cookies | storage [set ] | perf is (visible|hidden|enabled|disabled|checked|editable|focused) -Visual: screenshot [path] | pdf [path] | responsive [prefix] +Visual: screenshot [--viewport] [--clip x,y,w,h] [@ref|sel] [path] + pdf [path] | responsive [prefix] Snapshot: snapshot [-i] [-c] [-d N] [-s sel] [-D] [-a] [-o path] [-C] -D/--diff: diff against previous snapshot -a/--annotate: annotated screenshot with ref labels diff --git a/browse/src/commands.ts b/browse/src/commands.ts index 6024c4b..aa86d1f 100644 --- a/browse/src/commands.ts +++ b/browse/src/commands.ts @@ -78,7 +78,7 @@ export const COMMAND_DESCRIPTIONS: Record ' }, diff --git a/browse/src/meta-commands.ts b/browse/src/meta-commands.ts index 8d3f9eb..65608dc 100644 --- a/browse/src/meta-commands.ts +++ b/browse/src/meta-commands.ts @@ -106,11 +106,63 @@ export async function handleMetaCommand( // ─── Visual ──────────────────────────────────────── case 'screenshot': { + // Parse priority: flags (--viewport, --clip) → selector (@ref, CSS) → output path const page = bm.getPage(); - const screenshotPath = args[0] || '/tmp/browse-screenshot.png'; - validateOutputPath(screenshotPath); - await page.screenshot({ path: screenshotPath, fullPage: true }); - return `Screenshot saved: ${screenshotPath}`; + let outputPath = '/tmp/browse-screenshot.png'; + let clipRect: { x: number; y: number; width: number; height: number } | undefined; + let targetSelector: string | undefined; + let viewportOnly = false; + + const remaining: string[] = []; + for (let i = 0; i < args.length; i++) { + if (args[i] === '--viewport') { + viewportOnly = true; + } else if (args[i] === '--clip') { + const coords = args[++i]; + if (!coords) throw new Error('Usage: screenshot --clip x,y,w,h [path]'); + const parts = coords.split(',').map(Number); + if (parts.length !== 4 || parts.some(isNaN)) + throw new Error('Usage: screenshot --clip x,y,width,height — all must be numbers'); + clipRect = { x: parts[0], y: parts[1], width: parts[2], height: parts[3] }; + } else if (args[i].startsWith('--')) { + throw new Error(`Unknown screenshot flag: ${args[i]}`); + } else { + remaining.push(args[i]); + } + } + + // Separate target (selector/@ref) from output path + for (const arg of remaining) { + if (arg.startsWith('@e') || arg.startsWith('@c') || arg.startsWith('.') || arg.startsWith('#') || arg.includes('[')) { + targetSelector = arg; + } else { + outputPath = arg; + } + } + + validateOutputPath(outputPath); + + if (clipRect && targetSelector) { + throw new Error('Cannot use --clip with a selector/ref — choose one'); + } + if (viewportOnly && clipRect) { + throw new Error('Cannot use --viewport with --clip — choose one'); + } + + if (targetSelector) { + const resolved = bm.resolveRef(targetSelector); + const locator = 'locator' in resolved ? resolved.locator : page.locator(resolved.selector); + await locator.screenshot({ path: outputPath, timeout: 5000 }); + return `Screenshot saved (element): ${outputPath}`; + } + + if (clipRect) { + await page.screenshot({ path: outputPath, clip: clipRect }); + return `Screenshot saved (clip ${clipRect.x},${clipRect.y},${clipRect.width},${clipRect.height}): ${outputPath}`; + } + + await page.screenshot({ path: outputPath, fullPage: !viewportOnly }); + return `Screenshot saved${viewportOnly ? ' (viewport)' : ''}: ${outputPath}`; } case 'pdf': { diff --git a/browse/test/commands.test.ts b/browse/test/commands.test.ts index 1f6ad2f..a3e201d 100644 --- a/browse/test/commands.test.ts +++ b/browse/test/commands.test.ts @@ -315,6 +315,107 @@ describe('Visual', () => { fs.unlinkSync(screenshotPath); }); + test('screenshot --viewport saves viewport-only', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + const p = '/tmp/browse-test-viewport.png'; + const result = await handleMetaCommand('screenshot', ['--viewport', p], bm, async () => {}); + expect(result).toContain('Screenshot saved (viewport)'); + expect(fs.existsSync(p)).toBe(true); + expect(fs.statSync(p).size).toBeGreaterThan(1000); + fs.unlinkSync(p); + }); + + test('screenshot with CSS selector crops to element', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + const p = '/tmp/browse-test-element-css.png'; + const result = await handleMetaCommand('screenshot', ['#title', p], bm, async () => {}); + expect(result).toContain('Screenshot saved (element)'); + expect(fs.existsSync(p)).toBe(true); + expect(fs.statSync(p).size).toBeGreaterThan(100); + fs.unlinkSync(p); + }); + + test('screenshot with @ref crops to element', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + await handleMetaCommand('snapshot', [], bm, async () => {}); + const p = '/tmp/browse-test-element-ref.png'; + const result = await handleMetaCommand('screenshot', ['@e1', p], bm, async () => {}); + expect(result).toContain('Screenshot saved (element)'); + expect(fs.existsSync(p)).toBe(true); + expect(fs.statSync(p).size).toBeGreaterThan(100); + fs.unlinkSync(p); + }); + + test('screenshot --clip crops to region', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + const p = '/tmp/browse-test-clip.png'; + const result = await handleMetaCommand('screenshot', ['--clip', '0,0,100,100', p], bm, async () => {}); + expect(result).toContain('Screenshot saved (clip 0,0,100,100)'); + expect(fs.existsSync(p)).toBe(true); + expect(fs.statSync(p).size).toBeGreaterThan(100); + fs.unlinkSync(p); + }); + + test('screenshot --clip + selector throws', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + try { + await handleMetaCommand('screenshot', ['--clip', '0,0,100,100', '#title'], bm, async () => {}); + expect(true).toBe(false); + } catch (err: any) { + expect(err.message).toContain('Cannot use --clip with a selector/ref'); + } + }); + + test('screenshot --viewport + --clip throws', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + try { + await handleMetaCommand('screenshot', ['--viewport', '--clip', '0,0,100,100'], bm, async () => {}); + expect(true).toBe(false); + } catch (err: any) { + expect(err.message).toContain('Cannot use --viewport with --clip'); + } + }); + + test('screenshot --clip with invalid coords throws', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + try { + await handleMetaCommand('screenshot', ['--clip', 'abc'], bm, async () => {}); + expect(true).toBe(false); + } catch (err: any) { + expect(err.message).toContain('all must be numbers'); + } + }); + + test('screenshot unknown flag throws', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + try { + await handleMetaCommand('screenshot', ['--bogus', '/tmp/foo.png'], bm, async () => {}); + expect(true).toBe(false); + } catch (err: any) { + expect(err.message).toContain('Unknown screenshot flag'); + } + }); + + test('screenshot --viewport still validates path', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + try { + await handleMetaCommand('screenshot', ['--viewport', '/etc/evil.png'], bm, async () => {}); + expect(true).toBe(false); + } catch (err: any) { + expect(err.message).toContain('Path must be within'); + } + }); + + test('screenshot with nonexistent selector throws timeout', async () => { + await handleWriteCommand('goto', [baseUrl + '/basic.html'], bm); + try { + await handleMetaCommand('screenshot', ['.nonexistent-element-xyz'], bm, async () => {}); + expect(true).toBe(false); + } catch (err: any) { + expect(err.message).toBeDefined(); + } + }, 10000); + test('responsive saves 3 screenshots', async () => { await handleWriteCommand('goto', [baseUrl + '/responsive.html'], bm); const prefix = '/tmp/browse-test-resp'; From 57975da4c2c9be5b6b61b6c755c2e7d03c6744d4 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 14 Mar 2026 14:39:02 -0500 Subject: [PATCH 2/3] chore: bump version and changelog (v0.3.7) Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 6 ++++++ VERSION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4833031..75cd404 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 0.3.7 — 2026-03-14 + +### Added +- **Screenshot element/region clipping** — `screenshot` command now supports element crop via CSS selector or @ref (`screenshot "#hero" out.png`, `screenshot @e3 out.png`), region clip (`screenshot --clip x,y,w,h out.png`), and viewport-only mode (`screenshot --viewport out.png`). Uses Playwright's native `locator.screenshot()` and `page.screenshot({ clip })`. Full page remains the default. +- 10 new tests covering all screenshot modes (viewport, CSS, @ref, clip) and error paths (unknown flag, mutual exclusion, invalid coords, path validation, nonexistent selector). + ## 0.3.6 — 2026-03-14 ### Added diff --git a/VERSION b/VERSION index 449d7e7..0f82685 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.6 +0.3.7 From 0baca1a3253ae4d59e790b18936329158eb0e067 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 14 Mar 2026 14:43:01 -0500 Subject: [PATCH 3/3] docs: add screenshot modes to BROWSER.md command reference Co-Authored-By: Claude Opus 4.6 --- BROWSER.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/BROWSER.md b/BROWSER.md index 640bb65..8d0c577 100644 --- a/BROWSER.md +++ b/BROWSER.md @@ -11,7 +11,7 @@ This document covers the command reference and internals of gstack's headless br | Snapshot | `snapshot [-i] [-c] [-d N] [-s sel] [-D] [-a] [-o] [-C]` | Get refs, diff, annotate | | Interact | `click`, `fill`, `select`, `hover`, `type`, `press`, `scroll`, `wait`, `viewport`, `upload` | Use the page | | Inspect | `js`, `eval`, `css`, `attrs`, `is`, `console`, `network`, `dialog`, `cookies`, `storage`, `perf` | Debug and verify | -| Visual | `screenshot`, `pdf`, `responsive` | See what Claude sees | +| Visual | `screenshot [--viewport] [--clip x,y,w,h] [sel\|@ref] [path]`, `pdf`, `responsive` | See what Claude sees | | Compare | `diff ` | Spot differences between environments | | Dialogs | `dialog-accept [text]`, `dialog-dismiss` | Control alert/confirm/prompt handling | | Tabs | `tabs`, `tab`, `newtab`, `closetab` | Multi-page workflows | @@ -92,6 +92,21 @@ No DOM mutation. No injected scripts. Just Playwright's native accessibility API - `--annotate` (`-a`): Injects temporary overlay divs at each ref's bounding box, takes a screenshot with ref labels visible, then removes the overlays. Use `-o ` to control the output path. - `--cursor-interactive` (`-C`): Scans for non-ARIA interactive elements (divs with `cursor:pointer`, `onclick`, `tabindex>=0`) using `page.evaluate`. Assigns `@c1`, `@c2`... refs with deterministic `nth-child` CSS selectors. These are elements the ARIA tree misses but users can still click. +### Screenshot modes + +The `screenshot` command supports four modes: + +| Mode | Syntax | Playwright API | +|------|--------|----------------| +| Full page (default) | `screenshot [path]` | `page.screenshot({ fullPage: true })` | +| Viewport only | `screenshot --viewport [path]` | `page.screenshot({ fullPage: false })` | +| Element crop | `screenshot "#sel" [path]` or `screenshot @e3 [path]` | `locator.screenshot()` | +| Region clip | `screenshot --clip x,y,w,h [path]` | `page.screenshot({ clip })` | + +Element crop accepts CSS selectors (`.class`, `#id`, `[attr]`) or `@e`/`@c` refs from `snapshot`. Auto-detection: `@e`/`@c` prefix = ref, `.`/`#`/`[` prefix = CSS selector, `--` prefix = flag, everything else = output path. + +Mutual exclusion: `--clip` + selector and `--viewport` + `--clip` both throw errors. Unknown flags (e.g. `--bogus`) also throw. + ### Authentication Each server session generates a random UUID as a bearer token. The token is written to the state file (`.gstack/browse.json`) with chmod 600. Every HTTP request must include `Authorization: Bearer `. This prevents other processes on the machine from controlling the browser.