From 4f1d997a009289a92d485e68a0c964d42e7f86cf Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangxiao1098@gmail.com>
Date: Mon, 30 Mar 2026 19:33:37 +0800
Subject: [PATCH] Refine highlight pagination and enforce explicit
 confirmations

---
 AGENTS.md                                     |   3 +-
 bug_report_highlight_any_scrollable.md        |  66 ---
 extension/src/background/index.ts             |  75 ++-
 extension/src/commands/visual-highlight.ts    | 437 +++++++++---------
 extension/src/utils/collision-detection.ts    |  29 +-
 server/agent/tools/base.py                    |  15 -
 server/agent/tools/browser_executor.py        | 112 -----
 .../tests/unit/test_agent_browser_executor.py | 159 ++++---
 server/tests/unit/test_base_classes.py        |  16 +-
 9 files changed, 373 insertions(+), 539 deletions(-)
 delete mode 100644 bug_report_highlight_any_scrollable.md

diff --git a/AGENTS.md b/AGENTS.md
index 0feb22c..d9f94f2 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -169,8 +169,7 @@ OpenBrowser uses Jinja2 templates for agent prompts, enabling dynamic content in
 - Model profile is resolved from session metadata and exposed to prompt rendering as `model_profile` / `small_model`; see `server/agent/manager.py` and `server/agent/tools/prompt_context.py`
 - Tool prompt variants are split by model profile under `server/agent/prompts/small_model/` and `server/agent/prompts/big_model/`
 - Small-model browser guidance intentionally avoids `keywords` fallback and leans harder on same-mode highlight pagination when dense UI may be split across collision-aware pages
-- Observation rendering also differs by model profile: large models keep clickable highlights compact (`... and N clickable elements`), while small models include clickable element HTML in the LLM-visible observation text for extra semantic grounding
-- The small-model clickable-observation branch is implemented in `server/agent/tools/base.py`; the per-conversation `small_model` flag is attached in `server/agent/tools/browser_executor.py`
+- Observation rendering now includes clickable element HTML for all model profiles; prompt differences still vary by model profile
 
 ### Keyword Discipline
 - Highlight pagination remains the default discovery flow for controls and dense UI
diff --git a/bug_report_highlight_any_scrollable.md b/bug_report_highlight_any_scrollable.md
deleted file mode 100644
index 4c6dcc5..0000000
--- a/bug_report_highlight_any_scrollable.md
+++ /dev/null
@@ -1,66 +0,0 @@
-## Bug Report: `highlight_elements` with `type='any'` Never Returns Scrollable Elements
-
-**Date:** 2026-03-22
-**Component:** `extension/src/commands/highlight-detection.injected.js`
-**Severity:** Medium
-**Type:** Logic Error / Dead Code
-
----
-
-### Summary
-
-When calling `highlight_elements(element_type='any')`, scrollable elements are effectively **never returned** due to short-circuit logic in `resolveElementCandidate()`. The scrollable check (lines 948-957) is dead code for `type='any'`.
-
----
-
-### Root Cause
-
-In `resolveElementCandidate(el, 'any')` (lines 837-973):
-
-```javascript
-const clickableCandidate = resolveClickableCandidate(el);  // Traverses UP DOM tree
-
-if (clickableCandidate) {
-    return { type: 'clickable', ... };  // ← EARLY RETURN
-}
-
-// Lines 924-969 below are NEVER reached when clickableCandidate exists:
-if (isInputableCandidate(el))      // dead code
-if (isSelectableCandidate(el))     // dead code
-if (isScrollableCandidate(el))    // dead code ← SCROLLABLE NEVER RETURNED
-if (isHoverableCandidate(el))      // dead code
-```
-
-The function short-circuits on the first clickable ancestor found (by traversing UP the DOM tree), making all subsequent type checks dead code for `type='any'`.
-
----
-
-### Expected Behavior
-
-Per collision-aware pagination design, `type='any'` should return elements across **all types** (clickable, inputable, selectable, scrollable, hoverable), prioritized by `HIGHLIGHT_TYPE_PRIORITY`.
-
----
-
-### Actual Behavior
-
-`type='any'` effectively becomes `type='clickable'`. Elements are only returned if:
-- They themselves are clickable, OR
-- Any **ancestor** has clickable characteristics (pointer cursor + text content)
-
-This causes:
-- Scrollable divs to be misclassified as "clickable" (their ancestor)
-- Hoverable elements to be skipped entirely
-- False positives where a non-interactive ancestor shadows the actual target element
-
----
-
-### Affected Code Paths
-
-- `resolveElementCandidate()` — lines 912-922 (early return on `clickableCandidate`)
-- `resolveClickableCandidate()` — lines 632-689 (DOM tree traversal with `isTightClickableWrapper` checks)
-
----
-
-### Suggested Fix
-
-For `type='any'`, collect candidates across ALL types and use `compareCandidates()` with `HIGHLIGHT_TYPE_PRIORITY` for selection — matching the behavior of the specific-type paths (lines 840-910) rather than short-circuiting.
diff --git a/extension/src/background/index.ts b/extension/src/background/index.ts
index c725b93..ca73359 100644
--- a/extension/src/background/index.ts
+++ b/extension/src/background/index.ts
@@ -45,8 +45,7 @@ import {
 } from '../commands/label-constants';
 import { getOrCreateUUID } from '../uuid/uuidGenerator';
 import {
-  selectCollisionFreePage,
-  calculateTotalPages,
+  paginateCollisionFreeElements,
   sortElementsByVisualOrder,
 } from '../utils/collision-detection';
 import {
@@ -161,35 +160,25 @@ async function runRawScreenshotPrime(options: {
 
 function buildStoredHighlightPages(options: {
   filteredElements: InteractiveElement[];
-  totalPages: number;
   viewportWidth: number;
   viewportHeight: number;
   keywordMode: boolean;
 }): InteractiveElement[][] {
-  const {
-    filteredElements,
-    totalPages,
-    viewportWidth,
-    viewportHeight,
-    keywordMode,
-  } = options;
+  const { filteredElements, viewportWidth, viewportHeight, keywordMode } =
+    options;
 
   if (keywordMode) {
     return [sortElementsByVisualOrder(filteredElements)];
   }
 
-  const pages: InteractiveElement[][] = [];
-  for (let page = 1; page <= totalPages; page++) {
-    const pageElements = selectCollisionFreePage(
-      filteredElements,
-      page,
-      viewportWidth,
-      viewportHeight,
-    );
-    pages.push(sortElementsByVisualOrder(pageElements));
-  }
-
-  return pages;
+  const collisionFreePages = paginateCollisionFreeElements(
+    filteredElements,
+    viewportWidth,
+    viewportHeight,
+  );
+  return collisionFreePages.map((collisionFreePage) =>
+    sortElementsByVisualOrder(collisionFreePage),
+  );
 }
 
 function buildHighlightConsistencyScript(
@@ -389,38 +378,39 @@ async function captureHighlightedPageState(
       `⏱️ [HighlightTrace] background keyword-filter ${Date.now() - keywordFilterStart}ms (keywords=${keywordList.length}, kept=${filteredElements.length}/${allElements.length})`,
     );
 
+    let storedPages: InteractiveElement[][];
     let paginatedElements: InteractiveElement[];
     let totalPages: number;
     let currentPage = page;
 
     if (keywordList.length > 0) {
-      paginatedElements = filteredElements;
-      totalPages = 1;
+      storedPages = buildStoredHighlightPages({
+        filteredElements,
+        viewportWidth: detectedViewportWidth,
+        viewportHeight: detectedViewportHeight,
+        keywordMode: true,
+      });
+      paginatedElements = storedPages[0] ?? [];
+      totalPages = storedPages.length || 1;
       currentPage = 1;
       console.log(
         `🔍 [${logLabel}] Keywords [${keywordList.join(', ')}] matched ${paginatedElements.length} elements (no pagination)`,
       );
     } else {
-      const paginationSelectionStart = Date.now();
-      paginatedElements = selectCollisionFreePage(
+      const paginationBuildStart = Date.now();
+      storedPages = buildStoredHighlightPages({
         filteredElements,
-        page,
-        detectedViewportWidth,
-        detectedViewportHeight,
-      );
-      const paginationSelectionMs = Date.now() - paginationSelectionStart;
-      const totalPagesStart = Date.now();
-      totalPages = calculateTotalPages(
-        filteredElements,
-        detectedViewportWidth,
-        detectedViewportHeight,
-      );
-      const totalPagesMs = Date.now() - totalPagesStart;
+        viewportWidth: detectedViewportWidth,
+        viewportHeight: detectedViewportHeight,
+        keywordMode: false,
+      });
+      paginatedElements = storedPages[Math.max(0, page - 1)] ?? [];
+      totalPages = storedPages.length;
       console.log(
         `📄 [${logLabel}] Page ${page}/${totalPages}, showing ${paginatedElements.length} of ${filteredElements.length} elements`,
       );
       console.log(
-        `⏱️ [HighlightTrace] background pagination select=${paginationSelectionMs}ms totalPages=${totalPagesMs}ms (page=${page}, viewport=${detectedViewportWidth}x${detectedViewportHeight})`,
+        `⏱️ [HighlightTrace] background pagination build-pages=${Date.now() - paginationBuildStart}ms (page=${page}, viewport=${detectedViewportWidth}x${detectedViewportHeight})`,
       );
     }
 
@@ -515,13 +505,6 @@ async function captureHighlightedPageState(
       );
     }
 
-    const storedPages = buildStoredHighlightPages({
-      filteredElements,
-      totalPages,
-      viewportWidth: detectedViewportWidth,
-      viewportHeight: detectedViewportHeight,
-      keywordMode: keywordList.length > 0,
-    });
     const displayOrderedElements = storedPages[currentPage - 1] ?? [];
 
     const cacheStoreStart = Date.now();
diff --git a/extension/src/commands/visual-highlight.ts b/extension/src/commands/visual-highlight.ts
index eba80fd..e368d3f 100644
--- a/extension/src/commands/visual-highlight.ts
+++ b/extension/src/commands/visual-highlight.ts
@@ -36,10 +36,8 @@ const LABEL_BG_COLORS: Record<ElementType, string> = {
   any: 'rgba(0, 204, 204, 0.7)',
 };
 
-/**
- * Maximum number of elements to draw at once to prevent performance issues
- */
-const MAX_ELEMENTS = 50;
+const DRAW_YIELD_EVERY = 12;
+const MAX_ENCODED_IMAGE_SIZE = 10 * 1024 * 1024; // 10MB limit for the final data URL
 
 /**
  * Base sizes in CSS pixels (will be multiplied by scale for device pixels)
@@ -47,13 +45,23 @@ const MAX_ELEMENTS = 50;
 const BASE_BOX_PADDING = 1; // Box padding at scale=1 (minimal to avoid covering content)
 const BASE_LINE_WIDTH = 1.5; // Box border width at scale=1 (thinner to reduce overlap)
 
+interface PreparedBoundingBox {
+  element: InteractiveElement;
+  boxX: number;
+  boxY: number;
+  boxWidth: number;
+  boxHeight: number;
+  color: string;
+  bgColor: string;
+}
+
 /**
  * Draw highlights (bounding boxes with labels) on a screenshot
  *
  * @param screenshotDataUrl - Base64 data URL of the screenshot
  * @param elements - Array of interactive elements to highlight
  * @param options - Highlight options (limit, offset, elementTypes filter, scale)
- * @returns Promise resolving to base64 PNG data URL with highlights drawn
+ * @returns Promise resolving to a base64 data URL with highlights drawn
  */
 export async function drawHighlights(
   screenshotDataUrl: string,
@@ -179,11 +187,6 @@ export async function drawHighlights(
       `📐 [VisualHighlight] Using scale: ${scale.toFixed(3)} (provided: ${providedScale}, calculated: ${actualScale})`,
     );
 
-    // Create OffscreenCanvas with same dimensions as screenshot
-    console.log(
-      `🖼️ [VisualHighlight] Screenshot dimensions: ${imageBitmap.width}x${imageBitmap.height}`,
-    );
-
     // Create OffscreenCanvas with same dimensions as screenshot
     const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
     const ctx = canvas.getContext('2d');
@@ -200,41 +203,22 @@ export async function drawHighlights(
     // Close the bitmap to free memory
     imageBitmap.close();
 
-    // Note: Single-type design - caller already filtered by element_type
-    // All elements passed here are of the same type
     console.log(
       `🎨 [VisualHighlight] Drawing ${elements.length} elements (type: ${elements[0]?.type || 'none'}, scale=${scale})`,
     );
 
-    const elementsToDraw = elements;
-
-    // Draw each element's bounding box and label
-    // Scale coordinates from CSS pixels to device pixels
-    for (const element of elementsToDraw) {
-      drawBoundingBox(ctx, element, scale);
-    }
-
-    const resultBlob = await canvas.convertToBlob({ type: 'image/png' });
-
-    // Convert blob to data URL
-    const dataUrl = await new Promise<string>((resolve, reject) => {
-      const reader = new FileReader();
-      reader.onloadend = () => resolve(reader.result as string);
-      reader.onerror = () =>
-        reject(
-          new Error('[VisualHighlight] Failed to convert result to data URL'),
-        );
-      reader.readAsDataURL(resultBlob);
-    });
-
-    // Compress if too large (>10MB)
-    const MAX_SIZE = 10 * 1024 * 1024; // 10MB limit
-    const compressedDataUrl = await compressDataUrl(dataUrl, MAX_SIZE);
+    const preparedBoxes = await prepareBoundingBoxes(elements, scale);
+    await drawBoundingBoxes(ctx, preparedBoxes, scale);
+    await drawLabels(ctx, preparedBoxes, scale);
+    const encodedResult = await encodeCanvasWithCompression(
+      canvas,
+      MAX_ENCODED_IMAGE_SIZE,
+    );
 
     console.log(
-      `✅ [VisualHighlight] Highlights drawn successfully, size: ${compressedDataUrl.length} bytes`,
+      `✅ [VisualHighlight] Highlights drawn successfully, size: ${encodedResult.byteLength} bytes`,
     );
-    return compressedDataUrl;
+    return encodedResult.dataUrl;
   } catch (error) {
     const errorMsg = `[VisualHighlight] Error drawing highlights: ${error instanceof Error ? error.message : error}`;
     console.error(`❌ ${errorMsg}`);
@@ -242,230 +226,237 @@ export async function drawHighlights(
   }
 }
 
-/**
- * Compress a data URL if it exceeds the maximum size
- * Uses JPEG encoding with quality reduction
- *
- * @param dataUrl - Original data URL (PNG or JPEG)
- * @param maxSize - Maximum allowed size in bytes
- * @returns Compressed data URL (JPEG if compression was needed)
- */
-async function compressDataUrl(
-  dataUrl: string,
-  maxSize: number,
-): Promise<string> {
-  if (dataUrl.length <= maxSize) {
-    console.log(
-      `🖼️ [Compress] Image size ${dataUrl.length} bytes is within limit ${maxSize} bytes`,
-    );
-    return dataUrl;
-  }
+async function prepareBoundingBoxes(
+  elements: InteractiveElement[],
+  scale: number,
+): Promise<PreparedBoundingBox[]> {
+  const preparedBoxes: PreparedBoundingBox[] = [];
 
-  console.log(
-    `⚠️ [Compress] Image size ${dataUrl.length} bytes exceeds limit ${maxSize} bytes, compressing...`,
-  );
+  for (let index = 0; index < elements.length; index++) {
+    preparedBoxes.push(buildPreparedBoundingBox(elements[index], scale));
 
-  // Check if OffscreenCanvas is available
-  if (
-    typeof OffscreenCanvas === 'undefined' ||
-    typeof createImageBitmap === 'undefined'
-  ) {
-    console.warn(
-      '[Compress] OffscreenCanvas not available, returning original image',
-    );
-    return dataUrl;
+    if ((index + 1) % DRAW_YIELD_EVERY === 0) {
+      await yieldToMainThread();
+    }
   }
 
-  try {
-    // Convert data URL to Blob
-    const response = await fetch(dataUrl);
-    const blob = await response.blob();
+  return preparedBoxes;
+}
 
-    // Create ImageBitmap
-    const imageBitmap = await createImageBitmap(blob);
-    const { width, height } = imageBitmap;
+function buildPreparedBoundingBox(
+  element: InteractiveElement,
+  scale: number,
+): PreparedBoundingBox {
+  const { x, y, width, height } = element.bbox;
+  const boxPadding = Math.round(BASE_BOX_PADDING * scale);
+  const boxX = Math.round(x * scale) - boxPadding;
+  const boxY = Math.round(y * scale) - boxPadding;
+  const boxWidth = Math.round(width * scale) + boxPadding * 2;
+  const boxHeight = Math.round(height * scale) + boxPadding * 2;
 
-    console.log(`🖼️ [Compress] Original dimensions: ${width}x${height}`);
+  return {
+    element,
+    boxX,
+    boxY,
+    boxWidth,
+    boxHeight,
+    color: COLORS[element.type] || '#CCCCCC',
+    bgColor: LABEL_BG_COLORS[element.type] || 'rgba(200, 200, 200, 0.7)',
+  };
+}
 
-    // Try different JPEG qualities
-    const qualities = [0.8, 0.7, 0.6, 0.5, 0.4];
+async function drawBoundingBoxes(
+  ctx: OffscreenCanvasRenderingContext2D,
+  preparedBoxes: PreparedBoundingBox[],
+  scale: number,
+): Promise<void> {
+  if (preparedBoxes.length === 0) {
+    return;
+  }
 
-    for (const quality of qualities) {
-      const canvas = new OffscreenCanvas(width, height);
-      const ctx = canvas.getContext('2d');
+  const groupedBoxes = new Map<string, PreparedBoundingBox[]>();
+  for (const preparedBox of preparedBoxes) {
+    const boxes = groupedBoxes.get(preparedBox.color) ?? [];
+    boxes.push(preparedBox);
+    groupedBoxes.set(preparedBox.color, boxes);
+  }
 
-      if (!ctx) {
-        throw new Error('[Compress] Failed to get 2d context');
-      }
+  ctx.lineWidth = BASE_LINE_WIDTH * scale;
 
-      // Fill with white background (for JPEG)
-      ctx.fillStyle = '#FFFFFF';
-      ctx.fillRect(0, 0, width, height);
-      ctx.drawImage(imageBitmap, 0, 0);
-
-      const compressedBlob = await canvas.convertToBlob({
-        type: 'image/jpeg',
-        quality: quality,
-      });
-
-      // Convert to data URL
-      const compressedDataUrl = await new Promise<string>((resolve, reject) => {
-        const reader = new FileReader();
-        reader.onloadend = () => resolve(reader.result as string);
-        reader.onerror = () =>
-          reject(new Error('[Compress] Failed to read compressed blob'));
-        reader.readAsDataURL(compressedBlob);
-      });
+  for (const [color, boxes] of groupedBoxes) {
+    ctx.strokeStyle = color;
+    ctx.beginPath();
 
-      console.log(
-        `🖼️ [Compress] JPEG quality=${quality}: ${compressedDataUrl.length} bytes`,
-      );
+    for (let index = 0; index < boxes.length; index++) {
+      const box = boxes[index];
+      ctx.rect(box.boxX, box.boxY, box.boxWidth, box.boxHeight);
 
-      if (compressedDataUrl.length <= maxSize) {
-        console.log(
-          `✅ [Compress] Compressed to ${compressedDataUrl.length} bytes with quality=${quality}`,
-        );
-        return compressedDataUrl;
+      if ((index + 1) % DRAW_YIELD_EVERY === 0 && index + 1 < boxes.length) {
+        ctx.stroke();
+        ctx.beginPath();
+        await yieldToMainThread();
       }
     }
 
-    // If still too large, try reducing dimensions
-    console.log(
-      `⚠️ [Compress] Quality reduction not enough, trying to reduce dimensions...`,
+    ctx.stroke();
+  }
+}
+
+async function drawLabels(
+  ctx: OffscreenCanvasRenderingContext2D,
+  preparedBoxes: PreparedBoundingBox[],
+  scale: number,
+): Promise<void> {
+  if (preparedBoxes.length === 0) {
+    return;
+  }
+
+  ctx.textBaseline = 'top';
+
+  for (let index = 0; index < preparedBoxes.length; index++) {
+    const preparedBox = preparedBoxes[index];
+    drawLabel(
+      ctx,
+      preparedBox.element.id,
+      preparedBox.boxX,
+      preparedBox.boxY,
+      preparedBox.boxWidth,
+      preparedBox.boxHeight,
+      preparedBox.bgColor,
+      scale,
+      preparedBox.element.labelPosition || 'above',
     );
 
-    const scales = [0.75, 0.5, 0.4];
-    for (const scale of scales) {
-      const newWidth = Math.round(width * scale);
-      const newHeight = Math.round(height * scale);
+    if ((index + 1) % DRAW_YIELD_EVERY === 0) {
+      await yieldToMainThread();
+    }
+  }
+}
 
-      const canvas = new OffscreenCanvas(newWidth, newHeight);
-      const ctx = canvas.getContext('2d');
+async function encodeCanvasWithCompression(
+  canvas: OffscreenCanvas,
+  maxSize: number,
+): Promise<{ dataUrl: string; byteLength: number }> {
+  const pngBlob = await canvas.convertToBlob({ type: 'image/png' });
+  const pngEstimatedSize = estimateDataUrlLength(pngBlob);
 
-      if (!ctx) {
-        throw new Error('[Compress] Failed to get 2d context');
-      }
+  if (pngEstimatedSize <= maxSize) {
+    console.log(
+      `🖼️ [Compress] Image size ${pngEstimatedSize} bytes is within limit ${maxSize} bytes`,
+    );
+    return buildEncodedResult(pngBlob);
+  }
 
-      ctx.fillStyle = '#FFFFFF';
-      ctx.fillRect(0, 0, newWidth, newHeight);
-      ctx.drawImage(imageBitmap, 0, 0, newWidth, newHeight);
+  console.log(
+    `⚠️ [Compress] Image size ${pngEstimatedSize} bytes exceeds limit ${maxSize} bytes, compressing...`,
+  );
 
-      const compressedBlob = await canvas.convertToBlob({
-        type: 'image/jpeg',
-        quality: 0.6,
-      });
+  const qualities = [0.8, 0.7, 0.6, 0.5, 0.4];
+  for (const quality of qualities) {
+    const compressedBlob = await canvas.convertToBlob({
+      type: 'image/jpeg',
+      quality,
+    });
+    const estimatedSize = estimateDataUrlLength(compressedBlob);
 
-      const compressedDataUrl = await new Promise<string>((resolve, reject) => {
-        const reader = new FileReader();
-        reader.onloadend = () => resolve(reader.result as string);
-        reader.onerror = () =>
-          reject(new Error('[Compress] Failed to read compressed blob'));
-        reader.readAsDataURL(compressedBlob);
-      });
+    console.log(
+      `🖼️ [Compress] JPEG quality=${quality}: ${estimatedSize} bytes`,
+    );
 
+    if (estimatedSize <= maxSize) {
       console.log(
-        `🖼️ [Compress] Scale=${scale} (${newWidth}x${newHeight}): ${compressedDataUrl.length} bytes`,
+        `✅ [Compress] Compressed to ${estimatedSize} bytes with quality=${quality}`,
       );
-
-      if (compressedDataUrl.length <= maxSize) {
-        console.log(
-          `✅ [Compress] Compressed to ${compressedDataUrl.length} bytes with scale=${scale}`,
-        );
-        return compressedDataUrl;
-      }
+      return buildEncodedResult(compressedBlob);
     }
+  }
 
-    // Return the smallest version even if still over limit
-    console.warn(
-      `⚠️ [Compress] Could not compress below ${maxSize} bytes, returning smallest version`,
-    );
+  console.log(
+    '⚠️ [Compress] Quality reduction not enough, trying to reduce dimensions...',
+  );
+
+  const scales = [0.75, 0.5, 0.4];
+  for (const scale of scales) {
+    const scaledCanvas = createScaledCanvas(canvas, scale);
+    const compressedBlob = await scaledCanvas.convertToBlob({
+      type: 'image/jpeg',
+      quality: 0.6,
+    });
+    const estimatedSize = estimateDataUrlLength(compressedBlob);
 
-    // Return 50% scale JPEG with 0.4 quality as final fallback
-    const finalCanvas = new OffscreenCanvas(
-      Math.round(width * 0.5),
-      Math.round(height * 0.5),
+    console.log(
+      `🖼️ [Compress] Scale=${scale} (${scaledCanvas.width}x${scaledCanvas.height}): ${estimatedSize} bytes`,
     );
-    const finalCtx = finalCanvas.getContext('2d');
-    if (finalCtx) {
-      finalCtx.fillStyle = '#FFFFFF';
-      finalCtx.fillRect(0, 0, finalCanvas.width, finalCanvas.height);
-      finalCtx.drawImage(
-        imageBitmap,
-        0,
-        0,
-        finalCanvas.width,
-        finalCanvas.height,
+
+    if (estimatedSize <= maxSize) {
+      console.log(
+        `✅ [Compress] Compressed to ${estimatedSize} bytes with scale=${scale}`,
       );
-      const finalBlob = await finalCanvas.convertToBlob({
-        type: 'image/jpeg',
-        quality: 0.4,
-      });
-      return new Promise((resolve, reject) => {
-        const reader = new FileReader();
-        reader.onloadend = () => resolve(reader.result as string);
-        reader.onerror = () =>
-          reject(new Error('[Compress] Failed to read final blob'));
-        reader.readAsDataURL(finalBlob);
-      });
+      return buildEncodedResult(compressedBlob);
     }
-
-    return dataUrl;
-  } catch (error) {
-    console.error(
-      `❌ [Compress] Compression failed: ${error instanceof Error ? error.message : error}`,
-    );
-    return dataUrl;
   }
+
+  console.warn(
+    `⚠️ [Compress] Could not compress below ${maxSize} bytes, returning smallest version`,
+  );
+
+  const fallbackCanvas = createScaledCanvas(canvas, 0.5);
+  const fallbackBlob = await fallbackCanvas.convertToBlob({
+    type: 'image/jpeg',
+    quality: 0.4,
+  });
+  return buildEncodedResult(fallbackBlob);
 }
 
-/**
- * Draw a bounding box with label for an interactive element
- *
- * @param ctx - Canvas 2D rendering context
- * @param element - Interactive element to draw
- * @param scale - Scale factor to convert CSS pixels to device pixels (default: 1)
- */
-function drawBoundingBox(
-  ctx: OffscreenCanvasRenderingContext2D,
-  element: InteractiveElement,
-  scale: number = 1,
-): void {
-  const color = COLORS[element.type] || '#CCCCCC';
-  const { x, y, width, height } = element.bbox;
+async function buildEncodedResult(
+  blob: Blob,
+): Promise<{ dataUrl: string; byteLength: number }> {
+  const dataUrl = await blobToDataUrl(blob);
+  return {
+    dataUrl,
+    byteLength: dataUrl.length,
+  };
+}
 
-  // Calculate device-pixel values from base CSS sizes
-  const boxPadding = Math.round(BASE_BOX_PADDING * scale);
-  const lineWidth = BASE_LINE_WIDTH * scale;
+function estimateDataUrlLength(blob: Blob): number {
+  const headerLength = `data:${blob.type || 'application/octet-stream'};base64,`
+    .length;
+  return headerLength + Math.ceil(blob.size / 3) * 4;
+}
 
-  // Apply scale to convert CSS pixels to device pixels, then apply padding
-  // x, y are viewport-relative from getBoundingClientRect()
-  const boxX = Math.round(x * scale) - boxPadding;
-  const boxY = Math.round(y * scale) - boxPadding;
-  const boxWidth = Math.round(width * scale) + boxPadding * 2;
-  const boxHeight = Math.round(height * scale) + boxPadding * 2;
+function createScaledCanvas(
+  sourceCanvas: OffscreenCanvas,
+  scale: number,
+): OffscreenCanvas {
+  const scaledWidth = Math.max(1, Math.round(sourceCanvas.width * scale));
+  const scaledHeight = Math.max(1, Math.round(sourceCanvas.height * scale));
+  const scaledCanvas = new OffscreenCanvas(scaledWidth, scaledHeight);
+  const scaledCtx = scaledCanvas.getContext('2d');
+
+  if (!scaledCtx) {
+    throw new Error('[Compress] Failed to get 2d context');
+  }
 
-  console.log(
-    `[VisualHighlight] Drawing bbox for ${element.id}: CSS(${x}, ${y}, ${width}, ${height}) → Device(${boxX}, ${boxY}, ${boxWidth}, ${boxHeight}) scale=${scale}`,
-  );
+  scaledCtx.fillStyle = '#FFFFFF';
+  scaledCtx.fillRect(0, 0, scaledWidth, scaledHeight);
+  scaledCtx.drawImage(sourceCanvas, 0, 0, scaledWidth, scaledHeight);
+  return scaledCanvas;
+}
 
-  // Draw bounding box
-  ctx.strokeStyle = color;
-  ctx.lineWidth = lineWidth;
-  ctx.strokeRect(boxX, boxY, boxWidth, boxHeight);
+async function blobToDataUrl(blob: Blob): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.onloadend = () => resolve(reader.result as string);
+    reader.onerror = () =>
+      reject(
+        new Error('[VisualHighlight] Failed to convert result to data URL'),
+      );
+    reader.readAsDataURL(blob);
+  });
+}
 
-  // Draw element ID label with transparent background
-  const bgColor = LABEL_BG_COLORS[element.type] || 'rgba(200, 200, 200, 0.7)';
-  drawLabel(
-    ctx,
-    element.id,
-    boxX,
-    boxY,
-    boxWidth,
-    boxHeight,
-    bgColor,
-    scale,
-    element.labelPosition || 'above',
-  );
+async function yieldToMainThread(): Promise<void> {
+  await new Promise((resolve) => setTimeout(resolve, 0));
 }
 
 /**
diff --git a/extension/src/utils/collision-detection.ts b/extension/src/utils/collision-detection.ts
index 31c69ca..054abc2 100644
--- a/extension/src/utils/collision-detection.ts
+++ b/extension/src/utils/collision-detection.ts
@@ -206,6 +206,31 @@ export function isLabelWithinViewport(
   );
 }
 
+/**
+ * Build collision-free highlight pages using a constraint-aware greedy
+ * algorithm. It places the most-constrained remaining element first, then
+ * chooses the label position that blocks the fewest later elements.
+ *
+ * @param elements - All elements sorted by priority
+ * @param page - 1-indexed page number
+ * @param viewportWidth - Optional viewport width for boundary checks
+ * @param viewportHeight - Optional viewport height for boundary checks
+ * @returns All collision-free pages up to maxPages when provided
+ */
+export function paginateCollisionFreeElements(
+  elements: InteractiveElement[],
+  viewportWidth?: number,
+  viewportHeight?: number,
+  maxPages?: number,
+): InteractiveElement[][] {
+  return buildCollisionFreePages(
+    elements,
+    viewportWidth,
+    viewportHeight,
+    maxPages,
+  );
+}
+
 /**
  * Select a collision-free page of elements using a constraint-aware greedy
  * algorithm. It places the most-constrained remaining element first, then
@@ -227,7 +252,7 @@ export function selectCollisionFreePage(
     return [];
   }
 
-  const pages = buildCollisionFreePages(
+  const pages = paginateCollisionFreeElements(
     elements,
     viewportWidth,
     viewportHeight,
@@ -245,7 +270,7 @@ export function calculateTotalPages(
   viewportWidth?: number,
   viewportHeight?: number,
 ): number {
-  return buildCollisionFreePages(elements, viewportWidth, viewportHeight)
+  return paginateCollisionFreeElements(elements, viewportWidth, viewportHeight)
     .length;
 }
 
diff --git a/server/agent/tools/base.py b/server/agent/tools/base.py
index 66af7bc..37a330e 100644
--- a/server/agent/tools/base.py
+++ b/server/agent/tools/base.py
@@ -383,14 +383,9 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
             text_parts.append("")
             # Format: id: <html> for each element
             element_descriptions = []
-            clickable_count = 0
-            include_clickable_html = bool(self.small_model)
             for el in self.highlighted_elements:
                 el_id = el.get("id", "unknown")
                 el_type = el.get("type")
-                if el_type == "clickable" and not include_clickable_html:
-                    clickable_count += 1
-                    continue
                 raw_hints = (
                     el.get("interactionHints") or el.get("interaction_hints") or []
                 )
@@ -415,16 +410,6 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
                 else:
                     tag = el.get("tagName", "").upper()
                     element_descriptions.append(f"{display_id} ({tag})")
-            if clickable_count:
-                clickable_label = (
-                    f"{clickable_count} clickable element"
-                    if clickable_count == 1
-                    else f"{clickable_count} clickable elements"
-                )
-                if element_descriptions:
-                    element_descriptions.append(f"... and {clickable_label}")
-                else:
-                    element_descriptions.append(clickable_label)
             text_parts.append("\n".join(element_descriptions))
             text_parts.append("")
 
diff --git a/server/agent/tools/browser_executor.py b/server/agent/tools/browser_executor.py
index 778871d..41be4bb 100644
--- a/server/agent/tools/browser_executor.py
+++ b/server/agent/tools/browser_executor.py
@@ -12,7 +12,6 @@
 """
 
 import asyncio
-from collections import OrderedDict
 import logging
 import threading
 from typing import Any, Dict, Optional, Union
@@ -52,26 +51,7 @@
 
 logger = logging.getLogger(__name__)
 
-CONFIRMED_ACTION_ID_CACHE_SIZE = 10
 ELEMENT_HTML_CACHE_MISS_PLACEHOLDER = "<element not found in cache>"
-VISUAL_SAFE_ELEMENT_ID_LENGTH = 3
-VISUAL_SAFE_ELEMENT_ID_CHAR_MAP = {
-    "0": "O",
-    "o": "O",
-    "O": "O",
-    "i": "1",
-    "I": "1",
-    "l": "1",
-    "L": "1",
-    "z": "2",
-    "Z": "2",
-    "s": "5",
-    "S": "5",
-    "g": "6",
-    "G": "6",
-    "b": "8",
-    "B": "8",
-}
 
 # Global registry for shared BrowserExecutor instances per conversation
 # Key: conversation_id (str), Value: BrowserExecutor instance
@@ -121,8 +101,6 @@ def __init__(self):
         self.conversation_id = None
         # Pending confirmations per conversation for 2PC actions.
         self.pending_confirmations: Dict[str, Dict[str, Any]] = {}
-        # Recently confirmed element targets keyed by action_type then element_id.
-        self.confirmed_action_id_lru: Dict[str, Dict[str, OrderedDict[str, None]]] = {}
 
     def _uses_small_model(self) -> bool:
         """Whether the active conversation uses the small-model profile."""
@@ -380,18 +358,6 @@ def _execute_element_interaction_action(
         if action_type == "click":
             if not action.element_id:
                 raise ValueError("click requires element_id parameter")
-            if self._has_confirmed_action_element_id("click", action.element_id):
-                command = ClickElementCommand(
-                    element_id=action.element_id,
-                    conversation_id=self.conversation_id,
-                    tab_id=action.tab_id,
-                )
-                result_dict = self._execute_element_command(command, "click element")
-                return self._build_observation_from_result(
-                    result_dict,
-                    f"Clicked element: {action.element_id}",
-                    element_id=action.element_id,
-                )
             element_preview = self._get_element_full_html(action.element_id, "click")
             full_html = element_preview[0]
             screenshot = element_preview[1]
@@ -504,21 +470,6 @@ def _execute_element_interaction_action(
                 raise ValueError("keyboard_input requires element_id parameter")
             if not action.text:
                 raise ValueError("keyboard_input requires text parameter")
-            if self._has_confirmed_action_element_id(
-                "keyboard_input", action.element_id
-            ):
-                command = KeyboardInputCommand(
-                    element_id=action.element_id,
-                    text=action.text,
-                    conversation_id=self.conversation_id,
-                    tab_id=action.tab_id,
-                )
-                result_dict = self._execute_element_command(command, "input text")
-                return self._build_observation_from_result(
-                    result_dict,
-                    f"Input text to element: {action.element_id}",
-                    element_id=action.element_id,
-                )
             element_preview = self._get_element_full_html(
                 action.element_id, "keyboard_input"
             )
@@ -605,7 +556,6 @@ def _execute_element_interaction_action(
             if not result_dict or not result_dict.get("success"):
                 ext_error = self._extract_result_error(result_dict)
                 raise RuntimeError(f"Failed to click element: {ext_error}")
-            self._remember_confirmed_action_element_id("click", pending_element_id)
             message = f"Confirmed and clicked element: {pending_element_id}"
             self._clear_pending_confirmation()
             return self._build_observation_from_result(
@@ -636,9 +586,6 @@ def _execute_element_interaction_action(
             if not result_dict or not result_dict.get("success"):
                 ext_error = self._extract_result_error(result_dict)
                 raise RuntimeError(f"Failed to input text: {ext_error}")
-            self._remember_confirmed_action_element_id(
-                "keyboard_input", pending_element_id
-            )
             message = f"Confirmed and input text to element: {pending_element_id}"
             self._clear_pending_confirmation()
             return self._build_observation_from_result(
@@ -727,24 +674,6 @@ def _get_pending_confirmation(self) -> Optional[Dict[str, Any]]:
         """Get pending confirmation for current conversation"""
         return self.pending_confirmations.get(self.conversation_id)
 
-    def _normalize_confirmed_action_element_id(
-        self, element_id: Optional[str]
-    ) -> Optional[str]:
-        """Normalize cached element ids used for repeat-confirmation shortcuts."""
-        if not isinstance(element_id, str):
-            return None
-
-        compact = element_id.strip().replace(" ", "")
-        if not compact:
-            return None
-
-        if len(compact) != VISUAL_SAFE_ELEMENT_ID_LENGTH or not compact.isalnum():
-            return compact
-
-        return "".join(
-            VISUAL_SAFE_ELEMENT_ID_CHAR_MAP.get(char, char.upper()) for char in compact
-        )
-
     def _build_element_id_resolution_note(
         self,
         requested_element_id: Optional[str],
@@ -764,47 +693,6 @@ def _build_element_id_resolution_note(
             f"'{resolved_element_id}'."
         )
 
-    def _get_confirmed_action_element_id_lru(
-        self, action_type: str
-    ) -> OrderedDict[str, None]:
-        """Get or create the confirmed-action element-id LRU cache for current conversation."""
-        conversation_lru = self.confirmed_action_id_lru.setdefault(
-            self.conversation_id, {}
-        )
-        return conversation_lru.setdefault(action_type, OrderedDict())
-
-    def _has_confirmed_action_element_id(
-        self, action_type: str, element_id: Optional[str]
-    ) -> bool:
-        """Return whether this exact element id was recently confirmed for the action."""
-        normalized = self._normalize_confirmed_action_element_id(element_id)
-        if normalized is None:
-            return False
-
-        lru = self._get_confirmed_action_element_id_lru(action_type)
-        if normalized not in lru:
-            return False
-
-        lru.move_to_end(normalized)
-        return True
-
-    def _remember_confirmed_action_element_id(
-        self, action_type: str, element_id: Optional[str]
-    ) -> None:
-        """Record a confirmed-action element id in the per-conversation LRU cache."""
-        normalized = self._normalize_confirmed_action_element_id(element_id)
-        if normalized is None:
-            return
-
-        lru = self._get_confirmed_action_element_id_lru(action_type)
-        if normalized in lru:
-            lru.move_to_end(normalized)
-        else:
-            lru[normalized] = None
-
-        while len(lru) > CONFIRMED_ACTION_ID_CACHE_SIZE:
-            lru.popitem(last=False)
-
     def _extract_result_error(
         self, result_dict: Optional[Dict[str, Any]], default: str = "Unknown error"
     ) -> str:
diff --git a/server/tests/unit/test_agent_browser_executor.py b/server/tests/unit/test_agent_browser_executor.py
index 2e93542..db9a669 100644
--- a/server/tests/unit/test_agent_browser_executor.py
+++ b/server/tests/unit/test_agent_browser_executor.py
@@ -8,7 +8,6 @@
 from server.agent.tools.element_interaction_tool import ElementInteractionAction
 from server.agent.tools.highlight_tool import HighlightAction
 from server.models.commands import (
-    ClickElementCommand,
     HoverElementCommand,
     SwipeElementCommand,
 )
@@ -264,31 +263,25 @@ def fake_execute(command):
     assert executor._get_pending_confirmation() is None
 
 
-def test_repeat_click_with_confirmed_element_id_executes_without_pending_confirmation(
+def test_click_always_requires_pending_confirmation_even_after_prior_confirm(
     monkeypatch,
 ) -> None:
     executor = BrowserExecutor()
     executor.conversation_id = "conv-repeat-click"
-    executor._remember_confirmed_action_element_id("click", "15")
 
     monkeypatch.setattr(
         executor,
         "_get_element_full_html",
-        lambda *args, **kwargs: (_ for _ in ()).throw(
-            AssertionError("should not fetch confirmation preview")
+        lambda element_id, intended_action=None: (
+            "<button>Save</button>",
+            "data:image/png;base64,pending",
         ),
     )
-
-    captured = {}
-
-    def fake_execute(command):
-        captured["command"] = command
-        return {
-            "success": True,
-            "data": {"screenshot": "data:image/png;base64,clicked-direct"},
-        }
-
-    monkeypatch.setattr(executor, "_execute_command_sync", fake_execute)
+    monkeypatch.setattr(
+        executor,
+        "_execute_command_sync",
+        lambda command: (_ for _ in ()).throw(AssertionError("should stay in 2PC")),
+    )
 
     observation = executor._execute_action_sync(
         ElementInteractionAction(
@@ -300,12 +293,10 @@ def fake_execute(command):
     )
 
     assert observation.success is True
-    assert observation.message == "Clicked element: 15"
-    assert observation.pending_confirmation is None
-    assert isinstance(captured["command"], ClickElementCommand)
-    assert captured["command"].element_id == "15"
-    assert captured["command"].tab_id == 456
-    assert executor._get_pending_confirmation() is None
+    assert observation.message == "Click action pending confirmation for element: 15"
+    assert observation.pending_confirmation is not None
+    assert observation.pending_confirmation["action_type"] == "click"
+    assert observation.pending_confirmation["element_id"] == "15"
 
 
 def test_unconfirmed_click_element_id_still_enters_pending_confirmation(
@@ -344,53 +335,25 @@ def test_unconfirmed_click_element_id_still_enters_pending_confirmation(
     )
 
 
-def test_confirmed_action_element_id_lru_keeps_only_most_recent_ten_entries() -> None:
-    executor = BrowserExecutor()
-    executor.conversation_id = "conv-repeat-lru"
-
-    for i in range(11):
-        executor._remember_confirmed_action_element_id("click", f"id-{i}")
-
-    lru = executor._get_confirmed_action_element_id_lru("click")
-
-    assert len(lru) == 10
-    assert "id-0" not in lru
-    assert "id-10" in lru
-
-
-def test_confirmed_action_element_id_normalization_only_applies_to_visual_ids() -> None:
-    executor = BrowserExecutor()
-
-    assert executor._normalize_confirmed_action_element_id("D02") == "DO2"
-    assert executor._normalize_confirmed_action_element_id(" d o 2 ") == "DO2"
-    assert executor._normalize_confirmed_action_element_id("id-10") == "id-10"
-
-
-def test_repeat_keyboard_input_with_confirmed_element_id_executes_without_pending_confirmation(
+def test_keyboard_input_always_requires_pending_confirmation_even_after_prior_confirm(
     monkeypatch,
 ) -> None:
     executor = BrowserExecutor()
     executor.conversation_id = "conv-repeat-input"
-    executor._remember_confirmed_action_element_id("keyboard_input", "inp123")
 
     monkeypatch.setattr(
         executor,
         "_get_element_full_html",
-        lambda *args, **kwargs: (_ for _ in ()).throw(
-            AssertionError("should not fetch confirmation preview")
+        lambda element_id, intended_action=None: (
+            '<input type="text" />',
+            "data:image/png;base64,input-pending",
         ),
     )
-
-    captured = {}
-
-    def fake_execute(command):
-        captured["command"] = command
-        return {
-            "success": True,
-            "data": {"screenshot": "data:image/png;base64,input-direct"},
-        }
-
-    monkeypatch.setattr(executor, "_execute_command_sync", fake_execute)
+    monkeypatch.setattr(
+        executor,
+        "_execute_command_sync",
+        lambda command: (_ for _ in ()).throw(AssertionError("should stay in 2PC")),
+    )
 
     observation = executor._execute_action_sync(
         ElementInteractionAction(
@@ -403,12 +366,13 @@ def fake_execute(command):
     )
 
     assert observation.success is True
-    assert observation.message == "Input text to element: inp123"
-    assert observation.pending_confirmation is None
-    assert captured["command"].element_id == "inp123"
-    assert captured["command"].text == "hello"
-    assert captured["command"].tab_id == 321
-    assert executor._get_pending_confirmation() is None
+    assert (
+        observation.message
+        == "Keyboard input action pending confirmation for element: inp123"
+    )
+    assert observation.pending_confirmation is not None
+    assert observation.pending_confirmation["action_type"] == "keyboard_input"
+    assert observation.pending_confirmation["element_id"] == "inp123"
 
 
 def test_click_pending_confirmation_records_corrected_element_id(monkeypatch) -> None:
@@ -458,7 +422,6 @@ def test_confirmed_click_element_id_does_not_skip_keyboard_input_confirmation(
 ) -> None:
     executor = BrowserExecutor()
     executor.conversation_id = "conv-input-action-isolation"
-    executor._remember_confirmed_action_element_id("click", "inp123")
 
     monkeypatch.setattr(
         executor,
@@ -492,6 +455,70 @@ def test_confirmed_click_element_id_does_not_skip_keyboard_input_confirmation(
     assert observation.pending_confirmation["action_type"] == "keyboard_input"
 
 
+def test_confirmed_click_does_not_grant_next_click_shortcut(monkeypatch) -> None:
+    executor = BrowserExecutor()
+    executor.conversation_id = "conv-click-no-shortcut"
+    executor._set_pending_confirmation(
+        element_id="btn13",
+        action_type="click",
+        full_html="<button>Save</button>",
+        extra_data={"tab_id": 456},
+    )
+
+    executed_commands = []
+
+    def fake_execute(command):
+        executed_commands.append(command)
+        return {
+            "success": True,
+            "data": {"screenshot": "data:image/png;base64,clicked"},
+        }
+
+    monkeypatch.setattr(executor, "_execute_command_sync", fake_execute)
+
+    confirmed = executor._execute_action_sync(
+        ElementInteractionAction(
+            action="confirm_click",
+            conversation_id="conv-click-no-shortcut",
+        )
+    )
+
+    assert confirmed.success is True
+    assert confirmed.message == "Confirmed and clicked element: btn13"
+    assert len(executed_commands) == 1
+
+    monkeypatch.setattr(
+        executor,
+        "_get_element_full_html",
+        lambda element_id, intended_action=None: (
+            "<button>Save</button>",
+            "data:image/png;base64,pending-again",
+        ),
+    )
+    monkeypatch.setattr(
+        executor,
+        "_execute_command_sync",
+        lambda command: (_ for _ in ()).throw(AssertionError("should stay in 2PC")),
+    )
+
+    next_observation = executor._execute_action_sync(
+        ElementInteractionAction(
+            action="click",
+            element_id="btn13",
+            tab_id=456,
+            conversation_id="conv-click-no-shortcut",
+        )
+    )
+
+    assert next_observation.success is True
+    assert (
+        next_observation.message
+        == "Click action pending confirmation for element: btn13"
+    )
+    assert next_observation.pending_confirmation is not None
+    assert next_observation.pending_confirmation["action_type"] == "click"
+
+
 def test_confirm_keyboard_input_reports_nested_extension_error(monkeypatch) -> None:
     executor = BrowserExecutor()
     executor.conversation_id = "conv-input-error"
diff --git a/server/tests/unit/test_base_classes.py b/server/tests/unit/test_base_classes.py
index a07c3c9..ae358f8 100644
--- a/server/tests/unit/test_base_classes.py
+++ b/server/tests/unit/test_base_classes.py
@@ -81,7 +81,7 @@ def test_browser_state_prefers_tab_id_field_and_attaches_screenshot(self) -> Non
         assert llm_content[0].image_urls == ["data:image/png;base64,abc123"]
         assert "**[99]** Example" in llm_content[1].text
 
-    def test_highlighted_clickable_elements_are_summarized(self) -> None:
+    def test_highlighted_clickable_elements_include_html(self) -> None:
         observation = OpenBrowserObservation(
             success=True,
             element_type="clickable",
@@ -97,9 +97,8 @@ def test_highlighted_clickable_elements_are_summarized(self) -> None:
 
         text = _text_content(observation)
 
-        assert "1 clickable element" in text
-        assert "abc123(clickable):" not in text
-        assert "<button>Submit</button>" not in text
+        assert "1 clickable element" not in text
+        assert "abc123(clickable): <button>Submit</button>" in text
 
     def test_highlighted_elements_render_page_metadata(self) -> None:
         observation = OpenBrowserObservation(
@@ -122,7 +121,9 @@ def test_highlighted_elements_render_page_metadata(self) -> None:
         assert "**Page**: 2/4" in text
         assert "**Total Elements**: 9" in text
 
-    def test_small_model_highlighted_clickable_elements_keep_html(self) -> None:
+    def test_small_model_highlighted_clickable_elements_still_include_html(
+        self,
+    ) -> None:
         observation = OpenBrowserObservation(
             success=True,
             small_model=True,
@@ -201,10 +202,11 @@ def test_highlighted_elements_include_detected_type_suffix(self) -> None:
 
         text = _text_content(observation)
 
+        assert 'vrtbj5(clickable): <div class="search-icon"></div>' in text
         assert "q4w08w(inputable):" in text
-        assert "... and 1 clickable element" in text
+        assert "clickable element" not in text
 
-    def test_small_model_mixed_highlighted_elements_include_clickable_html(
+    def test_small_model_mixed_highlighted_elements_match_default_rendering(
         self,
     ) -> None:
         observation = OpenBrowserObservation(