From 775fe8753b580afe52cb544f0c8c29aa6ee24dbc Mon Sep 17 00:00:00 2001 From: William Chong Date: Mon, 30 Mar 2026 16:33:12 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=9A=80=20Add=20canvas-based=20text=20?= =?UTF-8?q?measurement=20and=20dirty-flag=20dimension=20caching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Layer 1 — Dirty-flag caching in IframeView.expand(): - Cache textWidth/textHeight results, only re-measure when content changes (RESIZE/EXPAND events or setLayout calls) - Pre-populate cache from resizeCheck() to avoid double measurement - Speculative expand() in next()/prev() becomes a no-op automatically Layer 2 — TextMeasurer utility (src/utils/text-measurer.ts): - Canvas-based text measurement via measureText() with zero DOM reflow - Intl.Segmenter for word boundaries (CJK, Thai), space/char fallback - Font-keyed width cache, per-parent getComputedStyle cache - Exotic CSS detection (letter-spacing, word-spacing) with fallback Layer 3 — Canvas-optimized Mapping: - Binary search on pre-measured cumulative widths in findTextStartRange and findTextEndRange, reducing O(N) per-word reflows to O(1) - Reuses node bounds from findStart/findEnd to avoid redundant reflow - Falls back to DOM Range loop when canvas path can't verify position No impact on Node.js entry point — TextMeasurer is only imported by browser-only modules. Requires OffscreenCanvas (Chrome 69+, Safari 16.4+) and Intl.Segmenter (Chrome 87+, Safari 15.4+) with automatic fallbacks for older browsers. --- PROJECT_STATUS.md | 16 ++ README.md | 4 +- src/managers/default/index.ts | 19 ++- src/managers/views/iframe.ts | 33 +++- src/mapping.ts | 105 +++++++++++-- src/utils/text-measurer.ts | 278 ++++++++++++++++++++++++++++++++++ 6 files changed, 435 insertions(+), 20 deletions(-) create mode 100644 src/utils/text-measurer.ts diff --git a/PROJECT_STATUS.md b/PROJECT_STATUS.md index be8fda3..df63aef 100644 --- a/PROJECT_STATUS.md +++ b/PROJECT_STATUS.md @@ -44,9 +44,25 @@ All formats are single-file bundles. `preserveModules` was considered for ESM bu --- +## Performance Optimizations + +### Dirty-flag dimension caching (Layer 1) +`IframeView.expand()` now caches `textWidth()`/`textHeight()` results and only re-measures when content actually changes (RESIZE/EXPAND events). This eliminates redundant synchronous reflows during page navigation, font size changes, and layout recalculations. The RESIZE event handler pre-populates the cache from `resizeCheck()` measurements, cutting the resize chain from 4 reflows to 2. + +### Canvas-based text measurement (Layers 2–3) +`TextMeasurer` (`src/utils/text-measurer.ts`) measures text widths via `CanvasRenderingContext2D.measureText()` instead of DOM Range + `getBoundingClientRect()`. `Mapping.findTextStartRange()` and `findTextEndRange()` use binary search on pre-measured cumulative widths, reducing per-word reflow loops from O(N) to O(1) for text-heavy content. Falls back to DOM measurement for content with exotic CSS (`letter-spacing`, `word-spacing`, `text-indent`). + +### Browser requirements for optimizations +- `OffscreenCanvas`: Chrome 69+, Firefox 105+, Safari 16.4+ (fallback: `HTMLCanvasElement`) +- `Intl.Segmenter`: Chrome 87+, Firefox 125+, Safari 15.4+ (fallback: space/CJK splitting) +- Older browsers get the same behavior as before — all optimizations are transparent fallbacks + +--- + ## Next Steps - **Annotation rendering** — `highlight()`, `underline()`, `mark()` in `annotations.ts` have TODO stubs needing View/Contents integration - **3 remaining TODOs** — CFI range validation (`epubcfi.ts`), CFI validity check and page list fallback (`pagelist.ts`) - **Logger abstraction** — 9 `eslint-disable no-console` suppressions could be replaced with a pluggable logger +- **Canvas page estimation (Layer 4)** — Optional: estimate page counts from text metrics for instant progress display before full `Locations.generate()` completes diff --git a/README.md b/README.md index 03abb6c..e3dc323 100644 --- a/README.md +++ b/README.md @@ -128,10 +128,12 @@ Key classes: | Environment | Import | Notes | |-------------|--------|-------| -| Modern browsers | `@likecoin/epub-ts` | Chrome, Firefox, Safari, Edge | +| Modern browsers | `@likecoin/epub-ts` | Chrome 64+, Firefox 69+, Safari 13.1+, Edge 79+ | | Vite / webpack | `@likecoin/epub-ts` | ESM or CJS | | Node.js 18+ | `@likecoin/epub-ts/node` | Parsing only (no rendering); requires `linkedom` peer dep | +The minimum browser floor is set by `ResizeObserver`. Canvas-based text measurement optimizations additionally use `OffscreenCanvas` and `Intl.Segmenter` when available, with automatic fallbacks for older browsers. + ## What's Changed from epubjs - Build: webpack + Babel → Vite diff --git a/src/managers/default/index.ts b/src/managers/default/index.ts index 85eeb1f..4da1e24 100644 --- a/src/managers/default/index.ts +++ b/src/managers/default/index.ts @@ -5,6 +5,7 @@ import Mapping from "../../mapping"; import Queue from "../../utils/queue"; import Stage from "../helpers/stage"; import Views from "../helpers/views"; +import TextMeasurer from "../../utils/text-measurer"; import { EVENTS } from "../../utils/constants"; import type Layout from "../../layout"; import type Section from "../../section"; @@ -40,6 +41,7 @@ class DefaultViewManager implements IEventEmitter { overflow!: string; layout!: Layout; mapping!: Mapping; + _measurer!: TextMeasurer; location!: ViewLocation[]; isPaginated!: boolean; scrollLeft!: number; @@ -96,6 +98,7 @@ class DefaultViewManager implements IEventEmitter { allowPopups: this.settings.allowPopups }; + this._measurer = new TextMeasurer(); this.rendered = false; } @@ -208,6 +211,10 @@ class DefaultViewManager implements IEventEmitter { this.stage.destroy(); + if (this._measurer) { + this._measurer.destroy(); + } + this.rendered = false; this.__listeners = {}; @@ -750,6 +757,14 @@ class DefaultViewManager implements IEventEmitter { // this.q.clear(); if (this.views) { + // Invalidate canvas measurement caches for views being removed + if (this._measurer) { + this.views.forEach((view: IframeView) => { + if (view?.document?.body) { + this._measurer.invalidate(view.document.body); + } + }); + } this.views.hide(); this.scrollTo(0,0, true); this.views.clear(); @@ -1070,7 +1085,7 @@ class DefaultViewManager implements IEventEmitter { this.viewSettings.layout = layout; - this.mapping = new Mapping(layout.props, this.settings.direction, this.settings.axis); + this.mapping = new Mapping(layout.props, this.settings.direction, this.settings.axis, false, this._measurer); if(this.views) { @@ -1101,7 +1116,7 @@ class DefaultViewManager implements IEventEmitter { this.viewSettings.axis = axis; if (this.mapping) { - this.mapping = new Mapping(this.layout.props, this.settings.direction, this.settings.axis); + this.mapping = new Mapping(this.layout.props, this.settings.direction, this.settings.axis, false, this._measurer); } if (this.layout) { diff --git a/src/managers/views/iframe.ts b/src/managers/views/iframe.ts index ce1a1af..0ad5445 100644 --- a/src/managers/views/iframe.ts +++ b/src/managers/views/iframe.ts @@ -46,6 +46,7 @@ class IframeView implements IEventEmitter { _textHeight: number | undefined; _contentWidth: number | undefined; _contentHeight: number | undefined; + _contentDirty!: boolean; _needsReframe!: boolean; _expanding!: boolean; elementBounds!: { width: number; height: number }; @@ -283,6 +284,7 @@ class IframeView implements IEventEmitter { this._textHeight = undefined; this._contentHeight = undefined; } + this._contentDirty = true; this._needsReframe = true; } @@ -349,8 +351,6 @@ class IframeView implements IEventEmitter { let height = this.lockedHeight; let columns; - let _textWidth, _textHeight; - if(!this.iframe || this._expanding) return; this._expanding = true; @@ -361,8 +361,14 @@ class IframeView implements IEventEmitter { } // Expand Horizontally else if(this.settings.axis === "horizontal") { - // Get the width of the text - width = this.contents!.textWidth(); + // Use cached text width when content hasn't changed (avoids synchronous reflow) + if (!this._contentDirty && this._textWidth !== undefined) { + width = this._textWidth; + } else { + width = this.contents!.textWidth(); + this._textWidth = width; + this._contentDirty = false; + } if (width % this.layout.pageWidth > 0) { width = Math.ceil(width / this.layout.pageWidth) * this.layout.pageWidth; @@ -380,7 +386,15 @@ class IframeView implements IEventEmitter { } // Expand Vertically else if(this.settings.axis === "vertical") { - height = this.contents!.textHeight(); + // Use cached text height when content hasn't changed (avoids synchronous reflow) + if (!this._contentDirty && this._textHeight !== undefined) { + height = this._textHeight; + } else { + height = this.contents!.textHeight(); + this._textHeight = height; + this._contentDirty = false; + } + if (this.settings.flow === "paginated" && height % this.layout.height > 0) { height = Math.ceil(height / this.layout.height) * this.layout.height; @@ -505,6 +519,7 @@ class IframeView implements IEventEmitter { this.contents.on(EVENTS.CONTENTS.EXPAND, () => { if(this.displayed && this.iframe) { + this._contentDirty = true; this.expand(); if (this.contents) { this.layout.format(this.contents); @@ -512,8 +527,13 @@ class IframeView implements IEventEmitter { } }); - this.contents.on(EVENTS.CONTENTS.RESIZE, (_e: { width: number; height: number }) => { + this.contents.on(EVENTS.CONTENTS.RESIZE, (e: { width: number; height: number }) => { if(this.displayed && this.iframe) { + // Pre-populate cache with values already measured by resizeCheck(), + // avoiding a redundant reflow when expand() runs next + this._textWidth = e.width; + this._textHeight = e.height; + this._contentDirty = false; this.expand(); if (this.contents) { this.layout.format(this.contents); @@ -529,6 +549,7 @@ class IframeView implements IEventEmitter { if (this.contents) { this.layout.format(this.contents); + this._contentDirty = true; this.expand(); } } diff --git a/src/mapping.ts b/src/mapping.ts index f7d687a..26000ed 100644 --- a/src/mapping.ts +++ b/src/mapping.ts @@ -3,6 +3,8 @@ import { nodeBounds } from "./utils/core"; import type { EpubCFIPair, RangePair, LayoutProps } from "./types"; import type IframeView from "./managers/views/iframe"; import type Contents from "./contents"; +import type TextMeasurer from "./utils/text-measurer"; +import type { PreparedNode } from "./utils/text-measurer"; /** * Map text locations to CFI ranges @@ -17,12 +19,14 @@ class Mapping { horizontal: boolean; direction: string; _dev: boolean; + _measurer: TextMeasurer | null; - constructor(layout: LayoutProps, direction?: string, axis?: string, dev: boolean = false) { + constructor(layout: LayoutProps, direction?: string, axis?: string, dev: boolean = false, measurer?: TextMeasurer) { this.layout = layout; this.horizontal = (axis === "horizontal") ? true : false; this.direction = direction || "ltr"; this._dev = dev; + this._measurer = measurer || null; } /** @@ -133,6 +137,7 @@ class Mapping { let $el; let found; let $prev = root; + let lastElPos: DOMRect | undefined; while (stack.length) { @@ -143,6 +148,7 @@ class Mapping { const elPos = nodeBounds(node); + lastElPos = elPos; if (this.horizontal && this.direction === "ltr") { @@ -193,13 +199,13 @@ class Mapping { }); if(found) { - return this.findTextStartRange(found, start, end); + return this.findTextStartRange(found, start, end, lastElPos); } } // Return last element - return this.findTextStartRange($prev, start, end); + return this.findTextStartRange($prev, start, end, lastElPos); } /** @@ -215,6 +221,7 @@ class Mapping { let $el; let $prev = root; let found; + let lastElPos: DOMRect | undefined; while (stack.length) { @@ -225,6 +232,7 @@ class Mapping { let left, right, top, bottom; const elPos = nodeBounds(node); + lastElPos = elPos; if (this.horizontal && this.direction === "ltr") { @@ -275,24 +283,90 @@ class Mapping { if(found){ - return this.findTextEndRange(found, start, end); + return this.findTextEndRange(found, start, end, lastElPos); } } // end of chapter - return this.findTextEndRange($prev, start, end); + return this.findTextEndRange($prev, start, end, lastElPos); + } + + /** + * Try to prepare a text node's root for canvas-based measurement. + * Returns the PreparedNode for this text node, or null if not available. + * @private + */ + private _canvasPrepare(node: Node): PreparedNode | null { + if (!this._measurer || node.nodeType !== Node.TEXT_NODE) return null; + + const textNode = node as Text; + const root = textNode.parentElement; + if (!root) return null; + + const win = root.ownerDocument?.defaultView; + if (!win) return null; + + if (this._measurer.hasExoticCSS(textNode, win)) return null; + + return this._measurer.prepare(root.ownerDocument.body, win) + .find(p => p.node === textNode) || null; + } + + /** + * Canvas fast path: use binary search on pre-measured cumulative widths + * to find a Range at the target position, then verify with one getBoundingClientRect. + * Returns the Range if verification passes, or null to fall through to DOM loop. + * @private + */ + private _canvasFindRange( + node: Node, nodePos: DOMRect, target: number, verifyFn: (pos: DOMRect) => boolean + ): Range | null { + const prepared = this._canvasPrepare(node); + if (!prepared || prepared.segments.length === 0) return null; + + const textNode = node as Text; + const nodeStart = this.horizontal + ? (this.direction === "rtl" ? nodePos.right : nodePos.left) + : nodePos.top; + const relativeTarget = this.direction === "rtl" ? nodeStart - target : target - nodeStart; + + if (relativeTarget < 0) return null; + + const offset = this._measurer!.findOffsetAtPosition(prepared.segments, relativeTarget); + const doc = textNode.ownerDocument!; + const range = doc.createRange(); + const safeOffset = Math.min(offset, textNode.data.length); + range.setStart(textNode, safeOffset); + range.setEnd(textNode, Math.min(safeOffset + 1, textNode.data.length)); + + const pos = range.getBoundingClientRect(); + return verifyFn(pos) ? range : null; } /** * Find Text Start Range * @private - * @param {Node} root root node + * @param {Node} node text node * @param {number} start position to start at * @param {number} end position to end at + * @param {DOMRect} [nodePos] pre-computed node bounds from findStart (avoids redundant reflow) * @return {Range} */ - findTextStartRange(node: Node, start: number, end: number): Range { + findTextStartRange(node: Node, start: number, end: number, nodePos?: DOMRect): Range { + // Canvas fast path: reuse nodePos from findStart to avoid a second reflow + if (nodePos) { + const canvasRange = this._canvasFindRange(node, nodePos, start, (pos) => { + const check = this.horizontal + ? (this.direction === "rtl" ? pos.right : pos.left) + : pos.top; + if (this.horizontal && this.direction === "ltr") return check >= start; + if (this.horizontal && this.direction === "rtl") return check <= end; + return check >= start; + }); + if (canvasRange) return canvasRange; + } + const ranges = this.splitTextNodeIntoRanges(node); let range; let pos; @@ -326,8 +400,6 @@ class Mapping { } - // prev = range; - } return ranges[0]!; @@ -336,12 +408,23 @@ class Mapping { /** * Find Text End Range * @private - * @param {Node} root root node + * @param {Node} node text node * @param {number} start position to start at * @param {number} end position to end at + * @param {DOMRect} [nodePos] pre-computed node bounds from findEnd (avoids redundant reflow) * @return {Range} */ - findTextEndRange(node: Node, start: number, end: number): Range { + findTextEndRange(node: Node, start: number, end: number, nodePos?: DOMRect): Range { + // Canvas fast path: reuse nodePos from findEnd to avoid a second reflow + if (nodePos) { + const canvasRange = this._canvasFindRange(node, nodePos, end, (pos) => { + if (this.horizontal && this.direction === "ltr") return pos.left <= end && pos.right >= end; + if (this.horizontal && this.direction === "rtl") return pos.right >= start && pos.left <= start; + return pos.top <= end && pos.bottom >= end; + }); + if (canvasRange) return canvasRange; + } + const ranges = this.splitTextNodeIntoRanges(node); let prev; let range; diff --git a/src/utils/text-measurer.ts b/src/utils/text-measurer.ts new file mode 100644 index 0000000..c13ca47 --- /dev/null +++ b/src/utils/text-measurer.ts @@ -0,0 +1,278 @@ +/** + * Canvas-based text measurement utility. + * + * Applies pretext's prepare/layout pattern: expensive measurement is done once + * via CanvasRenderingContext2D.measureText(), then layout queries (finding the + * character offset at a pixel position) are pure binary-search arithmetic with + * zero DOM reflow. + * + * Browser-only — not imported by the Node.js entry point. + */ + +export interface TextSegment { + /** The text node this segment belongs to */ + node: Text; + /** Character offset within the text node where this segment starts */ + charOffset: number; + /** The segment text content */ + text: string; + /** Measured width of this segment in pixels */ + width: number; + /** Cumulative width from the start of the parent element */ + cumWidth: number; +} + +export interface PreparedNode { + node: Text; + segments: TextSegment[]; + totalWidth: number; + font: string; +} + +type SegmenterLike = { segment(text: string): Iterable<{ segment: string; index: number }> }; + +// CJK Unicode ranges for per-character segmentation fallback +const CJK_RE = /[\u2E80-\u9FFF\uF900-\uFAFF\uFE30-\uFE4F\u{20000}-\u{2FA1F}]/u; + +/** + * Determine if a CSS property value is "exotic" (non-default), meaning + * canvas measureText() would not account for it. + */ +function hasExoticTextCSS(style: CSSStyleDeclaration): boolean { + const letterSpacing = style.letterSpacing; + if (letterSpacing && letterSpacing !== "normal" && letterSpacing !== "0px") return true; + + const wordSpacing = style.wordSpacing; + if (wordSpacing && wordSpacing !== "normal" && wordSpacing !== "0px") return true; + + const textIndent = style.textIndent; + if (textIndent && textIndent !== "0px") return true; + + return false; +} + +class TextMeasurer { + private _canvas: OffscreenCanvas | HTMLCanvasElement | null = null; + private _ctx: CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D | null = null; + /** font string → (text → width) */ + private _widthCache: Map> = new Map(); + /** parent element → prepared nodes */ + private _preparedCache: WeakMap = new WeakMap(); + /** shared Intl.Segmenter instance (lazy) */ + private _segmenter: SegmenterLike | null = null; + + private getCanvas(): CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D { + if (this._ctx) return this._ctx; + + if (typeof OffscreenCanvas !== "undefined") { + this._canvas = new OffscreenCanvas(1, 1); + this._ctx = this._canvas.getContext("2d")!; + } else { + this._canvas = document.createElement("canvas"); + this._ctx = this._canvas.getContext("2d")!; + } + return this._ctx; + } + + private getSegmenter(): SegmenterLike | null { + if (this._segmenter) return this._segmenter; + if (typeof Intl !== "undefined" && "Segmenter" in Intl) { + this._segmenter = new (Intl as typeof Intl & { Segmenter: new (locale?: string, options?: { granularity: string }) => SegmenterLike }).Segmenter(undefined, { granularity: "word" }); + return this._segmenter; + } + return null; + } + + /** + * Measure a text string with a given CSS font, returning its width in pixels. + * Results are cached per font+text pair. + */ + measureText(text: string, font: string): number { + let fontMap = this._widthCache.get(font); + if (fontMap) { + const cached = fontMap.get(text); + if (cached !== undefined) return cached; + } else { + fontMap = new Map(); + this._widthCache.set(font, fontMap); + } + + const ctx = this.getCanvas(); + ctx.font = font; + const width = ctx.measureText(text).width; + fontMap.set(text, width); + return width; + } + + /** + * Segment text into word-level pieces suitable for measurement. + * Uses Intl.Segmenter when available, falls back to space-splitting + * (with per-character splitting for CJK). + */ + segmentText(text: string): { text: string; index: number }[] { + const segmenter = this.getSegmenter(); + if (segmenter) { + const result: { text: string; index: number }[] = []; + for (const seg of segmenter.segment(text)) { + result.push({ text: seg.segment, index: seg.index }); + } + return result; + } + + // Fallback: split on spaces, but split CJK characters individually + const result: { text: string; index: number }[] = []; + let current = ""; + let currentStart = 0; + + for (let i = 0; i < text.length; i++) { + const ch = text[i]!; + if (ch === " ") { + if (current) { + result.push({ text: current, index: currentStart }); + } + result.push({ text: " ", index: i }); + current = ""; + currentStart = i + 1; + } else if (CJK_RE.test(ch)) { + if (current) { + result.push({ text: current, index: currentStart }); + current = ""; + } + result.push({ text: ch, index: i }); + currentStart = i + 1; + } else { + if (!current) currentStart = i; + current += ch; + } + } + if (current) { + result.push({ text: current, index: currentStart }); + } + return result; + } + + /** + * Prepare phase: measure all text nodes under a root element. + * Returns PreparedNode[] with cumulative widths for binary search. + * + * Skips subtrees with exotic CSS (letter-spacing, word-spacing, text-indent) + * by returning null for those — the caller should fall back to DOM Range measurement. + * + * @param root The container element (usually document.body) + * @param win The window object for getComputedStyle + * @returns PreparedNode[] with entries for measurable text nodes (may be empty) + */ + prepare(root: Element, win: Window): PreparedNode[] { + const cached = this._preparedCache.get(root); + if (cached) return cached; + + const result: PreparedNode[] = []; + const styleCache = new Map(); + const walker = root.ownerDocument.createTreeWalker(root, NodeFilter.SHOW_TEXT, { + acceptNode(node: Node): number { + return (node as Text).data.trim().length > 0 + ? NodeFilter.FILTER_ACCEPT + : NodeFilter.FILTER_REJECT; + } + }); + + let textNode: Text | null; + while ((textNode = walker.nextNode() as Text | null)) { + const parent = textNode.parentElement; + if (!parent) continue; + + let style = styleCache.get(parent); + if (!style) { + style = win.getComputedStyle(parent); + styleCache.set(parent, style); + } + if (hasExoticTextCSS(style)) continue; + + const font = style.font; + if (!font) continue; + + const text = textNode.data; + const segments = this.segmentText(text); + const measured: TextSegment[] = []; + let cumWidth = 0; + + for (const seg of segments) { + const w = this.measureText(seg.text, font); + measured.push({ + node: textNode, + charOffset: seg.index, + text: seg.text, + width: w, + cumWidth: cumWidth + w, + }); + cumWidth += w; + } + + result.push({ + node: textNode, + segments: measured, + totalWidth: cumWidth, + font, + }); + } + + this._preparedCache.set(root, result); + return result; + } + + /** + * Layout phase: find the character offset within a text node at a given + * pixel position using binary search on cumulative widths. + * + * @param segments The TextSegment[] from a PreparedNode + * @param position Target position in pixels (relative to text node start) + * @returns Character offset within the text node + */ + findOffsetAtPosition(segments: TextSegment[], position: number): number { + if (segments.length === 0) return 0; + + // Binary search for the segment whose cumulative width crosses the position + let lo = 0; + let hi = segments.length - 1; + + while (lo < hi) { + const mid = (lo + hi) >>> 1; + if (segments[mid]!.cumWidth < position) { + lo = mid + 1; + } else { + hi = mid; + } + } + + const seg = segments[lo]!; + return seg.charOffset; + } + + /** + * Check if a text node's parent has exotic CSS that prevents canvas measurement. + */ + hasExoticCSS(node: Text, win: Window): boolean { + const parent = node.parentElement; + if (!parent) return true; + return hasExoticTextCSS(win.getComputedStyle(parent)); + } + + /** + * Invalidate cached preparation for a root element. + */ + invalidate(root: Element): void { + this._preparedCache.delete(root); + } + + /** + * Destroy the measurer, releasing the canvas and all caches. + */ + destroy(): void { + this._widthCache.clear(); + this._ctx = null; + this._canvas = null; + this._segmenter = null; + } +} + +export default TextMeasurer; From 0bb24bfb427c73cc487089377531fd24c043809b Mon Sep 17 00:00:00 2001 From: William Chong Date: Mon, 30 Mar 2026 17:09:03 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Address=20review:=20bo?= =?UTF-8?q?und=20width=20cache,=20fix=20segment=20ranges,=20add=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Cap _widthCache at 32 font entries to prevent unbounded growth - Handle getContext("2d") returning null (OffscreenCanvas fallback) - Fix canvas fast path to span full segment instead of single char - Rename findOffsetAtPosition → findSegmentIndex (returns index) - Add WeakMap index for O(1) node lookup - Replace O(N) .find() in _canvasPrepare with getPreparedNode() - Fix stale JSDoc claiming prepare() returns null - Revert README browser versions to unversioned - Add 6 expand() caching tests (dirty flag, horizontal/vertical) - Add 16 TextMeasurer unit tests (segmentation, binary search, etc.) --- README.md | 4 +- src/mapping.ts | 23 +++++-- src/utils/text-measurer.ts | 54 ++++++++++----- test/iframe.test.ts | 87 ++++++++++++++++++++++++ test/text-measurer.test.ts | 134 +++++++++++++++++++++++++++++++++++++ 5 files changed, 276 insertions(+), 26 deletions(-) create mode 100644 test/text-measurer.test.ts diff --git a/README.md b/README.md index e3dc323..03abb6c 100644 --- a/README.md +++ b/README.md @@ -128,12 +128,10 @@ Key classes: | Environment | Import | Notes | |-------------|--------|-------| -| Modern browsers | `@likecoin/epub-ts` | Chrome 64+, Firefox 69+, Safari 13.1+, Edge 79+ | +| Modern browsers | `@likecoin/epub-ts` | Chrome, Firefox, Safari, Edge | | Vite / webpack | `@likecoin/epub-ts` | ESM or CJS | | Node.js 18+ | `@likecoin/epub-ts/node` | Parsing only (no rendering); requires `linkedom` peer dep | -The minimum browser floor is set by `ResizeObserver`. Canvas-based text measurement optimizations additionally use `OffscreenCanvas` and `Intl.Segmenter` when available, with automatic fallbacks for older browsers. - ## What's Changed from epubjs - Build: webpack + Babel → Vite diff --git a/src/mapping.ts b/src/mapping.ts index 26000ed..11d86ca 100644 --- a/src/mapping.ts +++ b/src/mapping.ts @@ -301,6 +301,11 @@ class Mapping { if (!this._measurer || node.nodeType !== Node.TEXT_NODE) return null; const textNode = node as Text; + + // O(1) lookup if already prepared + const indexed = this._measurer.getPreparedNode(textNode); + if (indexed) return indexed; + const root = textNode.parentElement; if (!root) return null; @@ -309,8 +314,9 @@ class Mapping { if (this._measurer.hasExoticCSS(textNode, win)) return null; - return this._measurer.prepare(root.ownerDocument.body, win) - .find(p => p.node === textNode) || null; + // Prepare the entire document body (populates _nodeIndex for all text nodes) + this._measurer.prepare(root.ownerDocument.body, win); + return this._measurer.getPreparedNode(textNode); } /** @@ -333,12 +339,17 @@ class Mapping { if (relativeTarget < 0) return null; - const offset = this._measurer!.findOffsetAtPosition(prepared.segments, relativeTarget); + const segIdx = this._measurer!.findSegmentIndex(prepared.segments, relativeTarget); + const seg = prepared.segments[segIdx]!; + const nextSeg = prepared.segments[segIdx + 1]; + const segEnd = nextSeg ? nextSeg.charOffset : textNode.data.length; + const doc = textNode.ownerDocument!; const range = doc.createRange(); - const safeOffset = Math.min(offset, textNode.data.length); - range.setStart(textNode, safeOffset); - range.setEnd(textNode, Math.min(safeOffset + 1, textNode.data.length)); + const safeStart = Math.min(seg.charOffset, textNode.data.length); + const safeEnd = Math.min(segEnd, textNode.data.length); + range.setStart(textNode, safeStart); + range.setEnd(textNode, safeEnd); const pos = range.getBoundingClientRect(); return verifyFn(pos) ? range : null; diff --git a/src/utils/text-measurer.ts b/src/utils/text-measurer.ts index c13ca47..f9ffac5 100644 --- a/src/utils/text-measurer.ts +++ b/src/utils/text-measurer.ts @@ -51,26 +51,31 @@ function hasExoticTextCSS(style: CSSStyleDeclaration): boolean { return false; } +/** Max number of font entries in the width cache before eviction */ +const MAX_WIDTH_CACHE_FONTS = 32; + class TextMeasurer { private _canvas: OffscreenCanvas | HTMLCanvasElement | null = null; private _ctx: CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D | null = null; - /** font string → (text → width) */ + /** font string → (text → width), bounded to MAX_WIDTH_CACHE_FONTS entries */ private _widthCache: Map> = new Map(); /** parent element → prepared nodes */ private _preparedCache: WeakMap = new WeakMap(); + /** text node → prepared node, for O(1) lookup in _canvasPrepare */ + private _nodeIndex: WeakMap = new WeakMap(); /** shared Intl.Segmenter instance (lazy) */ private _segmenter: SegmenterLike | null = null; - private getCanvas(): CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D { + private getCanvas(): CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D | null { if (this._ctx) return this._ctx; if (typeof OffscreenCanvas !== "undefined") { this._canvas = new OffscreenCanvas(1, 1); - this._ctx = this._canvas.getContext("2d")!; - } else { - this._canvas = document.createElement("canvas"); - this._ctx = this._canvas.getContext("2d")!; + this._ctx = this._canvas.getContext("2d"); + if (this._ctx) return this._ctx; } + this._canvas = document.createElement("canvas"); + this._ctx = this._canvas.getContext("2d"); return this._ctx; } @@ -93,11 +98,17 @@ class TextMeasurer { const cached = fontMap.get(text); if (cached !== undefined) return cached; } else { + // Evict oldest font entry if cache is full + if (this._widthCache.size >= MAX_WIDTH_CACHE_FONTS) { + const oldest = this._widthCache.keys().next().value; + if (oldest !== undefined) this._widthCache.delete(oldest); + } fontMap = new Map(); this._widthCache.set(font, fontMap); } const ctx = this.getCanvas(); + if (!ctx) return 0; ctx.font = font; const width = ctx.measureText(text).width; fontMap.set(text, width); @@ -155,8 +166,9 @@ class TextMeasurer { * Prepare phase: measure all text nodes under a root element. * Returns PreparedNode[] with cumulative widths for binary search. * - * Skips subtrees with exotic CSS (letter-spacing, word-spacing, text-indent) - * by returning null for those — the caller should fall back to DOM Range measurement. + * Text nodes whose parent has exotic CSS (letter-spacing, word-spacing, + * text-indent) are skipped — the caller should fall back to DOM Range + * measurement for those. * * @param root The container element (usually document.body) * @param win The window object for getComputedStyle @@ -208,12 +220,14 @@ class TextMeasurer { cumWidth += w; } - result.push({ + const preparedNode: PreparedNode = { node: textNode, segments: measured, totalWidth: cumWidth, font, - }); + }; + result.push(preparedNode); + this._nodeIndex.set(textNode, preparedNode); } this._preparedCache.set(root, result); @@ -221,17 +235,16 @@ class TextMeasurer { } /** - * Layout phase: find the character offset within a text node at a given - * pixel position using binary search on cumulative widths. + * Layout phase: find the segment index at a given pixel position + * using binary search on cumulative widths. * * @param segments The TextSegment[] from a PreparedNode * @param position Target position in pixels (relative to text node start) - * @returns Character offset within the text node + * @returns Index into the segments array */ - findOffsetAtPosition(segments: TextSegment[], position: number): number { + findSegmentIndex(segments: TextSegment[], position: number): number { if (segments.length === 0) return 0; - // Binary search for the segment whose cumulative width crosses the position let lo = 0; let hi = segments.length - 1; @@ -244,8 +257,15 @@ class TextMeasurer { } } - const seg = segments[lo]!; - return seg.charOffset; + return lo; + } + + /** + * Look up a previously prepared text node in O(1). + * Returns null if the node was not prepared (exotic CSS, not yet prepared, etc.). + */ + getPreparedNode(node: Text): PreparedNode | null { + return this._nodeIndex.get(node) || null; } /** diff --git a/test/iframe.test.ts b/test/iframe.test.ts index 61545d6..4e55920 100644 --- a/test/iframe.test.ts +++ b/test/iframe.test.ts @@ -363,6 +363,93 @@ describe("IframeView", () => { expect(view._contentHeight).toBeUndefined(); expect(view._needsReframe).toBe(true); }); + + it("should set _contentDirty to true", () => { + const view = createView(); + view.create(); + view._contentDirty = false; + view.reset(); + expect(view._contentDirty).toBe(true); + }); + }); + + describe("expand()", () => { + it("should use cached _textWidth when _contentDirty is false (horizontal)", () => { + const view = createView(undefined, { axis: "horizontal" }); + view.create(); + view.displayed = true; + view._contentDirty = false; + view._textWidth = 1200; + view.contents = { + textWidth: vi.fn().mockReturnValue(999), + textHeight: vi.fn().mockReturnValue(999), + } as any; + view.expand(); + // Should use cached value, not call textWidth() + expect(view.contents!.textWidth).not.toHaveBeenCalled(); + expect(view._width).toBe(1200); + }); + + it("should measure and cache when _contentDirty is true (horizontal)", () => { + const view = createView(undefined, { axis: "horizontal" }); + view.create(); + view.displayed = true; + view._contentDirty = true; + view.contents = { + textWidth: vi.fn().mockReturnValue(1200), + textHeight: vi.fn().mockReturnValue(600), + } as any; + view.expand(); + expect(view.contents!.textWidth).toHaveBeenCalledOnce(); + expect(view._textWidth).toBe(1200); + expect(view._contentDirty).toBe(false); + }); + + it("should use cached _textHeight when _contentDirty is false (vertical)", () => { + const view = createView(undefined, { axis: "vertical" }); + view.create(); + view.displayed = true; + view._contentDirty = false; + view._textHeight = 2000; + view.contents = { + textWidth: vi.fn().mockReturnValue(999), + textHeight: vi.fn().mockReturnValue(999), + } as any; + view.expand(); + expect(view.contents!.textHeight).not.toHaveBeenCalled(); + }); + + it("should measure and cache when _contentDirty is true (vertical)", () => { + const view = createView(undefined, { axis: "vertical" }); + view.create(); + view.displayed = true; + view._contentDirty = true; + view.contents = { + textWidth: vi.fn().mockReturnValue(800), + textHeight: vi.fn().mockReturnValue(2000), + } as any; + view.expand(); + expect(view.contents!.textHeight).toHaveBeenCalledOnce(); + expect(view._textHeight).toBe(2000); + expect(view._contentDirty).toBe(false); + }); + }); + + describe("setLayout()", () => { + it("should mark _contentDirty before expand", () => { + const view = createView(); + view.create(); + view.displayed = true; + view._contentDirty = false; + view.contents = { + textWidth: vi.fn().mockReturnValue(800), + textHeight: vi.fn().mockReturnValue(600), + } as any; + const layout = createMockLayout(); + view.setLayout(layout as any); + // setLayout marks dirty, so expand should re-measure + expect(view.contents!.textWidth).toHaveBeenCalled(); + }); }); describe("reframe()", () => { diff --git a/test/text-measurer.test.ts b/test/text-measurer.test.ts new file mode 100644 index 0000000..76024a2 --- /dev/null +++ b/test/text-measurer.test.ts @@ -0,0 +1,134 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import TextMeasurer from "../src/utils/text-measurer"; + +describe("TextMeasurer", () => { + let measurer: TextMeasurer; + + beforeEach(() => { + measurer = new TextMeasurer(); + }); + + describe("segmentText()", () => { + it("should split Latin text on spaces", () => { + const segments = measurer.segmentText("hello world"); + const texts = segments.map(s => s.text); + expect(texts).toContain("hello"); + expect(texts).toContain(" "); + expect(texts).toContain("world"); + }); + + it("should segment CJK text", () => { + const segments = measurer.segmentText("你好世界"); + expect(segments.length).toBeGreaterThan(0); + // All original text should be accounted for + expect(segments.map(s => s.text).join("")).toBe("你好世界"); + }); + + it("should handle mixed Latin and CJK", () => { + const segments = measurer.segmentText("hi你好"); + const joined = segments.map(s => s.text).join(""); + expect(joined).toBe("hi你好"); + // Latin part should be its own segment + expect(segments[0]!.text).toBe("hi"); + }); + + it("should return correct character offsets", () => { + const segments = measurer.segmentText("ab cd"); + expect(segments[0]).toEqual({ text: "ab", index: 0 }); + expect(segments[1]).toEqual({ text: " ", index: 2 }); + expect(segments[2]).toEqual({ text: "cd", index: 3 }); + }); + + it("should handle empty string", () => { + expect(measurer.segmentText("")).toEqual([]); + }); + }); + + describe("findSegmentIndex()", () => { + const segments = [ + { node: null as any, charOffset: 0, text: "hello", width: 50, cumWidth: 50 }, + { node: null as any, charOffset: 5, text: " ", width: 10, cumWidth: 60 }, + { node: null as any, charOffset: 6, text: "world", width: 50, cumWidth: 110 }, + ]; + + it("should return 0 for position at start", () => { + expect(measurer.findSegmentIndex(segments, 0)).toBe(0); + }); + + it("should find segment at exact boundary", () => { + expect(measurer.findSegmentIndex(segments, 50)).toBe(0); + }); + + it("should find segment past first boundary", () => { + expect(measurer.findSegmentIndex(segments, 51)).toBe(1); + }); + + it("should find last segment for large position", () => { + expect(measurer.findSegmentIndex(segments, 200)).toBe(2); + }); + + it("should return 0 for empty segments", () => { + expect(measurer.findSegmentIndex([], 100)).toBe(0); + }); + }); + + describe("getPreparedNode()", () => { + it("should return null for unprepared nodes", () => { + const textNode = document.createTextNode("test"); + expect(measurer.getPreparedNode(textNode)).toBeNull(); + }); + }); + + describe("hasExoticCSS()", () => { + it("should return true for orphan text node", () => { + const textNode = document.createTextNode("test"); + expect(measurer.hasExoticCSS(textNode, window)).toBe(true); + }); + + it("should return false for normal text in a paragraph", () => { + const p = document.createElement("p"); + const text = document.createTextNode("hello"); + p.appendChild(text); + document.body.appendChild(p); + try { + expect(measurer.hasExoticCSS(text, window)).toBe(false); + } finally { + document.body.removeChild(p); + } + }); + }); + + describe("invalidate()", () => { + it("should clear cached preparation for root", () => { + const root = document.createElement("div"); + const text = document.createTextNode("hello"); + root.appendChild(text); + document.body.appendChild(root); + + try { + const result1 = measurer.prepare(root, window); + const result2 = measurer.prepare(root, window); + expect(result2).toBe(result1); // same reference = cached + + measurer.invalidate(root); + const result3 = measurer.prepare(root, window); + expect(result3).not.toBe(result1); // new array = re-prepared + } finally { + document.body.removeChild(root); + } + }); + }); + + describe("destroy()", () => { + it("should not throw", () => { + expect(() => measurer.destroy()).not.toThrow(); + }); + + it("should clear internal state", () => { + measurer.destroy(); + // Should still work after destroy (re-initializes lazily) + const segments = measurer.segmentText("test"); + expect(segments.length).toBeGreaterThan(0); + }); + }); +}); From 0f1c5138d35a3936962e31fb093ad328917aba00 Mon Sep 17 00:00:00 2001 From: William Chong Date: Mon, 30 Mar 2026 21:45:20 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=90=9B=20Fix=20surrogate=20pairs,=20R?= =?UTF-8?q?TL=20vertical,=20and=20invalidation=20in=20TextMeasurer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Iterate by code point (for...of) in segmentText() fallback to handle astral-plane CJK characters (U+20000+) correctly with UTF-16 offsets - Only apply RTL reversal in _canvasFindRange when horizontal, so vertical layouts with RTL languages don't get negative relativeTarget - invalidate() now clears _nodeIndex entries for all PreparedNodes under the root, so _canvasPrepare re-prepares after DOM changes - destroy() resets _preparedCache and _nodeIndex to new WeakMaps --- src/mapping.ts | 4 +++- src/utils/text-measurer.ts | 26 +++++++++++++++++++------- test/text-measurer.test.ts | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/mapping.ts b/src/mapping.ts index 11d86ca..301be8a 100644 --- a/src/mapping.ts +++ b/src/mapping.ts @@ -335,7 +335,9 @@ class Mapping { const nodeStart = this.horizontal ? (this.direction === "rtl" ? nodePos.right : nodePos.left) : nodePos.top; - const relativeTarget = this.direction === "rtl" ? nodeStart - target : target - nodeStart; + const relativeTarget = (this.horizontal && this.direction === "rtl") + ? nodeStart - target + : target - nodeStart; if (relativeTarget < 0) return null; diff --git a/src/utils/text-measurer.ts b/src/utils/text-measurer.ts index f9ffac5..c349d3d 100644 --- a/src/utils/text-measurer.ts +++ b/src/utils/text-measurer.ts @@ -130,31 +130,34 @@ class TextMeasurer { return result; } - // Fallback: split on spaces, but split CJK characters individually + // Fallback: split on spaces, but split CJK characters individually. + // Iterate by code point (for...of) to handle surrogate pairs correctly, + // while tracking UTF-16 index for DOM Range offsets. const result: { text: string; index: number }[] = []; let current = ""; let currentStart = 0; + let i = 0; - for (let i = 0; i < text.length; i++) { - const ch = text[i]!; + for (const ch of text) { if (ch === " ") { if (current) { result.push({ text: current, index: currentStart }); } result.push({ text: " ", index: i }); current = ""; - currentStart = i + 1; + currentStart = i + ch.length; } else if (CJK_RE.test(ch)) { if (current) { result.push({ text: current, index: currentStart }); current = ""; } result.push({ text: ch, index: i }); - currentStart = i + 1; + currentStart = i + ch.length; } else { if (!current) currentStart = i; current += ch; } + i += ch.length; } if (current) { result.push({ text: current, index: currentStart }); @@ -278,10 +281,17 @@ class TextMeasurer { } /** - * Invalidate cached preparation for a root element. + * Invalidate cached preparation for a root element, + * including all per-node index entries under it. */ invalidate(root: Element): void { - this._preparedCache.delete(root); + const cached = this._preparedCache.get(root); + if (cached) { + for (const p of cached) { + this._nodeIndex.delete(p.node); + } + this._preparedCache.delete(root); + } } /** @@ -289,6 +299,8 @@ class TextMeasurer { */ destroy(): void { this._widthCache.clear(); + this._preparedCache = new WeakMap(); + this._nodeIndex = new WeakMap(); this._ctx = null; this._canvas = null; this._segmenter = null; diff --git a/test/text-measurer.test.ts b/test/text-measurer.test.ts index 76024a2..495c845 100644 --- a/test/text-measurer.test.ts +++ b/test/text-measurer.test.ts @@ -42,6 +42,20 @@ describe("TextMeasurer", () => { it("should handle empty string", () => { expect(measurer.segmentText("")).toEqual([]); }); + + it("should handle astral-plane CJK (surrogate pairs) with correct UTF-16 offsets", () => { + // U+20000 is 𠀀 (CJK Unified Ideographs Extension B), encoded as 2 UTF-16 code units + const text = "a𠀀b"; + const segments = measurer.segmentText(text); + const joined = segments.map(s => s.text).join(""); + expect(joined).toBe(text); + // "a" starts at UTF-16 index 0 + expect(segments[0]!.index).toBe(0); + // 𠀀 is 2 code units, so "b" should start at index 3 + const lastSeg = segments[segments.length - 1]!; + expect(lastSeg.text).toContain("b"); + expect(lastSeg.index).toBe(3); + }); }); describe("findSegmentIndex()", () => {