diff --git a/packages/core/src/docx/__tests__/header-image-repack.test.ts b/packages/core/src/docx/__tests__/header-image-repack.test.ts new file mode 100644 index 00000000..c5030575 --- /dev/null +++ b/packages/core/src/docx/__tests__/header-image-repack.test.ts @@ -0,0 +1,147 @@ +import { describe, test, expect } from 'bun:test'; +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import JSZip from 'jszip'; +import { parseDocx } from '../parser'; +import { repackDocx } from '../rezip'; +import type { Document } from '../../types/document'; +import type { HeaderFooter, Paragraph, Image } from '../../types/content'; + +// 1x1 transparent PNG as a data URL. +const TINY_PNG_DATA_URL = + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='; + +const FIXTURE_PATH = join( + __dirname, + '..', + '..', + '..', + '..', + '..', + 'e2e', + 'fixtures', + 'titlePg-header-footer.docx' +); + +async function loadFixture(): Promise { + const buffer = readFileSync(FIXTURE_PATH); + return await parseDocx( + buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength) + ); +} + +function findFirstHeader(doc: Document): HeaderFooter | undefined { + const headers = doc.package.headers; + if (!headers || headers.size === 0) return undefined; + return headers.values().next().value; +} + +function findFirstFooter(doc: Document): HeaderFooter | undefined { + const footers = doc.package.footers; + if (!footers || footers.size === 0) return undefined; + return footers.values().next().value; +} + +function insertImageInto(part: HeaderFooter, image: Image): void { + const paragraph: Paragraph = { + type: 'paragraph', + content: [{ type: 'run', formatting: {}, content: [{ type: 'drawing', image }] }], + }; + part.content.push(paragraph); +} + +describe('Header images — rezip round-trip (issue #251)', () => { + test('newly inserted header image is persisted with a valid relationship', async () => { + const doc = await loadFixture(); + + const header = findFirstHeader(doc); + expect(header).toBeDefined(); + + const insertedImage: Image = { + type: 'image', + rId: '', // placeholder — rezip assigns the real rId + src: TINY_PNG_DATA_URL, + size: { width: 96, height: 96 }, + wrap: { type: 'inline' }, + }; + insertImageInto(header!, insertedImage); + + const repacked = await repackDocx(doc, { updateModifiedDate: false }); + + // The inserted image's rId must have been rewritten to a real value. + expect(insertedImage.rId).toMatch(/^rId\d+$/); + + const outZip = await JSZip.loadAsync(repacked); + + // Binary must have been added to word/media/. Capture the new filename. + const mediaFiles = Object.keys(outZip.files).filter((p) => + /^word\/media\/image\d+\.png$/.test(p) + ); + expect(mediaFiles.length).toBeGreaterThan(0); + const newMediaName = mediaFiles[mediaFiles.length - 1].replace(/^word\/media\//, ''); + + // The header's rels file must reference the new image with the assigned rId. + const headerRelsPaths = Object.keys(outZip.files).filter((p) => + /^word\/_rels\/header\d+\.xml\.rels$/.test(p) + ); + expect(headerRelsPaths.length).toBeGreaterThan(0); + + const relsXmls = await Promise.all(headerRelsPaths.map((p) => outZip.file(p)!.async('text'))); + const combinedRels = relsXmls.join('\n'); + expect(combinedRels).toContain(`Id="${insertedImage.rId}"`); + expect(combinedRels).toContain(`Target="media/${newMediaName}"`); + + // The document's main rels file must NOT reference this image (media target + // is a package-wide namespace, so Target identity is the right check — + // rId numbers are scoped per-rels file and can legitimately collide). + const docRels = await outZip.file('word/_rels/document.xml.rels')!.async('text'); + expect(docRels).not.toContain(`Target="media/${newMediaName}"`); + + // The header XML must reference the new rId via r:embed. + const headerXmlPaths = Object.keys(outZip.files).filter((p) => + /^word\/header\d+\.xml$/.test(p) + ); + const headerXmls = await Promise.all(headerXmlPaths.map((p) => outZip.file(p)!.async('text'))); + expect(headerXmls.join('\n')).toContain(`r:embed="${insertedImage.rId}"`); + + // Content types must register the PNG extension. + const contentTypes = await outZip.file('[Content_Types].xml')!.async('text'); + expect(contentTypes).toContain('Extension="png"'); + }); + + test('newly inserted footer image is persisted to the footer rels file', async () => { + const doc = await loadFixture(); + + const footer = findFirstFooter(doc); + expect(footer).toBeDefined(); + + const insertedImage: Image = { + type: 'image', + rId: '', + src: TINY_PNG_DATA_URL, + size: { width: 96, height: 96 }, + wrap: { type: 'inline' }, + }; + insertImageInto(footer!, insertedImage); + + const repacked = await repackDocx(doc, { updateModifiedDate: false }); + expect(insertedImage.rId).toMatch(/^rId\d+$/); + + const outZip = await JSZip.loadAsync(repacked); + const mediaFiles = Object.keys(outZip.files).filter((p) => + /^word\/media\/image\d+\.png$/.test(p) + ); + expect(mediaFiles.length).toBeGreaterThan(0); + const newMediaName = mediaFiles[mediaFiles.length - 1].replace(/^word\/media\//, ''); + + const footerRelsPaths = Object.keys(outZip.files).filter((p) => + /^word\/_rels\/footer\d+\.xml\.rels$/.test(p) + ); + const relsXmls = await Promise.all(footerRelsPaths.map((p) => outZip.file(p)!.async('text'))); + const combinedRels = relsXmls.join('\n'); + expect(combinedRels).toContain(`Target="media/${newMediaName}"`); + + const docRels = await outZip.file('word/_rels/document.xml.rels')!.async('text'); + expect(docRels).not.toContain(`Target="media/${newMediaName}"`); + }); +}); diff --git a/packages/core/src/docx/rezip.ts b/packages/core/src/docx/rezip.ts index 10dbf113..af541ade 100644 --- a/packages/core/src/docx/rezip.ts +++ b/packages/core/src/docx/rezip.ts @@ -31,7 +31,7 @@ import JSZip from 'jszip'; import type { Document } from '../types/document'; -import type { BlockContent, Image, Hyperlink } from '../types/content'; +import type { BlockContent, HeaderFooter, Image, Hyperlink } from '../types/content'; import { serializeDocument } from './serializer/documentSerializer'; import { serializeHeaderFooter } from './serializer/headerFooterSerializer'; import { @@ -168,96 +168,170 @@ function decodeDataUrl(dataUrl: string): { data: ArrayBuffer; extension: string } /** - * Process newly inserted images: add binary data to ZIP, create relationships, - * update content types, and rewrite rIds in the document model so the serializer - * outputs correct references. - * - * Mutates the images' rId fields in-place. + * A DOCX part (body, header, or footer) that owns a rels file and may contain + * newly inserted images/hyperlinks that need to be registered. */ -async function processNewImages( - newImages: Image[], +interface Part { + /** Path to the rels file for this part, e.g. `word/_rels/header1.xml.rels` */ + relsPath: string; + blocks: BlockContent[]; +} + +const EMPTY_RELS_XML = + '\n' + + ''; + +/** + * Resolve the on-disk filename of a header/footer part from its relationship entry. + * Returns e.g. `word/header1.xml`. + */ +function headerFooterFilename(target: string): string { + return target.startsWith('/') ? target.slice(1) : `word/${target}`; +} + +/** + * Enumerate all parts that may contain newly inserted images/hyperlinks: + * the document body plus every header and footer. + */ +function collectParts(doc: Document): Part[] { + const parts: Part[] = [ + { relsPath: 'word/_rels/document.xml.rels', blocks: doc.package.document.content }, + ]; + + const rels = doc.package.relationships; + if (!rels) return parts; + + const addHeaderFooterParts = (map: Map | undefined, type: string) => { + if (!map) return; + for (const [rId, hf] of map.entries()) { + const rel = rels.get(rId); + if (!rel || rel.type !== type || !rel.target) continue; + const filename = headerFooterFilename(rel.target); + const basename = filename.replace(/^word\//, ''); + parts.push({ relsPath: `word/_rels/${basename}.rels`, blocks: hf.content }); + } + }; + + addHeaderFooterParts(doc.package.headers, RELATIONSHIP_TYPES.header); + addHeaderFooterParts(doc.package.footers, RELATIONSHIP_TYPES.footer); + + return parts; +} + +/** + * Read an existing rels file (or return a minimal stub) and normalize the + * self-closing form `` — which Word emits for empty parts — + * to the open/close form so our `.replace('', ...)` append works. + */ +async function readRelsOrStub(zip: JSZip, relsPath: string): Promise { + const file = zip.file(relsPath); + const xml = file ? await file.async('text') : EMPTY_RELS_XML; + return xml.replace(/]*)\/>/, ''); +} + +/** + * Register new image extensions in [Content_Types].xml (idempotent). + */ +async function registerImageExtensions( zip: JSZip, + extensions: Set, compressionLevel: number ): Promise { - if (newImages.length === 0) return; - - // Read existing relationships - const relsPath = 'word/_rels/document.xml.rels'; - const relsFile = zip.file(relsPath); - if (!relsFile) return; - let relsXml = await relsFile.async('text'); - - // Find highest existing rId - let maxId = findMaxRId(relsXml); + if (extensions.size === 0) return; + const ctFile = zip.file('[Content_Types].xml'); + if (!ctFile) return; + + let ctXml = await ctFile.async('text'); + let changed = false; + for (const ext of extensions) { + if (!ctXml.includes(`Extension="${ext}"`)) { + const contentType = getContentTypeForExtension(ext, ''); + ctXml = ctXml.replace( + '', + `` + ); + changed = true; + } + } + if (changed) { + zip.file('[Content_Types].xml', ctXml, { + compression: 'DEFLATE', + compressionOptions: { level: compressionLevel }, + }); + } +} - // Find highest existing image number in word/media/ - let maxImageNum = 0; +/** + * Find the highest image number currently used in `word/media/`. Media filenames + * are a shared package-wide namespace, so a single counter is used across parts. + */ +function findMaxImageNum(zip: JSZip): number { + let max = 0; zip.forEach((relativePath) => { const m = relativePath.match(/^word\/media\/image(\d+)\./); if (m) { const num = parseInt(m[1], 10); - if (num > maxImageNum) maxImageNum = num; + if (num > max) max = num; } }); + return max; +} - const relEntries: string[] = []; +/** + * Process newly inserted images across all parts (body, headers, footers): + * add binary data to ZIP, create per-part relationships, update content types, + * and rewrite rIds so the serializer outputs correct references. + * + * Mutates each image's rId in-place. + */ +async function processNewImages( + parts: Part[], + zip: JSZip, + compressionLevel: number +): Promise { + let maxImageNum = findMaxImageNum(zip); const extensionsAdded = new Set(); - for (const image of newImages) { - const { data, extension } = decodeDataUrl(image.src!); + for (const { relsPath, blocks } of parts) { + const images = collectNewImages(blocks); + if (images.length === 0) continue; - maxImageNum++; - maxId++; - const mediaFilename = `image${maxImageNum}.${extension}`; - const mediaPath = `word/media/${mediaFilename}`; - const newRId = `rId${maxId}`; + const relsXml = await readRelsOrStub(zip, relsPath); + let maxId = findMaxRId(relsXml); + const relEntries: string[] = []; - // Add binary to ZIP - zip.file(mediaPath, data, { - compression: 'DEFLATE', - compressionOptions: { level: compressionLevel }, - }); + for (const image of images) { + const { data, extension } = decodeDataUrl(image.src!); - // Build relationship entry - relEntries.push( - `` - ); + maxImageNum++; + maxId++; + const mediaFilename = `image${maxImageNum}.${extension}`; + const newRId = `rId${maxId}`; - extensionsAdded.add(extension); + zip.file(`word/media/${mediaFilename}`, data, { + compression: 'DEFLATE', + compressionOptions: { level: compressionLevel }, + }); - // Rewrite the image's rId so the serializer outputs the correct reference - image.rId = newRId; - } + relEntries.push( + `` + ); - // Update relationships XML - if (relEntries.length > 0) { - relsXml = relsXml.replace('', relEntries.join('') + ''); - zip.file(relsPath, relsXml, { + extensionsAdded.add(extension); + image.rId = newRId; + } + + const updatedRelsXml = relsXml.replace( + '', + relEntries.join('') + '' + ); + zip.file(relsPath, updatedRelsXml, { compression: 'DEFLATE', compressionOptions: { level: compressionLevel }, }); } - // Update [Content_Types].xml if new extensions were added - if (extensionsAdded.size > 0) { - const ctFile = zip.file('[Content_Types].xml'); - if (ctFile) { - let ctXml = await ctFile.async('text'); - for (const ext of extensionsAdded) { - if (!ctXml.includes(`Extension="${ext}"`)) { - const contentType = getContentTypeForExtension(ext, ''); - ctXml = ctXml.replace( - '', - `` - ); - } - } - zip.file('[Content_Types].xml', ctXml, { - compression: 'DEFLATE', - compressionOptions: { level: compressionLevel }, - }); - } - } + await registerImageExtensions(zip, extensionsAdded, compressionLevel); } // ============================================================================ @@ -291,39 +365,40 @@ function collectHyperlinksWithoutRId(blocks: BlockContent[]): Hyperlink[] { } /** - * Process newly created hyperlinks: assign rIds and add relationship entries. - * Mutates the hyperlinks' rId fields in-place. + * Process newly created hyperlinks across all parts (body, headers, footers): + * assign rIds and add relationship entries to the owning part's rels file. + * + * Mutates each hyperlink's rId in-place. */ async function processNewHyperlinks( - newHyperlinks: Hyperlink[], + parts: Part[], zip: JSZip, compressionLevel: number ): Promise { - if (newHyperlinks.length === 0) return; + for (const { relsPath, blocks } of parts) { + const hyperlinks = collectHyperlinksWithoutRId(blocks); + if (hyperlinks.length === 0) continue; - const relsPath = 'word/_rels/document.xml.rels'; - const relsFile = zip.file(relsPath); - if (!relsFile) return; - let relsXml = await relsFile.async('text'); - - let maxId = findMaxRId(relsXml); - const relEntries: string[] = []; + const relsXml = await readRelsOrStub(zip, relsPath); + let maxId = findMaxRId(relsXml); + const relEntries: string[] = []; - for (const hyperlink of newHyperlinks) { - maxId++; - const newRId = `rId${maxId}`; + for (const hyperlink of hyperlinks) { + maxId++; + const newRId = `rId${maxId}`; - relEntries.push( - `` - ); + relEntries.push( + `` + ); - // Rewrite the hyperlink's rId so the serializer outputs the correct reference - hyperlink.rId = newRId; - } + hyperlink.rId = newRId; + } - if (relEntries.length > 0) { - relsXml = relsXml.replace('', relEntries.join('') + ''); - zip.file(relsPath, relsXml, { + const updatedRelsXml = relsXml.replace( + '', + relEntries.join('') + '' + ); + zip.file(relsPath, updatedRelsXml, { compression: 'DEFLATE', compressionOptions: { level: compressionLevel }, }); @@ -390,15 +465,11 @@ export async function repackDocx(doc: Document, options: RepackOptions = {}): Pr }); } - // Process newly inserted images (data URLs → binary media files + relationships). - // This mutates image rIds in-place so the serializer outputs correct references. - const newImages = collectNewImages(exportDocument.package.document.content); - await processNewImages(newImages, newZip, compressionLevel); - - // Process newly created hyperlinks (assign rIds + add relationship entries). - // This mutates hyperlink rIds in-place so the serializer outputs correct references. - const newHyperlinks = collectHyperlinksWithoutRId(exportDocument.package.document.content); - await processNewHyperlinks(newHyperlinks, newZip, compressionLevel); + // Process newly inserted images and hyperlinks across body + headers + footers. + // Mutates rIds in-place so serializers emit correct references. + const parts = collectParts(exportDocument); + await processNewImages(parts, newZip, compressionLevel); + await processNewHyperlinks(parts, newZip, compressionLevel); // Serialize and update document.xml (after image/hyperlink rIds have been rewritten) const documentXml = serializeDocument(exportDocument); @@ -479,12 +550,10 @@ export async function repackDocxFromRaw( }); } - // Serialize and update document.xml - const newImages = collectNewImages(exportDocument.package.document.content); - await processNewImages(newImages, newZip, compressionLevel); - - const newHyperlinks = collectHyperlinksWithoutRId(exportDocument.package.document.content); - await processNewHyperlinks(newHyperlinks, newZip, compressionLevel); + // Process newly inserted images and hyperlinks across body + headers + footers. + const parts = collectParts(exportDocument); + await processNewImages(parts, newZip, compressionLevel); + await processNewHyperlinks(parts, newZip, compressionLevel); const documentXml = serializeDocument(exportDocument); newZip.file('word/document.xml', documentXml, { @@ -876,8 +945,7 @@ export function collectHeaderFooterUpdates(doc: Document): Map { for (const [rId, headerFooter] of map.entries()) { const rel = rels.get(rId); if (rel && rel.type === type && rel.target) { - const filename = rel.target.startsWith('/') ? rel.target.slice(1) : `word/${rel.target}`; - updates.set(filename, serializeHeaderFooter(headerFooter)); + updates.set(headerFooterFilename(rel.target), serializeHeaderFooter(headerFooter)); } } }