From 887b1209b0abad309b8665ae51f99afb1e4c4649 Mon Sep 17 00:00:00 2001 From: turky Date: Thu, 7 Aug 2025 15:46:11 +0900 Subject: [PATCH 1/2] Unify path separators in OPF files to slashes. --- sphinx/builders/_epub_base.py | 6 +- tests/test_builders/test_build_epub.py | 151 +++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 2 deletions(-) diff --git a/sphinx/builders/_epub_base.py b/sphinx/builders/_epub_base.py index 3c7c93dfd1f..972feec7103 100644 --- a/sphinx/builders/_epub_base.py +++ b/sphinx/builders/_epub_base.py @@ -613,9 +613,11 @@ def build_content(self) -> None: continue if refnode['refuri'] in self.ignored_files: continue - spine = Spine(html.escape(self.make_id(refnode['refuri'])), True) + spine = Spine( + html.escape(self.make_id(refnode['refuri'].replace(os.sep, '/'))), True + ) metadata['spines'].append(spine) - spinefiles.add(refnode['refuri']) + spinefiles.add(refnode['refuri'].replace(os.sep, '/')) for info in self.domain_indices: spine = Spine(html.escape(self.make_id(info[0] + self.out_suffix)), True) metadata['spines'].append(spine) diff --git a/tests/test_builders/test_build_epub.py b/tests/test_builders/test_build_epub.py index b8c7395c22c..42852c01757 100644 --- a/tests/test_builders/test_build_epub.py +++ b/tests/test_builders/test_build_epub.py @@ -542,3 +542,154 @@ def test_copy_images(app: SphinxTestApp) -> None: 'svgimg.svg', 'testimäge.png', } + + +@pytest.mark.sphinx('epub', testroot='builder-dirhtml') +def test_epub_manifest_path_separator_normalization(app: SphinxTestApp) -> None: + """Test that path separators are normalized to forward slashes + in EPUB manifests, even on Windows. + """ + app.build() + + # Read the content.opf file + opf_path = app.outdir / 'content.opf' + assert opf_path.exists(), 'content.opf was not generated' + + # Parse manifest and spine elements + # Verify that all idrefs in spine match ids in manifest + from xml.etree import ElementTree as ET + + tree = ET.parse(str(opf_path)) # noqa: S314 + root = tree.getroot() + + # Define namespace + ns = {'opf': 'http://www.idpf.org/2007/opf'} + + # Collect items from manifest + manifest_ids: set[str | None] = set() + manifest_hrefs: dict[str, str] = {} + for item in root.findall('.//opf:manifest/opf:item', ns): + item_id: str | None = item.get('id') + item_href: str | None = item.get('href') + manifest_ids.add(item_id) + if item_id is not None and item_href is not None: + manifest_hrefs[item_id] = item_href + + # Check idrefs in spine + spine_idrefs = [] + for itemref in root.findall('.//opf:spine/opf:itemref', ns): + idref: str | None = itemref.get('idref') + spine_idrefs.append(idref) + + # Verify all spine idrefs exist in manifest + for idref in spine_idrefs: + assert idref in manifest_ids, ( + f"spine idref '{idref}' does not exist in manifest" + ) + + # Verify hrefs do not contain backslashes + # (should be normalized to forward slashes even on Windows) + for item_id, href in manifest_hrefs.items(): + assert '\\' not in href, ( + f"manifest item '{item_id}' href '{href}' contains backslashes" + ) + + # Verify no duplicate IDs are assigned to the same href + href_to_ids: dict[str, list[str | None]] = {} + for item_id, href in manifest_hrefs.items(): + # Normalize path for comparison + normalized_href = href.replace('\\', '/') + if normalized_href not in href_to_ids: + href_to_ids[normalized_href] = [] + href_to_ids[normalized_href].append(item_id) + + # Detect duplicate IDs + duplicates: dict[str, list[str | None]] = { + href: ids for href, ids in href_to_ids.items() if len(ids) > 1 + } + assert not duplicates, f'Multiple IDs assigned to the same file: {duplicates}' + + +@pytest.mark.sphinx('epub', testroot='builder-dirhtml') +def test_epub_manifest_subdirectory_paths(app: SphinxTestApp) -> None: + """Test that path separators are correctly normalized to forward slashes + even for paths containing subdirectories. + """ + app.build() + + opf_path = app.outdir / 'content.opf' + assert opf_path.exists() + + from xml.etree import ElementTree as ET + + tree = ET.parse(str(opf_path)) # noqa: S314 + root = tree.getroot() + + ns: dict[str, str] = {'opf': 'http://www.idpf.org/2007/opf'} + + # Check all manifest item hrefs + for item in root.findall('.//opf:manifest/opf:item', ns): + href: str | None = item.get('href') + if href is not None: + # Verify no backslashes are present + assert '\\' not in href, ( + f"href '{href}' contains backslashes (should be forward slashes)" + ) + + # For paths with subdirectories, verify they are separated by forward slashes + if href is not None and '/' in href: + # Verify the path is correctly constructed + parts: list[str] = href.split('/') + assert all(part for part in parts), ( + f"href '{href}' contains empty path segments" + ) + + +@pytest.mark.sphinx('epub', testroot='basic') +def test_epub_spine_idref_consistency(app: SphinxTestApp) -> None: + """Test that spine idrefs and manifest ids are consistent. + Verify that path separator normalization ensures the same file + is reliably referenced with the same ID. + """ + app.build() + + opf_path = app.outdir / 'content.opf' + from xml.etree import ElementTree as ET + + tree = ET.parse(str(opf_path)) # noqa: S314 + root = tree.getroot() + + ns: dict[str, str] = {'opf': 'http://www.idpf.org/2007/opf'} + + # Create id→href mapping from manifest + id_to_href = {} + for item in root.findall('.//opf:manifest/opf:item', ns): + item_id: str | None = item.get('id') + item_href: str | None = item.get('href') + id_to_href[item_id] = item_href + + # For each idref in spine, verify corresponding href exists + # and that href is unique + spine_hrefs = [] + for itemref in root.findall('.//opf:spine/opf:itemref', ns): + idref: str | None = itemref.get('idref') + assert idref in id_to_href, f"manifest item not found for spine idref '{idref}'" + + href = id_to_href[idref] + spine_hrefs.append(href) + + # Warn if the same href is referenced multiple times + # (normally each file should appear only once in spine) + from collections import Counter + + href_counts = Counter(spine_hrefs) + duplicated_hrefs: list[str | None] = [ + href for href, count in href_counts.items() if count > 1 + ] + + # Note: Some EPUBs may intentionally reference the same file multiple times, + # so this is logged as informational rather than a strict error + if duplicated_hrefs: + print( + f'Info: The following hrefs are referenced multiple times in spine: {duplicated_hrefs}' + ) From e83d00c0445027d176cafaebc1856ad6289d90b1 Mon Sep 17 00:00:00 2001 From: turky Date: Tue, 18 Nov 2025 18:21:32 +0900 Subject: [PATCH 2/2] add CHANGES entry. --- CHANGES.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index cd36d83957b..ef3aa6cdaac 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -140,7 +140,9 @@ Bugs fixed * #14059: LaTeX: Footnotes cause pdflatex error with French language (since late June 2025 upstream change to LaTeX ``babel-french``). Patch by Jean-François B. - +* #14067: EPUB: unify path separators in manifest items to forward slashes, + resolve duplicates manifest on Windows OS. + Patch by Akihiro Takizawa. Testing -------