Skip to content

Commit e04adb4

Browse files
committed
Unify path separators in OPF files to slashes.
1 parent 5d0ad16 commit e04adb4

File tree

2 files changed

+152
-2
lines changed

2 files changed

+152
-2
lines changed

sphinx/builders/_epub_base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,9 +613,11 @@ def build_content(self) -> None:
613613
continue
614614
if refnode['refuri'] in self.ignored_files:
615615
continue
616-
spine = Spine(html.escape(self.make_id(refnode['refuri'])), True)
616+
spine = Spine(
617+
html.escape(self.make_id(refnode['refuri'].replace(os.sep, '/'))), True
618+
)
617619
metadata['spines'].append(spine)
618-
spinefiles.add(refnode['refuri'])
620+
spinefiles.add(refnode['refuri'].replace(os.sep, '/'))
619621
for info in self.domain_indices:
620622
spine = Spine(html.escape(self.make_id(info[0] + self.out_suffix)), True)
621623
metadata['spines'].append(spine)

tests/test_builders/test_build_epub.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,151 @@ def test_copy_images(app: SphinxTestApp) -> None:
542542
'svgimg.svg',
543543
'testimäge.png',
544544
}
545+
546+
547+
@pytest.mark.sphinx('epub', testroot='builder-dirhtml')
548+
def test_epub_manifest_path_separator_normalization(app: SphinxTestApp) -> None:
549+
"""Test that path separators (backslashes) are normalized to forward slashes
550+
in EPUB manifests, even on Windows.
551+
"""
552+
app.build()
553+
554+
# Read the content.opf file
555+
opf_path = app.outdir / 'content.opf'
556+
assert opf_path.exists(), 'content.opf was not generated'
557+
558+
# Parse manifest and spine elements
559+
# Verify that all idrefs in spine match ids in manifest
560+
from xml.etree import ElementTree as ET
561+
562+
tree = ET.parse(str(opf_path)) # noqa: S314
563+
root = tree.getroot()
564+
565+
# Define namespace
566+
ns = {'opf': 'http://www.idpf.org/2007/opf'}
567+
568+
# Collect items from manifest
569+
manifest_ids: set[str] = set()
570+
manifest_hrefs: dict[str, str] = {}
571+
for item in root.findall('.//opf:manifest/opf:item', ns):
572+
item_id: str | None = item.get('id')
573+
item_href: str | None = item.get('href')
574+
manifest_ids.add(item_id)
575+
manifest_hrefs[item_id] = item_href
576+
577+
# Check idrefs in spine
578+
spine_idrefs = []
579+
for itemref in root.findall('.//opf:spine/opf:itemref', ns):
580+
idref: str | None = itemref.get('idref')
581+
spine_idrefs.append(idref)
582+
583+
# Verify all spine idrefs exist in manifest
584+
for idref in spine_idrefs:
585+
assert idref in manifest_ids, (
586+
f"spine idref '{idref}' does not exist in manifest"
587+
)
588+
589+
# Verify hrefs do not contain backslashes
590+
# (should be normalized to forward slashes even on Windows)
591+
for item_id, href in manifest_hrefs.items():
592+
assert '\\' not in href, (
593+
f"manifest item '{item_id}' href '{href}' contains backslashes"
594+
)
595+
596+
# Verify no duplicate IDs are assigned to the same href
597+
href_to_ids: dict[str, list[str]] = {}
598+
for item_id, href in manifest_hrefs.items():
599+
# Normalize path for comparison
600+
normalized_href = href.replace('\\', '/')
601+
if normalized_href not in href_to_ids:
602+
href_to_ids[normalized_href] = []
603+
href_to_ids[normalized_href].append(item_id)
604+
605+
# Detect duplicate IDs
606+
duplicates: dict[str, list[str]] = {
607+
href: ids for href, ids in href_to_ids.items() if len(ids) > 1
608+
}
609+
assert not duplicates, f'Multiple IDs assigned to the same file: {duplicates}'
610+
611+
612+
@pytest.mark.sphinx('epub', testroot='builder-dirhtml')
613+
def test_epub_manifest_subdirectory_paths(app: SphinxTestApp) -> None:
614+
"""Test that path separators are correctly normalized to forward slashes
615+
even for paths containing subdirectories.
616+
"""
617+
app.build()
618+
619+
opf_path = app.outdir / 'content.opf'
620+
assert opf_path.exists()
621+
622+
from xml.etree import ElementTree as ET
623+
624+
tree = ET.parse(str(opf_path)) # noqa: S314
625+
root = tree.getroot()
626+
627+
ns: dict[str, str] = {'opf': 'http://www.idpf.org/2007/opf'}
628+
629+
# Check all manifest item hrefs
630+
for item in root.findall('.//opf:manifest/opf:item', ns):
631+
href: str | None = item.get('href')
632+
# Verify no backslashes are present
633+
assert '\\' not in href, (
634+
f"href '{href}' contains backslashes (should be forward slashes)"
635+
)
636+
637+
# For paths with subdirectories, verify they are separated by forward slashes
638+
if '/' in href:
639+
# Verify the path is correctly constructed
640+
parts: list[str] = href.split('/')
641+
assert all(part for part in parts), (
642+
f"href '{href}' contains empty path segments"
643+
)
644+
645+
646+
@pytest.mark.sphinx('epub', testroot='basic')
647+
def test_epub_spine_idref_consistency(app: SphinxTestApp) -> None:
648+
"""Test that spine idrefs and manifest ids are consistent.
649+
Verify that path separator normalization ensures the same file
650+
is reliably referenced with the same ID.
651+
"""
652+
app.build()
653+
654+
opf_path = app.outdir / 'content.opf'
655+
from xml.etree import ElementTree as ET
656+
657+
tree = ET.parse(str(opf_path)) # noqa: S314
658+
root = tree.getroot()
659+
660+
ns: dict[str, str] = {'opf': 'http://www.idpf.org/2007/opf'}
661+
662+
# Create id→href mapping from manifest
663+
id_to_href = {}
664+
for item in root.findall('.//opf:manifest/opf:item', ns):
665+
item_id: list[str] = item.get('id')
666+
item_href: list[str] = item.get('href')
667+
id_to_href[item_id] = item_href
668+
669+
# For each idref in spine, verify corresponding href exists
670+
# and that href is unique
671+
spine_hrefs = []
672+
for itemref in root.findall('.//opf:spine/opf:itemref', ns):
673+
idref: list[str] = itemref.get('idref')
674+
assert idref in id_to_href, f"manifest item not found for spine idref '{idref}'"
675+
676+
href = id_to_href[idref]
677+
spine_hrefs.append(href)
678+
679+
# Warn if the same href is referenced multiple times
680+
# (normally each file should appear only once in spine)
681+
from collections import Counter
682+
href_counts = Counter(spine_hrefs)
683+
duplicated_hrefs: list[str] = [
684+
href for href, count in href_counts.items() if count > 1
685+
]
686+
687+
# Note: Some EPUBs may intentionally reference the same file multiple times,
688+
# so this is logged as informational rather than a strict error
689+
if duplicated_hrefs:
690+
print(
691+
f'Info: The following hrefs are referenced multiple times in spine: {duplicated_hrefs}'
692+
)

0 commit comments

Comments
 (0)