@@ -542,3 +542,151 @@ def test_copy_images(app: SphinxTestApp) -> None:
542542 'svgimg.svg' ,
543543 'testimäge.png' ,
544544 }
545+
546+
547+ @pytest .mark .sphinx ('epub' , testroot = 'builder-dirhtml' )
548+ def test_epub_manifest_path_separator_normalization (app : SphinxTestApp ) -> None :
549+ """Test that path separators (backslashes) are normalized to forward slashes
550+ in EPUB manifests, even on Windows.
551+ """
552+ app .build ()
553+
554+ # Read the content.opf file
555+ opf_path = app .outdir / 'content.opf'
556+ assert opf_path .exists (), 'content.opf was not generated'
557+
558+ # Parse manifest and spine elements
559+ # Verify that all idrefs in spine match ids in manifest
560+ from xml .etree import ElementTree as ET
561+
562+ tree = ET .parse (str (opf_path )) # noqa: S314
563+ root = tree .getroot ()
564+
565+ # Define namespace
566+ ns = {'opf' : 'http://www.idpf.org/2007/opf' }
567+
568+ # Collect items from manifest
569+ manifest_ids : set [str ] = set ()
570+ manifest_hrefs : dict [str , str ] = {}
571+ for item in root .findall ('.//opf:manifest/opf:item' , ns ):
572+ item_id : str | None = item .get ('id' )
573+ item_href : str | None = item .get ('href' )
574+ manifest_ids .add (item_id )
575+ manifest_hrefs [item_id ] = item_href
576+
577+ # Check idrefs in spine
578+ spine_idrefs = []
579+ for itemref in root .findall ('.//opf:spine/opf:itemref' , ns ):
580+ idref : str | None = itemref .get ('idref' )
581+ spine_idrefs .append (idref )
582+
583+ # Verify all spine idrefs exist in manifest
584+ for idref in spine_idrefs :
585+ assert idref in manifest_ids , (
586+ f"spine idref '{ idref } ' does not exist in manifest"
587+ )
588+
589+ # Verify hrefs do not contain backslashes
590+ # (should be normalized to forward slashes even on Windows)
591+ for item_id , href in manifest_hrefs .items ():
592+ assert '\\ ' not in href , (
593+ f"manifest item '{ item_id } ' href '{ href } ' contains backslashes"
594+ )
595+
596+ # Verify no duplicate IDs are assigned to the same href
597+ href_to_ids : dict [str , list [str ]] = {}
598+ for item_id , href in manifest_hrefs .items ():
599+ # Normalize path for comparison
600+ normalized_href = href .replace ('\\ ' , '/' )
601+ if normalized_href not in href_to_ids :
602+ href_to_ids [normalized_href ] = []
603+ href_to_ids [normalized_href ].append (item_id )
604+
605+ # Detect duplicate IDs
606+ duplicates : dict [str , list [str ]] = {
607+ href : ids for href , ids in href_to_ids .items () if len (ids ) > 1
608+ }
609+ assert not duplicates , f'Multiple IDs assigned to the same file: { duplicates } '
610+
611+
612+ @pytest .mark .sphinx ('epub' , testroot = 'builder-dirhtml' )
613+ def test_epub_manifest_subdirectory_paths (app : SphinxTestApp ) -> None :
614+ """Test that path separators are correctly normalized to forward slashes
615+ even for paths containing subdirectories.
616+ """
617+ app .build ()
618+
619+ opf_path = app .outdir / 'content.opf'
620+ assert opf_path .exists ()
621+
622+ from xml .etree import ElementTree as ET
623+
624+ tree = ET .parse (str (opf_path )) # noqa: S314
625+ root = tree .getroot ()
626+
627+ ns : dict [str , str ] = {'opf' : 'http://www.idpf.org/2007/opf' }
628+
629+ # Check all manifest item hrefs
630+ for item in root .findall ('.//opf:manifest/opf:item' , ns ):
631+ href : str | None = item .get ('href' )
632+ # Verify no backslashes are present
633+ assert '\\ ' not in href , (
634+ f"href '{ href } ' contains backslashes (should be forward slashes)"
635+ )
636+
637+ # For paths with subdirectories, verify they are separated by forward slashes
638+ if '/' in href :
639+ # Verify the path is correctly constructed
640+ parts : list [str ] = href .split ('/' )
641+ assert all (part for part in parts ), (
642+ f"href '{ href } ' contains empty path segments"
643+ )
644+
645+
646+ @pytest .mark .sphinx ('epub' , testroot = 'basic' )
647+ def test_epub_spine_idref_consistency (app : SphinxTestApp ) -> None :
648+ """Test that spine idrefs and manifest ids are consistent.
649+ Verify that path separator normalization ensures the same file
650+ is reliably referenced with the same ID.
651+ """
652+ app .build ()
653+
654+ opf_path = app .outdir / 'content.opf'
655+ from xml .etree import ElementTree as ET
656+
657+ tree = ET .parse (str (opf_path )) # noqa: S314
658+ root = tree .getroot ()
659+
660+ ns : dict [str , str ] = {'opf' : 'http://www.idpf.org/2007/opf' }
661+
662+ # Create id→href mapping from manifest
663+ id_to_href = {}
664+ for item in root .findall ('.//opf:manifest/opf:item' , ns ):
665+ item_id : list [str ] = item .get ('id' )
666+ item_href : list [str ] = item .get ('href' )
667+ id_to_href [item_id ] = item_href
668+
669+ # For each idref in spine, verify corresponding href exists
670+ # and that href is unique
671+ spine_hrefs = []
672+ for itemref in root .findall ('.//opf:spine/opf:itemref' , ns ):
673+ idref : list [str ] = itemref .get ('idref' )
674+ assert idref in id_to_href , f"manifest item not found for spine idref '{ idref } '"
675+
676+ href = id_to_href [idref ]
677+ spine_hrefs .append (href )
678+
679+ # Warn if the same href is referenced multiple times
680+ # (normally each file should appear only once in spine)
681+ from collections import Counter
682+ href_counts = Counter (spine_hrefs )
683+ duplicated_hrefs : list [str ] = [
684+ href for href , count in href_counts .items () if count > 1
685+ ]
686+
687+ # Note: Some EPUBs may intentionally reference the same file multiple times,
688+ # so this is logged as informational rather than a strict error
689+ if duplicated_hrefs :
690+ print (
691+ f'Info: The following hrefs are referenced multiple times in spine: { duplicated_hrefs } '
692+ )
0 commit comments