diff --git a/chandra/input.py b/chandra/input.py index 56829ab..1b762bc 100644 --- a/chandra/input.py +++ b/chandra/input.py @@ -69,8 +69,17 @@ def load_file(filepath: str, config: dict): page_range = parse_range_str(page_range) input_type = filetype.guess(filepath) + is_pdf = False + + # Prefer header-based detection but fall back to file extension. if input_type and input_type.extension == "pdf": + is_pdf = True + elif filepath.lower().endswith(".pdf"): + is_pdf = True + + if is_pdf: images = load_pdf_images(filepath, page_range) else: + # Non‑PDF inputs are treated as single images. images = [load_image(filepath)] return images diff --git a/tests/test_input_loader.py b/tests/test_input_loader.py new file mode 100644 index 0000000..90f50a1 --- /dev/null +++ b/tests/test_input_loader.py @@ -0,0 +1,31 @@ +from chandra import input as input_mod + + +def test_load_file_uses_pdf_loader_when_extension_pdf(monkeypatch): + """Ensure load_file routes .pdf paths to load_pdf_images even if filetype.guess fails. + + This simulates a multi-page PDF where only the first page would be used + if we treated the file as a single image. + """ + + calls = {} + + def fake_guess(_): + # Simulate failure to detect PDF from file header. + return None + + def fake_load_pdf_images(path, page_range): # pragma: no cover - behavior verified via result + calls["path"] = path + calls["page_range"] = page_range + # Pretend we decoded three pages + return ["page0", "page1", "page2"] + + monkeypatch.setattr(input_mod.filetype, "guess", fake_guess) + monkeypatch.setattr(input_mod, "load_pdf_images", fake_load_pdf_images) + + images = input_mod.load_file("dummy.pdf", {"page_range": "0-2"}) + + assert images == ["page0", "page1", "page2"] + assert calls["path"].endswith("dummy.pdf") + # Parsed page range should be passed through as a list of ints + assert calls["page_range"] == [0, 1, 2]