wustho · mbeavitt · Oct 6, 2025 · Oct 6, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,7 @@ appdirs = "^1.4.4"
 peewee = "^3.16.0"
 fuzzywuzzy = "^0.18.0"
 climage = "^0.2.0"
+filetype = "^1.2.0"
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.1.0"

diff --git a/src/baca/tools/KindleUnpack/mobi_cover.py b/src/baca/tools/KindleUnpack/mobi_cover.py
@@ -8,7 +8,7 @@
 
 from .unipath import pathof
 import os
-import imghdr
+import filetype
 
 import struct
 # note:  struct pack, unpack, unpack_from all require bytestring format
@@ -34,25 +34,13 @@
 
 
 def get_image_type(imgname, imgdata=None):
-    imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata))
-
-    # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
-    # with only the magic JPEG bytes out there...
-    # ImageMagick handles those, so, do it too.
-    if imgtype is None:
-        if imgdata is None:
-            with open(pathof(imgname), 'rb') as f:
-                imgdata = f.read()
-        if imgdata[0:2] == b'\xFF\xD8':
-            # Get last non-null bytes
-            last = len(imgdata)
-            while (imgdata[last-1:last] == b'\x00'):
-                last-=1
-            # Be extra safe, check the trailing bytes, too.
-            if imgdata[last-2:last] == b'\xFF\xD9':
-                imgtype = "jpeg"
-    return imgtype
-
+    if imgdata is not None:
+        result = filetype.guess(imgdata)
+    else:
+        result = filetype.guess(pathof(imgname))
+    if result is None:
+        return None
+    return unicode_str(result.extension)
 
 def get_image_size(imgname, imgdata=None):
     '''Determine the image type of imgname (or imgdata) and return its size.

diff --git a/tests/fixtures/room-with-a-view-gutenberg.mobi b/tests/fixtures/room-with-a-view-gutenberg.mobi
diff --git a/tests/test_mobi_cover.py b/tests/test_mobi_cover.py
@@ -0,0 +1,53 @@
+import os
+import tempfile
+import shutil
+from baca.tools.KindleUnpack.mobi_cover import get_image_type, get_image_size
+
+
+def test_mobi_file_cover_extraction():
+    """Test that we can extract and identify cover image from a real mobi file"""
+    # Use test fixture from repo
+    test_dir = os.path.dirname(os.path.abspath(__file__))
+    mobi_file = os.path.join(test_dir, "fixtures", "room-with-a-view-gutenberg.mobi")
+
+    # Skip test if the mobi file doesn't exist
+    if not os.path.exists(mobi_file):
+        import pytest
+        pytest.skip(f"Test mobi file not found: {mobi_file}")
+
+    # Extract the mobi file to a temporary directory
+    temp_dir = tempfile.mkdtemp()
+
+    try:
+        # Use kindleunpack to extract the mobi
+        from baca.tools.KindleUnpack.kindleunpack import unpackBook
+
+        # Extract the mobi file
+        unpackBook(mobi_file, temp_dir)
+
+        # Look for image files in the extracted directory
+        image_files = []
+        for root, dirs, files in os.walk(temp_dir):
+            for file in files:
+                if file.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
+                    image_files.append(os.path.join(root, file))
+
+        # Verify that we found at least one image
+        assert len(image_files) > 0, "No images found in extracted mobi file"
+
+        # Test get_image_type on each image found
+        for img_path in image_files:
+            img_type = get_image_type(img_path)
+            assert img_type is not None, f"Could not determine type for {img_path}"
+            assert img_type in ['jpg', 'jpeg', 'png', 'gif'], f"Unexpected image type: {img_type}"
+
+            # Test get_image_size on each image
+            size = get_image_size(img_path)
+            if size is not None:
+                width, height = size
+                assert width > 0, f"Invalid width for {img_path}"
+                assert height > 0, f"Invalid height for {img_path}"
+
+    finally:
+        # Clean up temporary directory
+        shutil.rmtree(temp_dir, ignore_errors=True)