Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ appdirs = "^1.4.4"
peewee = "^3.16.0"
fuzzywuzzy = "^0.18.0"
climage = "^0.2.0"
filetype = "^1.2.0"

[tool.poetry.group.dev.dependencies]
black = "^23.1.0"
Expand Down
28 changes: 8 additions & 20 deletions src/baca/tools/KindleUnpack/mobi_cover.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .unipath import pathof
import os
import imghdr
import filetype

import struct
# note: struct pack, unpack, unpack_from all require bytestring format
Expand All @@ -34,25 +34,13 @@


def get_image_type(imgname, imgdata=None):
imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata))

# imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
# with only the magic JPEG bytes out there...
# ImageMagick handles those, so, do it too.
if imgtype is None:
if imgdata is None:
with open(pathof(imgname), 'rb') as f:
imgdata = f.read()
if imgdata[0:2] == b'\xFF\xD8':
# Get last non-null bytes
last = len(imgdata)
while (imgdata[last-1:last] == b'\x00'):
last-=1
# Be extra safe, check the trailing bytes, too.
if imgdata[last-2:last] == b'\xFF\xD9':
imgtype = "jpeg"
return imgtype

if imgdata is not None:
result = filetype.guess(imgdata)
else:
result = filetype.guess(pathof(imgname))
if result is None:
return None
return unicode_str(result.extension)

def get_image_size(imgname, imgdata=None):
'''Determine the image type of imgname (or imgdata) and return its size.
Expand Down
Binary file added tests/fixtures/room-with-a-view-gutenberg.mobi
Binary file not shown.
53 changes: 53 additions & 0 deletions tests/test_mobi_cover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import tempfile
import shutil
from baca.tools.KindleUnpack.mobi_cover import get_image_type, get_image_size


def test_mobi_file_cover_extraction():
"""Test that we can extract and identify cover image from a real mobi file"""
# Use test fixture from repo
test_dir = os.path.dirname(os.path.abspath(__file__))
mobi_file = os.path.join(test_dir, "fixtures", "room-with-a-view-gutenberg.mobi")

# Skip test if the mobi file doesn't exist
if not os.path.exists(mobi_file):
import pytest
pytest.skip(f"Test mobi file not found: {mobi_file}")

# Extract the mobi file to a temporary directory
temp_dir = tempfile.mkdtemp()

try:
# Use kindleunpack to extract the mobi
from baca.tools.KindleUnpack.kindleunpack import unpackBook

# Extract the mobi file
unpackBook(mobi_file, temp_dir)

# Look for image files in the extracted directory
image_files = []
for root, dirs, files in os.walk(temp_dir):
for file in files:
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
image_files.append(os.path.join(root, file))

# Verify that we found at least one image
assert len(image_files) > 0, "No images found in extracted mobi file"

# Test get_image_type on each image found
for img_path in image_files:
img_type = get_image_type(img_path)
assert img_type is not None, f"Could not determine type for {img_path}"
assert img_type in ['jpg', 'jpeg', 'png', 'gif'], f"Unexpected image type: {img_type}"

# Test get_image_size on each image
size = get_image_size(img_path)
if size is not None:
width, height = size
assert width > 0, f"Invalid width for {img_path}"
assert height > 0, f"Invalid height for {img_path}"

finally:
# Clean up temporary directory
shutil.rmtree(temp_dir, ignore_errors=True)