diff --git a/CHANGES.rst b/CHANGES.rst index d4ba101eda5..fb6b02ff4f5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -154,6 +154,8 @@ Bugs fixed Patch by Harmen Stoppels. * #13944: autodoc: show traceback during import in human readable representation. Patch by Florian Best. +* #14006: Support images with data URIs that aren't base64-encoded. + Patch by Shengyu Zhang and Adam Turner. Testing diff --git a/sphinx/util/images.py b/sphinx/util/images.py index ca6cb66764b..e9963d484fe 100644 --- a/sphinx/util/images.py +++ b/sphinx/util/images.py @@ -5,6 +5,7 @@ import base64 from pathlib import Path from typing import TYPE_CHECKING, NamedTuple, overload +from urllib.parse import unquote_to_bytes import imagesize @@ -90,7 +91,6 @@ def get_image_extension(mimetype: str) -> str | None: def parse_data_uri(uri: str) -> DataURI | None: if not uri.startswith('data:'): return None - uri = uri[5:] if ',' not in uri: msg = 'malformed data URI' @@ -100,16 +100,19 @@ def parse_data_uri(uri: str) -> DataURI | None: mimetype = 'text/plain' charset = 'US-ASCII' + uri = uri[5:] properties, _, data = uri.partition(',') for prop in properties.split(';'): if prop == 'base64': pass # skip - elif prop.startswith('charset='): + elif prop.lower().startswith('charset='): charset = prop[8:] elif prop: - mimetype = prop + mimetype = prop.lower() - image_data = base64.b64decode(data) + image_data = unquote_to_bytes(data) # data might be percent-encoded + if properties.endswith(';base64'): + image_data = base64.decodebytes(image_data) return DataURI(mimetype, charset, image_data) diff --git a/tests/test_util/test_util_images.py b/tests/test_util/test_util_images.py index d0b4f918afc..c4832ce224c 100644 --- a/tests/test_util/test_util_images.py +++ b/tests/test_util/test_util_images.py @@ -86,3 +86,15 @@ def test_parse_data_uri() -> None: ) with pytest.raises(ValueError, match=r'malformed data URI'): parse_data_uri(uri) + + # not base64 + uri = ( + 'data:image/svg+xml,%3Csvg%20width%3D%22100%22%20height%3D%22100%22%20' + 'xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%3Ccircle%20cx' + '%3D%2250%22%20cy%3D%2250%22%20r%3D%2240%22%20fill%3D%22blue%22%2F%3E' + '%3C%2Fsvg%3E' + ) + image = parse_data_uri(uri) + assert image is not None + assert image.mimetype == 'image/svg+xml' + assert b'%' not in image.data