From 09e2c0fd9777013a174a013004cf927172568440 Mon Sep 17 00:00:00 2001
From: Tom Most <twm@freecog.net>
Date: Tue, 27 May 2025 17:08:12 -0700
Subject: [PATCH 1/4] Avoid a copy of the attribute list

normalize_attributes always does a copy when the list is non-empty, so
there's no need to copy again.
---
 feedparser/urls.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/feedparser/urls.py b/feedparser/urls.py
index e4e83d16..cfea402c 100644
--- a/feedparser/urls.py
+++ b/feedparser/urls.py
@@ -158,13 +158,9 @@ def resolve_uri(self, uri):
 
     def unknown_starttag(self, tag, attrs):
         attrs = self.normalize_attrs(attrs)
-        attrs = [
-            (
-                key,
-                ((tag, key) in self.relative_uris) and self.resolve_uri(value) or value,
-            )
-            for key, value in attrs
-        ]
+        for i, (key, value) in enumerate(attrs):
+            if (tag, key) in self.relative_uris:
+                attrs[i] = (key, self.resolve_uri(value))
         super().unknown_starttag(tag, attrs)
 
 

From 6558d8de55cb7920d84c7684f8692820c9ee2f3c Mon Sep 17 00:00:00 2001
From: Tom Most <twm@freecog.net>
Date: Tue, 27 May 2025 22:06:10 -0700
Subject: [PATCH 2/4] Resolve relative srcset URLs

---
 feedparser/sanitizer.py                       |  1 +
 feedparser/urls.py                            | 63 +++++++++++++++++++
 tests/test_srcset_candidates.py               | 63 +++++++++++++++++++
 .../base/http_entry_content_base_srcset.xml   | 10 +++
 4 files changed, 137 insertions(+)
 create mode 100644 tests/test_srcset_candidates.py
 create mode 100644 tests/wellformed/base/http_entry_content_base_srcset.xml

diff --git a/feedparser/sanitizer.py b/feedparser/sanitizer.py
index 5b3014e1..7e7884fb 100644
--- a/feedparser/sanitizer.py
+++ b/feedparser/sanitizer.py
@@ -259,6 +259,7 @@ class HTMLSanitizer(BaseHTMLProcessor):
         "size",
         "span",
         "src",
+        "srcset",
         "start",
         "step",
         "style",
diff --git a/feedparser/urls.py b/feedparser/urls.py
index cfea402c..d33a8d52 100644
--- a/feedparser/urls.py
+++ b/feedparser/urls.py
@@ -1,3 +1,4 @@
+# Copyright 2025 Tom Most <twm@freecog.net>
 # Copyright 2010-2024 Kurt McKee <contactme@kurtmckee.org>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
@@ -116,6 +117,56 @@ def make_safe_absolute_uri(base, rel=None):
     return uri
 
 
+# Matches image candidate strings within a srcset attribute value as
+# described in https://html.spec.whatwg.org/multipage/images.html#srcset-attributes
+_srcset_candidate = re.compile(
+    r"""
+    # ASCII whitespace: https://infra.spec.whatwg.org/#ascii-whitespace
+    [\t\n\f\r ]*
+    (
+        # URL that doesn't start or end with a comma
+        (?!,)
+        [^\t\n\f\r ]+
+        (?<!,)
+    )
+    (
+        # Width descriptor like "1234w"
+        # https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#non-negative-integers
+        [\t\n\f\r ]+
+        \d+w
+        |
+        # Pixel density descriptor like "2.0x"
+        # https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-floating-point-number
+        [\t\n\f\r ]+
+        \d+(?:\.\d+)?(?:[eE][-+]?\d+)?x
+        |
+    )
+    [\t\n\f\r ]*
+    (?:,|\Z)
+    """,
+    re.VERBOSE | re.ASCII,
+)
+
+
+def srcset_candidates(value: str) -> list[tuple[str, str]]:
+    """
+    Split a ``srcset`` attribute value into candidates:
+
+    >>> srcset_candidates("/foo.jpg, /foo.2x.jpg 2x")
+    [("/foo.jpg", ""), ("/foo.2x.jpg", "2x")]
+
+    This doesn't validate the URLs, nor check for duplicate or conflicting
+    descriptors. It returns an empty list when parsing fails.
+    """
+    pos = 0
+    candidates = []
+    while m := _srcset_candidate.match(value, pos):
+        desc = m[2].strip("\t\n\f\r ")
+        candidates.append((m[1], desc))
+        pos = m.end(0)
+    return candidates
+
+
 class RelativeURIResolver(BaseHTMLProcessor):
     relative_uris = {
         ("a", "href"),
@@ -156,11 +207,23 @@ def __init__(self, baseuri, encoding, _type):
     def resolve_uri(self, uri):
         return make_safe_absolute_uri(self.baseuri, uri.strip())
 
+    def resolve_srcset(self, srcset):
+        candidates = []
+        for uri, desc in srcset_candidates(srcset):
+            uri = self.resolve_uri(uri)
+            if desc:
+                candidates.append("{} {}".format(uri, desc))
+            else:
+                candidates.append(uri)
+        return ", ".join(candidates)
+
     def unknown_starttag(self, tag, attrs):
         attrs = self.normalize_attrs(attrs)
         for i, (key, value) in enumerate(attrs):
             if (tag, key) in self.relative_uris:
                 attrs[i] = (key, self.resolve_uri(value))
+            elif tag in {"img", "source"} and key == "srcset":
+                attrs[i] = (key, self.resolve_srcset(value))
         super().unknown_starttag(tag, attrs)
 
 
diff --git a/tests/test_srcset_candidates.py b/tests/test_srcset_candidates.py
new file mode 100644
index 00000000..485a0530
--- /dev/null
+++ b/tests/test_srcset_candidates.py
@@ -0,0 +1,63 @@
+import pytest
+
+from feedparser.urls import srcset_candidates
+
+
+def test_empty():
+    assert srcset_candidates("") == []
+    assert srcset_candidates("    \n") == []
+
+
+def test_default():
+    assert srcset_candidates("/1x.jpg") == [("/1x.jpg", "")]
+
+
+def test_pixel_density_descriptor_one():
+    assert srcset_candidates("/1x.jpg 1x") == [("/1x.jpg", "1x")]
+
+
+def test_pixel_density_descriptor_two():
+    assert srcset_candidates("/1x.jpg 1x,/2x.jpg\t2.0x") == [
+        ("/1x.jpg", "1x"),
+        ("/2x.jpg", "2.0x"),
+    ]
+
+
+def test_pixel_density_descriptor_three():
+    assert srcset_candidates("/1x.jpg, /2x.jpg  2x  , /3x.jpg 3x  ") == [
+        ("/1x.jpg", ""),
+        ("/2x.jpg", "2x"),
+        ("/3x.jpg", "3x"),
+    ]
+
+
+@pytest.mark.parametrize(
+    "pd", ["1x", "1.0x", "9.5x", "36x", "39.95x", "100x", "1e1x", "2E2x"]
+)
+def test_pixel_density_descriptor_floats(pd):
+    """A pixel density descriptor allows all the valid float formats."""
+    assert [("/foo.jpg", pd)] == srcset_candidates("/foo.jpg " + pd)
+
+
+def test_url_comma():
+    """A URL containing a comma is not broken."""
+    assert srcset_candidates(" /,.jpg 6x,\n /,,,,.webp \t1e100x") == [
+        ("/,.jpg", "6x"),
+        ("/,,,,.webp", "1e100x"),
+    ]
+
+
+def test_width_one():
+    assert srcset_candidates("/a.png 600w") == [("/a.png", "600w")]
+
+
+def test_width_two():
+    assert srcset_candidates("a.jpg 123w, b.jpg 1234w") == [
+        ("a.jpg", "123w"),
+        ("b.jpg", "1234w"),
+    ]
+
+
+@pytest.mark.parametrize("pd", ["1.5w", "9000X", "-23w", "-60x"])
+def test_invalid(pd):
+    assert srcset_candidates("/x.gif " + pd) == []
diff --git a/tests/wellformed/base/http_entry_content_base_srcset.xml b/tests/wellformed/base/http_entry_content_base_srcset.xml
new file mode 100644
index 00000000..4de53047
--- /dev/null
+++ b/tests/wellformed/base/http_entry_content_base_srcset.xml
@@ -0,0 +1,10 @@
+<!--
+Description: entry content srcset relative to document URI
+Expect:      not bozo and entries[0]['content'][0]['value'] == '<img srcset="http://127.0.0.1:8097/rel/img.png, http://127.0.0.1:8097/rel/img.2x.png 2x" />'
+-->
+<feed version="0.3" xmlns="http://purl.org/atom/ns#">
+<entry>
+    <content type="text/html" mode="escaped">&lt;img srcset="/rel/img.png, /rel/img.2x.png 2x"&gt;</content>
+</entry>
+</feed>
+

From ca863e5be8795204c2c9f701b3fc39d91ba562c2 Mon Sep 17 00:00:00 2001
From: Tom Most <twm@freecog.net>
Date: Tue, 27 May 2025 22:19:15 -0700
Subject: [PATCH 3/4] Add changefragment

---
 changelog.d/20250527_twm_srcset.rst | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 changelog.d/20250527_twm_srcset.rst

diff --git a/changelog.d/20250527_twm_srcset.rst b/changelog.d/20250527_twm_srcset.rst
new file mode 100644
index 00000000..725afb24
--- /dev/null
+++ b/changelog.d/20250527_twm_srcset.rst
@@ -0,0 +1,4 @@
+Added
+-----
+
+*   Resolve relative URLs in ``srcset`` attributes and pass through ``srcset`` when sanitizing.

From 95ad1e542a1de2e58ec78dcea1f9b9281dda364b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 May 2025 06:05:27 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 feedparser/urls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/feedparser/urls.py b/feedparser/urls.py
index d33a8d52..9679d530 100644
--- a/feedparser/urls.py
+++ b/feedparser/urls.py
@@ -212,7 +212,7 @@ def resolve_srcset(self, srcset):
         for uri, desc in srcset_candidates(srcset):
             uri = self.resolve_uri(uri)
             if desc:
-                candidates.append("{} {}".format(uri, desc))
+                candidates.append(f"{uri} {desc}")
             else:
                 candidates.append(uri)
         return ", ".join(candidates)