From 282408056e0d645e7e13e8d919ae77c2693f56c7 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Mon, 20 Apr 2026 10:09:37 -0400 Subject: [PATCH] fix: make Requirements/Markers pickle-safe Signed-off-by: Henry Schreiner --- pyproject.toml | 1 + src/packaging/_parser.py | 21 ++++++ src/packaging/markers.py | 22 ++++++ src/packaging/requirements.py | 21 ++++++ tests/test_markers.py | 90 ++++++++++++++++++++++++ tests/test_requirements.py | 126 ++++++++++++++++++++++++++++++++++ 6 files changed, 281 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 7b2a3dbe6..4dfaf47e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ exclude = [ iMatix = "iMatix" ANDed = "ANDed" ORed = "ORed" +ba = "ba" [tool.typos.default.extend-words] dynamc = "dynamc" diff --git a/src/packaging/_parser.py b/src/packaging/_parser.py index f6c1f5cd2..93d9951af 100644 --- a/src/packaging/_parser.py +++ b/src/packaging/_parser.py @@ -27,6 +27,27 @@ def __repr__(self) -> str: def serialize(self) -> str: raise NotImplementedError + def __getstate__(self) -> str: + # Return just the value string for compactness and stability. + return self.value + + def __setstate__(self, state: object) -> None: + if isinstance(state, str): + # New format (26.2+): just the value string. + self.value = state + return + if isinstance(state, tuple) and len(state) == 2: + # Old format (packaging <= 26.1, __slots__): (None, {slot: value}). + _, slot_dict = state + if isinstance(slot_dict, dict) and "value" in slot_dict: + self.value = slot_dict["value"] + return + if isinstance(state, dict) and "value" in state: + # Old format (packaging <= 26.0, no __slots__): plain __dict__. + self.value = state["value"] + return + raise TypeError(f"Cannot restore {self.__class__.__name__} from {state!r}") + class Variable(Node): __slots__ = () diff --git a/src/packaging/markers.py b/src/packaging/markers.py index 65d7f330b..6655f2f86 100644 --- a/src/packaging/markers.py +++ b/src/packaging/markers.py @@ -381,6 +381,28 @@ def __eq__(self, other: object) -> bool: return str(self) == str(other) + def __getstate__(self) -> str: + # Return the marker expression string for compactness and stability. + # Internal Node objects are excluded; the string is re-parsed on load. + return str(self) + + def __setstate__(self, state: object) -> None: + if isinstance(state, str): + # New format (26.2+): just the marker expression string. + self._markers = _normalize_extra_values(_parse_marker(state)) + return + if isinstance(state, dict) and "_markers" in state: + # Old format (packaging <= 26.1, no __slots__): plain __dict__. + self._markers = state["_markers"] + return + if isinstance(state, tuple) and len(state) == 2: + # Old format (packaging <= 26.1, __slots__): (None, {slot: value}). + _, slot_dict = state + if isinstance(slot_dict, dict) and "_markers" in slot_dict: + self._markers = slot_dict["_markers"] + return + raise TypeError(f"Cannot restore Marker from {state!r}") + def __and__(self, other: Marker) -> Marker: if not isinstance(other, Marker): return NotImplemented diff --git a/src/packaging/requirements.py b/src/packaging/requirements.py index 18640d438..811beb45f 100644 --- a/src/packaging/requirements.py +++ b/src/packaging/requirements.py @@ -73,6 +73,27 @@ def _iter_parts(self, name: str) -> Iterator[str]: if self.marker: yield f"; {self.marker}" + def __getstate__(self) -> str: + # Return the requirement string for compactness and stability. + # Re-parsed on load to reconstruct all fields. + return str(self) + + def __setstate__(self, state: object) -> None: + if isinstance(state, str): + # New format (26.2+): just the requirement string. + tmp = Requirement(state) + self.name = tmp.name + self.url = tmp.url + self.extras = tmp.extras + self.specifier = tmp.specifier + self.marker = tmp.marker + return + if isinstance(state, dict): + # Old format (packaging <= 26.1, no __slots__): plain __dict__. + self.__dict__.update(state) + return + raise TypeError(f"Cannot restore Requirement from {state!r}") + def __str__(self) -> str: return "".join(self._iter_parts(self.name)) diff --git a/tests/test_markers.py b/tests/test_markers.py index eda41e409..08ebd982f 100644 --- a/tests/test_markers.py +++ b/tests/test_markers.py @@ -6,6 +6,7 @@ import itertools import os +import pickle import platform import sys from typing import Any, NamedTuple, cast @@ -564,3 +565,92 @@ def test_evaluation_of_combined_markers() -> None: & Marker('platform_system == "Linux"') ) assert m.evaluate(env) is True + + +@pytest.mark.parametrize( + "marker_str", + [ + 'python_version >= "3.8"', + 'python_version >= "3.8" and os_name == "posix"', + 'python_version >= "3.8" or platform_system == "Windows"', + 'extra == "security"', + ], +) +def test_pickle_marker_roundtrip(marker_str: str) -> None: + # Make sure equality and str() work between a pickle/unpickle round trip. + m = Marker(marker_str) + loaded = pickle.loads(pickle.dumps(m)) + assert loaded == m + assert str(loaded) == str(m) + + +def test_pickle_marker_setstate_rejects_invalid_state() -> None: + # Cover the TypeError branches in __setstate__ for invalid input. + m = Marker.__new__(Marker) + with pytest.raises(TypeError, match="Cannot restore Marker"): + m.__setstate__(12345) + with pytest.raises(TypeError, match="Cannot restore Marker"): + m.__setstate__((1, 2, 3)) # Wrong tuple length + + +# Pickle bytes generated with packaging==26.1, Python 3.13.1, pickle protocol 2. +# Format: __slots__ (no __getstate__), state is (None, {slot: value}). +_PACKAGING_26_1_PICKLE_MARKER_PYTHON_VERSION_GE_3_8 = ( + b"\x80\x02cpackaging.markers\nMarker\nq\x00)\x81q\x01N}q\x02X\x08\x00" + b"\x00\x00_markersq\x03]q\x04cpackaging._parser\nVariable\nq\x05)\x81" + b"q\x06N}q\x07X\x05\x00\x00\x00valueq\x08X\x0e\x00\x00\x00python_vers" + b"ionq\ts\x86q\nbcpackaging._parser\nOp\nq\x0b)\x81q\x0cN}q\rh\x08X\x02" + b"\x00\x00\x00>=q\x0es\x86q\x0fbcpackaging._parser\nValue\nq\x10)\x81q" + b"\x11N}q\x12h\x08X\x03\x00\x00\x003.8q\x13s\x86q\x14b\x87q\x15as\x86" + b"q\x16b." +) + + +# Pickle bytes generated with packaging==26.0, Python 3.13.1, pickle protocol 2. +# Format: __slots__ (no __getstate__), state is plain __dict__. +_PACKAGING_26_0_PICKLE_MARKER_PYTHON_VERSION_GE_3_8 = ( + b"\x80\x02cpackaging.markers\nMarker\nq\x00)\x81q\x01}q\x02X\x08\x00\x00" + b"\x00_markersq\x03]q\x04cpackaging._parser\nVariable\nq\x05)\x81q\x06N}" + b"q\x07X\x05\x00\x00\x00valueq\x08X\x0e\x00\x00\x00python_versionq\ts\x86" + b"q\nbcpackaging._parser\nOp\nq\x0b)\x81q\x0cN}q\rh\x08X\x02\x00\x00" + b"\x00>=q\x0es\x86q\x0fbcpackaging._parser\nValue\nq\x10)\x81q\x11N}q\x12" + b"h\x08X\x03\x00\x00\x003.8q\x13s\x86q\x14b\x87q\x15asb." +) + +# Format: __slots__ with Node objects using __dict__ format (packaging <= 25.0). +# Now loadable because Node classes have __getstate__/__setstate__. +_PACKAGING_25_0_PICKLE_MARKER_PYTHON_VERSION_GE_3_8 = ( + b"\x80\x02cpackaging.markers\nMarker\nq\x00)\x81q\x01}q\x02X\x08\x00\x00" + b"\x00_markersq\x03]q\x04cpackaging._parser\nVariable\nq\x05)\x81q\x06}q\x07" + b"X\x05\x00\x00\x00valueq\x08X\x0e\x00\x00\x00python_versionq\tsbcpackaging" + b"._parser\nOp\nq\n)\x81q\x0b}q\x0ch\x08X\x02\x00\x00\x00>=q\rsbcpackaging" + b"._parser\nValue\nq\x0e)\x81q\x0f}q\x10h\x08X\x03\x00\x00\x003.8q\x11sb\x87" + b"q\x12asb." +) + + +def test_pickle_marker_old_format_loads() -> None: + # Verify that Marker pickles created with packaging <= 26.1 (__slots__, + # no __getstate__) can be loaded and produce correct Marker objects. + m = pickle.loads(_PACKAGING_26_1_PICKLE_MARKER_PYTHON_VERSION_GE_3_8) + assert isinstance(m, Marker) + assert str(m) == 'python_version >= "3.8"' + assert m == Marker('python_version >= "3.8"') + + +def test_pickle_marker_26_0_format_loads() -> None: + # Verify that Marker pickles created with packaging 26.0 (plain __dict__) + # can be loaded and produce correct Marker objects. + m = pickle.loads(_PACKAGING_26_0_PICKLE_MARKER_PYTHON_VERSION_GE_3_8) + assert isinstance(m, Marker) + assert str(m) == 'python_version >= "3.8"' + assert m == Marker('python_version >= "3.8"') + + +def test_pickle_marker_25_0_format_loads() -> None: + # Verify that Marker pickles created with packaging 25.0 (with Node __dict__) + # can now be loaded thanks to __getstate__/__setstate__ in Node classes. + m = pickle.loads(_PACKAGING_25_0_PICKLE_MARKER_PYTHON_VERSION_GE_3_8) + assert isinstance(m, Marker) + assert str(m) == 'python_version >= "3.8"' + assert m == Marker('python_version >= "3.8"') diff --git a/tests/test_requirements.py b/tests/test_requirements.py index 96da8465d..8db42484b 100644 --- a/tests/test_requirements.py +++ b/tests/test_requirements.py @@ -4,6 +4,8 @@ from __future__ import annotations +import pickle + import pytest from packaging.markers import Marker @@ -705,3 +707,127 @@ def test_different_reqs_different_hashes(self, dep1: str, dep2: str) -> None: def test_compare_with_string(self) -> None: assert Requirement("packaging>=21.3") != "packaging>=21.3" + + +@pytest.mark.parametrize( + "req_str", + [ + "requests", + "requests>=2.0", + "requests>=2.0,<3.0", + 'requests>=2.0; python_version >= "3.8"', + "requests[security,socks]>=2.0", + "my-pkg @ https://example.com", + 'Django>=1.4.2,!=1.5.0,!=1.5.1; python_version < "3"', + ], +) +def test_pickle_requirement_roundtrip(req_str: str) -> None: + # Make sure equality and str() work between a pickle/unpickle round trip. + r = Requirement(req_str) + loaded = pickle.loads(pickle.dumps(r)) + assert loaded == r + assert str(loaded) == str(r) + + +def test_pickle_requirement_setstate_rejects_invalid_state() -> None: + # Cover the TypeError branches in __setstate__ for invalid input. + r = Requirement.__new__(Requirement) + with pytest.raises(TypeError, match="Cannot restore Requirement"): + r.__setstate__(12345) + with pytest.raises(TypeError, match="Cannot restore Requirement"): + r.__setstate__((1, 2, 3)) + + +# Pickle bytes generated with packaging==26.1, Python 3.13.1, pickle protocol 2. +# Format: plain __dict__ (no __getstate__). Contains nested SpecifierSet and +# Marker objects also pickled in their old format. +_PACKAGING_26_1_PICKLE_REQUESTS_GE_2_0_WITH_MARKER = ( + b"\x80\x02cpackaging.requirements\nRequirement\nq\x00)\x81q\x01}q\x02(" + b"X\x04\x00\x00\x00nameq\x03X\x08\x00\x00\x00requestsq\x04X\x03\x00" + b"\x00\x00urlq\x05NX\x06\x00\x00\x00extrasq\x06c__builtin__\nset\nq\x07" + b"]q\x08\x85q\tRq\nX\t\x00\x00\x00specifierq\x0bcpackaging.specifiers\n" + b"SpecifierSet\nq\x0c)\x81q\rcpackaging.specifiers\nSpecifier\nq\x0e)\x81" + b"q\x0fX\x02\x00\x00\x00>=q\x10X\x03\x00\x00\x002.0q\x11\x86q\x12N\x86" + b"q\x13b\x85q\x14N\x86q\x15bX\x06\x00\x00\x00markerq\x16cpackaging." + b"markers\nMarker\nq\x17)\x81q\x18N}q\x19X\x08\x00\x00\x00_markersq\x1a" + b"]q\x1bcpackaging._parser\nVariable\nq\x1c)\x81q\x1dN}q\x1eX\x05\x00" + b"\x00\x00valueq\x1fX\x0e\x00\x00\x00python_versionq s\x86q!b" + b'cpackaging._parser\nOp\nq")\x81q#N}q$h\x1fX\x02\x00\x00\x00>=q%s' + b"\x86q&bcpackaging._parser\nValue\nq')\x81q(N}q)h\x1fX\x03\x00\x00" + b"\x003.8q*s\x86q+b\x87q,as\x86q-bub." +) + + +# Pickle bytes generated with packaging==26.0, Python 3.13.1, pickle protocol 2. +# Format: plain __dict__ (no __getstate__). +_PACKAGING_26_0_PICKLE_REQUESTS_GE_2_0 = ( + b"\x80\x02cpackaging.requirements\nRequirement\nq\x00)\x81q\x01}q\x02(" + b"X\x04\x00\x00\x00nameq\x03X\x08\x00\x00\x00requestsq\x04X\x03\x00" + b"\x00\x00urlq\x05NX\x06\x00\x00\x00extrasq\x06c__builtin__\nset\nq\x07" + b"]q\x08\x85q\tRq\nX\t\x00\x00\x00specifierq\x0bcpackaging.specifiers\n" + b"SpecifierSet\nq\x0c)\x81q\rN}q\x0e(X\x0c\x00\x00\x00_prereleasesq\x0f" + b"NX\x06\x00\x00\x00_specsq\x10c__builtin__\nfrozenset\nq\x11]q\x12cpackag" + b"ing.specifiers\nSpecifier\nq\x13)\x81q\x14N}q\x15(h\x0fNX\x05\x00\x00" + b"\x00_specq\x16X\x02\x00\x00\x00>=q\x17X\x03\x00\x00\x002.0q\x18\x86q" + b"\x19X\r\x00\x00\x00_spec_versionq\x1ah\x18cpackaging.version\nVersion\n" + b"q\x1b)\x81q\x1cN}q\x1d(X\x04\x00\x00\x00_devq\x1eNX\x06\x00\x00\x00_epo" + b"chq\x1fK\x00X\n\x00\x00\x00_key_cacheq NX\x06\x00\x00\x00_localq!NX\x05" + b'\x00\x00\x00_postq"NX\x04\x00\x00\x00_preq#NX\x08\x00\x00\x00_releaseq$' + b"K\x02K\x00\x86q%u\x86q&b\x86q'u\x86q(ba\x85q)Rq*u\x86q+bX\x06\x00\x00" + b"\x00markerq,Nub." +) + + +# Pickle bytes generated with packaging==25.0, Python 3.13.1, pickle protocol 2. +# Format: plain __dict__ (no __getstate__). +_PACKAGING_25_0_PICKLE_REQUESTS_GE_2_0 = ( + b"\x80\x02cpackaging.requirements\nRequirement\nq\x00)\x81q\x01}q\x02(" + b"X\x04\x00\x00\x00nameq\x03X\x08\x00\x00\x00requestsq\x04X\x03\x00" + b"\x00\x00urlq\x05NX\x06\x00\x00\x00extrasq\x06c__builtin__\nset\nq\x07" + b"]q\x08\x85q\tRq\nX\t\x00\x00\x00specifierq\x0bcpackaging.specifiers\n" + b"SpecifierSet\nq\x0c)\x81q\r}q\x0e(X\x06\x00\x00\x00_specsq\x0fc__bui" + b"ltin__\nfrozenset\nq\x10]q\x11cpackaging.specifiers\nSpecifier\nq\x12)\x81" + b"q\x13}q\x14(X\x05\x00\x00\x00_specq\x15X\x02\x00\x00\x00>=q\x16X\x03\x00" + b"\x00\x002.0q\x17\x86q\x18X\x0c\x00\x00\x00_prereleasesq\x19Nuba\x85q\x1a" + b"Rq\x1bh\x19NubX\x06\x00\x00\x00markerq\x1cNub." +) + + +def test_pickle_requirement_old_format_loads() -> None: + # Verify that Requirement pickles created with packaging <= 26.1 (plain + # __dict__, no __getstate__) can be loaded and produce correct objects. + r = pickle.loads(_PACKAGING_26_1_PICKLE_REQUESTS_GE_2_0_WITH_MARKER) + assert isinstance(r, Requirement) + assert r.name == "requests" + assert r.url is None + assert r.extras == set() + assert str(r.specifier) == ">=2.0" + assert r.marker is not None + assert str(r.marker) == 'python_version >= "3.8"' + assert r == Requirement('requests>=2.0; python_version >= "3.8"') + + +def test_pickle_requirement_26_0_format_loads() -> None: + # Verify that Requirement pickles created with packaging 26.0 (plain __dict__) + # can be loaded and produce correct objects. + r = pickle.loads(_PACKAGING_26_0_PICKLE_REQUESTS_GE_2_0) + assert isinstance(r, Requirement) + assert r.name == "requests" + assert r.url is None + assert r.extras == set() + assert str(r.specifier) == ">=2.0" + assert r.marker is None + assert r == Requirement("requests>=2.0") + + +def test_pickle_requirement_25_0_format_loads() -> None: + # Verify that Requirement pickles created with packaging 25.0 (plain __dict__) + # can be loaded and produce correct objects. + r = pickle.loads(_PACKAGING_25_0_PICKLE_REQUESTS_GE_2_0) + assert isinstance(r, Requirement) + assert r.name == "requests" + assert r.url is None + assert r.extras == set() + assert str(r.specifier) == ">=2.0" + assert r.marker is None + assert r == Requirement("requests>=2.0")