From f7cfbc389bbdeeaf90d4379894f70a4e69446b78 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 29 Jul 2025 21:20:44 +0100 Subject: [PATCH] Improve use of `url2pathname()` Call `_clean_file_url_path()` (thence `url2pathname()`) with a complete URL path, rather than parts of the path after splitting on `/@|%2f/`. This lays the groundwork for using pip's own URL utilities from `link.py`. --- ...fa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst | 0 src/pip/_internal/models/link.py | 23 ++++++++----------- 2 files changed, 9 insertions(+), 14 deletions(-) create mode 100644 news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst diff --git a/news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst b/news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 2e2c0f836ac..aa7c8ae3793 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -1,7 +1,6 @@ from __future__ import annotations import functools -import itertools import logging import os import posixpath @@ -19,7 +18,6 @@ from pip._internal.utils.filetypes import WHEEL_EXTENSION from pip._internal.utils.hashes import Hashes from pip._internal.utils.misc import ( - pairwise, redact_auth_from_url, split_auth_from_netloc, splitext, @@ -113,12 +111,12 @@ def supported_hashes(hashes: dict[str, str] | None) -> dict[str, str] | None: return hashes -def _clean_url_path_part(part: str) -> str: +def _clean_url_path_part(part: str, safe: str = "/") -> str: """ Clean a "part" of a URL path (i.e. after splitting on "@" characters). """ # We unquote prior to quoting to make sure nothing is double quoted. - return urllib.parse.quote(urllib.parse.unquote(part)) + return urllib.parse.quote(urllib.parse.unquote(part), safe) def _clean_file_url_path(part: str) -> str: @@ -140,6 +138,7 @@ def _clean_file_url_path(part: str) -> str: # percent-encoded: / _reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) +_escaped_chars_re = re.compile("---PIP-(%40|/)-PIP---") def _clean_url_path(path: str, is_local_path: bool) -> str: @@ -151,17 +150,13 @@ def _clean_url_path(path: str, is_local_path: bool) -> str: else: clean_func = _clean_url_path_part - # Split on the reserved characters prior to cleaning so that + # Tag the reserved characters prior to cleaning so that # revision strings in VCS URLs are properly preserved. - parts = _reserved_chars_re.split(path) - - cleaned_parts = [] - for to_clean, reserved in pairwise(itertools.chain(parts, [""])): - cleaned_parts.append(clean_func(to_clean)) - # Normalize %xx escapes (e.g. %2f -> %2F) - cleaned_parts.append(reserved.upper()) - - return "".join(cleaned_parts) + path = _reserved_chars_re.sub(r"---PIP-\1-PIP---", path) + path = clean_func(path) + # Untag and restore the reserved characters. + path = _escaped_chars_re.sub(lambda m: _clean_url_path_part(m[1], safe="@"), path) + return path def _ensure_quoted_url(url: str) -> str: