From f7cfbc389bbdeeaf90d4379894f70a4e69446b78 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Tue, 29 Jul 2025 21:20:44 +0100
Subject: [PATCH] Improve use of `url2pathname()`

Call `_clean_file_url_path()` (thence `url2pathname()`) with a complete URL
path, rather than parts of the path after splitting on `/@|%2f/`.

This lays the groundwork for using pip's own URL utilities from `link.py`.
---
 ...fa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst |  0
 src/pip/_internal/models/link.py              | 23 ++++++++-----------
 2 files changed, 9 insertions(+), 14 deletions(-)
 create mode 100644 news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst

diff --git a/news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst b/news/d8d59afa-40e6-43fc-a9c4-b619c55edfc2.trivial.rst
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 2e2c0f836ac..aa7c8ae3793 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import functools
-import itertools
 import logging
 import os
 import posixpath
@@ -19,7 +18,6 @@
 from pip._internal.utils.filetypes import WHEEL_EXTENSION
 from pip._internal.utils.hashes import Hashes
 from pip._internal.utils.misc import (
-    pairwise,
     redact_auth_from_url,
     split_auth_from_netloc,
     splitext,
@@ -113,12 +111,12 @@ def supported_hashes(hashes: dict[str, str] | None) -> dict[str, str] | None:
     return hashes
 
 
-def _clean_url_path_part(part: str) -> str:
+def _clean_url_path_part(part: str, safe: str = "/") -> str:
     """
     Clean a "part" of a URL path (i.e. after splitting on "@" characters).
     """
     # We unquote prior to quoting to make sure nothing is double quoted.
-    return urllib.parse.quote(urllib.parse.unquote(part))
+    return urllib.parse.quote(urllib.parse.unquote(part), safe)
 
 
 def _clean_file_url_path(part: str) -> str:
@@ -140,6 +138,7 @@ def _clean_file_url_path(part: str) -> str:
 
 # percent-encoded:                   /
 _reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
+_escaped_chars_re = re.compile("---PIP-(%40|/)-PIP---")
 
 
 def _clean_url_path(path: str, is_local_path: bool) -> str:
@@ -151,17 +150,13 @@ def _clean_url_path(path: str, is_local_path: bool) -> str:
     else:
         clean_func = _clean_url_path_part
 
-    # Split on the reserved characters prior to cleaning so that
+    # Tag the reserved characters prior to cleaning so that
     # revision strings in VCS URLs are properly preserved.
-    parts = _reserved_chars_re.split(path)
-
-    cleaned_parts = []
-    for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
-        cleaned_parts.append(clean_func(to_clean))
-        # Normalize %xx escapes (e.g. %2f -> %2F)
-        cleaned_parts.append(reserved.upper())
-
-    return "".join(cleaned_parts)
+    path = _reserved_chars_re.sub(r"---PIP-\1-PIP---", path)
+    path = clean_func(path)
+    # Untag and restore the reserved characters.
+    path = _escaped_chars_re.sub(lambda m: _clean_url_path_part(m[1], safe="@"), path)
+    return path
 
 
 def _ensure_quoted_url(url: str) -> str: