From eac855f514446411a2561504db8066cd49592406 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 19:16:44 +0100 Subject: [PATCH 01/17] Stop using `pathname2url()` and `url2pathname()` in `urls` `urllib.request.pathname2url()` and `url2pathname()` are: - Fine in 3.14 - Awkward in latest 3.13 and 3.12 - Buggy in previous versions In this patch we provide our own conversion functions (copy-pasted from the Python 3.14 source tree), rather than calling `urllib.request` functions. This allows us to remove workarounds for upstream bugs. --- ...3b-d572-41f8-b080-58cebf24b89a.trivial.rst | 0 src/pip/_internal/utils/urls.py | 61 +++++++++++-------- 2 files changed, 34 insertions(+), 27 deletions(-) create mode 100644 news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst diff --git a/news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst b/news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index 9f34f882a1a..26ce5ca7cc3 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -1,7 +1,6 @@ import os -import string +import sys import urllib.parse -import urllib.request from .compat import WINDOWS @@ -12,8 +11,24 @@ def path_to_url(path: str) -> str: quoted path parts. """ path = os.path.normpath(os.path.abspath(path)) - url = urllib.parse.urljoin("file:", urllib.request.pathname2url(path)) - return url + if WINDOWS: + path = path.replace("\\", "/") + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + + drive, tail = os.path.splitdrive(path) + if drive: + if drive[:4] == "//?/": + drive = drive[4:] + if drive[:4].upper() == "UNC/": + drive = "//" + drive[4:] + if drive[1:] == ":": + drive = "///" + drive + drive = urllib.parse.quote(drive, "/:", encoding, errors) + elif tail.startswith("/"): + tail = "//" + tail + tail = urllib.parse.quote(tail, "/", encoding, errors) + return "file:" + drive + tail def url_to_path(url: str) -> str: @@ -26,30 +41,22 @@ def url_to_path(url: str) -> str: _, netloc, path, _, _ = urllib.parse.urlsplit(url) - if not netloc or netloc == "localhost": - # According to RFC 8089, same as empty authority. - netloc = "" - elif WINDOWS: - # If we have a UNC path, prepend UNC share notation. - netloc = "\\\\" + netloc - else: + if WINDOWS: + if netloc and netloc != "localhost": + path = "//" + netloc + path + elif path[:3] == "///": + path = path[1:] + else: + if path[:1] == "/" and path[2:3] in (":", "|"): + path = path[1:] + if path[1:2] == "|": + path = path[:1] + ":" + path[2:] + path = path.replace("/", "\\") + elif netloc and netloc != "localhost": raise ValueError( f"non-local file URIs are not supported on this platform: {url!r}" ) - path = urllib.request.url2pathname(netloc + path) - - # On Windows, urlsplit parses the path as something like "/C:/Users/foo". - # This creates issues for path-related functions like io.open(), so we try - # to detect and strip the leading slash. - if ( - WINDOWS - and not netloc # Not UNC. - and len(path) >= 3 - and path[0] == "/" # Leading slash to strip. - and path[1] in string.ascii_letters # Drive letter. - and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path. - ): - path = path[1:] - - return path + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + return urllib.parse.unquote(path, encoding, errors) From 3fb3599c2446b177ff807cc0c7abce29e00de22e Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 20:06:21 +0100 Subject: [PATCH 02/17] Add `clean_file_url()` utility. --- src/pip/_internal/models/link.py | 30 +++++------------------ src/pip/_internal/utils/urls.py | 20 +++++++++++++-- src/pip/_internal/vcs/git.py | 19 +++------------ tests/lib/__init__.py | 13 ---------- tests/unit/test_collector.py | 42 +++----------------------------- tests/unit/test_urls.py | 35 +++++++++++++++++++++----- 6 files changed, 59 insertions(+), 100 deletions(-) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 87651c76e25..20c7ad35333 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -24,7 +24,7 @@ split_auth_from_netloc, splitext, ) -from pip._internal.utils.urls import path_to_url, url_to_path +from pip._internal.utils.urls import clean_file_url, path_to_url, url_to_path if TYPE_CHECKING: from pip._internal.index.collector import IndexContent @@ -121,39 +121,21 @@ def _clean_url_path_part(part: str) -> str: return urllib.parse.quote(urllib.parse.unquote(part)) -def _clean_file_url_path(part: str) -> str: - """ - Clean the first part of a URL path that corresponds to a local - filesystem path (i.e. the first part after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - # Also, on Windows the path part might contain a drive letter which - # should not be quoted. On Linux where drive letters do not - # exist, the colon should be quoted. We rely on urllib.request - # to do the right thing here. - return urllib.request.pathname2url(urllib.request.url2pathname(part)) - - # percent-encoded: / _reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) -def _clean_url_path(path: str, is_local_path: bool) -> str: +def _clean_url_path(path: str) -> str: """ Clean the path portion of a URL. """ - if is_local_path: - clean_func = _clean_file_url_path - else: - clean_func = _clean_url_path_part - # Split on the reserved characters prior to cleaning so that # revision strings in VCS URLs are properly preserved. parts = _reserved_chars_re.split(path) cleaned_parts = [] for to_clean, reserved in pairwise(itertools.chain(parts, [""])): - cleaned_parts.append(clean_func(to_clean)) + cleaned_parts.append(_clean_url_path_part(to_clean)) # Normalize %xx escapes (e.g. %2f -> %2F) cleaned_parts.append(reserved.upper()) @@ -166,12 +148,12 @@ def _ensure_quoted_url(url: str) -> str: For example, if ' ' occurs in the URL, it will be replaced with "%20", and without double-quoting other characters. """ + if url.startswith("file:"): + return clean_file_url(url) # Split the URL into parts according to the general structure # `scheme://netloc/path?query#fragment`. result = urllib.parse.urlsplit(url) - # If the netloc is empty, then the URL refers to a local filesystem path. - is_local_path = not result.netloc - path = _clean_url_path(result.path, is_local_path=is_local_path) + path = _clean_url_path(result.path) return urllib.parse.urlunsplit(result._replace(path=path)) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index 26ce5ca7cc3..5bdd5b65fad 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -5,12 +5,13 @@ from .compat import WINDOWS -def path_to_url(path: str) -> str: +def path_to_url(path: str, normalize_path: bool = True) -> str: """ Convert a path to a file: URL. The path will be made absolute and have quoted path parts. """ - path = os.path.normpath(os.path.abspath(path)) + if normalize_path: + path = os.path.normpath(os.path.abspath(path)) if WINDOWS: path = path.replace("\\", "/") encoding = sys.getfilesystemencoding() @@ -60,3 +61,18 @@ def url_to_path(url: str) -> str: encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return urllib.parse.unquote(path, encoding, errors) + + +def clean_file_url(url: str) -> str: + """ + Fix up quoting and leading slashes in the given file: URL. + + e.g. 'file:/c:/foo bar' --> 'file:///c:/foo%20bar'. + """ + tok = "-_-PIP_AT_SYMBOL_-_" + orig_url = url.replace("@", tok) + tidy_url = path_to_url(url_to_path(orig_url), normalize_path=False) + orig_parts = urllib.parse.urlsplit(orig_url) + tidy_parts = urllib.parse.urlsplit(tidy_url) + merged_url = urllib.parse.urlunsplit(tidy_parts[:3] + orig_parts[3:]) + return merged_url.replace(tok, "@") diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 1769da791cb..f387f171d78 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -4,14 +4,13 @@ import os.path import pathlib import re -import urllib.parse -import urllib.request from dataclasses import replace from typing import Any from pip._internal.exceptions import BadCommand, InstallationError from pip._internal.utils.misc import HiddenText, display_path, hide_url from pip._internal.utils.subprocess import make_command +from pip._internal.utils.urls import clean_file_url from pip._internal.vcs.versioncontrol import ( AuthInfo, RemoteNotFoundError, @@ -22,10 +21,6 @@ vcs, ) -urlsplit = urllib.parse.urlsplit -urlunsplit = urllib.parse.urlunsplit - - logger = logging.getLogger(__name__) @@ -502,16 +497,8 @@ def get_url_rev_and_auth(cls, url: str) -> tuple[str, str | None, AuthInfo]: """ # Works around an apparent Git bug # (see https://article.gmane.org/gmane.comp.version-control.git/146500) - scheme, netloc, path, query, fragment = urlsplit(url) - if scheme.endswith("file"): - initial_slashes = path[: -len(path.lstrip("/"))] - newpath = initial_slashes + urllib.request.url2pathname(path).replace( - "\\", "/" - ).lstrip("/") - after_plus = scheme.find("+") + 1 - url = scheme[:after_plus] + urlunsplit( - (scheme[after_plus:], netloc, newpath, query, fragment), - ) + if url.startswith("git+file:"): + url = "git+" + clean_file_url(url[4:]) if "://" not in url: assert "file:" not in url diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index d67271e1885..5c050448524 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -17,7 +17,6 @@ from textwrap import dedent from typing import Any, AnyStr, Callable, Literal, Protocol, Union, cast from urllib.parse import urlparse, urlunparse -from urllib.request import pathname2url from zipfile import ZipFile import pytest @@ -1385,15 +1384,3 @@ def __call__( sys.platform != "win32" or has_new_urlun_behavior, reason="testing windows behavior for older CPython", ) - -# Trailing slashes are now preserved on Windows, matching POSIX behaviour. -# BPO: https://github.com/python/cpython/issues/126212 -does_pathname2url_preserve_trailing_slash = pathname2url("C:/foo/").endswith("/") -skip_needs_new_pathname2url_trailing_slash_behavior_win = pytest.mark.skipif( - sys.platform != "win32" or not does_pathname2url_preserve_trailing_slash, - reason="testing windows (pathname2url) behavior for newer CPython", -) -skip_needs_old_pathname2url_trailing_slash_behavior_win = pytest.mark.skipif( - sys.platform != "win32" or does_pathname2url_preserve_trailing_slash, - reason="testing windows (pathname2url) behavior for older CPython", -) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index d95fa10fea3..fd4ab55e05a 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -41,9 +41,7 @@ from tests.lib import ( TestData, make_test_link_collector, - skip_needs_new_pathname2url_trailing_slash_behavior_win, skip_needs_new_urlun_behavior_win, - skip_needs_old_pathname2url_trailing_slash_behavior_win, skip_needs_old_urlun_behavior_win, ) @@ -298,32 +296,8 @@ def test_get_simple_response_dont_log_clear_text_password( ("a %2f b", "a%20%2F%20b"), ], ) -@pytest.mark.parametrize("is_local_path", [True, False]) -def test_clean_url_path(path: str, expected: str, is_local_path: bool) -> None: - assert _clean_url_path(path, is_local_path=is_local_path) == expected - - -@pytest.mark.parametrize( - "path, expected", - [ - # Test a VCS path with a Windows drive letter and revision. - pytest.param( - "/T:/with space/repo.git@1.0", - "///T:/with%20space/repo.git@1.0", - marks=pytest.mark.skipif("sys.platform != 'win32'"), - ), - # Test a VCS path with a Windows drive letter and revision, - # running on non-windows platform. - pytest.param( - "/T:/with space/repo.git@1.0", - "/T%3A/with%20space/repo.git@1.0", - marks=pytest.mark.skipif("sys.platform == 'win32'"), - ), - ], -) -def test_clean_url_path_with_local_path(path: str, expected: str) -> None: - actual = _clean_url_path(path, is_local_path=True) - assert actual == expected +def test_clean_url_path(path: str, expected: str) -> None: + assert _clean_url_path(path) == expected @pytest.mark.parametrize( @@ -392,18 +366,9 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None: # removed. pytest.param( "file:///T:/path/with spaces/", - "file:///T:/path/with%20spaces", + "file:///T:/path/with%20spaces/", marks=[ skip_needs_old_urlun_behavior_win, - skip_needs_old_pathname2url_trailing_slash_behavior_win, - ], - ), - pytest.param( - "file:///T:/path/with spaces/", - "file://///T:/path/with%20spaces", - marks=[ - skip_needs_new_urlun_behavior_win, - skip_needs_old_pathname2url_trailing_slash_behavior_win, ], ), pytest.param( @@ -411,7 +376,6 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None: "file://///T:/path/with%20spaces/", marks=[ skip_needs_new_urlun_behavior_win, - skip_needs_new_pathname2url_trailing_slash_behavior_win, ], ), # URL with Windows drive letter, running on non-windows diff --git a/tests/unit/test_urls.py b/tests/unit/test_urls.py index 0c145255080..e63d84c2625 100644 --- a/tests/unit/test_urls.py +++ b/tests/unit/test_urls.py @@ -4,7 +4,7 @@ import pytest -from pip._internal.utils.urls import path_to_url, url_to_path +from pip._internal.utils.urls import clean_file_url, path_to_url, url_to_path @pytest.mark.skipif("sys.platform == 'win32'") @@ -18,8 +18,8 @@ def test_path_to_url_unix() -> None: @pytest.mark.parametrize( "path, url", [ - pytest.param("c:/tmp/file", "file:///C:/tmp/file", id="posix-path"), - pytest.param("c:\\tmp\\file", "file:///C:/tmp/file", id="nt-path"), + pytest.param("c:/tmp/file", "file:///c:/tmp/file", id="posix-path"), + pytest.param("c:\\tmp\\file", "file:///c:/tmp/file", id="nt-path"), ], ) def test_path_to_url_win(path: str, url: str) -> None: @@ -46,13 +46,13 @@ def test_relative_path_to_url_win() -> None: "url,win_expected,non_win_expected", [ ("file:tmp", "tmp", "tmp"), - ("file:c:/path/to/file", r"C:\path\to\file", "c:/path/to/file"), + ("file:c:/path/to/file", r"c:\path\to\file", "c:/path/to/file"), ("file:/path/to/file", r"\path\to\file", "/path/to/file"), ("file://localhost/tmp/file", r"\tmp\file", "/tmp/file"), - ("file://localhost/c:/tmp/file", r"C:\tmp\file", "/c:/tmp/file"), + ("file://localhost/c:/tmp/file", r"c:\tmp\file", "/c:/tmp/file"), ("file://somehost/tmp/file", r"\\somehost\tmp\file", None), ("file:///tmp/file", r"\tmp\file", "/tmp/file"), - ("file:///c:/tmp/file", r"C:\tmp\file", "/c:/tmp/file"), + ("file:///c:/tmp/file", r"c:\tmp\file", "/c:/tmp/file"), ], ) def test_url_to_path(url: str, win_expected: str, non_win_expected: str) -> None: @@ -75,3 +75,26 @@ def test_url_to_path_path_to_url_symmetry_win() -> None: unc_path = r"\\unc\share\path" assert url_to_path(path_to_url(unc_path)) == unc_path + + +@pytest.mark.parametrize( + "url, expected", + [ + # Test a VCS path with a Windows drive letter and revision. + pytest.param( + "file:/T:/with space/repo.git@1.0", + "file:///T:/with%20space/repo.git@1.0", + marks=pytest.mark.skipif("sys.platform != 'win32'"), + ), + # Test a VCS path with a Windows drive letter and revision, + # running on non-windows platform. + pytest.param( + "file:/T:/with space/repo.git@1.0", + "file:///T%3A/with%20space/repo.git@1.0", + marks=pytest.mark.skipif("sys.platform == 'win32'"), + ), + ], +) +def test_clean_file_url(url: str, expected: str) -> None: + actual = clean_file_url(url) + assert actual == expected From 9860f51d0dc43f205871d8d05be0806aee577aa4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 20:29:02 +0100 Subject: [PATCH 03/17] Windows tests fixes --- src/pip/_internal/models/link.py | 5 +++-- src/pip/_internal/utils/urls.py | 13 +++++++------ src/pip/_internal/vcs/git.py | 2 +- tests/lib/__init__.py | 21 --------------------- tests/unit/test_collector.py | 27 ++++----------------------- 5 files changed, 15 insertions(+), 53 deletions(-) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 20c7ad35333..4181d1e402a 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -148,11 +148,12 @@ def _ensure_quoted_url(url: str) -> str: For example, if ' ' occurs in the URL, it will be replaced with "%20", and without double-quoting other characters. """ - if url.startswith("file:"): - return clean_file_url(url) # Split the URL into parts according to the general structure # `scheme://netloc/path?query#fragment`. result = urllib.parse.urlsplit(url) + # If the netloc is empty, then the URL refers to a local filesystem path + if not result.netloc: + return clean_file_url(url) path = _clean_url_path(result.path) return urllib.parse.urlunsplit(result._replace(path=path)) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index 5bdd5b65fad..43145ac79d8 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -36,11 +36,9 @@ def url_to_path(url: str) -> str: """ Convert a file: URL to a path. """ - assert url.startswith( - "file:" - ), f"You can only turn file: urls into filenames (not {url!r})" - - _, netloc, path, _, _ = urllib.parse.urlsplit(url) + scheme, netloc, path, _, _ = urllib.parse.urlsplit(url) + if scheme != "file" and not scheme.endswith("+file"): + raise ValueError(f"You can only turn file: urls into filenames (not {url!r})") if WINDOWS: if netloc and netloc != "localhost": @@ -70,9 +68,12 @@ def clean_file_url(url: str) -> str: e.g. 'file:/c:/foo bar' --> 'file:///c:/foo%20bar'. """ tok = "-_-PIP_AT_SYMBOL_-_" + assert tok not in url orig_url = url.replace("@", tok) tidy_url = path_to_url(url_to_path(orig_url), normalize_path=False) - orig_parts = urllib.parse.urlsplit(orig_url) tidy_parts = urllib.parse.urlsplit(tidy_url) + orig_parts = urllib.parse.urlsplit(orig_url) merged_url = urllib.parse.urlunsplit(tidy_parts[:3] + orig_parts[3:]) + if orig_parts.scheme != "file": + merged_url = orig_parts.scheme + merged_url[4:] return merged_url.replace(tok, "@") diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index f387f171d78..dba49389d77 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -498,7 +498,7 @@ def get_url_rev_and_auth(cls, url: str) -> tuple[str, str | None, AuthInfo]: # Works around an apparent Git bug # (see https://article.gmane.org/gmane.comp.version-control.git/146500) if url.startswith("git+file:"): - url = "git+" + clean_file_url(url[4:]) + url = clean_file_url(url) if "://" not in url: assert "file:" not in url diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index 5c050448524..01a5955572c 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -16,7 +16,6 @@ from io import BytesIO, StringIO from textwrap import dedent from typing import Any, AnyStr, Callable, Literal, Protocol, Union, cast -from urllib.parse import urlparse, urlunparse from zipfile import ZipFile import pytest @@ -1364,23 +1363,3 @@ def __call__( CertFactory = Callable[[], str] - -# ------------------------------------------------------------------------- -# Accommodations for Windows path and URL changes in recent Python releases -# ------------------------------------------------------------------------- - -# versions containing fix/backport from https://github.com/python/cpython/pull/113563 -# which changed the behavior of `urllib.parse.urlun{parse,split}` -url = "////path/to/file" -has_new_urlun_behavior = url == urlunparse(urlparse(url)) - -# the above change seems to only impact tests on Windows, so just add skips for that -skip_needs_new_urlun_behavior_win = pytest.mark.skipif( - sys.platform != "win32" or not has_new_urlun_behavior, - reason="testing windows behavior for newer CPython", -) - -skip_needs_old_urlun_behavior_win = pytest.mark.skipif( - sys.platform != "win32" or has_new_urlun_behavior, - reason="testing windows behavior for older CPython", -) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index fd4ab55e05a..6b306c53432 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -38,12 +38,7 @@ ) from pip._internal.network.session import PipSession -from tests.lib import ( - TestData, - make_test_link_collector, - skip_needs_new_urlun_behavior_win, - skip_needs_old_urlun_behavior_win, -) +from tests.lib import TestData, make_test_link_collector ACCEPT = ", ".join( [ @@ -367,16 +362,7 @@ def test_clean_url_path(path: str, expected: str) -> None: pytest.param( "file:///T:/path/with spaces/", "file:///T:/path/with%20spaces/", - marks=[ - skip_needs_old_urlun_behavior_win, - ], - ), - pytest.param( - "file:///T:/path/with spaces/", - "file://///T:/path/with%20spaces/", - marks=[ - skip_needs_new_urlun_behavior_win, - ], + marks=pytest.mark.skipif("sys.platform != 'win32'"), ), # URL with Windows drive letter, running on non-windows # platform. The `:` after the drive should be quoted. @@ -389,18 +375,13 @@ def test_clean_url_path(path: str, expected: str) -> None: pytest.param( "git+file:///T:/with space/repo.git@1.0#egg=my-package-1.0", "git+file:///T:/with%20space/repo.git@1.0#egg=my-package-1.0", - marks=skip_needs_old_urlun_behavior_win, - ), - pytest.param( - "git+file:///T:/with space/repo.git@1.0#egg=my-package-1.0", - "git+file://///T:/with%20space/repo.git@1.0#egg=my-package-1.0", - marks=skip_needs_new_urlun_behavior_win, + marks=pytest.mark.skipif("sys.platform != 'win32'"), ), # Test a VCS URL with a Windows drive letter and revision, # running on non-windows platform. pytest.param( "git+file:///T:/with space/repo.git@1.0#egg=my-package-1.0", - "git+file:/T%3A/with%20space/repo.git@1.0#egg=my-package-1.0", + "git+file:///T%3A/with%20space/repo.git@1.0#egg=my-package-1.0", marks=pytest.mark.skipif("sys.platform == 'win32'"), ), ], From 2f3d8d990f69ceca9ffbb187d5a274d223756e14 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 21:13:05 +0100 Subject: [PATCH 04/17] More Windows gubbins --- src/pip/_internal/models/link.py | 2 +- tests/unit/test_req.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 4181d1e402a..dc18e752a1c 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -151,7 +151,7 @@ def _ensure_quoted_url(url: str) -> str: # Split the URL into parts according to the general structure # `scheme://netloc/path?query#fragment`. result = urllib.parse.urlsplit(url) - # If the netloc is empty, then the URL refers to a local filesystem path + # If the netloc is empty, then the URL refers to a local filesystem path. if not result.netloc: return clean_file_url(url) path = _clean_url_path(result.path) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 0547131134e..f0d30c00634 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -51,6 +51,7 @@ handle_requirement_line, ) from pip._internal.resolution.legacy.resolver import Resolver +from pip._internal.utils.urls import url_to_path from tests.lib import TestData, make_test_finder, requirements_file, wheel @@ -224,7 +225,7 @@ def test_unsupported_hashes(self, data: TestData) -> None: dir_path = data.packages.joinpath("FSPkg") reqset.add_unnamed_requirement( get_processed_req_from_line( - f"file://{dir_path}", + url_to_path(dir_path), lineno=2, ) ) From 60edfd85dd69a6960e8b123b9b00111e7434d8dc Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 21:17:09 +0100 Subject: [PATCH 05/17] Tweak --- tests/unit/test_req.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index f0d30c00634..69152dedfa9 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -51,7 +51,6 @@ handle_requirement_line, ) from pip._internal.resolution.legacy.resolver import Resolver -from pip._internal.utils.urls import url_to_path from tests.lib import TestData, make_test_finder, requirements_file, wheel @@ -225,7 +224,7 @@ def test_unsupported_hashes(self, data: TestData) -> None: dir_path = data.packages.joinpath("FSPkg") reqset.add_unnamed_requirement( get_processed_req_from_line( - url_to_path(dir_path), + f"file:{dir_path}", lineno=2, ) ) From 0757e0d2254fd75b1db533a12301d3d487bda81e Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 21:22:08 +0100 Subject: [PATCH 06/17] Tweak test regex --- tests/unit/test_req.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 69152dedfa9..e0074c5de4c 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -244,7 +244,7 @@ def test_unsupported_hashes(self, data: TestData) -> None: r"file \(line 1\)\)\n" r"Can't verify hashes for these file:// requirements because " r"they point to directories:\n" - rf" file://.*{sep}data{sep}packages{sep}FSPkg " + rf" file:.*{sep}data{sep}packages{sep}FSPkg " r"\(from -r file \(line 2\)\)" ), ): From f846efde00241a6a86e332e789be69a55ec4aee1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 21:34:19 +0100 Subject: [PATCH 07/17] More Windows test fixes, cleanups. --- src/pip/_internal/utils/urls.py | 7 ++++--- tests/functional/test_install.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index 43145ac79d8..567cdded679 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -11,7 +11,7 @@ def path_to_url(path: str, normalize_path: bool = True) -> str: quoted path parts. """ if normalize_path: - path = os.path.normpath(os.path.abspath(path)) + path = os.path.abspath(path) if WINDOWS: path = path.replace("\\", "/") encoding = sys.getfilesystemencoding() @@ -37,8 +37,9 @@ def url_to_path(url: str) -> str: Convert a file: URL to a path. """ scheme, netloc, path, _, _ = urllib.parse.urlsplit(url) - if scheme != "file" and not scheme.endswith("+file"): - raise ValueError(f"You can only turn file: urls into filenames (not {url!r})") + assert scheme == "file" or scheme.endswith( + "+file" + ), f"You can only turn file: urls into filenames (not {url!r})" if WINDOWS: if netloc and netloc != "localhost": diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index f23b156dcf7..8c4e1682a4a 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1880,7 +1880,7 @@ def test_install_editable_with_wrong_egg_name( result = script.pip( "install", "--editable", - f"file://{pkga_path}#egg=pkgb", + f"file:{pkga_path}#egg=pkgb", expect_error=(resolver_variant == "resolvelib"), ) assert ( From 359557e1144bc9e88cea5a1c593988ba02ec91c7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 22:00:24 +0100 Subject: [PATCH 08/17] Undo test changes --- src/pip/_internal/utils/urls.py | 7 +++++-- tests/functional/test_install.py | 2 +- tests/unit/test_req.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index 567cdded679..f99e6c26905 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -42,8 +42,11 @@ def url_to_path(url: str) -> str: ), f"You can only turn file: urls into filenames (not {url!r})" if WINDOWS: - if netloc and netloc != "localhost": - path = "//" + netloc + path + if netloc: + if netloc[1:] == ":": + path = netloc + path + elif netloc != "localhost": + path = "//" + netloc + path elif path[:3] == "///": path = path[1:] else: diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 8c4e1682a4a..f23b156dcf7 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1880,7 +1880,7 @@ def test_install_editable_with_wrong_egg_name( result = script.pip( "install", "--editable", - f"file:{pkga_path}#egg=pkgb", + f"file://{pkga_path}#egg=pkgb", expect_error=(resolver_variant == "resolvelib"), ) assert ( diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index e0074c5de4c..0547131134e 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -224,7 +224,7 @@ def test_unsupported_hashes(self, data: TestData) -> None: dir_path = data.packages.joinpath("FSPkg") reqset.add_unnamed_requirement( get_processed_req_from_line( - f"file:{dir_path}", + f"file://{dir_path}", lineno=2, ) ) @@ -244,7 +244,7 @@ def test_unsupported_hashes(self, data: TestData) -> None: r"file \(line 1\)\)\n" r"Can't verify hashes for these file:// requirements because " r"they point to directories:\n" - rf" file:.*{sep}data{sep}packages{sep}FSPkg " + rf" file://.*{sep}data{sep}packages{sep}FSPkg " r"\(from -r file \(line 2\)\)" ), ): From d07a876732c2c3283dd2696e688ac673a777154c Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 22:19:26 +0100 Subject: [PATCH 09/17] Support backslashes in URLs --- src/pip/_internal/utils/urls.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index f99e6c26905..d668650cf47 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -42,11 +42,10 @@ def url_to_path(url: str) -> str: ), f"You can only turn file: urls into filenames (not {url!r})" if WINDOWS: - if netloc: - if netloc[1:] == ":": - path = netloc + path - elif netloc != "localhost": - path = "//" + netloc + path + if netloc[1:2] == ":": + path = netloc + path + elif netloc and netloc != "localhost": + path = "//" + netloc + path elif path[:3] == "///": path = path[1:] else: @@ -71,13 +70,13 @@ def clean_file_url(url: str) -> str: e.g. 'file:/c:/foo bar' --> 'file:///c:/foo%20bar'. """ - tok = "-_-PIP_AT_SYMBOL_-_" - assert tok not in url - orig_url = url.replace("@", tok) + at_symbol_token = "-_-PIP_AT_SYMBOL_-_" + assert at_symbol_token not in url + orig_url = url.replace("@", at_symbol_token) tidy_url = path_to_url(url_to_path(orig_url), normalize_path=False) - tidy_parts = urllib.parse.urlsplit(tidy_url) orig_parts = urllib.parse.urlsplit(orig_url) - merged_url = urllib.parse.urlunsplit(tidy_parts[:3] + orig_parts[3:]) - if orig_parts.scheme != "file": - merged_url = orig_parts.scheme + merged_url[4:] - return merged_url.replace(tok, "@") + tidy_parts = urllib.parse.urlsplit(tidy_url) + url = urllib.parse.urlunsplit(tidy_parts[:3] + orig_parts[3:]) + url = url.replace(tidy_parts.scheme, orig_parts.scheme, 1) + url = url.replace(at_symbol_token, "@") + return url From 071a7e3bd4dfdaac78302f6dc1700e5c55fb3cbf Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 22:37:10 +0100 Subject: [PATCH 10/17] Add news blurb --- news/13501.bugfix.rst | 1 + news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst | 0 2 files changed, 1 insertion(+) create mode 100644 news/13501.bugfix.rst delete mode 100644 news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst diff --git a/news/13501.bugfix.rst b/news/13501.bugfix.rst new file mode 100644 index 00000000000..cb06bbb90d1 --- /dev/null +++ b/news/13501.bugfix.rst @@ -0,0 +1 @@ +Make conversion of file URLs more consistent across Python versions. diff --git a/news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst b/news/47850a3b-d572-41f8-b080-58cebf24b89a.trivial.rst deleted file mode 100644 index e69de29bb2d..00000000000 From 5c23ec0a29f170ae024b5c7b0a9067eb20f93357 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 23:25:15 +0100 Subject: [PATCH 11/17] Add comments, improve naming. --- src/pip/_internal/utils/urls.py | 43 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index d668650cf47..bfa3f0b88a2 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -14,8 +14,6 @@ def path_to_url(path: str, normalize_path: bool = True) -> str: path = os.path.abspath(path) if WINDOWS: path = path.replace("\\", "/") - encoding = sys.getfilesystemencoding() - errors = sys.getfilesystemencodeerrors() drive, tail = os.path.splitdrive(path) if drive: @@ -25,9 +23,12 @@ def path_to_url(path: str, normalize_path: bool = True) -> str: drive = "//" + drive[4:] if drive[1:] == ":": drive = "///" + drive - drive = urllib.parse.quote(drive, "/:", encoding, errors) elif tail.startswith("/"): tail = "//" + tail + + encoding = sys.getfilesystemencoding() + errors = sys.getfilesystemencodeerrors() + drive = urllib.parse.quote(drive, "/:", encoding, errors) tail = urllib.parse.quote(tail, "/", encoding, errors) return "file:" + drive + tail @@ -42,17 +43,22 @@ def url_to_path(url: str) -> str: ), f"You can only turn file: urls into filenames (not {url!r})" if WINDOWS: + # e.g. file://c:/foo if netloc[1:2] == ":": path = netloc + path + + # e.g. file://server/share/foo elif netloc and netloc != "localhost": path = "//" + netloc + path + + # e.g. file://///server/share/foo elif path[:3] == "///": path = path[1:] - else: - if path[:1] == "/" and path[2:3] in (":", "|"): - path = path[1:] - if path[1:2] == "|": - path = path[:1] + ":" + path[2:] + + # e.g. file:///c:/foo + elif path[:1] == "/" and path[2:3] == ":": + path = path[1:] + path = path.replace("/", "\\") elif netloc and netloc != "localhost": raise ValueError( @@ -70,13 +76,20 @@ def clean_file_url(url: str) -> str: e.g. 'file:/c:/foo bar' --> 'file:///c:/foo%20bar'. """ + # Replace "@" characters to protect them from percent-encoding. at_symbol_token = "-_-PIP_AT_SYMBOL_-_" assert at_symbol_token not in url - orig_url = url.replace("@", at_symbol_token) - tidy_url = path_to_url(url_to_path(orig_url), normalize_path=False) - orig_parts = urllib.parse.urlsplit(orig_url) + url = url.replace("@", at_symbol_token) + parts = urllib.parse.urlsplit(url) + + # Convert to a file path and back. This normalizes the URL, but removes + # the original scheme, query and fragment components. + tidy_url = path_to_url(url_to_path(url), normalize_path=False) tidy_parts = urllib.parse.urlsplit(tidy_url) - url = urllib.parse.urlunsplit(tidy_parts[:3] + orig_parts[3:]) - url = url.replace(tidy_parts.scheme, orig_parts.scheme, 1) - url = url.replace(at_symbol_token, "@") - return url + + # Restore the original scheme, query and fragment components. + url = urllib.parse.urlunsplit(tidy_parts[:3] + parts[3:]) + url = url.replace(tidy_parts.scheme, parts.scheme, 1) + + # Restore "@" characters that were replaced earlier. + return url.replace(at_symbol_token, "@") From 8c817be90afb972cb77bb51d789ae82f3b6a7cee Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 23 Jul 2025 23:39:13 +0100 Subject: [PATCH 12/17] =?UTF-8?q?Enable=20Python=203.14=20CI=20?= =?UTF-8?q?=F0=9F=A4=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 2 ++ noxfile.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 815617e1c19..f3d69fa5763 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -122,6 +122,7 @@ jobs: - "3.11" - "3.12" - "3.13" + - "3.14" steps: - uses: actions/checkout@v4 @@ -181,6 +182,7 @@ jobs: # - "3.11" # - "3.12" - "3.13" + - "3.14" group: - { number: 1, pytest-filter: "not test_install" } - { number: 2, pytest-filter: "test_install" } diff --git a/noxfile.py b/noxfile.py index 160e823e9f1..88e9d5dec66 100644 --- a/noxfile.py +++ b/noxfile.py @@ -67,7 +67,7 @@ def should_update_common_wheels() -> bool: # ----------------------------------------------------------------------------- # Development Commands # ----------------------------------------------------------------------------- -@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3"]) +@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "pypy3"]) def test(session: nox.Session) -> None: # Get the common wheels. if should_update_common_wheels(): From b2997be9a809129fd229eecfb9bc61b0eb7a9dfd Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 24 Jul 2025 18:06:00 +0100 Subject: [PATCH 13/17] Undo drive letter case change --- tests/unit/test_urls.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_urls.py b/tests/unit/test_urls.py index e63d84c2625..b88356c0caa 100644 --- a/tests/unit/test_urls.py +++ b/tests/unit/test_urls.py @@ -18,8 +18,8 @@ def test_path_to_url_unix() -> None: @pytest.mark.parametrize( "path, url", [ - pytest.param("c:/tmp/file", "file:///c:/tmp/file", id="posix-path"), - pytest.param("c:\\tmp\\file", "file:///c:/tmp/file", id="nt-path"), + pytest.param("c:/tmp/file", "file:///C:/tmp/file", id="posix-path"), + pytest.param("c:\\tmp\\file", "file:///C:/tmp/file", id="nt-path"), ], ) def test_path_to_url_win(path: str, url: str) -> None: @@ -46,13 +46,13 @@ def test_relative_path_to_url_win() -> None: "url,win_expected,non_win_expected", [ ("file:tmp", "tmp", "tmp"), - ("file:c:/path/to/file", r"c:\path\to\file", "c:/path/to/file"), + ("file:c:/path/to/file", r"C:\path\to\file", "c:/path/to/file"), ("file:/path/to/file", r"\path\to\file", "/path/to/file"), ("file://localhost/tmp/file", r"\tmp\file", "/tmp/file"), - ("file://localhost/c:/tmp/file", r"c:\tmp\file", "/c:/tmp/file"), + ("file://localhost/c:/tmp/file", r"C:\tmp\file", "/c:/tmp/file"), ("file://somehost/tmp/file", r"\\somehost\tmp\file", None), ("file:///tmp/file", r"\tmp\file", "/tmp/file"), - ("file:///c:/tmp/file", r"c:\tmp\file", "/c:/tmp/file"), + ("file:///c:/tmp/file", r"C:\tmp\file", "/c:/tmp/file"), ], ) def test_url_to_path(url: str, win_expected: str, non_win_expected: str) -> None: From 7db59a0840be92cfea4c05849f57f2a5a455dc6f Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 24 Jul 2025 19:51:18 +0100 Subject: [PATCH 14/17] Very minor tweaks --- src/pip/_internal/utils/urls.py | 14 +++++++------- tests/unit/test_collector.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index bfa3f0b88a2..989311c016c 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -7,8 +7,8 @@ def path_to_url(path: str, normalize_path: bool = True) -> str: """ - Convert a path to a file: URL. The path will be made absolute and have - quoted path parts. + Convert a path to a file: URL with quoted path parts. The path will be + normalized and made absolute if *normalize_path* is true (the default.) """ if normalize_path: path = os.path.abspath(path) @@ -37,7 +37,7 @@ def url_to_path(url: str) -> str: """ Convert a file: URL to a path. """ - scheme, netloc, path, _, _ = urllib.parse.urlsplit(url) + scheme, netloc, path = urllib.parse.urlsplit(url)[:3] assert scheme == "file" or scheme.endswith( "+file" ), f"You can only turn file: urls into filenames (not {url!r})" @@ -74,16 +74,16 @@ def clean_file_url(url: str) -> str: """ Fix up quoting and leading slashes in the given file: URL. - e.g. 'file:/c:/foo bar' --> 'file:///c:/foo%20bar'. + e.g. 'file:/c:/foo bar@1.0' --> 'file:///c:/foo%20bar@1.0'. """ # Replace "@" characters to protect them from percent-encoding. - at_symbol_token = "-_-PIP_AT_SYMBOL_-_" + at_symbol_token = "---PIP_AT_SYMBOL---" assert at_symbol_token not in url url = url.replace("@", at_symbol_token) parts = urllib.parse.urlsplit(url) - # Convert to a file path and back. This normalizes the URL, but removes - # the original scheme, query and fragment components. + # Convert URL to a file path and back. This normalizes the netloc and + # path, but resets the other URL components. tidy_url = path_to_url(url_to_path(url), normalize_path=False) tidy_parts = urllib.parse.urlsplit(tidy_url) diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 6b306c53432..5602d7616fd 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -358,7 +358,7 @@ def test_clean_url_path(path: str, expected: str) -> None: ), # URL with Windows drive letter. The `:` after the drive # letter should not be quoted. The trailing `/` should be - # removed. + # retained. pytest.param( "file:///T:/path/with spaces/", "file:///T:/path/with%20spaces/", From 00b7fa04c10b39799197e771cd90a826a1d31864 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 24 Jul 2025 20:18:32 +0100 Subject: [PATCH 15/17] Undo change to `pip._internal.vcs.git` and un-move function. --- src/pip/_internal/models/link.py | 30 ++++++++++++++++++++++++++++-- src/pip/_internal/utils/urls.py | 25 ------------------------- src/pip/_internal/vcs/git.py | 19 ++++++++++++++++--- tests/unit/test_collector.py | 24 ++++++++++++++++++++++++ tests/unit/test_urls.py | 25 +------------------------ 5 files changed, 69 insertions(+), 54 deletions(-) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index dc18e752a1c..6ef55dc1b27 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -24,7 +24,7 @@ split_auth_from_netloc, splitext, ) -from pip._internal.utils.urls import clean_file_url, path_to_url, url_to_path +from pip._internal.utils.urls import path_to_url, url_to_path if TYPE_CHECKING: from pip._internal.index.collector import IndexContent @@ -121,6 +121,32 @@ def _clean_url_path_part(part: str) -> str: return urllib.parse.quote(urllib.parse.unquote(part)) +def _clean_file_url(url: str) -> str: + """ + Clean a URL that corresponds to a local + filesystem path (i.e. the first part after splitting on "@" characters). + """ + # Replace "@" characters to protect them from percent-encoding. + at_symbol_token = "---PIP_AT_SYMBOL---" + assert at_symbol_token not in url + url = url.replace("@", at_symbol_token) + parts = urllib.parse.urlsplit(url) + + # We unquote prior to quoting to make sure nothing is double quoted. + # Also, on Windows the path part might contain a drive letter which + # should not be quoted. On Linux where drive letters do not + # exist, the colon should be quoted. + tidy_url = path_to_url(url_to_path(url), normalize_path=False) + tidy_parts = urllib.parse.urlsplit(tidy_url) + + # Restore the original scheme, query and fragment components. + url = urllib.parse.urlunsplit(tidy_parts[:3] + parts[3:]) + url = url.replace(tidy_parts.scheme, parts.scheme, 1) + + # Restore "@" characters that were replaced earlier. + return url.replace(at_symbol_token, "@") + + # percent-encoded: / _reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) @@ -153,7 +179,7 @@ def _ensure_quoted_url(url: str) -> str: result = urllib.parse.urlsplit(url) # If the netloc is empty, then the URL refers to a local filesystem path. if not result.netloc: - return clean_file_url(url) + return _clean_file_url(url) path = _clean_url_path(result.path) return urllib.parse.urlunsplit(result._replace(path=path)) diff --git a/src/pip/_internal/utils/urls.py b/src/pip/_internal/utils/urls.py index 989311c016c..e8f3cb25b5b 100644 --- a/src/pip/_internal/utils/urls.py +++ b/src/pip/_internal/utils/urls.py @@ -68,28 +68,3 @@ def url_to_path(url: str) -> str: encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return urllib.parse.unquote(path, encoding, errors) - - -def clean_file_url(url: str) -> str: - """ - Fix up quoting and leading slashes in the given file: URL. - - e.g. 'file:/c:/foo bar@1.0' --> 'file:///c:/foo%20bar@1.0'. - """ - # Replace "@" characters to protect them from percent-encoding. - at_symbol_token = "---PIP_AT_SYMBOL---" - assert at_symbol_token not in url - url = url.replace("@", at_symbol_token) - parts = urllib.parse.urlsplit(url) - - # Convert URL to a file path and back. This normalizes the netloc and - # path, but resets the other URL components. - tidy_url = path_to_url(url_to_path(url), normalize_path=False) - tidy_parts = urllib.parse.urlsplit(tidy_url) - - # Restore the original scheme, query and fragment components. - url = urllib.parse.urlunsplit(tidy_parts[:3] + parts[3:]) - url = url.replace(tidy_parts.scheme, parts.scheme, 1) - - # Restore "@" characters that were replaced earlier. - return url.replace(at_symbol_token, "@") diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index dba49389d77..1769da791cb 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -4,13 +4,14 @@ import os.path import pathlib import re +import urllib.parse +import urllib.request from dataclasses import replace from typing import Any from pip._internal.exceptions import BadCommand, InstallationError from pip._internal.utils.misc import HiddenText, display_path, hide_url from pip._internal.utils.subprocess import make_command -from pip._internal.utils.urls import clean_file_url from pip._internal.vcs.versioncontrol import ( AuthInfo, RemoteNotFoundError, @@ -21,6 +22,10 @@ vcs, ) +urlsplit = urllib.parse.urlsplit +urlunsplit = urllib.parse.urlunsplit + + logger = logging.getLogger(__name__) @@ -497,8 +502,16 @@ def get_url_rev_and_auth(cls, url: str) -> tuple[str, str | None, AuthInfo]: """ # Works around an apparent Git bug # (see https://article.gmane.org/gmane.comp.version-control.git/146500) - if url.startswith("git+file:"): - url = clean_file_url(url) + scheme, netloc, path, query, fragment = urlsplit(url) + if scheme.endswith("file"): + initial_slashes = path[: -len(path.lstrip("/"))] + newpath = initial_slashes + urllib.request.url2pathname(path).replace( + "\\", "/" + ).lstrip("/") + after_plus = scheme.find("+") + 1 + url = scheme[:after_plus] + urlunsplit( + (scheme[after_plus:], netloc, newpath, query, fragment), + ) if "://" not in url: assert "file:" not in url diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 5602d7616fd..318e6ef8a81 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -33,6 +33,7 @@ Link, LinkHash, MetadataFile, + _clean_file_url, _clean_url_path, _ensure_quoted_url, ) @@ -295,6 +296,29 @@ def test_clean_url_path(path: str, expected: str) -> None: assert _clean_url_path(path) == expected +@pytest.mark.parametrize( + "url, expected", + [ + # Test a VCS path with a Windows drive letter and revision. + pytest.param( + "file:/T:/with space/repo.git@1.0", + "file:///T:/with%20space/repo.git@1.0", + marks=pytest.mark.skipif("sys.platform != 'win32'"), + ), + # Test a VCS path with a Windows drive letter and revision, + # running on non-windows platform. + pytest.param( + "file:/T:/with space/repo.git@1.0", + "file:///T%3A/with%20space/repo.git@1.0", + marks=pytest.mark.skipif("sys.platform == 'win32'"), + ), + ], +) +def test_clean_file_url(url: str, expected: str) -> None: + actual = _clean_file_url(url) + assert actual == expected + + @pytest.mark.parametrize( "url, clean_url", [ diff --git a/tests/unit/test_urls.py b/tests/unit/test_urls.py index b88356c0caa..0c145255080 100644 --- a/tests/unit/test_urls.py +++ b/tests/unit/test_urls.py @@ -4,7 +4,7 @@ import pytest -from pip._internal.utils.urls import clean_file_url, path_to_url, url_to_path +from pip._internal.utils.urls import path_to_url, url_to_path @pytest.mark.skipif("sys.platform == 'win32'") @@ -75,26 +75,3 @@ def test_url_to_path_path_to_url_symmetry_win() -> None: unc_path = r"\\unc\share\path" assert url_to_path(path_to_url(unc_path)) == unc_path - - -@pytest.mark.parametrize( - "url, expected", - [ - # Test a VCS path with a Windows drive letter and revision. - pytest.param( - "file:/T:/with space/repo.git@1.0", - "file:///T:/with%20space/repo.git@1.0", - marks=pytest.mark.skipif("sys.platform != 'win32'"), - ), - # Test a VCS path with a Windows drive letter and revision, - # running on non-windows platform. - pytest.param( - "file:/T:/with space/repo.git@1.0", - "file:///T%3A/with%20space/repo.git@1.0", - marks=pytest.mark.skipif("sys.platform == 'win32'"), - ), - ], -) -def test_clean_file_url(url: str, expected: str) -> None: - actual = clean_file_url(url) - assert actual == expected From f30c187f3f5ce28279d01cbd3b360b3b6e26ef21 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 27 Jul 2025 21:07:21 +0100 Subject: [PATCH 16/17] Fix lint --- src/pip/_internal/models/link.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 4ce42173453..f1bacd7bca1 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -6,7 +6,6 @@ import os import posixpath import re -import sys import urllib.parse from collections.abc import Mapping from dataclasses import dataclass From 1ad7fbebe449e2c81cceffe07eab38328eb591cb Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 27 Jul 2025 21:07:30 +0100 Subject: [PATCH 17/17] =?UTF-8?q?Revert=20"Enable=20Python=203.14=20CI=20?= =?UTF-8?q?=F0=9F=A4=9E"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 8c817be90afb972cb77bb51d789ae82f3b6a7cee. --- .github/workflows/ci.yml | 2 -- noxfile.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f3d69fa5763..815617e1c19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -122,7 +122,6 @@ jobs: - "3.11" - "3.12" - "3.13" - - "3.14" steps: - uses: actions/checkout@v4 @@ -182,7 +181,6 @@ jobs: # - "3.11" # - "3.12" - "3.13" - - "3.14" group: - { number: 1, pytest-filter: "not test_install" } - { number: 2, pytest-filter: "test_install" } diff --git a/noxfile.py b/noxfile.py index 88e9d5dec66..160e823e9f1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -67,7 +67,7 @@ def should_update_common_wheels() -> bool: # ----------------------------------------------------------------------------- # Development Commands # ----------------------------------------------------------------------------- -@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "pypy3"]) +@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3"]) def test(session: nox.Session) -> None: # Get the common wheels. if should_update_common_wheels():