diff --git a/doc/manual/rl-next/git-url-scp.md b/doc/manual/rl-next/git-url-scp.md new file mode 100644 index 00000000000..b1125f7b32a --- /dev/null +++ b/doc/manual/rl-next/git-url-scp.md @@ -0,0 +1,30 @@ +--- +synopsis: Support SCP-like URLs in fetchGit and type = "git" flake inputs +prs: [14863] +issues: [14852, 14867] +--- + +Nix now (once again) recognizes [SCP-like syntax for Git URLs](https://git-scm.com/docs/git-clone#_git_urls). This partially +restores compatibility with Nix 2.3 for `fetchGit`. The following syntax is once again supported: + +```nix +builtins.fetchGit "host:/absolute/path/to/repo" +``` + +Nix also passes through the tilde (for home directories) verbatim: + +```nix +builtins.fetchGit "host:~/relative/to/home" +``` + +IPv6 addresses also supported when bracketed: + +```nix +builtins.fetchGit "user@[::1]:~/relative/to/home" +``` + +`builtins.fetchTree` also supports this syntax now: + +```nix +builtins.fetchTree { type = "git"; url = "host:/path/to/repo"; } +``` diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index 1614fcc595d..5637b04626b 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -600,7 +600,12 @@ static RegisterPrimOp primop_fetchGit({ - `url` - The URL of the repo. + The [Git URL] of the repo. SCP-like syntax is supported, but relative + paths are rewritten to absolute ones. For example: + + `git@github.com:repo/path` becomes `ssh://git@github.com/repo/path` + + [Git URL]: https://git-scm.com/docs/git-clone#_git_urls - `name` (default: `source`) diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 9d1a56a2158..a0fa5289167 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -63,6 +63,49 @@ INSTANTIATE_TEST_SUITE_P( .path = {"", "owner", "repo.git"}, }, }, + // SCP-like URL, no user (rewritten to ssh://) + FixGitURLParam{ + .input = "github.com:owner/repo.git", + .expected = "ssh://github.com/owner/repo.git", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .host = "github.com", + }, + .path = {"", "owner", "repo.git"}, + }, + }, + // SCP-like URL, no user, absolute path (rewritten to ssh://) + FixGitURLParam{ + .input = "github.com:/owner/repo.git", + .expected = "ssh://github.com/owner/repo.git", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .host = "github.com", + }, + .path = {"", "owner", "repo.git"}, + }, + }, + // SCP-like URL (rewritten to ssh://) + FixGitURLParam{ + .input = "user@server.com:/path/to/repo", + .expected = "ssh://user@server.com/path/to/repo", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .host = "server.com", + .user = "user", + }, + .path = {"", "path", "to", "repo"}, + }, + }, // Absolute path (becomes file:) FixGitURLParam{ .input = "/home/me/repo", @@ -75,8 +118,6 @@ INSTANTIATE_TEST_SUITE_P( }, }, // Already file: scheme - // NOTE: Git/SCP treat this as a `:`, so we are - // failing to "fix up" this case. FixGitURLParam{ .input = "file:/var/repos/x", .expected = "file:/var/repos/x", @@ -87,10 +128,43 @@ INSTANTIATE_TEST_SUITE_P( .path = {"", "var", "repos", "x"}, }, }, + // git+file scheme + FixGitURLParam{ + .input = "git+file:///var/repos/x", + .expected = "file:///var/repos/x", + .parsed = + ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"", "var", "repos", "x"}, + }, + }, + // absolute path with a space + FixGitURLParam{ + .input = "/repos/git repo", + .expected = "file:///repos/git%20repo", + .parsed = + ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"", "repos", "git repo"}, + }, + }, + // quoted path + FixGitURLParam{ + .input = "/repos/\"git repo\"", + .expected = "file:///repos/%22git%20repo%22", + .parsed = + ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"", "repos", "\"git repo\""}, + }, + }, // IPV6 test case FixGitURLParam{ .input = "user@[2001:db8:1::2]:/home/file", - .expected = "ssh://user@[2001:db8:1::2]//home/file", + .expected = "ssh://user@[2001:db8:1::2]/home/file", .parsed = ParsedURL{ .scheme = "ssh", @@ -100,7 +174,72 @@ INSTANTIATE_TEST_SUITE_P( .host = "2001:db8:1::2", .user = "user", }, - .path = {"", "", "home", "file"}, + .path = {"", "home", "file"}, + }, + }, + // https://github.com/NixOS/nix/issues/14867 + // Verify input doesn't trigger an assert. + // Intent is git@github, but gets parsed as git scheme with a relative path + FixGitURLParam{ + .input = "git:github.com:nixos/nixpkgs", + .expected = "git:github.com:nixos/nixpkgs", + .parsed = + ParsedURL{ + .scheme = "git", + .authority = std::nullopt, + .path = {"github.com:nixos", "nixpkgs"}, + }, + }, + // https://github.com/NixOS/nix/issues/14867#issuecomment-3699499232 + // Verify input doesn't trigger an assert. + // The authority should have a "//" prefix, but instead gets parsed as a path component + FixGitURLParam{ + .input = "git+https:/codeberg.org/forgejo/forgejo", + .expected = "https:/codeberg.org/forgejo/forgejo", + .parsed = + ParsedURL{ + .scheme = "https", + .authority = std::nullopt, + .path = {"", "codeberg.org", "forgejo", "forgejo"}, + }, + }, + FixGitURLParam{ + .input = "user%20@[::1]:repo/path", + .expected = "ssh://user%2520@[::1]/repo/path", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .hostType = ParsedURL::Authority::HostType::IPv6, .host = "::1", .user = "user%20"}, + .path = {"", "repo", "path"}, + }, + }, + // IPv6 SCP-like. Looks like a port but is actually a path. + FixGitURLParam{ + .input = "[2a02:8071:8192:c100:311d:192d:81ac:11ea]:12345", + .expected = "ssh://[2a02:8071:8192:c100:311d:192d:81ac:11ea]/12345", + .parsed = + ParsedURL{ + .scheme = "ssh", + .authority = + ParsedURL::Authority{ + .hostType = ParsedURL::Authority::HostType::IPv6, + .host = "2a02:8071:8192:c100:311d:192d:81ac:11ea", + .user = std::nullopt, + }, + .path = {"", "12345"}, + }, + }, + // Treats percent as a literal and not pct-encoding. + FixGitURLParam{ + .input = "/a/b/%20", + .expected = "file:///a/b/%2520", + .parsed = + ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = {"", "a", "b", "%20"}, }, })); @@ -112,16 +251,16 @@ TEST_P(FixGitURLTestSuite, parsesVariedGitUrls) EXPECT_EQ(actual.to_string(), p.expected); } -TEST(FixGitURLTestSuite, rejectScpLikeNoUser) +// This is an idempotence-like condition: every SCP URL has a corresponding bona fide URL that will parse correctly. +TEST_P(FixGitURLTestSuite, parsedNormalized) { - // SCP-like URL without user. Proper support can be implemented, but this is - // a deceptively deep feature - study existing implementations carefully. - EXPECT_THAT( - []() { fixGitURL("github.com:owner/repo.git"); }, - ::testing::ThrowsMessage(testing::HasSubstrIgnoreANSIMatcher("SCP-like URL"))); + auto & p = GetParam(); + const auto actual = fixGitURL(p.expected); + EXPECT_EQ(actual, p.parsed); + EXPECT_EQ(actual.to_string(), p.expected); } -TEST(FixGitURLTestSuite, properlyRejectFileURLWithAuthority) +TEST(FixGitURLTestSuite, rejectFileURLWithAuthority) { /* From the underlying `parseURL` validations. */ EXPECT_THAT( @@ -130,24 +269,35 @@ TEST(FixGitURLTestSuite, properlyRejectFileURLWithAuthority) testing::HasSubstrIgnoreANSIMatcher("file:// URL 'file://var/repos/x' has unexpected authority 'var'"))); } -TEST(FixGitURLTestSuite, rejectScpLikeNoUserLeadingSlash) +TEST(FixGitURLTestSuite, rejectRelativePath) { + /* From the underlying `parseURL` validations. */ EXPECT_THAT( - []() { fixGitURL("github.com:/owner/repo.git"); }, - ::testing::ThrowsMessage(testing::HasSubstrIgnoreANSIMatcher("SCP-like URL"))); + []() { fixGitURL("relative/repo"); }, + ::testing::ThrowsMessage(testing::HasSubstrIgnoreANSIMatcher("is not an absolute path"))); } -TEST(FixGitURLTestSuite, relativePath) +TEST(FixGitURLTestSuite, rejectEmptyPathGitScp) { - // Relative path - parsed as file path without authority - auto parsed = fixGitURL("relative/repo"); - EXPECT_EQ( - parsed, - (ParsedURL{ - .scheme = "file", - .path = {"relative", "repo"}, - })); - EXPECT_EQ(parsed.to_string(), "file:relative/repo"); + /* Reject SCP-style URLs with no path component. */ + EXPECT_THAT( + []() { fixGitURL("host:"); }, + ::testing::ThrowsMessage( + testing::HasSubstrIgnoreANSIMatcher("SCP-style Git URL 'host:' has an empty path"))); +} + +TEST(FixGitURLTestSuite, rejectMalformedBracketedURLs) +{ + /* Reject URLs with brackets that don't form valid SCP-style IPv6 syntax. */ + EXPECT_THAT( + []() { fixGitURL("user[2001:db8:1::2]:/home/@file"); }, + ::testing::ThrowsMessage(testing::HasSubstrIgnoreANSIMatcher("is not a valid URL"))); + EXPECT_THAT( + []() { fixGitURL("user:[2001:db8:1::2]:/home/@file"); }, + ::testing::ThrowsMessage(testing::HasSubstrIgnoreANSIMatcher("is not a valid URL"))); + EXPECT_THAT( + []() { fixGitURL("user:@[2001:db8:1::2]:/home/file"); }, + ::testing::ThrowsMessage(testing::HasSubstrIgnoreANSIMatcher("is not a valid URL"))); } struct ParseURLSuccessCase diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 55c475df651..48c37e1f675 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -335,6 +335,25 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme); * them by removing the `:` and assuming a scheme of `ssh://`. Also * drops `git+` from the scheme (e.g. `git+https://` to `https://`) * and changes absolute paths into `file://` URLs. + * + * @see https://git-scm.com/docs/git-clone#_git_urls + * + * ssh://[@][:]/ + * git://[:]/ + * http[s]://[:]/ + * ftp[s]://[:]/ + * + * An alternative scp-like syntax may also be used with the ssh protocol: + * [@]:/ + * This syntax is only recognized if there are no slashes before the first colon. + * + * For local repositories, also supported by Git natively, the following syntaxes may be used: + * /path/to/repo.git/ + * file:///path/to/repo.git/ + * + * @note file:/path/to/repo is recognised by libfetchers, but not git so this functions accepts + * it too. Technically this conflicts with the SCP-like syntax where file is the hostname, but + * it's special-cased. */ ParsedURL fixGitURL(std::string url); diff --git a/src/libutil/url.cc b/src/libutil/url.cc index d6bda4e9f5b..5fe526e443d 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -8,6 +8,8 @@ #include +#include + namespace nix { std::regex refRegex(refRegexS, std::regex::ECMAScript); @@ -412,34 +414,185 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme) }; } -ParsedURL fixGitURL(std::string url) +/** + * If is SCP style, return the parsed URL. + * + * This syntax is only recognized if there are no slashes before the + * first colon, and no double slash immediately after the colon (://). + * + * When Git encounters a URL of the form ://
, where + * is a protocol that it cannot handle natively, it + * automatically invokes git remote- with the full URL as the + * second argument. https://git-scm.com/docs/gitremote-helpers. If the + * url doesn't look like it would be accepted by the remote helper, + * treat it as a SCP-style one. Don't do any pct-decoding in that case. + * Schemes supported by git are excluded. + */ +static std::optional tryParseScpStyle(std::string_view url) { - std::regex scpRegex("([^/]*)@(.*):(.*)"); - if (!hasPrefix(url, "/") && std::regex_match(url, scpRegex)) - url = std::regex_replace(url, scpRegex, "ssh://$1@$2/$3"); - if (!hasPrefix(url, "file:") && !hasPrefix(url, "git+file:") && url.find("://") == std::string::npos) { - auto path = splitString>(url, "/"); - // Reject SCP-like URLs without user (e.g., "github.com:path") - colon in first component - if (!path.empty() && path[0].find(':') != std::string::npos) - throw BadURL("SCP-like URL '%s' is not supported; use SSH URL syntax instead (ssh://...)", url); - // Absolute paths get an empty authority (file:///path), relative paths get none (file:path) - if (hasPrefix(url, "/")) - return ParsedURL{ - .scheme = "file", - .authority = ParsedURL::Authority{}, - .path = path, + /* The funny functional structure indicates how this happens in two + parts. + + The first part decides whether we have a SCP-style pseudo-URL or + not --- any throwing errors rather than returning std::nullopt is + just a chance to improve error messages and should not change the + meaning (i.e. regular `parseURL` would have thrown in that case). + + The second part, if we have committed to parsing as SCP-style, + finishes the job without any bailing out --- either we throw and + error, or we finish parsing. */ + + auto opt = [&]() -> std::optional> { + std::string_view schemeOrHost; + /* If SCP pseudo-URL contains `[`, then it must have a bracketed + IPv6 for the host. See + https://github.com/git/git/blob/68cb7f9e92a5d8e9824f5b52ac3d0a9d8f653dbe/connect.c#L747-L769 + */ + if (url.find_first_of('[') != url.npos) { + /* Match optional user@, then bracketed IPv6, then colon. + SCP-style IPv6 URLs must have ':' after ']'. If not present, + this might be a proper URL (e.g. ssh://user@[::1]/path) which + has '/' after the bracket. Let parseURL handle it. */ + static std::regex scpIPv6Regex("([^:@]+@)?(\\[[^\\[\\]]+\\]):(.+)"); + std::match_results match; + if (!std::regex_match(url.begin(), url.end(), match, scpIPv6Regex)) + return std::nullopt; + /* schemeOrHost is user@ (if present) plus the bracketed address */ + schemeOrHost = url.substr(0, match.position(3) - 1); /* Everything before the final ':' */ + } else { + /* Otherwise return everything until the first `:`, which must + exist. */ + auto firstColon = url.find_first_of(':'); + if (firstColon == url.npos) + throw BadURL( + "Git URL '%s' doesn't have a scheme, is not an absolute path and doesn't look like an SCP-like URL", + url); + schemeOrHost = url.substr(0, firstColon); + + /* Purely to improve diagnostics for cases like + `git+https:/host/owner/repo` when users forget to specify an + authority (`://)`. Otherwise we'd recognize it as an SCP-like URL + (as we rightfully should). + + HACK: Also include `file` / `git+file` in this set. SCP + syntax overlaps with `file:/path/to/repo`. Git itself doesn't + recognize it (or rather treats `file` as the host name), but Nix + accepts `file:/path/to/repo` as well as `file:///path/to/repo`. + */ + static const auto schemesSupportedByGit = []() { + std::unordered_set> res; + for (auto scheme : {"ssh", "http", "https", "file", "ftp", "ftps", "git"}) { + res.insert(scheme); + res.insert(std::string("git+") + scheme); + } + return res; + }(); + if (schemesSupportedByGit.contains(schemeOrHost)) + return std::nullopt; + } + + std::string_view possiblyPathView = url; + possiblyPathView.remove_prefix(schemeOrHost.size()); + assert(possiblyPathView.starts_with(':')); + possiblyPathView.remove_prefix(1); /* Trim the colon. */ + + if (schemeOrHost.contains('/') || possiblyPathView.starts_with("//")) + return std::nullopt; + + return std::pair{schemeOrHost, possiblyPathView}; + }(); + + return opt.transform([&](std::pair pair) -> ParsedURL { + auto [host, pathView] = pair; + ParsedURL::Authority authority; + + /* Handle userinfo. SCP-like case thankfully can't provide a + password in the userinfo component. */ + auto username = splitPrefixTo(host, '@'); + + auto maybeIPv6 = [](std::string_view host) -> std::optional { + if (host.starts_with('[') && host.ends_with(']')) { + host.remove_prefix(1); + host.remove_suffix(1); + auto ipv6 = boost::urls::parse_ipv6_address(host); + if (!ipv6) + throw BadURL("Git SCP bracketed URL is not valid: '%s' is not a valid IPv6 address", host); + return ParsedURL::Authority{ + .hostType = ParsedURL::Authority::HostType::IPv6, + .host = ipv6->to_string(), + }; + } + return std::nullopt; + }; + + if (auto ipv4 = boost::urls::parse_ipv4_address(host)) { + authority = ParsedURL::Authority{ + .hostType = ParsedURL::Authority::HostType::IPv4, + .host = ipv4->to_string(), }; - else - return ParsedURL{ - .scheme = "file", - .path = path, + } else if (auto ipv6Authority = maybeIPv6(host)) { + authority = *ipv6Authority; + } else { + authority = ParsedURL::Authority{ + .hostType = ParsedURL::Authority::HostType::Name, + .host = std::string(host), }; + } + + authority.user = username; + + if (pathView.empty()) + throw BadURL("SCP-style Git URL '%s' has an empty path", url); + + ParsedURL res = { + .scheme = "ssh", + .authority = std::move(authority), + /* Everything else is the path. */ + .path = splitString>(pathView, "/"), + }; + + /* Force path to be absolute. FIXME: This is the status quo. + Unfortunately this only really works with git forges. There's + also home expansion to consider. Should be possible to work + around using tilde expansion by specifying something like + `host:~/path/to/repo` instead of `host:path/to/repo`. */ + if (auto & path = res.path; !path.empty() && !path.front().empty()) + path.insert(path.begin(), ""); + + return res; + }); +} + +ParsedURL fixGitURL(std::string url) +{ + /* First handle the absolute path case. TODO: Windows file:// URLs are tricky. See RFC8089. + Needs a forward slash before the drive letter: file:///C:/ + > Instead, such a reference ought to be constructed with a + > leading slash "/" character (e.g., "/c:/foo.txt"). + Git is non-compliant here and doesn't handle the necessary triple slash it seems. + https://github.com/git/git/blob/68cb7f9e92a5d8e9824f5b52ac3d0a9d8f653dbe/connect.c#L1122-L1123 */ + if (std::filesystem::path path = url; path.is_absolute()) { + /* Note that we don't do any percent decoding here, as we shouldn't since the input is not a URL but a local + * path. Any pct-encoded sequences get treated as literals. Should probably use + * std::filesystem::path::generic_string here for normalization, but that would be a slight behaviour change. */ + return ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = splitString>(url, "/"), + }; } + + /* Next, try parsing as an SCP-style URL. */ + if (auto scpStyle = tryParseScpStyle(url)) + return *scpStyle; + + /* TODO: What to do about query parameters? Git should pass those to the * http(s) remotes. Ignore for now and + * just pass through. Will fail later. */ auto parsed = parseURL(url); // Drop the superfluous "git+" from the scheme. - auto scheme = parseUrlScheme(parsed.scheme); - if (scheme.application == "git") + if (auto scheme = parseUrlScheme(parsed.scheme); scheme.application == "git") { parsed.scheme = scheme.transport; + } return parsed; }