Skip to content

Commit ec04015

Browse files
committed
Fix authority encoding in URL round-trips
The ParsedURL::to_string() method was not re-encoding special characters in the authority component, causing URLs with percent-encoded characters in userinfo (like %40 for @) to become invalid after serialization. This fix adds proper authority encoding by: - Separating userinfo from host at the last @ character - Encoding only the userinfo part while preserving host[:port] - Using appropriate allowed characters for authority encoding Added comprehensive tests to verify correct round-trip behavior for various URL patterns including encoded @, spaces, and multiple special characters in the authority.
1 parent 7d12190 commit ec04015

File tree

2 files changed

+52
-1
lines changed

2 files changed

+52
-1
lines changed

src/libutil-tests/url.cc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,39 @@ namespace nix {
295295
ASSERT_EQ(b, "abd%20/%20def");
296296
}
297297

298+
TEST(parseURL, authorityEncodingRoundTrip) {
299+
// Test various special characters in authority
300+
std::vector<std::pair<std::string, std::string>> testCases = {
301+
// URL with encoded @, decoded authority
302+
{"https://user%40domain@example.com/path", "user@domain@example.com"},
303+
// URL with encoded space
304+
{"https://user%20name@example.com/path", "user name@example.com"},
305+
// URL with colon (colon is allowed in userinfo for username:password)
306+
{"https://user:pass@example.com/path", "user:pass@example.com"},
307+
// URL with multiple encoded characters
308+
{"https://user%40%20name:pass@example.com/path", "user@ name:pass@example.com"},
309+
// URL with no userinfo
310+
{"https://example.com/path", "example.com"},
311+
// URL with port
312+
{"https://user%40name@example.com:8080/path", "user@name@example.com:8080"},
313+
};
314+
315+
for (const auto& [url, expectedAuth] : testCases) {
316+
auto parsed = parseURL(url);
317+
ASSERT_EQ(parsed.authority.value(), expectedAuth);
318+
319+
// Verify round-trip - the URL should be preserved
320+
auto reconstructed = parsed.to_string();
321+
auto reparsed = parseURL(reconstructed);
322+
ASSERT_EQ(reparsed.authority.value(), expectedAuth);
323+
324+
// For URLs with encoded characters in userinfo, verify exact round-trip
325+
if (url.find("%") != std::string::npos && url.find("@") != std::string::npos) {
326+
ASSERT_EQ(reconstructed, url);
327+
}
328+
}
329+
}
330+
298331
TEST(percentEncode, inverseOfDecode) {
299332
std::string original = "%3D%3D%40%3D%3D";
300333
std::string once = percentEncode(original);

src/libutil/url.cc

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ try {
117117

118118
const static std::string allowedInQuery = ":@/?";
119119
const static std::string allowedInPath = ":@/";
120+
const static std::string allowedInAuthority = ":!$&'()*+,;=";
120121

121122
std::string encodeQuery(const StringMap & ss)
122123
{
@@ -132,12 +133,29 @@ std::string encodeQuery(const StringMap & ss)
132133
return res;
133134
}
134135

136+
static std::string encodeAuthority(const std::string& auth)
137+
{
138+
// Find the last @ to separate userinfo from host
139+
// The last @ is the delimiter, any @ before that is part of userinfo
140+
auto lastAt = auth.rfind('@');
141+
if (lastAt != std::string::npos) {
142+
// We have userinfo
143+
auto userinfo = auth.substr(0, lastAt);
144+
auto hostPort = auth.substr(lastAt + 1);
145+
// In the userinfo part, @ needs to be encoded along with other special chars
146+
// But : is allowed in userinfo for username:password
147+
return percentEncode(userinfo, allowedInAuthority) + "@" + hostPort;
148+
}
149+
// No userinfo, just host[:port]
150+
return auth;
151+
}
152+
135153
std::string ParsedURL::to_string() const
136154
{
137155
return
138156
scheme
139157
+ ":"
140-
+ (authority ? "//" + *authority : "")
158+
+ (authority ? "//" + encodeAuthority(*authority) : "")
141159
+ percentEncode(path, allowedInPath)
142160
+ (query.empty() ? "" : "?" + encodeQuery(query))
143161
+ (fragment.empty() ? "" : "#" + percentEncode(fragment));

0 commit comments

Comments
 (0)