@@ -12,40 +12,70 @@ std::regex refRegex(refRegexS, std::regex::ECMAScript);
12
12
std::regex badGitRefRegex (badGitRefRegexS, std::regex::ECMAScript);
13
13
std::regex revRegex (revRegexS, std::regex::ECMAScript);
14
14
15
- ParsedURL parseURL (const std::string & url)
15
+ /* *
16
+ * Drop trailing shevron for output installable syntax.
17
+ *
18
+ * FIXME: parseURL shouldn't really be used for parsing the OutputSpec, but it does
19
+ * get used. That code should actually use ExtendedOutputsSpec::parseOpt.
20
+ */
21
+ static std::string_view dropShevronSuffix (std::string_view url)
16
22
{
17
- static std::regex uriRegex (
18
- " ((" + schemeNameRegex + " ):" + " (?:(?://(" + authorityRegex + " )(" + absPathRegex + " ))|(/?" + pathRegex
19
- + " )))" + " (?:\\ ?(" + queryRegex + " ))?" + " (?:#(" + fragmentRegex + " ))?" ,
20
- std::regex::ECMAScript);
21
-
22
- std::smatch match;
23
-
24
- if (std::regex_match (url, match, uriRegex)) {
25
- std::string scheme = match[2 ];
26
- auto authority = match[3 ].matched ? std::optional<std::string>(match[3 ]) : std::nullopt;
27
- std::string path = match[4 ].matched ? match[4 ] : match[5 ];
28
- auto & query = match[6 ];
29
- auto & fragment = match[7 ];
30
-
31
- auto transportIsFile = parseUrlScheme (scheme).transport == " file" ;
32
-
33
- if (authority && *authority != " " && transportIsFile)
34
- throw BadURL (" file:// URL '%s' has unexpected authority '%s'" , url, *authority);
35
-
36
- if (transportIsFile && path.empty ())
37
- path = " /" ;
38
-
39
- return ParsedURL{
40
- .scheme = scheme,
41
- .authority = authority,
42
- .path = percentDecode (path),
43
- .query = decodeQuery (query),
44
- .fragment = percentDecode (std::string (fragment))};
45
- }
23
+ auto shevron = url.rfind (" ^" );
24
+ if (shevron == std::string_view::npos)
25
+ return url;
26
+ return url.substr (0 , shevron);
27
+ }
28
+
29
+ /* *
30
+ * Percent encode spaces in the url.
31
+ */
32
+ static std::string percentEncodeSpaces (std::string_view url)
33
+ {
34
+ return replaceStrings (std::string (url), " " , percentEncode (" " ));
35
+ }
46
36
47
- else
48
- throw BadURL (" '%s' is not a valid URL" , url);
37
+ ParsedURL parseURL (const std::string & url)
38
+ try {
39
+ /* Drop the shevron suffix used for the flakerefs. Shevron character is reserved and
40
+ shouldn't appear in normal URIs. */
41
+ auto unparsedView = dropShevronSuffix (url);
42
+ /* For back-compat literal spaces are allowed. */
43
+ auto withFixedSpaces = percentEncodeSpaces (unparsedView);
44
+ auto urlView = boost::urls::url_view (withFixedSpaces);
45
+
46
+ if (!urlView.has_scheme ())
47
+ throw BadURL (" '%s' doesn't have a scheme" , url);
48
+
49
+ auto scheme = urlView.scheme ();
50
+ auto authority = [&]() -> std::optional<std::string> {
51
+ if (urlView.has_authority ())
52
+ return percentDecode (urlView.authority ().buffer ());
53
+ return std::nullopt;
54
+ }();
55
+
56
+ auto transportIsFile = parseUrlScheme (scheme).transport == " file" ;
57
+ if (authority && *authority != " " && transportIsFile)
58
+ throw BadURL (" file:// URL '%s' has unexpected authority '%s'" , url, *authority);
59
+
60
+ auto path = urlView.path (); /* Does pct-decoding */
61
+ auto fragment = urlView.fragment (); /* Does pct-decoding */
62
+
63
+ if (transportIsFile && path.empty ())
64
+ path = " /" ;
65
+
66
+ /* Get the raw query. Store URI supports smuggling doubly nested queries, where
67
+ the inner &/? are pct-encoded. */
68
+ auto query = std::string_view (urlView.encoded_query ());
69
+
70
+ return ParsedURL{
71
+ .scheme = scheme,
72
+ .authority = authority,
73
+ .path = path,
74
+ .query = decodeQuery (std::string (query)),
75
+ .fragment = fragment,
76
+ };
77
+ } catch (boost::system::system_error & e) {
78
+ throw BadURL (" '%s' is not a valid URL: %s" , url, e.code ().message ());
49
79
}
50
80
51
81
std::string percentDecode (std::string_view in)
@@ -64,20 +94,25 @@ std::string percentEncode(std::string_view s, std::string_view keep)
64
94
}
65
95
66
96
StringMap decodeQuery (const std::string & query)
67
- {
97
+ try {
98
+ /* For back-compat literal spaces are allowed. */
99
+ auto withFixedSpaces = percentEncodeSpaces (query);
100
+
68
101
StringMap result;
69
102
70
- for ( const auto & s : tokenizeString<Strings>(query, " & " )) {
71
- auto e = s. find ( ' = ' );
72
- if (e == std::string::npos ) {
73
- warn (" dubious URI query '%s' is missing equal sign '%s', ignoring" , s , " =" );
103
+ auto encodedQuery = boost::urls::params_encoded_view (withFixedSpaces);
104
+ for ( auto && [key, value, value_specified] : encodedQuery) {
105
+ if (!value_specified ) {
106
+ warn (" dubious URI query '%s' is missing equal sign '%s', ignoring" , std::string_view (key) , " =" );
74
107
continue ;
75
108
}
76
109
77
- result.emplace (s. substr ( 0 , e ), percentDecode ( std::string_view (s). substr (e + 1 ) ));
110
+ result.emplace (key. decode ( ), value. decode ( ));
78
111
}
79
112
80
113
return result;
114
+ } catch (boost::system::system_error & e) {
115
+ throw BadURL (" invalid URI query '%s': %s" , query, e.code ().message ());
81
116
}
82
117
83
118
const static std::string allowedInQuery = " :@/?" ;
@@ -150,6 +185,7 @@ std::string fixGitURL(const std::string & url)
150
185
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1
151
186
bool isValidSchemeName (std::string_view s)
152
187
{
188
+ const static std::string schemeNameRegex = " (?:[a-z][a-z0-9+.-]*)" ;
153
189
static std::regex regex (schemeNameRegex, std::regex::ECMAScript);
154
190
155
191
return std::regex_match (s.begin (), s.end (), regex, std::regex_constants::match_default);
0 commit comments