@@ -12,45 +12,68 @@ std::regex refRegex(refRegexS, std::regex::ECMAScript);
12
12
std::regex badGitRefRegex (badGitRefRegexS, std::regex::ECMAScript);
13
13
std::regex revRegex (revRegexS, std::regex::ECMAScript);
14
14
15
- ParsedURL parseURL (const std::string & url)
15
+ /* *
16
+ * Drop trailing shevron for output installable syntax.
17
+ *
18
+ * FIXME: parseURL shouldn't really be used for parsing the OutputSpec, but it does
19
+ * get used. That code should actually use ExtendedOutputsSpec::parseOpt.
20
+ */
21
+ static std::string_view dropShevronSuffix (std::string_view url)
16
22
{
17
- static std::regex uriRegex (
18
- " ((" + schemeNameRegex + " ):"
19
- + " (?:(?://(" + authorityRegex + " )(" + absPathRegex + " ))|(/?" + pathRegex + " )))"
20
- + " (?:\\ ?(" + queryRegex + " ))?"
21
- + " (?:#(" + fragmentRegex + " ))?" ,
22
- std::regex::ECMAScript);
23
-
24
- std::smatch match;
25
-
26
- if (std::regex_match (url, match, uriRegex)) {
27
- std::string scheme = match[2 ];
28
- auto authority = match[3 ].matched
29
- ? std::optional<std::string>(match[3 ]) : std::nullopt;
30
- std::string path = match[4 ].matched ? match[4 ] : match[5 ];
31
- auto & query = match[6 ];
32
- auto & fragment = match[7 ];
33
-
34
- auto transportIsFile = parseUrlScheme (scheme).transport == " file" ;
35
-
36
- if (authority && *authority != " " && transportIsFile)
37
- throw BadURL (" file:// URL '%s' has unexpected authority '%s'" ,
38
- url, *authority);
39
-
40
- if (transportIsFile && path.empty ())
41
- path = " /" ;
42
-
43
- return ParsedURL{
44
- .scheme = scheme,
45
- .authority = authority,
46
- .path = percentDecode (path),
47
- .query = decodeQuery (query),
48
- .fragment = percentDecode (std::string (fragment))
49
- };
50
- }
23
+ auto shevron = url.rfind (" ^" );
24
+ return url.substr (0 , shevron);
25
+ }
51
26
52
- else
53
- throw BadURL (" '%s' is not a valid URL" , url);
27
+ /* *
28
+ * Percent encode spaces in the url.
29
+ */
30
+ static std::string percentEncodeSpaces (std::string_view url)
31
+ {
32
+ return replaceStrings (std::string (url), " " , percentEncode (" " ));
33
+ }
34
+
35
+ ParsedURL parseURL (const std::string & url)
36
+ try {
37
+ /* Drop the shevron suffix used for the flakerefs. Shevron character is reserved and
38
+ shouldn't appear in normal URIs. */
39
+ auto unparsedView = dropShevronSuffix (url);
40
+ /* For back-compat literal spaces are allowed. */
41
+ auto withFixedSpaces = percentEncodeSpaces (unparsedView);
42
+ auto urlView = boost::urls::url_view (withFixedSpaces);
43
+
44
+ if (!urlView.has_scheme ())
45
+ throw BadURL (" '%s' doesn't have a scheme" , url);
46
+
47
+ auto scheme = urlView.scheme ();
48
+ auto authority = [&]() -> std::optional<std::string> {
49
+ if (urlView.has_authority ())
50
+ return percentDecode (urlView.authority ().buffer ());
51
+ return std::nullopt;
52
+ }();
53
+
54
+ auto transportIsFile = parseUrlScheme (scheme).transport == " file" ;
55
+ if (authority && *authority != " " && transportIsFile)
56
+ throw BadURL (" file:// URL '%s' has unexpected authority '%s'" , url, *authority);
57
+
58
+ auto path = urlView.path (); /* Does pct-decoding */
59
+ auto fragment = urlView.fragment (); /* Does pct-decoding */
60
+
61
+ if (transportIsFile && path.empty ())
62
+ path = " /" ;
63
+
64
+ /* Get the raw query. Store URI supports smuggling doubly nested queries, where
65
+ the innder &/? are pct-encoded. */
66
+ auto query = std::string_view (urlView.encoded_query ());
67
+
68
+ return ParsedURL{
69
+ .scheme = scheme,
70
+ .authority = authority,
71
+ .path = path,
72
+ .query = decodeQuery (std::string (query)),
73
+ .fragment = fragment,
74
+ };
75
+ } catch (boost::system::system_error & e) {
76
+ throw BadURL (" '%s' is not a valid URL: %s" , url, e.code ().message ());
54
77
}
55
78
56
79
std::string percentDecode (std::string_view in)
@@ -69,22 +92,25 @@ std::string percentEncode(std::string_view s, std::string_view keep)
69
92
}
70
93
71
94
StringMap decodeQuery (const std::string & query)
72
- {
95
+ try {
96
+ /* For back-compat literal spaces are allowed. */
97
+ auto withFixedSpaces = percentEncodeSpaces (query);
98
+
73
99
StringMap result;
74
100
75
- for ( const auto & s : tokenizeString<Strings>(query, " & " )) {
76
- auto e = s. find ( ' = ' );
77
- if (e == std::string::npos ) {
78
- warn (" dubious URI query '%s' is missing equal sign '%s', ignoring" , s , " =" );
101
+ auto encodedQuery = boost::urls::params_encoded_view (withFixedSpaces);
102
+ for ( auto && [key, value, value_specified] : encodedQuery) {
103
+ if (!value_specified ) {
104
+ warn (" dubious URI query '%s' is missing equal sign '%s', ignoring" , std::string_view (key) , " =" );
79
105
continue ;
80
106
}
81
107
82
- result.emplace (
83
- s.substr (0 , e),
84
- percentDecode (std::string_view (s).substr (e + 1 )));
108
+ result.emplace (key.decode (), value.decode ());
85
109
}
86
110
87
111
return result;
112
+ } catch (boost::system::system_error & e) {
113
+ throw BadURL (" invalid URI query '%s'" , e.code ().message ());
88
114
}
89
115
90
116
const static std::string allowedInQuery = " :@/?" ;
@@ -165,6 +191,7 @@ std::string fixGitURL(const std::string & url)
165
191
// https://www.rfc-editor.org/rfc/rfc3986#section-3.1
166
192
bool isValidSchemeName (std::string_view s)
167
193
{
194
+ const static std::string schemeNameRegex = " (?:[a-z][a-z0-9+.-]*)" ;
168
195
static std::regex regex (schemeNameRegex, std::regex::ECMAScript);
169
196
170
197
return std::regex_match (s.begin (), s.end (), regex, std::regex_constants::match_default);
0 commit comments