@@ -24,6 +24,48 @@ namespace Http
2424
2525const char * SEPARATOR = " ://" ;
2626
27+ bool s_compliantRfc3986Encoding = false ;
28+ void SetCompliantRfc3986Encoding (bool compliant) { s_compliantRfc3986Encoding = compliant; }
29+
30+ Aws::String urlEncodeSegment (const Aws::String& segment)
31+ {
32+ // consolidates legacy escaping logic into one local method
33+ if (s_compliantRfc3986Encoding)
34+ {
35+ return StringUtils::URLEncode (segment.c_str ());
36+ }
37+ else
38+ {
39+ Aws::StringStream ss;
40+ ss << std::hex << std::uppercase;
41+ for (unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
42+ {
43+ // RFC 3986 §2.3 unreserved characters
44+ if (StringUtils::IsAlnum (c))
45+ {
46+ ss << c;
47+ continue ;
48+ }
49+ switch (c)
50+ {
51+ // §2.3 unreserved characters
52+ // The path section of the URL allows unreserved characters to appear unescaped
53+ case ' -' : case ' _' : case ' .' : case ' ~' :
54+ // RFC 3986 §2.2 Reserved characters
55+ // NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
56+ // discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
57+ case ' $' : case ' &' : case ' ,' :
58+ case ' :' : case ' =' : case ' @' :
59+ ss << c;
60+ break ;
61+ default :
62+ ss << ' %' << std::setfill (' 0' ) << std::setw (2 ) << (int )c << std::setw (0 );
63+ }
64+ }
65+ return ss.str ();
66+ }
67+ }
68+
2769} // namespace Http
2870} // namespace Aws
2971
@@ -101,7 +143,7 @@ void URI::SetScheme(Scheme value)
101143
102144Aws::String URI::URLEncodePathRFC3986 (const Aws::String& path)
103145{
104- if (path.empty ())
146+ if (path.empty ())
105147 {
106148 return path;
107149 }
@@ -113,34 +155,10 @@ Aws::String URI::URLEncodePathRFC3986(const Aws::String& path)
113155 // escape characters appearing in a URL path according to RFC 3986
114156 for (const auto & segment : pathParts)
115157 {
116- ss << ' /' ;
117- for (unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
118- {
119- // §2.3 unreserved characters
120- if (StringUtils::IsAlnum (c))
121- {
122- ss << c;
123- continue ;
124- }
125- switch (c)
126- {
127- // §2.3 unreserved characters
128- case ' -' : case ' _' : case ' .' : case ' ~' :
129- // The path section of the URL allow reserved characters to appear unescaped
130- // RFC 3986 §2.2 Reserved characters
131- // NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
132- // discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
133- case ' $' : case ' &' : case ' ,' :
134- case ' :' : case ' =' : case ' @' :
135- ss << c;
136- break ;
137- default :
138- ss << ' %' << std::setfill (' 0' ) << std::setw (2 ) << (int )((unsigned char )c) << std::setw (0 );
139- }
140- }
158+ ss << ' /' << urlEncodeSegment (segment);
141159 }
142160
143- // if the last character was also a slash, then add that back here.
161+ // if the last character was also a slash, then add that back here.
144162 if (path.back () == ' /' )
145163 {
146164 ss << ' /' ;
@@ -216,33 +234,10 @@ Aws::String URI::GetURLEncodedPathRFC3986() const
216234 ss << std::hex << std::uppercase;
217235
218236 // escape characters appearing in a URL path according to RFC 3986
237+ // (mostly; there is some non-standards legacy support that can be disabled)
219238 for (const auto & segment : m_pathSegments)
220239 {
221- ss << ' /' ;
222- for (unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
223- {
224- // §2.3 unreserved characters
225- if (StringUtils::IsAlnum (c))
226- {
227- ss << c;
228- continue ;
229- }
230- switch (c)
231- {
232- // §2.3 unreserved characters
233- case ' -' : case ' _' : case ' .' : case ' ~' :
234- // The path section of the URL allow reserved characters to appear unescaped
235- // RFC 3986 §2.2 Reserved characters
236- // NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
237- // discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
238- case ' $' : case ' &' : case ' ,' :
239- case ' :' : case ' =' : case ' @' :
240- ss << c;
241- break ;
242- default :
243- ss << ' %' << std::setfill (' 0' ) << std::setw (2 ) << (int )((unsigned char )c) << std::setw (0 );
244- }
245- }
240+ ss << ' /' << urlEncodeSegment (segment);
246241 }
247242
248243 if (m_pathSegments.empty () || m_pathHasTrailingSlash)
0 commit comments