-
Notifications
You must be signed in to change notification settings - Fork 144
Description
Hi, using lhttpc I ran into a URL parsing issue. Since according to rfc3986 I am allowed to use '@' in query strings I did not urlencode them when sending an lhttpc request. This resulted in lhttp parsing the url wrong in the lhttpc_lib.erl:parse_url/1 function. (It interpreted the domain, the path, and parts of the query string before the '@' sign as username, rest of the query string as the host to connect to.)
I ended up using a workaround locally (but please take the following code with a grain of salt, I just started programming in erlang)
-spec parse_url(string()) -> #lhttpc_url{}.
parse_url(URL) ->
UrlParsingRegexp = "^(?:(?:(([^:\\/#\\?]+:)?(?:(?:\/\/)(?:(?:(?:([^:@\\/#\\?]+)(?:\:([^:@\\/#\\?]))?)@)?(([^:\\/#\\?\\]\[]+|\[[^\\/\\]@#?]+\])(?:\:([0-9]+))?))?)?)?((?:\/?(?:[^\\/\\?#]+\/+))(?:[^\\?#])))?(\?[^#]+)?)(#.)?",
{match, Matches} = re:run(URL, UrlParsingRegexp, [{capture, all, list}]),
{Scheme, User, Passwd, Host, Port, Path} = split_matched_url_to_parts(Matches),
#lhttpc_url{
host = string:to_lower(Host),
port = Port,
path = Path,
user = User,
password = Passwd,
is_ssl = (Scheme =:= https)
}.
split_matched_url_to_parts([_Href, _Origin, Protocol, Username, Password, _Host, HostName, Port, Path, Query | _]) ->
{get_scheme_from_parts(Protocol), Username, Password, HostName, get_port_from_parts(Protocol, Port), Path ++ Query};
split_matched_url_to_parts([_Href, _Origin, Protocol, Username, Password, _Host, HostName, Port, Path | _]) ->
{get_scheme_from_parts(Protocol), Username, Password, HostName, get_port_from_parts(Protocol, Port), Path}.
get_port_from_parts("http:", []) -> 80;
get_port_from_parts("https:", []) -> 443;
get_port_from_parts(Protocol, Port) -> list_to_integer(Port).
get_scheme_from_parts("http:") -> http;
get_scheme_from_parts("https:") -> https.