From fa4f7f77db392d4389a50e3ef72d97a8d545d07e Mon Sep 17 00:00:00 2001 From: edgul Date: Fri, 1 Aug 2025 15:46:27 -0400 Subject: [PATCH] Fix hostname canonicalization of badhostnames --- src/canonicalize_and_process.rs | 3 +- src/testdata/urlpatterntestdata.json | 58 ++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/src/canonicalize_and_process.rs b/src/canonicalize_and_process.rs index f528335..46c8beb 100644 --- a/src/canonicalize_and_process.rs +++ b/src/canonicalize_and_process.rs @@ -46,7 +46,8 @@ pub fn canonicalize_hostname(value: &str) -> Result { return Ok(String::new()); } let mut url = url::Url::parse("http://dummy.test").unwrap(); - url.set_host(Some(value)).map_err(Error::Url)?; + url::quirks::set_hostname(&mut url, value) + .map_err(|_| Error::Url(url::ParseError::InvalidDomainCharacter))?; Ok(url::quirks::hostname(&url).to_string()) } diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 5fcda0e..41a400d 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -1883,7 +1883,17 @@ { "pattern": [ "https://{sub.}?example{.com/}foo" ], "inputs": [ "https://example.com/foo" ], - "expected_obj": "error" + "exactly_empty_components": [ "port" ], + "expected_obj": { + "protocol": "https", + "hostname": "{sub.}?example.com", + "pathname": "*" + }, + "expected_match": { + "protocol": { "input": "https", "groups": {} }, + "hostname": { "input": "example.com", "groups": {} }, + "pathname": { "input": "/foo", "groups": { "0": "/foo" } } + } }, { "pattern": [ "{https://}example.com/foo" ], @@ -2441,7 +2451,13 @@ }, { "pattern": [{ "hostname": "bad#hostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "bad" }], + "expected_obj": { + "hostname": "bad" + }, + "expected_match": { + "hostname": { "input": "bad", "groups": {} } + } }, { "pattern": [{ "hostname": "bad%hostname" }], @@ -2449,7 +2465,13 @@ }, { "pattern": [{ "hostname": "bad/hostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "bad" }], + "expected_obj": { + "hostname": "bad" + }, + "expected_match": { + "hostname": { "input": "bad", "groups": {} } + } }, { "skip": "likely a bug in rust-url", @@ -2482,7 +2504,11 @@ }, { "pattern": [{ "hostname": "bad\\\\hostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "bad" + }, + "expected_match": null }, { "pattern": [{ "hostname": "bad^hostname" }], @@ -2494,15 +2520,33 @@ }, { "pattern": [{ "hostname": "bad\nhostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "badhostname" + }, + "expected_match": { + "hostname": { "input": "badhostname", "groups": {} } + } }, { "pattern": [{ "hostname": "bad\rhostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "badhostname" + }, + "expected_match": { + "hostname": { "input": "badhostname", "groups": {} } + } }, { "pattern": [{ "hostname": "bad\thostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "badhostname" + }, + "expected_match": { + "hostname": { "input": "badhostname", "groups": {} } + } }, { "pattern": [{}],