Skip to content

Commit 25aaafb

Browse files
Adds domain exclusion configuration for first-party proxy for creatives.
1 parent d4bd10b commit 25aaafb

File tree

4 files changed

+231
-19
lines changed

4 files changed

+231
-19
lines changed

crates/common/src/creative.rs

Lines changed: 158 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,18 @@ use crate::tsjs;
4343
use lol_html::{element, html_content::ContentType, text, HtmlRewriter, Settings as HtmlSettings};
4444

4545
// Helper: normalize to absolute URL if http/https or protocol-relative. Otherwise None.
46-
pub(super) fn to_abs(u: &str) -> Option<String> {
46+
// Checks against the rewrite blacklist to exclude configured domains/patterns from proxying.
47+
pub(super) fn to_abs(u: &str, settings: &Settings) -> Option<String> {
4748
let t = u.trim();
4849
if t.is_empty() {
4950
return None;
5051
}
52+
53+
// Skip if excluded from rewrites in settings
54+
if settings.rewrite.is_excluded(t) {
55+
return None;
56+
}
57+
5158
// Skip non-network schemes commonly found in creatives
5259
let lower = t.to_ascii_lowercase();
5360
if lower.starts_with("data:")
@@ -59,12 +66,13 @@ pub(super) fn to_abs(u: &str) -> Option<String> {
5966
{
6067
return None;
6168
}
69+
6270
if t.starts_with("//") {
6371
Some(format!("https:{}", t))
6472
} else if lower.starts_with("http://") || lower.starts_with("https://") {
6573
Some(t.to_string())
6674
} else {
67-
None
75+
return None;
6876
}
6977
}
7078

@@ -106,7 +114,7 @@ pub(super) fn rewrite_style_urls(style: &str, settings: &Settings) -> String {
106114
(s, e)
107115
};
108116
let url_val = &style[qs..qe];
109-
let new_val = if let Some(abs) = to_abs(url_val) {
117+
let new_val = if let Some(abs) = to_abs(url_val, settings) {
110118
build_proxy_url(settings, &abs)
111119
} else {
112120
url_val.to_string()
@@ -196,7 +204,7 @@ pub(super) fn build_click_url(settings: &Settings, clear_url: &str) -> String {
196204

197205
#[inline]
198206
pub(super) fn proxy_if_abs(settings: &Settings, val: &str) -> Option<String> {
199-
to_abs(val).map(|abs| build_proxy_url(settings, &abs))
207+
to_abs(val, settings).map(|abs| build_proxy_url(settings, &abs))
200208
}
201209

202210
/// Split a srcset/imagesrcset attribute into candidate strings.
@@ -259,7 +267,7 @@ pub(super) fn rewrite_srcset(srcset: &str, settings: &Settings) -> String {
259267
let mut parts = it.split_whitespace();
260268
let url = parts.next().unwrap_or("");
261269
let descriptor = parts.collect::<Vec<_>>().join(" ");
262-
let rewritten = if let Some(abs) = to_abs(url) {
270+
let rewritten = if let Some(abs) = to_abs(url, settings) {
263271
build_proxy_url(settings, &abs)
264272
} else {
265273
url.to_string()
@@ -404,7 +412,7 @@ pub fn rewrite_creative_html(markup: &str, settings: &Settings) -> String {
404412
// Click-through links
405413
element!("a[href], area[href]", |el| {
406414
if let Some(href) = el.get_attribute("href") {
407-
if let Some(abs) = to_abs(&href) {
415+
if let Some(abs) = to_abs(&href, settings) {
408416
let click = build_click_url(settings, &abs);
409417
let _ = el.set_attribute("href", &click);
410418
let _ = el.set_attribute("data-tsclick", &click);
@@ -508,26 +516,27 @@ mod tests {
508516

509517
#[test]
510518
fn to_abs_conversions() {
519+
let settings = crate::test_support::tests::create_test_settings();
511520
assert_eq!(
512-
to_abs("//cdn.example/x"),
521+
to_abs("//cdn.example/x", &settings),
513522
Some("https://cdn.example/x".to_string())
514523
);
515524
assert_eq!(
516-
to_abs("HTTPS://cdn.example/x"),
525+
to_abs("HTTPS://cdn.example/x", &settings),
517526
Some("HTTPS://cdn.example/x".to_string())
518527
);
519528
assert_eq!(
520-
to_abs("http://cdn.example/x"),
529+
to_abs("http://cdn.example/x", &settings),
521530
Some("http://cdn.example/x".to_string())
522531
);
523-
assert_eq!(to_abs("/local/x"), None);
532+
assert_eq!(to_abs("/local/x", &settings), None);
524533
assert_eq!(
525-
to_abs(" //cdn.example/y "),
534+
to_abs(" //cdn.example/y ", &settings),
526535
Some("https://cdn.example/y".to_string())
527536
);
528-
assert_eq!(to_abs("data:image/png;base64,abcd"), None);
529-
assert_eq!(to_abs("javascript:alert(1)"), None);
530-
assert_eq!(to_abs("mailto:test@example.com"), None);
537+
assert_eq!(to_abs("data:image/png;base64,abcd", &settings), None);
538+
assert_eq!(to_abs("javascript:alert(1)", &settings), None);
539+
assert_eq!(to_abs("mailto:test@example.com", &settings), None);
531540
}
532541

533542
#[test]
@@ -981,13 +990,14 @@ mod tests {
981990

982991
#[test]
983992
fn to_abs_additional_cases() {
993+
let settings = crate::test_support::tests::create_test_settings();
984994
assert_eq!(
985-
to_abs(" https://cdn.example/a "),
995+
to_abs(" https://cdn.example/a ", &settings),
986996
Some("https://cdn.example/a".to_string())
987997
);
988-
assert_eq!(to_abs("blob:xyz"), None);
989-
assert_eq!(to_abs("tel:+123"), None);
990-
assert_eq!(to_abs("about:blank"), None);
998+
assert_eq!(to_abs("blob:xyz", &settings), None);
999+
assert_eq!(to_abs("tel:+123", &settings), None);
1000+
assert_eq!(to_abs("about:blank", &settings), None);
9911001
}
9921002

9931003
#[test]
@@ -1003,4 +1013,134 @@ mod tests {
10031013
// relative candidate remains
10041014
assert!(out.contains("/local/img.png 1x"));
10051015
}
1016+
1017+
#[test]
1018+
fn to_abs_respects_exclude_domains() {
1019+
let mut settings = crate::test_support::tests::create_test_settings();
1020+
settings.rewrite.exclude_domains = vec!["trusted-cdn.example.com".to_string()];
1021+
1022+
// Excluded domain should return None (not proxied)
1023+
assert_eq!(
1024+
to_abs("https://trusted-cdn.example.com/lib.js", &settings),
1025+
None
1026+
);
1027+
1028+
// Non-excluded domain should return Some
1029+
assert_eq!(
1030+
to_abs("https://other-cdn.example.com/lib.js", &settings),
1031+
Some("https://other-cdn.example.com/lib.js".to_string())
1032+
);
1033+
}
1034+
1035+
#[test]
1036+
fn to_abs_respects_wildcard_domains() {
1037+
let mut settings = crate::test_support::tests::create_test_settings();
1038+
settings.rewrite.exclude_domains = vec!["*.cloudflare.com".to_string()];
1039+
1040+
// Should exclude base domain
1041+
assert_eq!(to_abs("https://cloudflare.com/cdn.js", &settings), None);
1042+
1043+
// Should exclude subdomain
1044+
assert_eq!(
1045+
to_abs("https://cdnjs.cloudflare.com/lib.js", &settings),
1046+
None
1047+
);
1048+
1049+
// Should not exclude different domain
1050+
assert_eq!(
1051+
to_abs("https://notcloudflare.com/lib.js", &settings),
1052+
Some("https://notcloudflare.com/lib.js".to_string())
1053+
);
1054+
}
1055+
1056+
#[test]
1057+
fn rewrite_html_excludes_blacklisted_domains() {
1058+
let mut settings = crate::test_support::tests::create_test_settings();
1059+
settings.rewrite.exclude_domains = vec!["trusted-cdn.example.com".to_string()];
1060+
1061+
let html = r#"
1062+
<img src="https://trusted-cdn.example.com/logo.png">
1063+
<img src="https://other-cdn.example.com/banner.jpg">
1064+
"#;
1065+
1066+
let out = rewrite_creative_html(html, &settings);
1067+
1068+
// Excluded domain should NOT be rewritten
1069+
assert!(out.contains(r#"src="https://trusted-cdn.example.com/logo.png"#));
1070+
1071+
// Non-excluded domain SHOULD be rewritten
1072+
assert!(out.contains("/first-party/proxy?tsurl="));
1073+
assert!(out.contains("other-cdn.example.com"));
1074+
}
1075+
1076+
#[test]
1077+
fn rewrite_srcset_excludes_blacklisted_domains() {
1078+
let mut settings = crate::test_support::tests::create_test_settings();
1079+
settings.rewrite.exclude_domains = vec!["trusted.example.com".to_string()];
1080+
1081+
let html = r#"
1082+
<img srcset="https://trusted.example.com/img-1x.png 1x, https://cdn.example.com/img-2x.png 2x">
1083+
"#;
1084+
1085+
let out = rewrite_creative_html(html, &settings);
1086+
1087+
// Excluded domain should remain as-is
1088+
assert!(out.contains("https://trusted.example.com/img-1x.png 1x"));
1089+
1090+
// Non-excluded should be proxied
1091+
assert!(out.contains("/first-party/proxy?tsurl="));
1092+
assert!(out.contains("cdn.example.com"));
1093+
}
1094+
1095+
#[test]
1096+
fn rewrite_style_urls_excludes_blacklisted_domains() {
1097+
let mut settings = crate::test_support::tests::create_test_settings();
1098+
settings.rewrite.exclude_domains = vec!["fonts.googleapis.com".to_string()];
1099+
1100+
let html = r#"
1101+
<style>
1102+
@font-face {
1103+
font-family: 'Test';
1104+
src: url(https://fonts.googleapis.com/font.woff2);
1105+
}
1106+
body {
1107+
background: url(https://cdn.example.com/bg.png);
1108+
}
1109+
</style>
1110+
"#;
1111+
1112+
let out = rewrite_creative_html(html, &settings);
1113+
1114+
// Excluded domain should remain unchanged
1115+
assert!(out.contains("url(https://fonts.googleapis.com/font.woff2)"));
1116+
1117+
// Non-excluded should be proxied
1118+
assert!(out.contains("/first-party/proxy?tsurl="));
1119+
assert!(out.contains("cdn.example.com"));
1120+
}
1121+
1122+
#[test]
1123+
fn rewrite_click_urls_excludes_blacklisted_domains() {
1124+
let mut settings = crate::test_support::tests::create_test_settings();
1125+
settings.rewrite.exclude_domains = vec!["trusted-landing.example.com".to_string()];
1126+
1127+
let html = r#"
1128+
<a href="https://trusted-landing.example.com/page">Trusted Link</a>
1129+
<a href="https://advertiser.example.com/landing">Ad Link</a>
1130+
"#;
1131+
1132+
let out = rewrite_creative_html(html, &settings);
1133+
1134+
// Excluded domain should NOT be rewritten to first-party click
1135+
assert!(out.contains(r#"href="https://trusted-landing.example.com/page"#));
1136+
// The excluded link should NOT have data-tsclick since it wasn't rewritten
1137+
assert!(
1138+
!out.contains(r#"<a href="https://trusted-landing.example.com/page" data-tsclick="#)
1139+
);
1140+
1141+
// Non-excluded should be rewritten and SHOULD have data-tsclick
1142+
assert!(out.contains("/first-party/click?tsurl="));
1143+
assert!(out.contains("advertiser.example.com"));
1144+
assert!(out.contains("data-tsclick=\"/first-party/click"));
1145+
}
10061146
}

crates/common/src/proxy.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ pub async fn handle_first_party_proxy_sign(
473473
.unwrap_or_else(|| "https".to_string());
474474
format!("{}:{}", default_scheme, trimmed)
475475
} else {
476-
crate::creative::to_abs(trimmed).ok_or_else(|| {
476+
crate::creative::to_abs(trimmed, settings).ok_or_else(|| {
477477
Report::new(TrustedServerError::Proxy {
478478
message: "unsupported url".to_string(),
479479
})

crates/common/src/settings.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,41 @@ impl Synthetic {
9999
}
100100
}
101101

102+
#[derive(Debug, Default, Deserialize, Serialize, Validate)]
103+
pub struct Rewrite {
104+
/// List of domains to exclude from rewriting. Supports wildcards (e.g., "*.example.com").
105+
/// URLs from these domains will not be proxied through first-party endpoints.
106+
#[serde(default)]
107+
pub exclude_domains: Vec<String>,
108+
}
109+
110+
impl Rewrite {
111+
/// Checks if a URL should be excluded from rewriting based on domain matching
112+
#[allow(dead_code)]
113+
pub fn is_excluded(&self, url: &str) -> bool {
114+
// Parse URL to extract host
115+
let Ok(parsed) = url::Url::parse(url) else {
116+
return false;
117+
};
118+
119+
let host = parsed.host_str().unwrap_or("");
120+
121+
// Check exact domain matches (with wildcard support)
122+
for domain in &self.exclude_domains {
123+
if let Some(suffix) = domain.strip_prefix("*.") {
124+
// Wildcard: *.example.com matches both example.com and sub.example.com
125+
if host == suffix || host.ends_with(&format!(".{}", suffix)) {
126+
return true;
127+
}
128+
} else if host == domain {
129+
return true;
130+
}
131+
}
132+
133+
false
134+
}
135+
}
136+
102137
#[derive(Debug, Default, Deserialize, Serialize, Validate)]
103138
pub struct Handler {
104139
#[validate(length(min = 1), custom(function = validate_path))]
@@ -137,6 +172,9 @@ pub struct Settings {
137172
pub handlers: Vec<Handler>,
138173
#[serde(default)]
139174
pub response_headers: HashMap<String, String>,
175+
#[serde(default)]
176+
#[validate(nested)]
177+
pub rewrite: Rewrite,
140178
}
141179

142180
#[allow(unused)]
@@ -616,4 +654,30 @@ mod tests {
616654
};
617655
assert_eq!(publisher.origin_host(), "[::1]:8080");
618656
}
657+
658+
#[test]
659+
fn test_rewrite_is_excluded() {
660+
let rewrite = Rewrite {
661+
exclude_domains: vec!["cdn.example.com".to_string(), "*.example2.com".to_string()],
662+
};
663+
664+
// Exact domain match
665+
assert!(rewrite.is_excluded("http://cdn.example.com/image.png"));
666+
667+
// Wildcard match - base domain
668+
assert!(rewrite.is_excluded("https://example2.com/cdn.js"));
669+
// Wildcard match - subdomains
670+
assert!(rewrite.is_excluded("https://cdnjs.example2.com/lib.js"));
671+
assert!(rewrite.is_excluded("https://sub.domain.example2.com/asset.js"));
672+
673+
// Should NOT match
674+
assert!(!rewrite.is_excluded("https://other.example.com/asset.js"));
675+
assert!(!rewrite.is_excluded("https://sub.cdn.example.com/asset.js"));
676+
assert!(!rewrite.is_excluded("https://example2.com.fake.com/asset.js"));
677+
assert!(!rewrite.is_excluded("https://notexample.com/asset.js"));
678+
679+
// Invalid URLs should not crash and should return false
680+
assert!(!rewrite.is_excluded("not a url"));
681+
assert!(!rewrite.is_excluded(""));
682+
}
619683
}

trusted-server.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,11 @@ template = "{{ client_ip }}:{{ user_agent }}:{{ first_party_id }}:{{ auth_user_i
3232
# Custom headers to be included in every response
3333
[response_headers]
3434
X-Custom-Header = "custom header value"
35+
36+
# Rewrite configuration for creative HTML/CSS processing
37+
# [rewrite]
38+
# Domains to exclude from first-party rewriting (supports wildcards like "*.example.com")
39+
# URLs from these domains will be left as-is and not proxied
40+
# exclude_domains = [
41+
# "*.edgecompute.app",
42+
# ]

0 commit comments

Comments
 (0)