diff --git a/crates/common/src/html_processor.rs b/crates/common/src/html_processor.rs index f786c78..b91bb9e 100644 --- a/crates/common/src/html_processor.rs +++ b/crates/common/src/html_processor.rs @@ -5,7 +5,6 @@ use std::cell::Cell; use std::rc::Rc; use lol_html::{element, html_content::ContentType, text, Settings as RewriterSettings}; -use regex::Regex; use crate::integrations::{ AttributeRewriteOutcome, IntegrationAttributeContext, IntegrationRegistry, @@ -22,14 +21,12 @@ pub struct HtmlProcessorConfig { pub request_host: String, pub request_scheme: String, pub integrations: IntegrationRegistry, - pub nextjs_enabled: bool, - pub nextjs_attributes: Vec, } impl HtmlProcessorConfig { /// Create from settings and request parameters pub fn from_settings( - settings: &Settings, + _settings: &Settings, integrations: &IntegrationRegistry, origin_host: &str, request_host: &str, @@ -40,8 +37,6 @@ impl HtmlProcessorConfig { request_host: request_host.to_string(), request_scheme: request_scheme.to_string(), integrations: integrations.clone(), - nextjs_enabled: settings.publisher.nextjs.enabled, - nextjs_attributes: settings.publisher.nextjs.rewrite_attributes.clone(), } } } @@ -75,39 +70,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso fn protocol_relative_replacement(&self) -> String { format!("//{}", self.request_host) } - - fn rewrite_nextjs_values(&self, content: &str, attributes: &[String]) -> Option { - let mut rewritten = content.to_string(); - let mut changed = false; - let escaped_origin = regex::escape(&self.origin_host); - for attribute in attributes { - let escaped_attr = regex::escape(attribute); - let pattern = format!( - r#"(?P(?:\\*")?{attr}(?:\\*")?:\\*")(?Phttps?://|//){origin}"#, - attr = escaped_attr, - origin = escaped_origin - ); - let regex = Regex::new(&pattern).expect("valid Next.js rewrite regex"); - let new_value = regex.replace_all(&rewritten, |caps: ®ex::Captures| { - let scheme = &caps["scheme"]; - let replacement = if scheme == "//" { - format!("//{}", self.request_host) - } else { - self.replacement_url() - }; - format!("{}{}", &caps["prefix"], replacement) - }); - if new_value != rewritten { - changed = true; - rewritten = new_value.into_owned(); - } - } - if changed { - Some(rewritten) - } else { - None - } - } } let patterns = Rc::new(UrlPatterns { @@ -116,8 +78,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso request_scheme: config.request_scheme.clone(), }); - let nextjs_attributes = Rc::new(config.nextjs_attributes.clone()); - let injected_tsjs = Rc::new(Cell::new(false)); let integration_registry = config.integrations.clone(); let script_rewriters = integration_registry.script_rewriters(); @@ -378,35 +338,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso })); } - if config.nextjs_enabled && !nextjs_attributes.is_empty() { - element_content_handlers.push(text!("script#__NEXT_DATA__", { - let patterns = patterns.clone(); - let attributes = nextjs_attributes.clone(); - move |text| { - let content = text.as_str(); - if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) { - text.replace(&rewritten, ContentType::Text); - } - Ok(()) - } - })); - - element_content_handlers.push(text!("script", { - let patterns = patterns.clone(); - let attributes = nextjs_attributes.clone(); - move |text| { - let content = text.as_str(); - if !content.contains("self.__next_f") { - return Ok(()); - } - if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) { - text.replace(&rewritten, ContentType::Text); - } - Ok(()) - } - })); - } - let rewriter_settings = RewriterSettings { element_content_handlers, ..RewriterSettings::default() @@ -433,8 +364,6 @@ mod tests { request_host: "test.example.com".to_string(), request_scheme: "https".to_string(), integrations: IntegrationRegistry::default(), - nextjs_enabled: false, - nextjs_attributes: vec!["href".to_string(), "link".to_string(), "url".to_string()], } } @@ -605,9 +534,19 @@ mod tests { "#; - let mut config = create_test_config(); - config.nextjs_enabled = true; - config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()]; + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + "nextjs", + &json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + let registry = IntegrationRegistry::new(&settings); + let config = config_from_settings(&settings, ®istry); let processor = create_html_processor(config); let pipeline_config = PipelineConfig { input_compression: Compression::None, @@ -659,9 +598,19 @@ mod tests { "#; - let mut config = create_test_config(); - config.nextjs_enabled = true; - config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()]; + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + "nextjs", + &json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + let registry = IntegrationRegistry::new(&settings); + let config = config_from_settings(&settings, ®istry); let processor = create_html_processor(config); let pipeline_config = PipelineConfig { input_compression: Compression::None, @@ -774,15 +723,6 @@ mod tests { assert_eq!(config.origin_host, "origin.test-publisher.com"); assert_eq!(config.request_host, "proxy.example.com"); assert_eq!(config.request_scheme, "https"); - assert!( - !config.nextjs_enabled, - "Next.js rewrites should default to disabled" - ); - assert_eq!( - config.nextjs_attributes, - vec!["href".to_string(), "link".to_string(), "url".to_string()], - "Should default to rewriting href/link/url attributes" - ); } #[test] diff --git a/crates/common/src/integrations/mod.rs b/crates/common/src/integrations/mod.rs index 49071b6..6c3f883 100644 --- a/crates/common/src/integrations/mod.rs +++ b/crates/common/src/integrations/mod.rs @@ -2,6 +2,7 @@ use crate::settings::Settings; +pub mod nextjs; pub mod prebid; mod registry; pub mod testlight; @@ -16,5 +17,5 @@ pub use registry::{ type IntegrationBuilder = fn(&Settings) -> Option; pub(crate) fn builders() -> &'static [IntegrationBuilder] { - &[prebid::register, testlight::register] + &[prebid::register, testlight::register, nextjs::register] } diff --git a/crates/common/src/integrations/nextjs.rs b/crates/common/src/integrations/nextjs.rs new file mode 100644 index 0000000..4ee347e --- /dev/null +++ b/crates/common/src/integrations/nextjs.rs @@ -0,0 +1,244 @@ +use std::sync::Arc; + +use regex::{escape, Regex}; +use serde::{Deserialize, Serialize}; +use validator::Validate; + +use crate::integrations::{ + IntegrationRegistration, IntegrationScriptContext, IntegrationScriptRewriter, + ScriptRewriteAction, +}; +use crate::settings::{IntegrationConfig, Settings}; + +const NEXTJS_INTEGRATION_ID: &str = "nextjs"; + +#[derive(Debug, Clone, Deserialize, Serialize, Validate)] +pub struct NextJsIntegrationConfig { + #[serde(default = "default_enabled")] + pub enabled: bool, + #[serde( + default = "default_rewrite_attributes", + deserialize_with = "crate::settings::vec_from_seq_or_map" + )] + #[validate(length(min = 1))] + pub rewrite_attributes: Vec, +} + +impl IntegrationConfig for NextJsIntegrationConfig { + fn is_enabled(&self) -> bool { + self.enabled + } +} + +fn default_enabled() -> bool { + false +} + +fn default_rewrite_attributes() -> Vec { + vec!["href".to_string(), "link".to_string(), "url".to_string()] +} + +pub fn register(settings: &Settings) -> Option { + let config = build(settings)?; + let structured = Arc::new(NextJsScriptRewriter::new( + Arc::clone(&config), + NextJsRewriteMode::Structured, + )); + let streamed = Arc::new(NextJsScriptRewriter::new( + config, + NextJsRewriteMode::Streamed, + )); + + Some( + IntegrationRegistration::builder(NEXTJS_INTEGRATION_ID) + .with_script_rewriter(structured) + .with_script_rewriter(streamed) + .build(), + ) +} + +fn build(settings: &Settings) -> Option> { + let config = settings + .integration_config::(NEXTJS_INTEGRATION_ID) + .ok() + .flatten()?; + Some(Arc::new(config)) +} + +#[derive(Clone, Copy)] +enum NextJsRewriteMode { + Structured, + Streamed, +} + +struct NextJsScriptRewriter { + config: Arc, + mode: NextJsRewriteMode, +} + +impl NextJsScriptRewriter { + fn new(config: Arc, mode: NextJsRewriteMode) -> Self { + Self { config, mode } + } + + fn rewrite_values( + &self, + content: &str, + ctx: &IntegrationScriptContext<'_>, + ) -> ScriptRewriteAction { + if let Some(rewritten) = rewrite_nextjs_values( + content, + ctx.origin_host, + ctx.request_host, + ctx.request_scheme, + &self.config.rewrite_attributes, + ) { + ScriptRewriteAction::replace(rewritten) + } else { + ScriptRewriteAction::keep() + } + } +} + +impl IntegrationScriptRewriter for NextJsScriptRewriter { + fn integration_id(&self) -> &'static str { + NEXTJS_INTEGRATION_ID + } + + fn selector(&self) -> &'static str { + match self.mode { + NextJsRewriteMode::Structured => "script#__NEXT_DATA__", + NextJsRewriteMode::Streamed => "script", + } + } + + fn rewrite(&self, content: &str, ctx: &IntegrationScriptContext<'_>) -> ScriptRewriteAction { + if self.config.rewrite_attributes.is_empty() { + return ScriptRewriteAction::keep(); + } + + match self.mode { + NextJsRewriteMode::Structured => self.rewrite_values(content, ctx), + NextJsRewriteMode::Streamed => { + if !content.contains("self.__next_f") { + return ScriptRewriteAction::keep(); + } + self.rewrite_values(content, ctx) + } + } + } +} + +fn rewrite_nextjs_values( + content: &str, + origin_host: &str, + request_host: &str, + request_scheme: &str, + attributes: &[String], +) -> Option { + if origin_host.is_empty() || request_host.is_empty() || attributes.is_empty() { + return None; + } + + let mut rewritten = content.to_string(); + let mut changed = false; + let escaped_origin = escape(origin_host); + let replacement_scheme = format!("{}://{}", request_scheme, request_host); + + for attribute in attributes { + let escaped_attr = escape(attribute); + let pattern = format!( + r#"(?P(?:\\*")?{attr}(?:\\*")?:\\*")(?Phttps?://|//){origin}"#, + attr = escaped_attr, + origin = escaped_origin, + ); + let regex = Regex::new(&pattern).expect("valid Next.js rewrite regex"); + let next_value = regex.replace_all(&rewritten, |caps: ®ex::Captures<'_>| { + let scheme = &caps["scheme"]; + let replacement = if scheme == "//" { + format!("//{}", request_host) + } else { + replacement_scheme.clone() + }; + format!("{}{}", &caps["prefix"], replacement) + }); + if next_value != rewritten { + changed = true; + rewritten = next_value.into_owned(); + } + } + + changed.then_some(rewritten) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::integrations::{IntegrationScriptContext, ScriptRewriteAction}; + + fn test_config() -> Arc { + Arc::new(NextJsIntegrationConfig { + enabled: true, + rewrite_attributes: vec!["href".into(), "link".into(), "url".into()], + }) + } + + fn ctx(selector: &'static str) -> IntegrationScriptContext<'static> { + IntegrationScriptContext { + selector, + request_host: "ts.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + } + } + + #[test] + fn structured_rewriter_updates_next_data_payload() { + let payload = r#"{"props":{"pageProps":{"primary":{"href":"https://origin.example.com/reviews"},"secondary":{"href":"http://origin.example.com/sign-in"},"fallbackHref":"http://origin.example.com/legacy","protoRelative":"//origin.example.com/assets/logo.png"}}}"#; + let rewriter = NextJsScriptRewriter::new(test_config(), NextJsRewriteMode::Structured); + let result = rewriter.rewrite(payload, &ctx("script#__NEXT_DATA__")); + + match result { + ScriptRewriteAction::Replace(value) => { + assert!(value.contains(r#""href":"https://ts.example.com/reviews""#)); + assert!(value.contains(r#""href":"https://ts.example.com/sign-in""#)); + assert!(value.contains(r#""fallbackHref":"http://origin.example.com/legacy""#)); + assert!(value.contains(r#""protoRelative":"//origin.example.com/assets/logo.png""#)); + } + _ => panic!("Expected rewrite to update payload"), + } + } + + #[test] + fn streamed_rewriter_only_runs_for_next_payloads() { + let rewriter = NextJsScriptRewriter::new(test_config(), NextJsRewriteMode::Streamed); + + let noop = rewriter.rewrite("console.log('hello');", &ctx("script")); + assert!(matches!(noop, ScriptRewriteAction::Keep)); + + let payload = r#"self.__next_f.push(["chunk", "{\"href\":\"https://origin.example.com/app\"}"]); + "#; + let rewritten = rewriter.rewrite(payload, &ctx("script")); + match rewritten { + ScriptRewriteAction::Replace(value) => { + assert!(value.contains(r#"https://ts.example.com/app"#)); + } + _ => panic!("Expected streamed payload rewrite"), + } + } + + #[test] + fn rewrite_helper_handles_protocol_relative_urls() { + let content = r#"{"props":{"pageProps":{"link":"//origin.example.com/image.png"}}}"#; + let rewritten = rewrite_nextjs_values( + content, + "origin.example.com", + "ts.example.com", + "https", + &["link".into()], + ) + .expect("should rewrite protocol relative link"); + + assert!(rewritten.contains(r#""link":"//ts.example.com/image.png""#)); + } +} diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs index 4808f58..3206b1c 100644 --- a/crates/common/src/settings.rs +++ b/crates/common/src/settings.rs @@ -3,10 +3,7 @@ use core::str; use config::{Config, Environment, File, FileFormat}; use error_stack::{Report, ResultExt}; use regex::Regex; -use serde::{ - de::{DeserializeOwned, IntoDeserializer}, - Deserialize, Deserializer, Serialize, -}; +use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize}; use serde_json::Value as JsonValue; use std::collections::HashMap; use std::ops::{Deref, DerefMut}; @@ -27,9 +24,6 @@ pub struct Publisher { /// Secret used to encrypt/decrypt proxied URLs in `/first-party/proxy`. /// Keep this secret stable to allow existing links to decode. pub proxy_secret: String, - #[serde(default)] - #[validate(nested)] - pub nextjs: NextJs, } impl Publisher { @@ -44,7 +38,6 @@ impl Publisher { /// cookie_domain: ".example.com".to_string(), /// origin_url: "https://origin.example.com:8080".to_string(), /// proxy_secret: "proxy-secret".to_string(), - /// nextjs: Default::default(), /// }; /// assert_eq!(publisher.origin_host(), "origin.example.com:8080"); /// ``` @@ -62,30 +55,6 @@ impl Publisher { } } -#[derive(Debug, Deserialize, Serialize, Validate)] -pub struct NextJs { - #[serde(default)] - pub enabled: bool, - #[serde( - default = "default_nextjs_attributes", - deserialize_with = "deserialize_nextjs_attributes" - )] - pub rewrite_attributes: Vec, -} - -fn default_nextjs_attributes() -> Vec { - vec!["href".to_string(), "link".to_string(), "url".to_string()] -} - -impl Default for NextJs { - fn default() -> Self { - Self { - enabled: false, - rewrite_attributes: default_nextjs_attributes(), - } - } -} - #[derive(Debug, Default, Deserialize, Serialize)] pub struct IntegrationSettings { #[serde(flatten)] @@ -187,18 +156,6 @@ impl DerefMut for IntegrationSettings { } } -fn deserialize_nextjs_attributes<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let value = Option::::deserialize(deserializer)?; - match value { - Some(json) => vec_from_seq_or_map(json.into_deserializer()) - .map_err(::custom), - None => Ok(default_nextjs_attributes()), - } -} - #[allow(unused)] #[derive(Debug, Default, Deserialize, Serialize, Validate)] pub struct Synthetic { @@ -419,8 +376,9 @@ where mod tests { use super::*; use regex::Regex; + use serde_json::json; - use crate::integrations::prebid::PrebidIntegrationConfig; + use crate::integrations::{nextjs::NextJsIntegrationConfig, prebid::PrebidIntegrationConfig}; use crate::test_support::tests::{crate_test_settings_str, create_test_settings}; #[test] @@ -441,12 +399,20 @@ mod tests { .expect("Prebid config should load from default settings"); assert!(!prebid_cfg.server_url.is_empty()); assert!( - !settings.publisher.nextjs.enabled, - "Next.js URL rewriting should default to disabled" + settings + .integration_config::("nextjs") + .expect("Next.js config query should succeed") + .is_none(), + "Next.js integration should be disabled by default" ); + let raw_nextjs = settings + .integrations + .get("nextjs") + .expect("embedded config should include nextjs block"); + assert_eq!(raw_nextjs["enabled"], json!(false)); assert_eq!( - settings.publisher.nextjs.rewrite_attributes, - vec!["href".to_string(), "link".to_string(), "url".to_string()], + raw_nextjs["rewrite_attributes"], + json!(["href", "link", "url"]), "Next.js rewrite attributes should default to href/link/url" ); @@ -473,12 +439,20 @@ mod tests { "https://test-prebid.com/openrtb2/auction" ); assert!( - !settings.publisher.nextjs.enabled, - "Next.js URL rewriting should default to disabled" + settings + .integration_config::("nextjs") + .expect("Next.js config query should succeed") + .is_none(), + "Next.js integration should default to disabled" ); + let raw_nextjs = settings + .integrations + .get("nextjs") + .expect("test settings should include nextjs block"); + assert_eq!(raw_nextjs["enabled"], json!(false)); assert_eq!( - settings.publisher.nextjs.rewrite_attributes, - vec!["href".to_string(), "link".to_string(), "url".to_string()], + raw_nextjs["rewrite_attributes"], + json!(["href", "link", "url"]), "Next.js rewrite attributes should default to href/link/url" ); assert_eq!(settings.publisher.domain, "test-publisher.com"); @@ -746,7 +720,6 @@ mod tests { cookie_domain: ".example.com".to_string(), origin_url: "https://origin.example.com:8080".to_string(), proxy_secret: "test-secret".to_string(), - nextjs: NextJs::default(), }; assert_eq!(publisher.origin_host(), "origin.example.com:8080"); @@ -756,7 +729,6 @@ mod tests { cookie_domain: ".example.com".to_string(), origin_url: "https://origin.example.com".to_string(), proxy_secret: "test-secret".to_string(), - nextjs: NextJs::default(), }; assert_eq!(publisher.origin_host(), "origin.example.com"); @@ -766,7 +738,6 @@ mod tests { cookie_domain: ".example.com".to_string(), origin_url: "http://localhost:9090".to_string(), proxy_secret: "test-secret".to_string(), - nextjs: NextJs::default(), }; assert_eq!(publisher.origin_host(), "localhost:9090"); @@ -776,7 +747,6 @@ mod tests { cookie_domain: ".example.com".to_string(), origin_url: "localhost:9090".to_string(), proxy_secret: "test-secret".to_string(), - nextjs: NextJs::default(), }; assert_eq!(publisher.origin_host(), "localhost:9090"); @@ -786,7 +756,6 @@ mod tests { cookie_domain: ".example.com".to_string(), origin_url: "http://192.168.1.1:8080".to_string(), proxy_secret: "test-secret".to_string(), - nextjs: NextJs::default(), }; assert_eq!(publisher.origin_host(), "192.168.1.1:8080"); @@ -796,7 +765,6 @@ mod tests { cookie_domain: ".example.com".to_string(), origin_url: "http://[::1]:8080".to_string(), proxy_secret: "test-secret".to_string(), - nextjs: NextJs::default(), }; assert_eq!(publisher.origin_host(), "[::1]:8080"); } diff --git a/crates/common/src/test_support.rs b/crates/common/src/test_support.rs index d3ff379..3b2f85c 100644 --- a/crates/common/src/test_support.rs +++ b/crates/common/src/test_support.rs @@ -20,6 +20,10 @@ pub mod tests { enabled = true server_url = "https://test-prebid.com/openrtb2/auction" + [integrations.nextjs] + enabled = false + rewrite_attributes = ["href", "link", "url"] + [synthetic] counter_store = "test-counter-store" opid_store = "test-opid-store" diff --git a/trusted-server.toml b/trusted-server.toml index 6a1097a..bdd6350 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -9,11 +9,6 @@ cookie_domain = ".test-publisher.com" origin_url = "https://origin.test-publisher.com" proxy_secret = "change-me-proxy-secret" -[publisher.nextjs] -enabled = false -rewrite_attributes = ["href", "link", "url"] - - [synthetic] counter_store = "counter_store" opid_store = "opid_store" @@ -46,6 +41,10 @@ bidders = ["kargo", "rubicon", "appnexus", "openx"] auto_configure = false debug = false +[integrations.nextjs] +enabled = false +rewrite_attributes = ["href", "link", "url"] + [integrations.testlight] endpoint = "https://testlight.example/openrtb2/auction" timeout_ms = 1200