From 4ad690ec84276d4d00908cb88322df3a94f4c818 Mon Sep 17 00:00:00 2001 From: Zious Date: Fri, 10 Apr 2026 15:44:45 -0400 Subject: [PATCH 1/2] =?UTF-8?q?test:=20e2e=20HTTP=20analyzer=20=E2=86=92?= =?UTF-8?q?=20reporter=20regression=20tests=20(#56)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 3 end-to-end tests that drive HttpAnalyzer with C1 CSI (U+009B) injection and verify the full reporter pipeline escapes dangerous bytes. Key finding during issue validation: httparse rejects C0 control bytes (including ESC 0x1b) in URIs and header values, but accepts C1 codepoints because their UTF-8 encoding (0xC2 0x9B) uses high bytes. C1 CSI is the real injection vector through the HTTP analyzer. Tests added: - test_http_finding_c1_csi_escaped_by_terminal_reporter - test_http_finding_c1_csi_in_json_reporter - test_http_analyzer_summary_c1_csi_escaped_by_terminal_reporter Closes #56 --- tests/reporter_tests.rs | 175 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/tests/reporter_tests.rs b/tests/reporter_tests.rs index db2710f..c5b20d2 100644 --- a/tests/reporter_tests.rs +++ b/tests/reporter_tests.rs @@ -1,4 +1,8 @@ +use std::net::IpAddr; +use wirerust::analyzer::http::HttpAnalyzer; use wirerust::findings::{Confidence, Finding, ThreatCategory, Verdict}; +use wirerust::reassembly::flow::FlowKey; +use wirerust::reassembly::handler::{Direction, StreamAnalyzer, StreamHandler}; use wirerust::reporter::Reporter; use wirerust::reporter::json::JsonReporter; use wirerust::reporter::terminal::TerminalReporter; @@ -276,3 +280,174 @@ fn test_terminal_reporter_escapes_control_bytes_in_analyzer_summaries() { "analyzer summary section must preserve legitimate Cyrillic, got: {output}" ); } + +// --------------------------------------------------------------------------- +// End-to-end: HttpAnalyzer → reporter pipeline (issue #56) +// +// These tests close the coverage gap identified during the ADR 0003 PR review: +// the existing contract tests above use synthetic Findings, not ones produced +// by the actual HttpAnalyzer. These tests drive HttpAnalyzer::on_data with +// crafted HTTP requests and verify the full pipeline. +// +// Key discovery during issue validation: httparse rejects C0 control bytes +// (including ESC 0x1b) in URIs and header values, but ACCEPTS C1 codepoints +// (U+0080-U+009F) because they encode as high bytes in UTF-8 (e.g., U+009B +// CSI = 0xC2 0x9B). C1 CSI is the real injection vector through httparse. +// --------------------------------------------------------------------------- + +fn http_test_flow_key() -> FlowKey { + FlowKey::new( + "10.0.0.1".parse::().unwrap(), + 49153, + "10.0.0.2".parse::().unwrap(), + 80, + ) +} + +/// Build an HTTP/1.1 request with a path-traversal URI containing C1 CSI +/// (U+009B) — the 8-bit equivalent of ESC[. httparse accepts this because +/// the UTF-8 encoding (0xC2 0x9B) consists of high bytes (≥ 0x80). +fn build_path_traversal_with_c1_csi() -> Vec { + let mut buf = b"GET /../../etc/passwd".to_vec(); + buf.extend_from_slice(&[0xC2, 0x9B]); // U+009B CSI + buf.extend_from_slice(b"31mHACKED HTTP/1.1\r\nHost: target.com\r\n\r\n"); + buf +} + +/// Build an HTTP/1.1 request with C1 CSI in the Host header value. +fn build_request_with_c1_in_host() -> Vec { + let mut buf = b"GET /index HTTP/1.1\r\nHost: evil".to_vec(); + buf.extend_from_slice(&[0xC2, 0x9B]); // U+009B CSI + buf.extend_from_slice(b"31m.com\r\nUser-Agent: Mozilla/5.0\r\n\r\n"); + buf +} + +#[test] +fn test_http_finding_c1_csi_escaped_by_terminal_reporter() { + // End-to-end: HttpAnalyzer produces a path-traversal finding whose + // summary contains a raw C1 CSI (U+009B). The terminal reporter must + // escape it. This exercises the real injection path — httparse accepts + // C1 in URIs because the UTF-8 encoding uses high bytes. + let mut analyzer = HttpAnalyzer::new(); + let fk = http_test_flow_key(); + analyzer.on_data( + &fk, + Direction::ClientToServer, + &build_path_traversal_with_c1_csi(), + 0, + ); + + let findings = analyzer.findings(); + assert!( + !findings.is_empty(), + "path traversal with C1 CSI should produce at least one finding" + ); + + // The finding's raw summary must contain the C1 CSI bytes (forensic preservation). + let traversal_finding = findings + .iter() + .find(|f| f.summary.contains("Path traversal")) + .expect("expected a path-traversal finding"); + assert!( + traversal_finding + .summary + .as_bytes() + .windows(2) + .any(|w| w == [0xC2, 0x9B]), + "Finding.summary must preserve raw C1 CSI for forensics, got: {:?}", + traversal_finding.summary + ); + + // Render through terminal reporter — no raw C1 bytes in output. + let output = TerminalReporter { use_color: false }.render(&Summary::new(), &findings, &[]); + assert!( + !output.as_bytes().windows(2).any(|w| w == [0xC2, 0x9B]), + "terminal output must not contain raw C1 CSI (0xC2 0x9B), got: {output:?}" + ); + assert!( + output.contains("\\u{9b}"), + "terminal output should contain the escaped form of C1 CSI, got: {output}" + ); +} + +#[test] +fn test_http_finding_c1_csi_in_json_reporter() { + // The JSON reporter renders findings from HttpAnalyzer. serde_json does + // NOT escape C1 codepoints (RFC 8259 only mandates C0 + DEL), so the + // raw C1 CSI UTF-8 bytes pass through. This test verifies the JSON + // round-trip preserves the C1 byte — downstream tools can reconstruct + // the original payload. + let mut analyzer = HttpAnalyzer::new(); + let fk = http_test_flow_key(); + analyzer.on_data( + &fk, + Direction::ClientToServer, + &build_path_traversal_with_c1_csi(), + 0, + ); + + let findings = analyzer.findings(); + let json_output = JsonReporter.render(&Summary::new(), &findings, &[]); + + // JSON must be valid. + let parsed: serde_json::Value = + serde_json::from_str(&json_output).expect("JSON output must be valid"); + + // Round-trip: the deserialized finding summary must contain the C1 CSI + // codepoint, proving the JSON encoding preserved it. + let json_findings = parsed["findings"].as_array().unwrap(); + let traversal = json_findings + .iter() + .find(|f| { + f["summary"] + .as_str() + .is_some_and(|s| s.contains("Path traversal")) + }) + .expect("expected path-traversal finding in JSON"); + let summary_str = traversal["summary"].as_str().unwrap(); + assert!( + summary_str.as_bytes().windows(2).any(|w| w == [0xC2, 0x9B]), + "JSON round-trip must preserve raw C1 CSI in finding summary, got: {summary_str:?}" + ); +} + +#[test] +fn test_http_analyzer_summary_c1_csi_escaped_by_terminal_reporter() { + // End-to-end: HttpAnalyzer accumulates a Host header containing C1 CSI + // into its top_hosts summary. When rendered through the terminal + // reporter's analyzer-summary section, the C1 must be escaped. + let mut analyzer = HttpAnalyzer::new(); + let fk = http_test_flow_key(); + analyzer.on_data( + &fk, + Direction::ClientToServer, + &build_request_with_c1_in_host(), + 0, + ); + + // Verify the host made it into the analyzer summary. + let analyzer_summary = analyzer.summarize(); + let top_hosts_str = analyzer_summary.detail["top_hosts"].to_string(); + assert!( + top_hosts_str + .as_bytes() + .windows(2) + .any(|w| w == [0xC2, 0x9B]), + "analyzer summary top_hosts must contain raw C1 CSI, got: {top_hosts_str:?}" + ); + + // Render through terminal reporter — no raw C1 bytes in output. + let output = TerminalReporter { use_color: false }.render( + &Summary::new(), + &[], + std::slice::from_ref(&analyzer_summary), + ); + assert!( + !output.as_bytes().windows(2).any(|w| w == [0xC2, 0x9B]), + "terminal output must not contain raw C1 CSI in analyzer summary section, got: {output:?}" + ); + assert!( + output.contains("\\u{9b}"), + "terminal output should contain the escaped form of C1 CSI in analyzer summary, got: {output}" + ); +} From 73fbcec2546e91f4618ec61dd45900c1e94b2f21 Mon Sep 17 00:00:00 2001 From: Zious Date: Fri, 10 Apr 2026 15:48:08 -0400 Subject: [PATCH 2/2] fix: correct RFC 8259 scope in test comments RFC 8259 only mandates escaping C0 (U+0000-U+001F). serde_json also escapes DEL (U+007F) as an implementation choice, not per RFC mandate. --- tests/reporter_tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/reporter_tests.rs b/tests/reporter_tests.rs index c5b20d2..01f2ab9 100644 --- a/tests/reporter_tests.rs +++ b/tests/reporter_tests.rs @@ -236,7 +236,7 @@ fn test_json_reporter_preserves_cyrillic_as_readable_unicode() { fn test_terminal_reporter_escapes_control_bytes_in_analyzer_summaries() { // Regression: analyzer_summaries detail values can contain // attacker-controlled strings (HTTP top_hosts, TLS top_snis, etc.). - // serde_json::Value's Display impl escapes C0 + DEL per RFC 8259 but + // serde_json::Value's Display impl escapes C0 (per RFC 8259) and DEL but // passes C1 codepoints (U+0080-U+009F) through as raw UTF-8 — which // is a terminal injection vector on the analyzer summary rendering // path. Per ADR 0003, the terminal reporter must escape at the @@ -373,7 +373,7 @@ fn test_http_finding_c1_csi_escaped_by_terminal_reporter() { #[test] fn test_http_finding_c1_csi_in_json_reporter() { // The JSON reporter renders findings from HttpAnalyzer. serde_json does - // NOT escape C1 codepoints (RFC 8259 only mandates C0 + DEL), so the + // NOT escape C1 codepoints (RFC 8259 only mandates C0; serde_json also escapes DEL), so the // raw C1 CSI UTF-8 bytes pass through. This test verifies the JSON // round-trip preserves the C1 byte — downstream tools can reconstruct // the original payload.