Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/adr/0002-modular-protocol-analyzers.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ The `detail` map contains protocol-specific fields as `serde_json::Value`. This
- Include MITRE ATT&CK technique ID only when there's a clean mapping; `None` is better than a forced fit
- Include actionable evidence (specific values, not just "something was wrong")
- Cap findings with `MAX_FINDINGS` to prevent memory exhaustion on adversarial input
- **Output sanitization is a reporter responsibility, not an analyzer responsibility.** Store raw bytes (post-`from_utf8_lossy`) in `Finding.summary` and `Finding.evidence`. Do not escape, debug-format, or otherwise pre-encode untrusted bytes at the analyzer. See ADR 0003 (`docs/adr/0003-reporting-pipeline-layering.md`) for the full layering principle.

## Alternatives Considered

Expand Down
224 changes: 224 additions & 0 deletions docs/adr/0003-reporting-pipeline-layering.md

Large diffs are not rendered by default.

805 changes: 805 additions & 0 deletions docs/superpowers/plans/2026-04-09-reporting-pipeline-layering.md

Large diffs are not rendered by default.

23 changes: 11 additions & 12 deletions src/analyzer/tls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,13 +340,13 @@ impl TlsAnalyzer {
category: ThreatCategory::Anomaly,
verdict: Verdict::Inconclusive,
confidence: Confidence::Low,
// Use Debug formatter ({:?}) to escape any control codepoints
// that might survive UTF-8 decoding (e.g. U+0085 NEL); the
// hostname here is valid UTF-8 but printable-script content
// is not guaranteed.
// Raw hostname interpolation — the data layer stores raw
// bytes per ADR 0003. Terminal-safety (escaping control
// codes, etc.) is applied by the terminal reporter at
// render time, not here.
summary: format!(
"TLS SNI contains non-ASCII characters (RFC 6066 requires \
A-labels per RFC 5890): {hostname:?}"
A-labels per RFC 5890): {hostname}"
),
evidence: vec![format!("hex: {hex}")],
mitre_technique: None,
Expand All @@ -359,14 +359,13 @@ impl TlsAnalyzer {
category: ThreatCategory::Anomaly,
verdict: Verdict::Inconclusive,
confidence: Confidence::Low,
// Use Debug formatter ({:?}) to escape control bytes (e.g.
// ESC 0x1b) that String::from_utf8_lossy preserves but the
// analyst's terminal would interpret as ANSI control
// sequences. Without this an attacker could craft a
// malformed SNI like b"\x1b[31m..." that recolors or
// overwrites the rendered finding line.
// Raw lossy interpolation — the data layer stores raw
// bytes (including any embedded ASCII control codes) per
// ADR 0003. The terminal reporter is responsible for
// escaping these for safe display; JSON output is already
// safe via serde_json's automatic RFC 8259 escaping.
summary: format!(
"TLS SNI contains non-UTF-8 bytes (RFC 6066 violation): {lossy:?}"
"TLS SNI contains non-UTF-8 bytes (RFC 6066 violation): {lossy}"
),
evidence: vec![format!("hex: {hex}")],
mitre_technique: None,
Expand Down
9 changes: 9 additions & 0 deletions src/findings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ pub struct Finding {
pub timestamp: Option<DateTime<Utc>>,
}

/// Produces the raw text representation of a finding for logging, debugging,
/// or machine-readable output. **Not safe for direct terminal display** — the
/// `summary` field may contain attacker-controlled bytes from packet payloads
/// (including ASCII control codes like ESC `0x1b`) that a terminal would
/// interpret as control sequences. For safe terminal rendering, use the
/// terminal reporter (`src/reporter/terminal.rs`), which escapes every
/// `summary` and `evidence` entry before writing to the output buffer.
/// See ADR 0003 (`docs/adr/0003-reporting-pipeline-layering.md`) for the
/// full layering principle.
impl fmt::Display for Finding {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
Expand Down
149 changes: 146 additions & 3 deletions src/reporter/terminal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,45 @@ use crate::findings::{Confidence, Finding, Verdict};
use crate::reporter::Reporter;
use crate::summary::Summary;

/// Escape control bytes (C0 + DEL + C1 + backslash) for safe terminal display.
///
/// Iterates the input string's characters and applies `char::escape_default`
/// when the character matches `char::is_ascii_control()` (C0 + DEL), falls in
/// the C1 range `U+0080..=U+009F`, or is a backslash. All other characters —
/// printable ASCII and valid non-ASCII Unicode (Cyrillic, CJK, emoji) — pass
/// through unchanged.
///
/// **Why C1?** Codepoints U+0080–U+009F (C1 controls like NEL U+0085 and CSI
/// U+009B) can be encoded in valid UTF-8 as two-byte sequences (e.g., U+009B
/// as `0xC2 0x9B`) and survive `String::from_utf8_lossy`. Most modern
/// terminals in UTF-8 mode do NOT interpret these as controls by default, but
/// the DEC S8C1T mode and some legacy terminals can treat U+009B as an 8-bit
/// equivalent of ESC[. Escaping them closes a narrow but real vector.
///
/// **Why not `str::escape_default`?** It routes *every* character through
/// `char::escape_default`, which escapes non-ASCII as `\u{...}` and would
/// mangle a Cyrillic hostname like `пример.рф` into `\u{43f}\u{440}...`.
/// See ADR 0003 (`docs/adr/0003-reporting-pipeline-layering.md`) for the
/// layering rationale and the empirical verification.
fn escape_for_terminal(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
// Escape C0 (0x00-0x1f), DEL (0x7f), C1 (U+0080-U+009F), and backslash.
// C1 codepoints like U+0085 (NEL) and U+009B (CSI — 8-bit equivalent of
// ESC[) are valid multi-byte UTF-8 in a `String` but can be interpreted
// as controls by terminals in 8-bit C1 mode (DEC S8C1T). Escaping them
// is cheap insurance.
if c.is_ascii_control() || ('\u{80}'..='\u{9f}').contains(&c) || c == '\\' {
for e in c.escape_default() {
out.push(e);
}
} else {
out.push(c);
}
}
out
}

pub struct TerminalReporter {
pub use_color: bool,
}
Expand Down Expand Up @@ -60,9 +99,13 @@ impl Reporter for TerminalReporter {
if !findings.is_empty() {
out.push_str(&self.section("FINDINGS"));
for f in findings {
// Per ADR 0003: the Finding struct stores raw bytes; the
// terminal reporter is responsible for escaping untrusted
// content (summary + evidence) before writing to a TTY.
let escaped_summary = escape_for_terminal(&f.summary);
Comment thread
Zious11 marked this conversation as resolved.
let line = format!(
"[{}] {} ({}) - {}",
f.category, f.verdict, f.confidence, f.summary
f.category, f.verdict, f.confidence, escaped_summary
);
let colored = if self.use_color {
match f.verdict {
Expand All @@ -78,7 +121,8 @@ impl Reporter for TerminalReporter {
};
out.push_str(&format!(" {colored}\n"));
for ev in &f.evidence {
out.push_str(&format!(" > {ev}\n"));
let escaped_ev = escape_for_terminal(ev);
out.push_str(&format!(" > {escaped_ev}\n"));
}
if let Some(ref t) = f.mitre_technique {
out.push_str(&format!(" MITRE: {t}\n"));
Expand All @@ -95,7 +139,15 @@ impl Reporter for TerminalReporter {
asummary.packets_analyzed
));
for (key, val) in &asummary.detail {
out.push_str(&format!(" {key}: {val}\n"));
// Per ADR 0003: analyzer summary detail values can contain
// attacker-controlled bytes (e.g., top_hosts, top_snis,
// recent_uris from HTTP/TLS). serde_json's Display impl
// escapes C0 + DEL per RFC 8259 but passes C1 codepoints
// (U+0080-U+009F) through as raw UTF-8 — so we still need
// to run the JSON rendering through escape_for_terminal to
// close the C1 gap that U+009B (CSI) exploits.
let escaped_val = escape_for_terminal(&val.to_string());
out.push_str(&format!(" {key}: {escaped_val}\n"));
}
out.push('\n');
}
Expand All @@ -113,3 +165,94 @@ impl TerminalReporter {
}
}
}

#[cfg(test)]
mod tests {
use super::escape_for_terminal;

#[test]
fn escapes_esc_byte() {
assert_eq!(
escape_for_terminal("\x1b[31mRED\x1b[0m"),
"\\u{1b}[31mRED\\u{1b}[0m"
);
}

#[test]
fn escapes_bel_and_del() {
assert_eq!(
escape_for_terminal("ring\x07bye\x7f"),
"ring\\u{7}bye\\u{7f}"
);
}

#[test]
fn escapes_tab_newline_cr_as_short_forms() {
// char::escape_default uses short escapes for these three.
assert_eq!(
escape_for_terminal("tab\there\nnewline\rreturn"),
"tab\\there\\nnewline\\rreturn"
);
}

#[test]
fn escapes_backslash() {
assert_eq!(escape_for_terminal("a\\b"), "a\\\\b");
}

#[test]
fn preserves_printable_ascii() {
assert_eq!(
escape_for_terminal("hello world 123 !@#"),
"hello world 123 !@#"
);
}

#[test]
fn preserves_cyrillic() {
assert_eq!(escape_for_terminal("пример.рф"), "пример.рф");
}

#[test]
fn preserves_emoji() {
assert_eq!(escape_for_terminal("crab 🦀 rust"), "crab 🦀 rust");
}

#[test]
fn mixed_content_escapes_only_dangerous_bytes() {
// Cyrillic + ESC injection + emoji — Cyrillic and emoji must survive,
// only the ESC sequence should be escaped.
assert_eq!(
escape_for_terminal("пример\x1b[31m🦀"),
"пример\\u{1b}[31m🦀"
);
}

#[test]
fn empty_string_is_empty() {
assert_eq!(escape_for_terminal(""), "");
}

#[test]
fn escapes_c1_nel_and_csi() {
// U+0085 (NEL, Next Line) and U+009B (CSI, 8-bit Control Sequence
// Introducer). Both are valid multi-byte UTF-8 and survive
// String::from_utf8_lossy — must be escaped to avoid 8-bit terminal
// control interpretation.
assert_eq!(escape_for_terminal("line1\u{85}line2"), "line1\\u{85}line2");
assert_eq!(
escape_for_terminal("before\u{9b}31mafter"),
"before\\u{9b}31mafter"
);
}

#[test]
fn escapes_c1_range_boundaries() {
// U+0080 (start of C1) and U+009F (end of C1) must both escape.
// U+00A0 (NBSP, just past C1) must pass through unchanged — it's a
// legitimate printable whitespace character, not a control code.
assert_eq!(escape_for_terminal("\u{80}"), "\\u{80}");
assert_eq!(escape_for_terminal("\u{9f}"), "\\u{9f}");
assert_eq!(escape_for_terminal("\u{a0}"), "\u{a0}");
}
}
Loading