diff --git a/README.md b/README.md index b4aa0b4..7c86181 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,13 @@ tcpdump -w - -c 1000 | dsct read - tcpdump -w - -i eth0 udp port 53 | dsct read - -f dns ``` +Include the original packet bytes (link-layer included) as a hex string under +`raw_bytes` for downstream parsing or reconstruction: + +```bash +dsct read capture.pcap --raw-bytes --count 1 +``` + Inspect available fields and schemas: ```bash diff --git a/benches/json_escape.rs b/benches/json_escape.rs index 4dd0772..d815ddd 100644 --- a/benches/json_escape.rs +++ b/benches/json_escape.rs @@ -152,6 +152,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&tcp_buf), black_box(tcp_data), None, + false, ) .unwrap(); black_box(&buf); @@ -168,6 +169,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&tcp_buf), black_box(tcp_data), None, + false, ) .unwrap(); black_box(&buf); @@ -186,6 +188,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&tcp_buf), black_box(tcp_data), None, + false, ) .unwrap(); bw.flush().unwrap(); @@ -204,6 +207,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&tcp_buf), black_box(tcp_data), None, + false, ) .unwrap(); let mut ew = JsonEscapeWriter::new(&mut buf); @@ -228,6 +232,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&dns_buf), black_box(dns_data), None, + false, ) .unwrap(); black_box(&buf); @@ -244,6 +249,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&dns_buf), black_box(dns_data), None, + false, ) .unwrap(); black_box(&buf); @@ -262,6 +268,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&dns_buf), black_box(dns_data), None, + false, ) .unwrap(); bw.flush().unwrap(); @@ -280,6 +287,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&dns_buf), black_box(dns_data), None, + false, ) .unwrap(); let mut ew = JsonEscapeWriter::new(&mut buf); @@ -306,6 +314,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&dissect_buf), black_box(&tp.raw), None, + false, ) .unwrap(); buf.push(b'\n'); @@ -330,6 +339,7 @@ fn bench_json_escape(c: &mut Criterion) { black_box(&dissect_buf), black_box(&tp.raw), None, + false, ) .unwrap(); ew.write_all(&pkt_buf).unwrap(); @@ -352,6 +362,7 @@ fn bench_json_escape(c: &mut Criterion) { &tcp_buf, &test_packets[0].raw, None, + false, ) .unwrap(); sample_json.push(b'\n'); @@ -361,6 +372,7 @@ fn bench_json_escape(c: &mut Criterion) { &dns_buf, &test_packets[1].raw, None, + false, ) .unwrap(); sample_json.push(b'\n'); diff --git a/src/main.rs b/src/main.rs index 16e4c6a..fd37565 100644 --- a/src/main.rs +++ b/src/main.rs @@ -111,6 +111,11 @@ struct ReadOptions { /// - `0x1234:aes-256-cbc:0xKEY:hmac-sha1-96:0xKEY` #[arg(long = "esp-sa", num_args = 1)] esp_sa: Vec, + + /// Include the original packet bytes (link-layer included) as a + /// lowercase hex string under the `raw_bytes` field of each record. + #[arg(long)] + raw_bytes: bool, } /// Options for the `dsct stats` command. @@ -303,6 +308,7 @@ fn cmd_read(opts: ReadOptions) -> Result<()> { progress, decode_as: decode_as_args, esp_sa: esp_sa_args, + raw_bytes, } = opts; // Resolve effective count: explicit --count, default limit, or unlimited. let (count, is_default_limit) = if no_limit { @@ -428,6 +434,7 @@ fn cmd_read(opts: ReadOptions) -> Result<()> { dissect_buf, data, field_config.as_ref(), + raw_bytes, )?; pkt_buf.push(b'\n'); writer.write_all(&pkt_buf)?; diff --git a/src/mcp/raw_mcp.rs b/src/mcp/raw_mcp.rs index 00b71bf..181c2b5 100644 --- a/src/mcp/raw_mcp.rs +++ b/src/mcp/raw_mcp.rs @@ -272,6 +272,11 @@ fn read_packets_schema() -> Value { "default": false, "description": "Show all fields including low-level details (checksums, header lengths, etc.)." }, + "raw_bytes": { + "type": "boolean", + "default": false, + "description": "Include the original packet bytes (link-layer included) as a lowercase hex string under the `raw_bytes` field of each record." + }, "sample_rate": { "type": "integer", "minimum": 1, @@ -495,6 +500,10 @@ fn handle_read_packets_streaming( .get("verbose") .and_then(Value::as_bool) .unwrap_or(false); + let raw_bytes = arguments + .get("raw_bytes") + .and_then(Value::as_bool) + .unwrap_or(false); let field_config = if verbose { None } else { @@ -624,6 +633,7 @@ fn handle_read_packets_streaming( dissect_buf, data, field_config.as_ref(), + raw_bytes, )?; w.write_all(&pkt_buf)?; packets_written += 1; @@ -960,6 +970,14 @@ mod tests { assert_eq!(verbose["default"], false); } + #[test] + fn read_packets_schema_has_raw_bytes() { + let schema = read_packets_schema(); + let raw = &schema["properties"]["raw_bytes"]; + assert_eq!(raw["type"], "boolean"); + assert_eq!(raw["default"], false); + } + #[test] fn get_stats_schema_has_esp_sa() { let schema = get_stats_schema(); diff --git a/src/schema.rs b/src/schema.rs index d1fce30..aae4120 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -53,6 +53,10 @@ pub fn read_schema() -> serde_json::Value { } } } + }, + "raw_bytes": { + "type": "string", + "description": "Original packet bytes (link-layer included) as a lowercase hex string. Present only when --raw-bytes is specified." } } }) @@ -214,6 +218,15 @@ mod tests { assert!(required.iter().any(|v| v == "layers")); } + #[test] + fn read_schema_has_optional_raw_bytes_property() { + let schema = read_schema(); + let raw = &schema["properties"]["raw_bytes"]; + assert_eq!(raw["type"], "string"); + let required = schema["required"].as_array().unwrap(); + assert!(!required.iter().any(|v| v == "raw_bytes")); + } + #[test] fn stats_schema_has_required_fields() { let schema = stats_schema(); diff --git a/src/serialize.rs b/src/serialize.rs index 81f8559..dbf7a15 100644 --- a/src/serialize.rs +++ b/src/serialize.rs @@ -155,6 +155,17 @@ fn write_timestamp_to(w: &mut W, secs: u64, usecs: u32) -> std::io::Re // Streaming JSON write — zero-allocation packet serialization via DissectBuffer. // --------------------------------------------------------------------------- +/// Write `bytes` as a JSON string of lowercase hex digits, including the +/// enclosing double quotes. +fn write_hex_string(w: &mut W, bytes: &[u8]) -> Result<()> { + w.write_all(b"\"")?; + for byte in bytes { + write!(w, "{byte:02x}")?; + } + w.write_all(b"\"")?; + Ok(()) +} + // --------------------------------------------------------------------------- /// Write a [`FieldValue`] as a JSON token to `w`. @@ -206,20 +217,10 @@ fn write_raw_field_value_json( write!(w, "\"{addr}\"")?; } FieldValue::MacAddr(m) => write!(w, "\"{m}\"")?, - FieldValue::Bytes(b) => { - write!(w, "\"")?; - for byte in *b { - write!(w, "{byte:02x}")?; - } - write!(w, "\"")?; - } + FieldValue::Bytes(b) => write_hex_string(w, b)?, FieldValue::Scratch(range) => { let scratch_bytes = &buf.scratch()[range.start as usize..range.end as usize]; - write!(w, "\"")?; - for byte in scratch_bytes { - write!(w, "{byte:02x}")?; - } - write!(w, "\"")?; + write_hex_string(w, scratch_bytes)?; } FieldValue::Array(_) | FieldValue::Object(_) => { // Container fields should be handled by write_field_json; @@ -437,12 +438,16 @@ fn write_layer_fields( /// /// Uses the flat [`DissectBuffer`] API — no intermediate serde structures /// are allocated. +/// +/// When `raw_bytes` is `true`, a trailing `"raw_bytes":""` field is +/// appended with the original packet bytes (including link-layer headers). pub fn write_packet_json( w: &mut W, meta: &PacketMeta, buf: &DissectBuffer<'_>, data: &[u8], field_config: Option<&FieldConfig>, + raw_bytes: bool, ) -> Result<()> { // number write!(w, "{{\"number\":{},\"timestamp\":\"", meta.number)?; @@ -473,7 +478,14 @@ pub fn write_packet_json( write_layer_fields(w, layer, buf, data, field_config)?; write!(w, "}}}}")?; // close fields, close layer } - write!(w, "]}}")?; // close layers, close packet + // close layers + w.write_all(b"]")?; + if raw_bytes { + w.write_all(b",\"raw_bytes\":")?; + write_hex_string(w, data)?; + } + // close packet + w.write_all(b"}")?; Ok(()) } @@ -605,7 +617,7 @@ mod tests { field_config: Option<&FieldConfig>, ) -> serde_json::Value { let mut out = Vec::new(); - write_packet_json(&mut out, meta, buf, data, field_config).unwrap(); + write_packet_json(&mut out, meta, buf, data, field_config, false).unwrap(); serde_json::from_slice(&out).unwrap() } diff --git a/tests/cli_output_snapshot_test.rs b/tests/cli_output_snapshot_test.rs index 96a9e1f..1ba138a 100644 --- a/tests/cli_output_snapshot_test.rs +++ b/tests/cli_output_snapshot_test.rs @@ -171,6 +171,40 @@ fn read_verbose_adds_fields_to_ipv4_layer() { ); } +#[test] +fn read_raw_bytes_appends_field_at_end() { + let tmp = write_pcap(1); + + let output = Command::cargo_bin("dsct") + .unwrap() + .args(["read", "--raw-bytes", tmp.path().to_str().unwrap()]) + .output() + .unwrap(); + assert!(output.status.success()); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let first_line = stdout.lines().next().expect("at least one JSONL line"); + let value: Value = serde_json::from_str(first_line).unwrap(); + + // raw_bytes must be appended after `layers`, preserving the existing + // top-level key order. + let expected = [ + "number", + "timestamp", + "length", + "original_length", + "stack", + "layers", + "raw_bytes", + ]; + assert_eq!( + object_keys(&value), + expected, + "raw_bytes must be appended at the end of the record" + ); + assert!(value["raw_bytes"].is_string()); +} + // --------------------------------------------------------------------------- // `dsct stats` — StatsOutput schema // --------------------------------------------------------------------------- diff --git a/tests/cli_read_test.rs b/tests/cli_read_test.rs index 1b32388..5031ed0 100644 --- a/tests/cli_read_test.rs +++ b/tests/cli_read_test.rs @@ -890,3 +890,58 @@ fn esp_null_decoded_without_sa() { "encrypted_data should not be emitted when auto-decoding succeeds" ); } + +#[test] +fn read_raw_bytes_emits_hex_matching_packet() { + let tmp = write_pcap(1); + + let output = Command::cargo_bin("dsct") + .unwrap() + .args(["read", "--raw-bytes", tmp.path().to_str().unwrap()]) + .output() + .unwrap(); + + assert!(output.status.success()); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let line = stdout.trim().lines().next().unwrap(); + let v: serde_json::Value = serde_json::from_str(line).unwrap(); + let raw = v["raw_bytes"].as_str().expect("raw_bytes must be a string"); + + let length = v["length"].as_u64().unwrap() as usize; + assert_eq!(raw.len(), length * 2, "hex length must be 2 * length"); + assert!( + raw.chars() + .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()), + "raw_bytes must be lowercase hex" + ); + + // The fixed Ethernet/IPv4/UDP packet built by build_pcap starts with the + // broadcast destination MAC and has a well-known byte layout. Verify a + // prefix exact match to ensure bytes are passed through unchanged. + let expected_prefix = "ffffffffffff00112233445508004500001c"; + assert!( + raw.starts_with(expected_prefix), + "unexpected raw_bytes prefix: {raw}" + ); +} + +#[test] +fn read_without_raw_bytes_omits_field() { + let tmp = write_pcap(1); + + let output = Command::cargo_bin("dsct") + .unwrap() + .args(["read", tmp.path().to_str().unwrap()]) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + let line = stdout.trim().lines().next().unwrap(); + let v: serde_json::Value = serde_json::from_str(line).unwrap(); + assert!( + v.get("raw_bytes").is_none(), + "raw_bytes must be absent when --raw-bytes is not set" + ); +}