diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..b984ee9 --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2025-05-15 - [Decompression Bomb and Path Collision Protection] +**Vulnerability:** Resource exhaustion via decompression bombs and application crash (SIGBUS) due to self-overwriting memory-mapped files. +**Learning:** Bzip2 blocks are typically 900KB but can be maliciously crafted. Memory-mapped files in Rust can trigger a SIGBUS if the underlying file is truncated (e.g., via `File::create`). +**Prevention:** Enforce a strict uncompressed size limit per block (2MB) using `Read::take`. Use `std::fs::canonicalize` to ensure input and output file paths are distinct before opening any output file. diff --git a/bz2zstd/src/main.rs b/bz2zstd/src/main.rs index ced1e01..646acc7 100644 --- a/bz2zstd/src/main.rs +++ b/bz2zstd/src/main.rs @@ -45,7 +45,7 @@ use std::path::PathBuf; use std::thread; mod writer; -use parallel_bzip2_decoder::{extract_bits, MarkerType, Scanner}; +use parallel_bzip2_decoder::{extract_bits, MarkerType, Scanner, MAX_BLOCK_SIZE}; use writer::OutputWriter; /// Command-line arguments for bz2zstd. @@ -143,28 +143,43 @@ fn main() -> Result<()> { let (result_sender, result_receiver) = bounded::<(usize, Vec)>(rayon::current_num_threads() * 2); + // Determine output file path + let output_path = if let Some(path) = &args.output { + path.clone() + } else { + // Auto-generate output filename by replacing .bz2 with .zst + let input_str = args.input.to_string_lossy(); + if input_str.ends_with("bz2") { + PathBuf::from(input_str.replace("bz2", "zst")) + } else { + let mut path = args.input.clone(); + path.set_extension("zst"); + path + } + }; + + // Security Check: Verify that input and output file paths are distinct + // This prevents a 'Bus error' (SIGBUS) caused by truncating a file that is + // currently memory-mapped as input. + if let Ok(canon_input) = std::fs::canonicalize(&args.input) { + if let Ok(canon_output) = std::fs::canonicalize(&output_path) { + if canon_input == canon_output { + return Err(anyhow::anyhow!( + "Input and output files must be different: {:?}", + output_path + )); + } + } + } + // === STAGE 3: WRITER THREAD === // // Receives compressed blocks from workers and writes them in order. // Uses a HashMap to buffer out-of-order blocks. + let writer_output_path = output_path.clone(); let writer_handle = thread::spawn(move || -> Result<()> { - // Determine output file path - let output_path = if let Some(path) = args.output { - path - } else { - // Auto-generate output filename by replacing .bz2 with .zst - let input_str = args.input.to_string_lossy(); - if input_str.ends_with("bz2") { - PathBuf::from(input_str.replace("bz2", "zst")) - } else { - let mut path = args.input.clone(); - path.set_extension("zst"); - path - } - }; - let raw_out: Box = - Box::new(File::create(output_path).context("Failed to create output file")?); + Box::new(File::create(writer_output_path).context("Failed to create output file")?); let mut out = OutputWriter::new(raw_out)?; // Buffer for out-of-order blocks @@ -281,8 +296,16 @@ fn main() -> Result<()> { // Decompress the bzip2 block // Note: Last block may not have EOS marker, causing UnexpectedEof decomp_buf.clear(); - let mut decoder = BzDecoder::new(&wrapped_data[..]); + // We take MAX_BLOCK_SIZE + 1 to detect if the limit was exceeded + let mut decoder = + BzDecoder::new(&wrapped_data[..]).take((MAX_BLOCK_SIZE + 1) as u64); match decoder.read_to_end(decomp_buf) { + Ok(_) if decomp_buf.len() > MAX_BLOCK_SIZE => { + return Err(anyhow::anyhow!( + "Decompression limit exceeded ({} bytes) at block {}. Possible decompression bomb.", + MAX_BLOCK_SIZE, idx + )); + } Ok(_) => {} // Expected for last block without EOS marker Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {} diff --git a/parallel_bzip2_decoder/src/error.rs b/parallel_bzip2_decoder/src/error.rs index 9f57cdb..72c3a2e 100644 --- a/parallel_bzip2_decoder/src/error.rs +++ b/parallel_bzip2_decoder/src/error.rs @@ -21,6 +21,14 @@ pub enum Bz2Error { /// Memory mapping failed. MmapFailed(std::io::Error), + + /// Decompression limit exceeded (possible decompression bomb). + DecompressionLimitExceeded { + /// Bit offset where the block starts + offset: u64, + /// The limit that was exceeded + limit: usize, + }, } impl fmt::Display for Bz2Error { @@ -36,6 +44,13 @@ impl fmt::Display for Bz2Error { Bz2Error::InvalidFormat(msg) => write!(f, "Invalid bzip2 format: {}", msg), Bz2Error::Io(err) => write!(f, "I/O error: {}", err), Bz2Error::MmapFailed(err) => write!(f, "Memory mapping failed: {}", err), + Bz2Error::DecompressionLimitExceeded { offset, limit } => { + write!( + f, + "Decompression limit exceeded ({} bytes) at bit offset {}. Possible decompression bomb.", + limit, offset + ) + } } } } @@ -47,6 +62,7 @@ impl std::error::Error for Bz2Error { Bz2Error::Io(err) => Some(err), Bz2Error::MmapFailed(err) => Some(err), Bz2Error::InvalidFormat(_) => None, + Bz2Error::DecompressionLimitExceeded { .. } => None, } } } diff --git a/parallel_bzip2_decoder/src/lib.rs b/parallel_bzip2_decoder/src/lib.rs index 410e827..804fe88 100644 --- a/parallel_bzip2_decoder/src/lib.rs +++ b/parallel_bzip2_decoder/src/lib.rs @@ -88,6 +88,10 @@ pub use decoder::Bz2Decoder; pub use error::{Bz2Error, Result}; pub use scanner::{extract_bits, MarkerType, Scanner}; +/// Maximum allowed uncompressed size for a single bzip2 block (2MB). +/// This protects against decompression bomb attacks. +pub const MAX_BLOCK_SIZE: usize = 2 * 1024 * 1024; + use bzip2::read::BzDecoder; use crossbeam_channel::bounded; use std::collections::HashMap; @@ -295,8 +299,13 @@ pub fn decompress_block_into( // Decompress using the bzip2 crate // Note: The last block may not have a proper EOS marker, causing UnexpectedEof out.clear(); - let mut decoder = BzDecoder::new(&scratch[..]); + // We take MAX_BLOCK_SIZE + 1 to detect if the limit was exceeded + let mut decoder = BzDecoder::new(&scratch[..]).take((MAX_BLOCK_SIZE + 1) as u64); match decoder.read_to_end(out) { + Ok(_) if out.len() > MAX_BLOCK_SIZE => Err(Bz2Error::DecompressionLimitExceeded { + offset: start_bit, + limit: MAX_BLOCK_SIZE, + }), Ok(_) => Ok(()), // UnexpectedEof is expected for the last block without EOS marker Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => Ok(()),