Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## 2025-05-15 - [Decompression Bomb and Path Collision Protection]
**Vulnerability:** Resource exhaustion via decompression bombs and application crash (SIGBUS) due to self-overwriting memory-mapped files.
**Learning:** Bzip2 blocks are typically 900KB but can be maliciously crafted. Memory-mapped files in Rust can trigger a SIGBUS if the underlying file is truncated (e.g., via `File::create`).
**Prevention:** Enforce a strict uncompressed size limit per block (2MB) using `Read::take`. Use `std::fs::canonicalize` to ensure input and output file paths are distinct before opening any output file.
59 changes: 41 additions & 18 deletions bz2zstd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use std::path::PathBuf;
use std::thread;

mod writer;
use parallel_bzip2_decoder::{extract_bits, MarkerType, Scanner};
use parallel_bzip2_decoder::{extract_bits, MarkerType, Scanner, MAX_BLOCK_SIZE};
use writer::OutputWriter;

/// Command-line arguments for bz2zstd.
Expand Down Expand Up @@ -143,28 +143,43 @@ fn main() -> Result<()> {
let (result_sender, result_receiver) =
bounded::<(usize, Vec<u8>)>(rayon::current_num_threads() * 2);

// Determine output file path
let output_path = if let Some(path) = &args.output {
path.clone()
} else {
// Auto-generate output filename by replacing .bz2 with .zst
let input_str = args.input.to_string_lossy();
if input_str.ends_with("bz2") {
PathBuf::from(input_str.replace("bz2", "zst"))
} else {
let mut path = args.input.clone();
path.set_extension("zst");
path
}
};

// Security Check: Verify that input and output file paths are distinct
// This prevents a 'Bus error' (SIGBUS) caused by truncating a file that is
// currently memory-mapped as input.
if let Ok(canon_input) = std::fs::canonicalize(&args.input) {
if let Ok(canon_output) = std::fs::canonicalize(&output_path) {
if canon_input == canon_output {
return Err(anyhow::anyhow!(
"Input and output files must be different: {:?}",
output_path
));
}
}
}

// === STAGE 3: WRITER THREAD ===
//
// Receives compressed blocks from workers and writes them in order.
// Uses a HashMap to buffer out-of-order blocks.
let writer_output_path = output_path.clone();
let writer_handle = thread::spawn(move || -> Result<()> {
// Determine output file path
let output_path = if let Some(path) = args.output {
path
} else {
// Auto-generate output filename by replacing .bz2 with .zst
let input_str = args.input.to_string_lossy();
if input_str.ends_with("bz2") {
PathBuf::from(input_str.replace("bz2", "zst"))
} else {
let mut path = args.input.clone();
path.set_extension("zst");
path
}
};

let raw_out: Box<dyn Write + Send> =
Box::new(File::create(output_path).context("Failed to create output file")?);
Box::new(File::create(writer_output_path).context("Failed to create output file")?);

let mut out = OutputWriter::new(raw_out)?;
// Buffer for out-of-order blocks
Expand Down Expand Up @@ -281,8 +296,16 @@ fn main() -> Result<()> {
// Decompress the bzip2 block
// Note: Last block may not have EOS marker, causing UnexpectedEof
decomp_buf.clear();
let mut decoder = BzDecoder::new(&wrapped_data[..]);
// We take MAX_BLOCK_SIZE + 1 to detect if the limit was exceeded
let mut decoder =
BzDecoder::new(&wrapped_data[..]).take((MAX_BLOCK_SIZE + 1) as u64);
match decoder.read_to_end(decomp_buf) {
Ok(_) if decomp_buf.len() > MAX_BLOCK_SIZE => {
return Err(anyhow::anyhow!(
"Decompression limit exceeded ({} bytes) at block {}. Possible decompression bomb.",
MAX_BLOCK_SIZE, idx
));
}
Ok(_) => {}
// Expected for last block without EOS marker
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {}
Expand Down
16 changes: 16 additions & 0 deletions parallel_bzip2_decoder/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ pub enum Bz2Error {

/// Memory mapping failed.
MmapFailed(std::io::Error),

/// Decompression limit exceeded (possible decompression bomb).
DecompressionLimitExceeded {
/// Bit offset where the block starts
offset: u64,
/// The limit that was exceeded
limit: usize,
},
}

impl fmt::Display for Bz2Error {
Expand All @@ -36,6 +44,13 @@ impl fmt::Display for Bz2Error {
Bz2Error::InvalidFormat(msg) => write!(f, "Invalid bzip2 format: {}", msg),
Bz2Error::Io(err) => write!(f, "I/O error: {}", err),
Bz2Error::MmapFailed(err) => write!(f, "Memory mapping failed: {}", err),
Bz2Error::DecompressionLimitExceeded { offset, limit } => {
write!(
f,
"Decompression limit exceeded ({} bytes) at bit offset {}. Possible decompression bomb.",
limit, offset
)
}
}
}
}
Expand All @@ -47,6 +62,7 @@ impl std::error::Error for Bz2Error {
Bz2Error::Io(err) => Some(err),
Bz2Error::MmapFailed(err) => Some(err),
Bz2Error::InvalidFormat(_) => None,
Bz2Error::DecompressionLimitExceeded { .. } => None,
}
}
}
Expand Down
11 changes: 10 additions & 1 deletion parallel_bzip2_decoder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ pub use decoder::Bz2Decoder;
pub use error::{Bz2Error, Result};
pub use scanner::{extract_bits, MarkerType, Scanner};

/// Maximum allowed uncompressed size for a single bzip2 block (2MB).
/// This protects against decompression bomb attacks.
pub const MAX_BLOCK_SIZE: usize = 2 * 1024 * 1024;

use bzip2::read::BzDecoder;
use crossbeam_channel::bounded;
use std::collections::HashMap;
Expand Down Expand Up @@ -295,8 +299,13 @@ pub fn decompress_block_into(
// Decompress using the bzip2 crate
// Note: The last block may not have a proper EOS marker, causing UnexpectedEof
out.clear();
let mut decoder = BzDecoder::new(&scratch[..]);
// We take MAX_BLOCK_SIZE + 1 to detect if the limit was exceeded
let mut decoder = BzDecoder::new(&scratch[..]).take((MAX_BLOCK_SIZE + 1) as u64);
match decoder.read_to_end(out) {
Ok(_) if out.len() > MAX_BLOCK_SIZE => Err(Bz2Error::DecompressionLimitExceeded {
offset: start_bit,
limit: MAX_BLOCK_SIZE,
}),
Ok(_) => Ok(()),
// UnexpectedEof is expected for the last block without EOS marker
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => Ok(()),
Expand Down
Loading