Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Sentinel Journal

## 2025-01-27 - [Bus Error via Memory Mapping and File Truncation]
**Vulnerability:** A `Bus error` (SIGBUS) occurred when the input file (memory-mapped) was the same as the output file (opened with `File::create`, which truncates it).
**Learning:** Memory mapping a file and then truncating it via another file handle in the same or another process leads to a crash when the memory-mapped region is accessed. Standard path comparison with `canonicalize` is insufficient if hardlinks are used.
**Prevention:** Check if input and output paths refer to the same file using `std::fs::canonicalize` and, on Unix systems, compare device and inode numbers to catch hardlinks.

## 2025-01-27 - [Decompression Bomb Mitigation]
**Vulnerability:** Lack of size limits during block decompression could lead to resource exhaustion (DoS) if a malicious or malformed bzip2 block is processed.
**Learning:** Even if a format specifies a maximum block size (like 900KB for standard bzip2), a decoder should never trust the input. A 2MB limit was chosen as it is more than double the standard maximum, allowing for safe margins while preventing massive memory allocation from malformed blocks.
**Prevention:** Use `Read::take()` to limit the amount of data decompressed from a single block and return an error if the limit is exceeded.
1 change: 1 addition & 0 deletions a
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test
1 change: 1 addition & 0 deletions b
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test
68 changes: 51 additions & 17 deletions bz2zstd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,28 +143,57 @@ fn main() -> Result<()> {
let (result_sender, result_receiver) =
bounded::<(usize, Vec<u8>)>(rayon::current_num_threads() * 2);

// Determine output file path
let output_path = if let Some(path) = args.output {
path
} else {
// Auto-generate output filename by replacing .bz2 with .zst
let input_str = args.input.to_string_lossy();
if input_str.ends_with("bz2") {
PathBuf::from(input_str.replace("bz2", "zst"))
} else {
let mut path = args.input.clone();
path.set_extension("zst");
path
}
};

// Check if input and output refer to the same file to avoid Bus Error (mmap conflict)
// This handles symlinks via canonicalize and hardlinks via device/inode check on Unix.
let is_same = (|| {
let abs_input = std::fs::canonicalize(&args.input).ok()?;
let abs_output = std::fs::canonicalize(&output_path).ok()?;
if abs_input == abs_output {
return Some(true);
}

#[cfg(unix)]
{
use std::os::unix::fs::MetadataExt;
let meta_in = std::fs::metadata(&abs_input).ok()?;
let meta_out = std::fs::metadata(&abs_output).ok()?;
if meta_in.dev() == meta_out.dev() && meta_in.ino() == meta_out.ino() {
return Some(true);
}
}
Some(false)
})()
.unwrap_or(false);

if is_same {
anyhow::bail!(
"Input and output files cannot be the same (preventing Bus Error with mmap)"
);
}

// === STAGE 3: WRITER THREAD ===
//
// Receives compressed blocks from workers and writes them in order.
// Uses a HashMap to buffer out-of-order blocks.
let writer_path = output_path.clone();
let writer_handle = thread::spawn(move || -> Result<()> {
// Determine output file path
let output_path = if let Some(path) = args.output {
path
} else {
// Auto-generate output filename by replacing .bz2 with .zst
let input_str = args.input.to_string_lossy();
if input_str.ends_with("bz2") {
PathBuf::from(input_str.replace("bz2", "zst"))
} else {
let mut path = args.input.clone();
path.set_extension("zst");
path
}
};

let raw_out: Box<dyn Write + Send> =
Box::new(File::create(output_path).context("Failed to create output file")?);
Box::new(File::create(writer_path).context("Failed to create output file")?);

let mut out = OutputWriter::new(raw_out)?;
// Buffer for out-of-order blocks
Expand Down Expand Up @@ -281,8 +310,13 @@ fn main() -> Result<()> {
// Decompress the bzip2 block
// Note: Last block may not have EOS marker, causing UnexpectedEof
decomp_buf.clear();
let mut decoder = BzDecoder::new(&wrapped_data[..]);
// Limit decompression to 2MB to prevent decompression bombs.
// Standard bzip2 blocks are max 900KB.
let mut decoder = BzDecoder::new(&wrapped_data[..]).take(2_000_001);
match decoder.read_to_end(decomp_buf) {
Ok(n) if n > 2_000_000 => {
return Err(anyhow::anyhow!("Decompressed block exceeds 2MB limit (potential decompression bomb)"));
}
Ok(_) => {}
// Expected for last block without EOS marker
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {}
Expand Down
7 changes: 6 additions & 1 deletion parallel_bzip2_decoder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,13 @@ pub fn decompress_block_into(
// Decompress using the bzip2 crate
// Note: The last block may not have a proper EOS marker, causing UnexpectedEof
out.clear();
let mut decoder = BzDecoder::new(&scratch[..]);
// Limit decompression to 2MB to prevent decompression bombs.
// Standard bzip2 blocks are max 900KB.
let mut decoder = BzDecoder::new(&scratch[..]).take(2_000_001);
match decoder.read_to_end(out) {
Ok(n) if n > 2_000_000 => Err(Bz2Error::InvalidFormat(
"Decompressed block exceeds 2MB limit (potential decompression bomb)".to_string(),
)),
Ok(_) => Ok(()),
// UnexpectedEof is expected for the last block without EOS marker
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => Ok(()),
Expand Down
Binary file added test.bz2
Binary file not shown.
Binary file added test_link.bz2
Binary file not shown.
Loading