diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..c9ef1be --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2025-05-22 - SIGBUS on Memory-Mapped File Truncation +**Vulnerability:** A SIGBUS crash occurs when an output file truncates the input file while it is currently memory-mapped. +**Learning:** `File::create` truncates the file immediately. If that file is backed by a `mmap`, any subsequent access to the mapping results in a SIGBUS signal because the mapped pages no longer exist in the file. +**Prevention:** Always use `std::fs::canonicalize` to verify that input and output file paths are distinct before opening any file for writing, especially when using memory mapping. diff --git a/bz2zstd/src/main.rs b/bz2zstd/src/main.rs index ced1e01..d777199 100644 --- a/bz2zstd/src/main.rs +++ b/bz2zstd/src/main.rs @@ -86,6 +86,33 @@ fn main() -> Result<()> { .context("Failed to build global thread pool")?; } + // Determine output file path early to check against input + let output_path = if let Some(path) = args.output.clone() { + path + } else { + // Auto-generate output filename by replacing .bz2 with .zst + let input_str = args.input.to_string_lossy(); + if input_str.ends_with("bz2") { + PathBuf::from(input_str.replace("bz2", "zst")) + } else { + let mut path = args.input.clone(); + path.set_extension("zst"); + path + } + }; + + // Security Requirement: Verify that input and output file paths are distinct + // to prevent SIGBUS caused by truncating a memory-mapped input file. + if let Ok(input_canonical) = std::fs::canonicalize(&args.input) { + if output_path.exists() { + if let Ok(output_canonical) = std::fs::canonicalize(&output_path) { + if input_canonical == output_canonical { + anyhow::bail!("Input and output files must be different"); + } + } + } + } + // Memory-map the input file for efficient random access // Benefits: // - No need to load entire file into memory @@ -148,21 +175,6 @@ fn main() -> Result<()> { // Receives compressed blocks from workers and writes them in order. // Uses a HashMap to buffer out-of-order blocks. let writer_handle = thread::spawn(move || -> Result<()> { - // Determine output file path - let output_path = if let Some(path) = args.output { - path - } else { - // Auto-generate output filename by replacing .bz2 with .zst - let input_str = args.input.to_string_lossy(); - if input_str.ends_with("bz2") { - PathBuf::from(input_str.replace("bz2", "zst")) - } else { - let mut path = args.input.clone(); - path.set_extension("zst"); - path - } - }; - let raw_out: Box = Box::new(File::create(output_path).context("Failed to create output file")?);