From 311780dbc75cbf3cd4ab7b23ffa39e94b942cf50 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 28 Jan 2026 21:09:30 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITICAL]?= =?UTF-8?q?=20Fix=20SIGBUS=20on=20input/output=20path=20collision?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 🚨 Severity: CRITICAL - 💡 Vulnerability: A SIGBUS crash occurs when the output file path is the same as the input file path. - 🎯 Impact: Opening the output file for writing truncates the input file while it is still memory-mapped, causing the application to crash immediately with a Bus Error (SIGBUS). - 🔧 Fix: Use \`std::fs::canonicalize\` to verify that input and output file paths are distinct before opening the output file. - ✅ Verification: Verified with a reproduction script that formerly crashed and now reports a proper error message. All existing tests pass. Co-authored-by: kassoulet <1905+kassoulet@users.noreply.github.com> --- .jules/sentinel.md | 4 ++++ bz2zstd/src/main.rs | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 .jules/sentinel.md diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..c9ef1be --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2025-05-22 - SIGBUS on Memory-Mapped File Truncation +**Vulnerability:** A SIGBUS crash occurs when an output file truncates the input file while it is currently memory-mapped. +**Learning:** `File::create` truncates the file immediately. If that file is backed by a `mmap`, any subsequent access to the mapping results in a SIGBUS signal because the mapped pages no longer exist in the file. +**Prevention:** Always use `std::fs::canonicalize` to verify that input and output file paths are distinct before opening any file for writing, especially when using memory mapping. diff --git a/bz2zstd/src/main.rs b/bz2zstd/src/main.rs index ced1e01..d777199 100644 --- a/bz2zstd/src/main.rs +++ b/bz2zstd/src/main.rs @@ -86,6 +86,33 @@ fn main() -> Result<()> { .context("Failed to build global thread pool")?; } + // Determine output file path early to check against input + let output_path = if let Some(path) = args.output.clone() { + path + } else { + // Auto-generate output filename by replacing .bz2 with .zst + let input_str = args.input.to_string_lossy(); + if input_str.ends_with("bz2") { + PathBuf::from(input_str.replace("bz2", "zst")) + } else { + let mut path = args.input.clone(); + path.set_extension("zst"); + path + } + }; + + // Security Requirement: Verify that input and output file paths are distinct + // to prevent SIGBUS caused by truncating a memory-mapped input file. + if let Ok(input_canonical) = std::fs::canonicalize(&args.input) { + if output_path.exists() { + if let Ok(output_canonical) = std::fs::canonicalize(&output_path) { + if input_canonical == output_canonical { + anyhow::bail!("Input and output files must be different"); + } + } + } + } + // Memory-map the input file for efficient random access // Benefits: // - No need to load entire file into memory @@ -148,21 +175,6 @@ fn main() -> Result<()> { // Receives compressed blocks from workers and writes them in order. // Uses a HashMap to buffer out-of-order blocks. let writer_handle = thread::spawn(move || -> Result<()> { - // Determine output file path - let output_path = if let Some(path) = args.output { - path - } else { - // Auto-generate output filename by replacing .bz2 with .zst - let input_str = args.input.to_string_lossy(); - if input_str.ends_with("bz2") { - PathBuf::from(input_str.replace("bz2", "zst")) - } else { - let mut path = args.input.clone(); - path.set_extension("zst"); - path - } - }; - let raw_out: Box = Box::new(File::create(output_path).context("Failed to create output file")?);