From dcf3835cd8a02a44922c2d1a603aecfba8bd647e Mon Sep 17 00:00:00 2001 From: kassoulet <1905+kassoulet@users.noreply.github.com> Date: Sun, 29 Mar 2026 19:40:40 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[CRITICAL]?= =?UTF-8?q?=20Fix=20SIGBUS=20crash=20on=20hard-linked=20output=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Identify and prevent input/output file collisions caused by hard links on Unix systems. Previously, bz2zstd only checked canonical paths, missing cases where different paths pointed to the same inode. Truncating such a file while it is memory-mapped for input triggers a SIGBUS (Bus error). - Added device and inode comparison using `std::os::unix::fs::MetadataExt`. - Maintained existing canonical path check for cross-platform support. - Encapsulated logic in a clean closure with appropriate error handling. Verified with a reproduction script that formerly triggered SIGBUS. Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- bz2zstd/src/main.rs | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 318e1c2..2ac6cea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,7 +154,7 @@ checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" [[package]] name = "bz2zstd" -version = "0.2.0" +version = "0.2.1" dependencies = [ "anyhow", "bzip2", @@ -714,7 +714,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "parallel_bzip2_decoder" -version = "0.2.0" +version = "0.2.1" dependencies = [ "aho-corasick", "anyhow", diff --git a/bz2zstd/src/main.rs b/bz2zstd/src/main.rs index 646acc7..8ffda15 100644 --- a/bz2zstd/src/main.rs +++ b/bz2zstd/src/main.rs @@ -158,18 +158,43 @@ fn main() -> Result<()> { } }; - // Security Check: Verify that input and output file paths are distinct + // Security Check: Verify that input and output file paths are distinct. // This prevents a 'Bus error' (SIGBUS) caused by truncating a file that is // currently memory-mapped as input. - if let Ok(canon_input) = std::fs::canonicalize(&args.input) { - if let Ok(canon_output) = std::fs::canonicalize(&output_path) { - if canon_input == canon_output { - return Err(anyhow::anyhow!( - "Input and output files must be different: {:?}", - output_path - )); + // + // We check both: + // 1. Canonical paths (detects same file via different paths/symlinks) + // 2. Device/Inode (detects same file via hard links on Unix) + let is_same_file = (|| -> Result { + let input_canon = std::fs::canonicalize(&args.input); + let output_canon = std::fs::canonicalize(&output_path); + + if let (Ok(in_c), Ok(out_c)) = (input_canon, output_canon) { + if in_c == out_c { + return Ok(true); } } + + #[cfg(unix)] + { + use std::os::unix::fs::MetadataExt; + let input_meta = std::fs::metadata(&args.input); + let output_meta = std::fs::metadata(&output_path); + if let (Ok(in_m), Ok(out_m)) = (input_meta, output_meta) { + if in_m.dev() == out_m.dev() && in_m.ino() == out_m.ino() { + return Ok(true); + } + } + } + + Ok(false) + })()?; + + if is_same_file { + return Err(anyhow::anyhow!( + "Input and output files must be different: {:?}", + output_path + )); } // === STAGE 3: WRITER THREAD ===