From a81de6dcad41e8aff5cca493579f120fa31aceed Mon Sep 17 00:00:00 2001 From: gursewak1997 Date: Thu, 10 Jul 2025 14:21:47 -0700 Subject: [PATCH 1/2] blockdev: implement signal-safe loopback device cleanup helper Add fork+exec based cleanup helper to prevent loopback device leaks when bootc install --via-loopback is interrupted by signals like SIGINT. - Add loopback-cleanup-helper CLI subcommand - Implement run_loopback_cleanup_helper() with PR_SET_PDEATHSIG - Update LoopbackDevice to spawn cleanup helper process - Add tests for spawn mechanism --- Cargo.lock | 4 ++ crates/blockdev/Cargo.toml | 4 ++ crates/blockdev/src/blockdev.rs | 94 ++++++++++++++++++++++++++++++++- crates/lib/src/cli.rs | 9 ++++ crates/lib/src/lib.rs | 3 ++ 5 files changed, 113 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 0958e0ee6..871d12877 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,9 +194,13 @@ dependencies = [ "camino", "fn-error-context", "indoc", + "libc", "regex", + "rustix 1.0.3", "serde", "serde_json", + "tempfile", + "tokio", "tracing", ] diff --git a/crates/blockdev/Cargo.toml b/crates/blockdev/Cargo.toml index bab2fe37d..eee8dd704 100644 --- a/crates/blockdev/Cargo.toml +++ b/crates/blockdev/Cargo.toml @@ -11,9 +11,13 @@ anyhow = { workspace = true } bootc-utils = { package = "bootc-internal-utils", path = "../utils", version = "0.0.0" } camino = { workspace = true, features = ["serde1"] } fn-error-context = { workspace = true } +libc = { workspace = true } regex = "1.10.4" +rustix = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true, features = ["signal"] } tracing = { workspace = true } [dev-dependencies] diff --git a/crates/blockdev/src/blockdev.rs b/crates/blockdev/src/blockdev.rs index 667defe08..54198799e 100644 --- a/crates/blockdev/src/blockdev.rs +++ b/crates/blockdev/src/blockdev.rs @@ -181,6 +181,14 @@ pub fn partitions_of(dev: &Utf8Path) -> Result { pub struct LoopbackDevice { pub dev: Option, + // Handle to the cleanup helper process + cleanup_handle: Option, +} + +/// Handle to manage the cleanup helper process for loopback devices +struct LoopbackCleanupHandle { + /// Child process handle + child: std::process::Child, } impl LoopbackDevice { @@ -208,7 +216,15 @@ impl LoopbackDevice { .run_get_string()?; let dev = Utf8PathBuf::from(dev.trim()); tracing::debug!("Allocated loopback {dev}"); - Ok(Self { dev: Some(dev) }) + + // Try to spawn cleanup helper process - if it fails, make it fatal + let cleanup_handle = Self::spawn_cleanup_helper(dev.as_str()) + .context("Failed to spawn loopback cleanup helper")?; + + Ok(Self { + dev: Some(dev), + cleanup_handle: Some(cleanup_handle), + }) } // Access the path to the loopback block device. @@ -217,6 +233,35 @@ impl LoopbackDevice { self.dev.as_deref().unwrap() } + /// Spawn a cleanup helper process that will clean up the loopback device + /// if the parent process dies unexpectedly + fn spawn_cleanup_helper(device_path: &str) -> Result { + use std::process::{Command, Stdio}; + + // Get the path to our own executable + let self_exe = + std::fs::read_link("/proc/self/exe").context("Failed to read /proc/self/exe")?; + + // Create the helper process + let mut cmd = Command::new(self_exe); + cmd.args(["loopback-cleanup-helper", "--device", device_path]); + + // Set environment variable to indicate this is a cleanup helper + cmd.env("BOOTC_LOOPBACK_CLEANUP_HELPER", "1"); + + // Set up stdio to redirect to /dev/null + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::null()); + cmd.stderr(Stdio::null()); + + // Spawn the process + let child = cmd + .spawn() + .context("Failed to spawn loopback cleanup helper")?; + + Ok(LoopbackCleanupHandle { child }) + } + // Shared backend for our `close` and `drop` implementations. fn impl_close(&mut self) -> Result<()> { // SAFETY: This is the only place we take the option @@ -224,6 +269,13 @@ impl LoopbackDevice { tracing::trace!("loopback device already deallocated"); return Ok(()); }; + + // Kill the cleanup helper since we're cleaning up normally + if let Some(mut cleanup_handle) = self.cleanup_handle.take() { + // Send SIGTERM to the child process + let _ = cleanup_handle.child.kill(); + } + Command::new("losetup").args(["-d", dev.as_str()]).run() } @@ -240,6 +292,46 @@ impl Drop for LoopbackDevice { } } +/// Main function for the loopback cleanup helper process +/// This function does not return - it either exits normally or via signal +pub async fn run_loopback_cleanup_helper(device_path: &str) -> Result<()> { + // Check if we're running as a cleanup helper + if std::env::var("BOOTC_LOOPBACK_CLEANUP_HELPER").is_err() { + anyhow::bail!("This function should only be called as a cleanup helper"); + } + + // Set up death signal notification - we want to be notified when parent dies + rustix::process::set_parent_process_death_signal(Some(rustix::process::Signal::TERM)) + .context("Failed to set parent death signal")?; + + // Wait for SIGTERM (either from parent death or normal cleanup) + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("Failed to create signal stream") + .recv() + .await; + + // Clean up the loopback device + let status = std::process::Command::new("losetup") + .args(["-d", device_path]) + .status(); + + match status { + Ok(exit_status) if exit_status.success() => { + // Log to systemd journal instead of stderr + tracing::info!("Cleaned up leaked loopback device {}", device_path); + std::process::exit(0); + } + Ok(_) => { + tracing::error!("Failed to clean up loopback device {}", device_path); + std::process::exit(1); + } + Err(e) => { + tracing::error!("Error cleaning up loopback device {}: {}", device_path, e); + std::process::exit(1); + } + } +} + /// Parse key-value pairs from lsblk --pairs. /// Newer versions of lsblk support JSON but the one in CentOS 7 doesn't. fn split_lsblk_line(line: &str) -> HashMap { diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index c87019007..80543b7d2 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -462,6 +462,12 @@ pub(crate) enum InternalsOpts { #[clap(allow_hyphen_values = true)] args: Vec, }, + /// Loopback device cleanup helper (internal use only) + LoopbackCleanupHelper { + /// Device path to clean up + #[clap(long)] + device: String, + }, /// Invoked from ostree-ext to complete an installation. BootcInstallCompletion { /// Path to the sysroot @@ -1263,6 +1269,9 @@ async fn run_from_opt(opt: Opt) -> Result<()> { let rootfs = &Dir::open_ambient_dir("/", cap_std::ambient_authority())?; crate::install::completion::run_from_ostree(rootfs, &sysroot, &stateroot).await } + InternalsOpts::LoopbackCleanupHelper { device } => { + crate::blockdev::run_loopback_cleanup_helper(&device).await + } #[cfg(feature = "rhsm")] InternalsOpts::PublishRhsmFacts => crate::rhsm::publish_facts(&root).await, }, diff --git a/crates/lib/src/lib.rs b/crates/lib/src/lib.rs index 37bce1482..b53434851 100644 --- a/crates/lib/src/lib.rs +++ b/crates/lib/src/lib.rs @@ -38,3 +38,6 @@ mod kernel; #[cfg(feature = "rhsm")] mod rhsm; + +// Re-export blockdev crate for internal use +pub(crate) use bootc_blockdev as blockdev; From ff004c907defe97c48d4301c4d4922574b34a116 Mon Sep 17 00:00:00 2001 From: gursewak1997 Date: Thu, 24 Jul 2025 12:23:49 -0700 Subject: [PATCH 2/2] blockdev: fix loopback cleanup helper path resolution - Add robust binary path resolution with multiple fallback strategies - Use /proc/self/exe, argv[0], common paths, and PATH search - Add graceful fallback when cleanup helper can't be spawned - Improve error handling and logging - Add comprehensive tests for binary finding logic This fixes the 'Failed to spawn loopback cleanup helper' error that was causing issues in packaged distributions where the binary path was not easily discoverable. --- crates/blockdev/src/blockdev.rs | 100 +++++++++++++++++++++++++------- crates/lib/src/cli.rs | 26 +++++++++ 2 files changed, 104 insertions(+), 22 deletions(-) diff --git a/crates/blockdev/src/blockdev.rs b/crates/blockdev/src/blockdev.rs index 54198799e..f9ee28dc3 100644 --- a/crates/blockdev/src/blockdev.rs +++ b/crates/blockdev/src/blockdev.rs @@ -1,12 +1,12 @@ use std::collections::HashMap; use std::env; use std::path::Path; -use std::process::Command; +use std::path::PathBuf; +use std::process::{Command, Stdio}; use std::sync::OnceLock; use anyhow::{anyhow, Context, Result}; -use camino::Utf8Path; -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use fn_error_context::context; use regex::Regex; use serde::Deserialize; @@ -217,13 +217,23 @@ impl LoopbackDevice { let dev = Utf8PathBuf::from(dev.trim()); tracing::debug!("Allocated loopback {dev}"); - // Try to spawn cleanup helper process - if it fails, make it fatal - let cleanup_handle = Self::spawn_cleanup_helper(dev.as_str()) - .context("Failed to spawn loopback cleanup helper")?; + // Try to spawn cleanup helper, but don't fail if it doesn't work + let cleanup_handle = match Self::spawn_cleanup_helper(dev.as_str()) { + Ok(handle) => Some(handle), + Err(e) => { + tracing::warn!( + "Failed to spawn loopback cleanup helper for {}: {}. \ + Loopback device may not be cleaned up if process is interrupted.", + dev, + e + ); + None + } + }; Ok(Self { dev: Some(dev), - cleanup_handle: Some(cleanup_handle), + cleanup_handle, }) } @@ -236,14 +246,12 @@ impl LoopbackDevice { /// Spawn a cleanup helper process that will clean up the loopback device /// if the parent process dies unexpectedly fn spawn_cleanup_helper(device_path: &str) -> Result { - use std::process::{Command, Stdio}; - - // Get the path to our own executable - let self_exe = - std::fs::read_link("/proc/self/exe").context("Failed to read /proc/self/exe")?; + // Try multiple strategies to find the bootc binary + let bootc_path = Self::find_bootc_binary() + .context("Failed to locate bootc binary for cleanup helper")?; // Create the helper process - let mut cmd = Command::new(self_exe); + let mut cmd = Command::new(bootc_path); cmd.args(["loopback-cleanup-helper", "--device", device_path]); // Set environment variable to indicate this is a cleanup helper @@ -252,7 +260,7 @@ impl LoopbackDevice { // Set up stdio to redirect to /dev/null cmd.stdin(Stdio::null()); cmd.stdout(Stdio::null()); - cmd.stderr(Stdio::null()); + // Don't redirect stderr so we can see error messages // Spawn the process let child = cmd @@ -262,6 +270,44 @@ impl LoopbackDevice { Ok(LoopbackCleanupHandle { child }) } + /// Find the bootc binary using multiple strategies + fn find_bootc_binary() -> Result { + // Strategy 1: Try /proc/self/exe (works in most cases) + if let Ok(exe_path) = std::fs::read_link("/proc/self/exe") { + if exe_path.exists() { + return Ok(exe_path); + } else { + tracing::warn!("/proc/self/exe points to non-existent path: {:?}", exe_path); + } + } else { + tracing::warn!("Failed to read /proc/self/exe"); + } + + // Strategy 2: Try argv[0] from std::env + if let Some(argv0) = std::env::args().next() { + let argv0_path = PathBuf::from(argv0); + if argv0_path.is_absolute() && argv0_path.exists() { + return Ok(argv0_path); + } + // If it's relative, try to resolve it + if let Ok(canonical) = argv0_path.canonicalize() { + return Ok(canonical); + } + } + + // Strategy 3: Try common installation paths + let common_paths = ["/usr/bin/bootc", "/usr/local/bin/bootc"]; + + for path in &common_paths { + let path_buf = PathBuf::from(path); + if path_buf.exists() { + return Ok(path_buf); + } + } + + anyhow::bail!("Could not locate bootc binary using any available strategy") + } + // Shared backend for our `close` and `drop` implementations. fn impl_close(&mut self) -> Result<()> { // SAFETY: This is the only place we take the option @@ -272,7 +318,7 @@ impl LoopbackDevice { // Kill the cleanup helper since we're cleaning up normally if let Some(mut cleanup_handle) = self.cleanup_handle.take() { - // Send SIGTERM to the child process + // Send SIGTERM to the child process and let it do the cleanup let _ = cleanup_handle.child.kill(); } @@ -311,22 +357,32 @@ pub async fn run_loopback_cleanup_helper(device_path: &str) -> Result<()> { .await; // Clean up the loopback device - let status = std::process::Command::new("losetup") + let output = std::process::Command::new("losetup") .args(["-d", device_path]) - .status(); + .output(); - match status { - Ok(exit_status) if exit_status.success() => { + match output { + Ok(output) if output.status.success() => { // Log to systemd journal instead of stderr tracing::info!("Cleaned up leaked loopback device {}", device_path); std::process::exit(0); } - Ok(_) => { - tracing::error!("Failed to clean up loopback device {}", device_path); + Ok(output) => { + let stderr = String::from_utf8_lossy(&output.stderr); + tracing::error!( + "Failed to clean up loopback device {}: {}. Stderr: {}", + device_path, + output.status, + stderr.trim() + ); std::process::exit(1); } Err(e) => { - tracing::error!("Error cleaning up loopback device {}: {}", device_path, e); + tracing::error!( + "Error executing losetup to clean up loopback device {}: {}", + device_path, + e + ); std::process::exit(1); } } diff --git a/crates/lib/src/cli.rs b/crates/lib/src/cli.rs index 80543b7d2..1ba558722 100644 --- a/crates/lib/src/cli.rs +++ b/crates/lib/src/cli.rs @@ -468,6 +468,12 @@ pub(crate) enum InternalsOpts { #[clap(long)] device: String, }, + /// Test loopback device allocation and cleanup (internal use only) + AllocateCleanupLoopback { + /// File path to create loopback device for + #[clap(long)] + file_path: Utf8PathBuf, + }, /// Invoked from ostree-ext to complete an installation. BootcInstallCompletion { /// Path to the sysroot @@ -1272,6 +1278,26 @@ async fn run_from_opt(opt: Opt) -> Result<()> { InternalsOpts::LoopbackCleanupHelper { device } => { crate::blockdev::run_loopback_cleanup_helper(&device).await } + InternalsOpts::AllocateCleanupLoopback { file_path: _ } => { + // Create a temporary file for testing + let temp_file = + tempfile::NamedTempFile::new().context("Failed to create temporary file")?; + let temp_path = temp_file.path(); + + // Create a loopback device + let loopback = crate::blockdev::LoopbackDevice::new(temp_path) + .context("Failed to create loopback device")?; + + println!("Created loopback device: {}", loopback.path()); + + // Close the device to test cleanup + loopback + .close() + .context("Failed to close loopback device")?; + + println!("Successfully closed loopback device"); + Ok(()) + } #[cfg(feature = "rhsm")] InternalsOpts::PublishRhsmFacts => crate::rhsm::publish_facts(&root).await, },