From 89c61dcf1172945f9149d2a9f71c574712f39c38 Mon Sep 17 00:00:00 2001 From: Pierre Tomasina Date: Mon, 27 Apr 2026 14:38:57 +0200 Subject: [PATCH] feat(executor): forward signals to sandboxed process group to prevent orphans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes issue #37: when sx receives SIGINT/SIGTERM/SIGHUP it now delivers the signal to the entire sandboxed process group via kill(-pgid, ...), followed by a 2s grace period and SIGKILL escalation. A RAII PgidKillGuard ensures the subtree is cleaned up even on unexpected exit paths. - Add signal-hook and libc dependencies - Spawn child in its own process group (process_group(0)) - Add spawn_with_signal_forwarding with SIGTERM→SIGKILL escalation - Add PgidKillGuard for panic-safe cleanup - Add unit test for process group isolation - Add integration tests (tests/signal_test.rs) covering SIGTERM propagation and document the known SIGKILL limitation --- Cargo.lock | 22 +++++ Cargo.toml | 4 + src/sandbox/executor.rs | 122 +++++++++++++++++++++++- tests/signal_test.rs | 203 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 349 insertions(+), 2 deletions(-) create mode 100644 tests/signal_test.rs diff --git a/Cargo.lock b/Cargo.lock index 0034c5b..3f99a3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -528,6 +528,26 @@ dependencies = [ "dirs", ] +[[package]] +name = "signal-hook" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a0c28ca5908dbdbcd52e6fdaa00358ab88637f8ab33e1f188dd510eb44b53d" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + [[package]] name = "smallvec" version = "1.15.1" @@ -548,9 +568,11 @@ dependencies = [ "assert_cmd", "clap", "dirs", + "libc", "predicates", "serde", "shellexpand", + "signal-hook", "tempfile", "thiserror", "toml", diff --git a/Cargo.toml b/Cargo.toml index d8429ad..a0a6111 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,10 @@ thiserror = "2" anyhow = "1" tempfile = "3" +# Signal handling and syscalls +signal-hook = "0.4" +libc = "0.2" + [dev-dependencies] assert_cmd = "2" predicates = "3" diff --git a/src/sandbox/executor.rs b/src/sandbox/executor.rs index 6834113..5ccce63 100644 --- a/src/sandbox/executor.rs +++ b/src/sandbox/executor.rs @@ -1,12 +1,22 @@ // sandbox-exec invocation use crate::sandbox::seatbelt::{generate_seatbelt_profile, SandboxParams, SeatbeltError}; use crate::sandbox::trace::TraceSession; +use signal_hook::consts::{SIGHUP, SIGINT, SIGTERM}; +use signal_hook::iterator::Signals; use std::fs; use std::io; +use std::os::unix::process::CommandExt; use std::path::Path; use std::process::{Command, ExitStatus, Stdio}; +use std::time::Duration; use tempfile::{NamedTempFile, TempDir}; +/// Grace period between SIGTERM and SIGKILL when forwarding shutdown signals +/// to the sandboxed process group. Long enough for typical cleanup (closing +/// IPC peers, flushing buffers) but short enough that orphaned processes do +/// not linger after `sx` is signalled. +const SIGTERM_TO_SIGKILL_GRACE: Duration = Duration::from_secs(2); + /// Exit codes for sandbox execution pub mod exit_codes { pub const SUCCESS: i32 = 0; @@ -171,8 +181,11 @@ SAVEHIST=0 .stdout(Stdio::inherit()) .stderr(Stdio::inherit()); - // Execute and wait - let status = cmd.spawn()?.wait()?; + // Execute and wait, forwarding shutdown signals to the entire sandboxed subtree. + // Without this, sx exits without signalling its sandboxed descendants — IPC + // children (e.g. Node `--useNodeIpc` workers) get reparented to launchd and + // accumulate forever. + let status = spawn_with_signal_forwarding(cmd)?; // Stop trace session if let Some(ref mut session) = trace_session { @@ -217,6 +230,74 @@ pub fn dry_run(params: &SandboxParams) -> Result { generate_seatbelt_profile(params) } +/// RAII guard that SIGKILLs an entire process group on drop. +/// Provides panic / early-return safety so the sandbox subtree is never +/// orphaned if `sx` exits along an unexpected path. +struct PgidKillGuard { + pgid: Option, +} + +impl PgidKillGuard { + fn new(pgid: i32) -> Self { + Self { pgid: Some(pgid) } + } + + /// Disarm the guard after a clean exit so we do not signal a dead pgid. + fn disarm(&mut self) { + self.pgid = None; + } +} + +impl Drop for PgidKillGuard { + fn drop(&mut self) { + if let Some(pgid) = self.pgid { + // Best-effort: ignore errors (group may already be gone). + unsafe { + libc::kill(-pgid, libc::SIGKILL); + } + } + } +} + +/// Send `sig` to the entire process group identified by `pgid`. +fn signal_pgroup(pgid: i32, sig: libc::c_int) { + unsafe { + libc::kill(-pgid, sig); + } +} + +/// Spawn `cmd` in its own process group and wait for it, forwarding +/// SIGINT/SIGTERM/SIGHUP to the sandboxed subtree with a SIGTERM → +/// grace-period → SIGKILL escalation. +fn spawn_with_signal_forwarding(mut cmd: Command) -> io::Result { + // Put the child in its own process group so kill(-pgid, ...) reaches every + // descendant in one syscall and does not loop back to sx itself. + cmd.process_group(0); + + let mut child = cmd.spawn()?; + let pgid = child.id() as i32; + let mut kill_guard = PgidKillGuard::new(pgid); + + let mut signals = Signals::new([SIGINT, SIGTERM, SIGHUP])?; + let signal_handle = signals.handle(); + + let signal_thread = std::thread::spawn(move || { + if signals.forever().next().is_some() { + signal_pgroup(pgid, libc::SIGTERM); + std::thread::sleep(SIGTERM_TO_SIGKILL_GRACE); + signal_pgroup(pgid, libc::SIGKILL); + } + }); + + let status = child.wait()?; + + kill_guard.disarm(); + signal_handle.close(); + let _ = signal_thread.join(); + + Ok(status) +} + /// Check if an environment variable name matches any glob-like pattern. /// Supports `*` wildcards: `AWS_*` matches `AWS_SECRET_KEY`, `*_SECRET*` matches `MY_SECRET_VALUE`. fn matches_env_pattern(name: &str, patterns: &[String]) -> bool { @@ -363,4 +444,41 @@ mod tests { &["DYLD_*".to_string()] )); } + + /// Verifies the foundational mechanism for issue #37: spawning a child via + /// `Command::process_group(0)` isolates it into its own process group so we + /// can deliver group-wide signals via `kill(-pgid, ...)`. This is a direct + /// unit test of the kernel behavior `spawn_with_signal_forwarding` relies + /// on; it does not require `sandbox-exec` and runs in any environment. + #[test] + fn test_process_group_isolation_for_signal_forwarding() { + let mut cmd = Command::new("/bin/sh"); + cmd.args(["-c", "sleep 5"]) + .process_group(0) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + + let mut child = cmd.spawn().expect("spawn /bin/sh"); + let child_pid = child.id() as i32; + + let child_pgid = unsafe { libc::getpgid(child_pid) }; + assert_eq!( + child_pgid, child_pid, + "child should lead its own process group (pgid == pid)" + ); + + let test_pgid = unsafe { libc::getpgid(0) }; + assert_ne!( + test_pgid, child_pgid, + "child pgid must be isolated from test process pgid" + ); + + // Cleanup: signal the entire group, mirroring what the production + // signal handler does, then reap the child. + unsafe { + libc::kill(-child_pid, libc::SIGTERM); + } + let _ = child.wait(); + } } diff --git a/tests/signal_test.rs b/tests/signal_test.rs new file mode 100644 index 0000000..24a6bb7 --- /dev/null +++ b/tests/signal_test.rs @@ -0,0 +1,203 @@ +//! Integration tests for signal forwarding (issue #37). +//! +//! Verifies that `sx` forwards SIGINT/SIGTERM/SIGHUP to the entire sandboxed +//! process subtree so descendants are not orphaned to launchd when `sx` exits. + +use std::fs; +use std::process::{Command, Stdio}; +use std::thread; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +/// Path to the `sx` binary produced by Cargo for this test target. +const SX_BIN: &str = env!("CARGO_BIN_EXE_sx"); + +/// Probe whether `sandbox-exec` accepts a custom deny-default profile on this +/// system. On hardened macOS configurations custom profiles can be blocked, +/// in which case there is nothing meaningful to assert about signal forwarding. +fn is_custom_sandbox_available() -> bool { + let probe = r#"(version 1) +(deny default) +(allow process-fork) +(allow process-exec) +(allow signal (target self)) +(allow sysctl-read) +(allow file-read-metadata) +(allow mach-lookup) +(allow file-read* (subpath "/usr")) +(allow file-read* (subpath "/bin")) +"#; + let Ok(temp) = tempfile::NamedTempFile::new() else { + return false; + }; + if fs::write(temp.path(), probe).is_err() { + return false; + } + Command::new("/usr/bin/sandbox-exec") + .arg("-f") + .arg(temp.path()) + .arg("/bin/echo") + .arg("ok") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +} + +macro_rules! skip_if_no_sandbox { + () => { + if !is_custom_sandbox_available() { + eprintln!("Skipping test: sandbox-exec custom profiles unavailable"); + return; + } + }; +} + +/// Generate a sleep duration unlikely to collide with anything else on the +/// system, so we can identify our descendants in `ps` output. ~115 days is +/// clearly synthetic and highly unlikely to match an unrelated process. +fn unique_marker() -> u64 { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0); + 9_000_000 + nanos % 1_000_000 +} + +/// Count running processes whose `ps -axo command` line contains `pattern`, +/// excluding the `ps` invocation itself. +fn count_processes(pattern: &str) -> usize { + let output = Command::new("/bin/ps") + .args(["-axo", "command"]) + .output() + .expect("invoke ps"); + String::from_utf8_lossy(&output.stdout) + .lines() + .filter(|line| line.contains(pattern) && !line.contains("/bin/ps")) + .count() +} + +/// Best-effort: SIGKILL every process whose command line contains `pattern`. +/// Used as test cleanup to avoid leaking orphans across test runs. +fn pkill_pattern(pattern: &str) { + let Ok(output) = Command::new("/bin/ps") + .args(["-axo", "pid,command"]) + .output() + else { + return; + }; + let stdout = String::from_utf8_lossy(&output.stdout); + for line in stdout.lines() { + if !line.contains(pattern) || line.contains("/bin/ps") { + continue; + } + let Some(pid_str) = line.split_whitespace().next() else { + continue; + }; + if let Ok(pid) = pid_str.parse::() { + unsafe { + libc::kill(pid, libc::SIGKILL); + } + } + } +} + +#[test] +fn test_sigterm_to_sx_propagates_to_sandbox_subtree() { + skip_if_no_sandbox!(); + + let marker = unique_marker(); + let pattern = format!("sleep {}", marker); + let shell_cmd = format!("/bin/sleep {} & /bin/sleep {} & wait", marker, marker); + + let mut child = Command::new(SX_BIN) + .arg("--no-config") + .arg("--") + .arg("/bin/sh") + .arg("-c") + .arg(&shell_cmd) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("spawn sx"); + let sx_pid = child.id(); + + // Give sandbox-exec → sh → sleep chain time to come up. + thread::sleep(Duration::from_millis(800)); + let before = count_processes(&pattern); + assert!( + before >= 2, + "expected at least 2 sleep descendants before SIGTERM, saw {}", + before + ); + + unsafe { + libc::kill(sx_pid as i32, libc::SIGTERM); + } + + // Issue #37 acceptance criterion: zero descendants remain after 2s. + // The 3s total budget covers SIGTERM → grace → SIGKILL plus reaping. + thread::sleep(Duration::from_secs(3)); + let after = count_processes(&pattern); + + // Always reap and cleanup before asserting so a failure does not leak processes. + let _ = child.wait(); + if after != 0 { + pkill_pattern(&pattern); + } + + assert_eq!( + after, 0, + "expected sandbox subtree to be gone 3s after SIGTERM, {} stragglers remain", + after + ); +} + +#[test] +fn test_sigkill_to_sx_orphans_subtree_known_limitation() { + skip_if_no_sandbox!(); + + // SIGKILL is uncatchable; sx has no opportunity to forward it. This test + // pins down the limitation: `kill_on_drop` / signal handlers do not help + // when sx itself is force-killed. A future supervisor process could fix + // this — when it lands, this test will start failing and should be updated. + let marker = unique_marker(); + let pattern = format!("sleep {}", marker); + let shell_cmd = format!("/bin/sleep {}", marker); + + let mut child = Command::new(SX_BIN) + .arg("--no-config") + .arg("--") + .arg("/bin/sh") + .arg("-c") + .arg(&shell_cmd) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("spawn sx"); + let sx_pid = child.id(); + + thread::sleep(Duration::from_millis(800)); + let before = count_processes(&pattern); + assert!( + before >= 1, + "expected sleep descendant to be running before SIGKILL, saw {}", + before + ); + + unsafe { + libc::kill(sx_pid as i32, libc::SIGKILL); + } + let _ = child.wait(); + + // Brief settle; orphan is reparented to launchd but stays alive. + thread::sleep(Duration::from_millis(500)); + let after_kill = count_processes(&pattern); + + // Cleanup orphans before asserting, regardless of outcome. + pkill_pattern(&pattern); + + assert!( + after_kill >= 1, + "expected SIGKILL to leave at least one orphan (uncatchable signal). \ + If this fails, supervisor logic has improved — update this test." + ); +}