From eee861093e94e0d750195007259d9d9373dee9b9 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 30 Mar 2026 14:15:20 +0200 Subject: [PATCH 1/2] fix(bridge): kill full process group on CLI timeout to prevent orphaned subprocesses When run_cli times out and kills agent-deck, grandchild processes spawned by agent-deck (e.g. tmux send-keys -l) survive as orphans because SIGKILL does not propagate to children in a different process group. These orphans continue feeding characters into the tmux pane's PTY input buffer, jamming subsequent sends indefinitely. Fix: - start_new_session=True on subprocess.run puts agent-deck in its own process group, isolated from the bridge's group - os.killpg() on TimeoutExpired kills the entire process group, including all grandchildren - Fallback to proc.kill() if the process group is already gone --- conductor/bridge.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/conductor/bridge.py b/conductor/bridge.py index 99524b0c6..ba8e63d73 100755 --- a/conductor/bridge.py +++ b/conductor/bridge.py @@ -18,6 +18,7 @@ import logging import os import re +import signal import subprocess import sys import time @@ -117,11 +118,20 @@ def run_cli( log.debug("CLI: %s", " ".join(cmd)) try: result = subprocess.run( - cmd, capture_output=True, text=True, timeout=timeout + cmd, capture_output=True, text=True, timeout=timeout, + start_new_session=True, # own process group → killpg kills grandchildren too ) return result - except subprocess.TimeoutExpired: + except subprocess.TimeoutExpired as exc: log.warning("CLI timeout: %s", " ".join(cmd)) + if exc.proc: + try: + # Kill the entire process group so grandchildren (e.g. tmux send-keys) + # don't survive as orphans and jam the pane's input queue. + os.killpg(os.getpgid(exc.proc.pid), signal.SIGKILL) + except (ProcessLookupError, PermissionError): + exc.proc.kill() # fallback: kill direct child only + exc.proc.communicate() return subprocess.CompletedProcess(cmd, 1, "", "timeout") except FileNotFoundError: log.error("agent-deck not found in PATH") From c3ea844e228ee3d9bc21165a036af8be522ff5f3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 30 Mar 2026 17:23:58 +0200 Subject: [PATCH 2/2] fix(bridge): use Popen instead of subprocess.run for proper process group kill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit subprocess.run() raises TimeoutExpired without a .proc attribute — exc.proc is only set by Popen.communicate(timeout=). Replace with Popen + communicate(timeout=) so we can reliably kill the process group on timeout. --- conductor/bridge.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/conductor/bridge.py b/conductor/bridge.py index ba8e63d73..91ffbb00f 100755 --- a/conductor/bridge.py +++ b/conductor/bridge.py @@ -117,22 +117,28 @@ def run_cli( cmd += list(args) log.debug("CLI: %s", " ".join(cmd)) try: - result = subprocess.run( - cmd, capture_output=True, text=True, timeout=timeout, + # Use Popen + communicate(timeout=) so we have the proc object available + # when TimeoutExpired fires — subprocess.run() does NOT set exc.proc. + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, start_new_session=True, # own process group → killpg kills grandchildren too ) - return result - except subprocess.TimeoutExpired as exc: - log.warning("CLI timeout: %s", " ".join(cmd)) - if exc.proc: + try: + stdout, stderr = proc.communicate(timeout=timeout) + return subprocess.CompletedProcess(cmd, proc.returncode, stdout, stderr) + except subprocess.TimeoutExpired: + log.warning("CLI timeout: %s", " ".join(cmd)) try: # Kill the entire process group so grandchildren (e.g. tmux send-keys) # don't survive as orphans and jam the pane's input queue. - os.killpg(os.getpgid(exc.proc.pid), signal.SIGKILL) + os.killpg(os.getpgid(proc.pid), signal.SIGKILL) except (ProcessLookupError, PermissionError): - exc.proc.kill() # fallback: kill direct child only - exc.proc.communicate() - return subprocess.CompletedProcess(cmd, 1, "", "timeout") + proc.kill() # fallback: kill direct child only + proc.communicate() + return subprocess.CompletedProcess(cmd, 1, "", "timeout") except FileNotFoundError: log.error("agent-deck not found in PATH") return subprocess.CompletedProcess(cmd, 1, "", "not found")