From f867701bce723fc55cabfa2d857b59893fd5aa40 Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 08:53:35 -0500 Subject: [PATCH] fix(airc daemon): sentinel-marker for intentional re-exec (#203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Joel + continuum-b69f 2026-04-28: Windows daemon launcher's `:loop` respawned a fresh airc 5s after the original bash exited, racing the new airc that just took over via host-mode re-exec. Continuous crashloop on `airc daemon install` from a project dir whose room gist had a stale heartbeat (a common state on cold start). Root cause specific to Windows MSYS-bash: `exec env ... "$0" connect` is true execve on Linux/Mac (PID stays, parent never observes exit), but emulated as spawn-and-exit on Windows MSYS (parent bash exits + new airc bash takes over with a different PID). The daemon launcher's `bash -c "exec airc connect"` thus returns to the .bat after every host-takeover, which the .bat treats as a crash. Fix: - New helper `_write_reexec_marker` writes `:` to `$AIRC_WRITE_DIR/airc.reexec-marker`. - Called immediately before all 5 `exec env ... "$0" connect ...` sites: 4 host-takeover paths (cmd_connect's stale-heartbeat self- heal in two different code paths × {rejoin-as-joiner, host}) + 1 cold-host split-brain race-loser path. - Daemon launcher .bat checks for the marker between iterations using `forfiles /p /m airc.reexec-marker /d 0` (file mtime today). If marker is fresh, the launcher prints a "re-exec'd; new process is now daemon, launcher exiting" message and exit /b 0 (no respawn). The new airc process from the exec is the running daemon now — competing-respawn would just kill it. On Linux/Mac the marker write is harmless: `exec` keeps the same PID, the parent bash never observes an exit, the launcher script (where applicable: launchd KeepAlive=true / systemd Restart=always) never sees the marker because it never re-enters its monitor loop. Trade-off: after intentional re-exec, the .bat exits → no auto- restart for crashes that happen LATER in the new airc's lifetime. User must wait until next logon or re-run `airc daemon install`. This is acceptable vs the current behavior (continuous crashloop after first re-exec). Future enhancement: .bat could transition to a "monitor mode" that polls airc.pid and only restarts if all PIDs in it are dead, but the simple exit-on-marker is the minimal viable fix for #203. Closes #203 once continuum-b69f re-tests on real Windows. --- airc | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/airc b/airc index 2339e6d..189dad2 100755 --- a/airc +++ b/airc @@ -291,6 +291,22 @@ fi unset _gh_resolved AIRC_WRITE_DIR="$(detect_scope)" + +# Write a sentinel marker before any intentional `exec env ... "$0" ...` +# call, so the Windows daemon launcher .bat can distinguish "intentional +# re-exec into different mode" from "actual crash" (#203). On Linux/Mac +# `exec` is a true execve — the parent bash's PID becomes the new +# program, so the launcher script never observes an exit and the marker +# is harmless. On Windows MSYS-bash, exec is emulated as spawn-and-exit: +# the original bash exits + a new airc bash takes over. The launcher +# .bat sees the original bash exit, would normally treat it as a crash, +# and respawn — racing the new airc that just took over (Joel/continuum- +# b69f's #203 crashloop). Marker contents: "PID:UNIX_TIMESTAMP". Caller +# is responsible for invoking this immediately before exec. +_write_reexec_marker() { + local marker="$AIRC_WRITE_DIR/airc.reexec-marker" + printf '%d:%d\n' "$$" "$(date +%s)" > "$marker" 2>/dev/null || true +} CONFIG="$AIRC_WRITE_DIR/config.json" IDENTITY_DIR="$AIRC_WRITE_DIR/identity" PEERS_DIR="$AIRC_WRITE_DIR/peers" @@ -2182,11 +2198,13 @@ cmd_connect() { if [ -n "$_new_picked" ]; then echo " ✓ Another tab beat us to it — joining their fresh gist ($_new_picked)" echo "" + _write_reexec_marker exec env ${_preserved_name:+AIRC_NAME="$_preserved_name"} "$0" connect "$_new_picked" fi echo " Re-execing into host mode for #${resolved_room_name}..." echo "" + _write_reexec_marker exec env AIRC_NO_DISCOVERY=1 ${_preserved_name:+AIRC_NAME="$_preserved_name"} "$0" connect --room "$resolved_room_name" fi @@ -2386,6 +2404,7 @@ except Exception: echo " ✓ Another tab beat us to it — joining their fresh gist ($_new_picked)" echo "" # Re-exec as joiner pointing at the winner's gist. + _write_reexec_marker exec env ${_preserved_name:+AIRC_NAME="$_preserved_name"} "$0" connect "$_new_picked" fi @@ -2394,6 +2413,7 @@ except Exception: # exec replaces the current bash process. AIRC_NO_DISCOVERY=1 # prevents the new instance from re-finding the just-deleted gist # (gh's gist-list cache might still show it for a few seconds). + _write_reexec_marker exec env AIRC_NO_DISCOVERY=1 ${_preserved_name:+AIRC_NAME="$_preserved_name"} "$0" connect --room "$resolved_room_name" fi # Either not a room flow, or no gh, or no resolved_room_name → original die. @@ -2834,6 +2854,7 @@ JSON "$AIRC_WRITE_DIR/room_gist_id" \ "$AIRC_WRITE_DIR/room_name" local _preserved_name; _preserved_name=$(get_config_val name "") + _write_reexec_marker exec env ${_preserved_name:+AIRC_NAME="$_preserved_name"} "$0" connect "$_winner_id" fi fi @@ -5046,15 +5067,46 @@ _daemon_install_schtasks() { cwd_win=$(printf '%s' "$(pwd -P)" | sed 's|^/\([a-z]\)/|\U\1:\\\\|; s|/|\\\\|g') airc_bin_unix="$airc_bin" fi + # Marker path the daemon-launcher polls between iterations to + # distinguish "intentional re-exec into different mode" from "actual + # crash" (#203). airc itself writes this file via _write_reexec_marker + # right before any `exec env ... "$0" connect ...` call. On Windows + # MSYS-bash, exec is emulated as spawn-and-exit (not a true execve), + # so the launcher .bat sees the original bash exit while the new + # airc takes over — the marker tells the .bat to step aside instead + # of racing-respawn the new airc with another instance. + local marker_win + if command -v cygpath >/dev/null 2>&1; then + marker_win=$(cygpath -w "$scope/airc.reexec-marker") + else + marker_win=$(printf '%s' "$scope/airc.reexec-marker" | sed 's|^/\([a-z]\)/|\U\1:\\\\|; s|/|\\\\|g') + fi local launcher_bash="$scope/airc-daemon.bat" cat > "$launcher_bash" <nul 2>&1 + if not errorlevel 1 ( + echo [%date% %time%] airc re-exec'd into different mode ^(host-takeover or rejoin^); new process is now daemon, launcher exiting. >> daemon.err + del "$marker_win" >nul 2>&1 + exit /b 0 + ) +) echo [%date% %time%] airc connect exited. Restarting in 5s. >> daemon.err timeout /t 5 /nobreak >nul goto loop