From 987296fb123b14c84ca0e701b9d3be3fbee4512b Mon Sep 17 00:00:00 2001 From: shenw Date: Sun, 26 Apr 2026 23:30:57 -0700 Subject: [PATCH 1/3] plugin/scripts: Windows (Git Bash) support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five Git-Bash-on-Windows bugs prevented the plugin install + service auto-start from working. None of them affect Linux/macOS — every change is gated on `claude_smart_is_windows` (uname matches MINGW*/MSYS*/CYGWIN*). 1. uv install: the astral.sh bash installer downloads a zip and unzips it, but Git Bash's bundled unzip corrupts the Windows uv.exe (bad CRC on the inflated binary). On Windows, run the official PowerShell installer (install.ps1) instead — same destination (~/.local/bin/uv.exe), so the post-install PATH prepend works uniformly. 2. python3 App Execution Alias stub: on Windows, %LocalAppData%\Microsoft\WindowsApps\python3.exe is a Microsoft Store launcher stub. `command -v python3` returns truthy but invoking it prints "Python was not found" and exits non-zero. Add claude_smart_resolve_python which probes interpreters with `-V` to filter the stub out, preferring `python` (the real installed one) over `python3` on Windows. Used by smart-install.sh's settings.json patcher and by the spawn-detached fallback. 3. spawn_detached on Windows: Git Bash has no setsid, no process groups, and os.setsid() is POSIX-only — the existing python3-based fallback would never work even if a real Python were on PATH. Use plain nohup on Windows: ignoring SIGHUP is sufficient for the child to survive the parent console closing, and Windows doesn't have the zombie-on-parent-exit problem POSIX has. 4. kill_tree on Windows: kill -TERM -- "-pgid" (negative pid = process group) is a POSIX construct with no Windows equivalent. Use taskkill /T /F /PID, which walks the child-process tree via the parent-pid/job-object relationships. Subtlety: in Git Bash, $! for a backgrounded job returns the MSYS pid (an internal counter), not the native Windows pid taskkill needs. ps's default output exposes the WINPID column; awk extracts it. Service scripts now delegate kill_group to claude_smart_kill_tree so the logic stays in one place. 5. Service start scripts (backend-service.sh, dashboard-service.sh): the inline setsid/python3/nohup detach blocks would land in the broken python3 branch on Windows. Replace with the unified claude_smart_spawn_detached helper. PID-file recording is split: Windows records the spawned MSYS pid (translated to WINPID by kill_tree at signal time); POSIX still records the pgid where setsid guarantees pid==pgid, and falls back to ps -o pgid otherwise. Tested on Windows 11 / Git Bash (MINGW64): smart-install.sh now completes (uv install + uv sync + dashboard build), and claude_smart_spawn_detached + kill_tree round-trip a `ping -n 100` through start-alive-kill-gone end-to-end. Co-Authored-By: Claude Opus 4.7 (1M context) --- plugin/scripts/_lib.sh | 100 ++++++++++++++++++++++++++-- plugin/scripts/backend-service.sh | 49 ++++++-------- plugin/scripts/dashboard-service.sh | 43 +++++------- plugin/scripts/smart-install.sh | 30 +++++++-- 4 files changed, 159 insertions(+), 63 deletions(-) diff --git a/plugin/scripts/_lib.sh b/plugin/scripts/_lib.sh index 3ecfac0..cd6f63a 100644 --- a/plugin/scripts/_lib.sh +++ b/plugin/scripts/_lib.sh @@ -21,22 +21,112 @@ claude_smart_prepend_astral_bins() { export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH" } +# Return 0 (true) if running under a Windows-flavoured bash (Git Bash, +# MSYS, Cygwin). Used to gate POSIX-only primitives (setsid, process +# groups) and route around Windows-specific potholes (the python3 App +# Execution Alias stub at WindowsApps\python3.exe). +claude_smart_is_windows() { + case "$(uname -s 2>/dev/null)" in + MINGW*|MSYS*|CYGWIN*) return 0 ;; + *) return 1 ;; + esac +} + +# Print the absolute path of a working python interpreter, or nothing +# (and return non-zero) if none is usable. On Windows, `python3` is +# usually the Microsoft Store "App Execution Alias" stub at +# %LocalAppData%\Microsoft\WindowsApps\python3.exe — `command -v python3` +# returns truthy but invoking it just prints a "Python was not found" +# message and exits non-zero. We probe with `-V` to filter the stub out +# and prefer `python` (the real interpreter when one is installed). +claude_smart_resolve_python() { + if claude_smart_is_windows; then + for cand in python python3; do + if command -v "$cand" >/dev/null 2>&1 && "$cand" -V >/dev/null 2>&1; then + command -v "$cand" + return 0 + fi + done + return 1 + fi + for cand in python3 python; do + if command -v "$cand" >/dev/null 2>&1; then + command -v "$cand" + return 0 + fi + done + return 1 +} + # Spawn a command fully detached from the current shell so a hook timeout # (Claude Code's install/SessionStart budget) cannot kill it mid-flight. -# Picks the strongest available primitive: setsid → python3 os.setsid → nohup. -# Caller is responsible for redirecting stdout/stderr; we do not impose a -# log destination here. Stdin is closed so the child cannot inherit a tty. +# POSIX: setsid → python3 os.setsid → nohup (in that order of strength). +# Windows: nohup alone — Git Bash has no setsid, no process groups, and +# `os.setsid()` is POSIX-only; nohup ignores SIGHUP which is enough to +# survive the parent console closing. The python3 fallback is gated on a +# real-interpreter probe (-V) so the Windows App Execution Alias stub +# doesn't get invoked. Caller is responsible for redirecting stdout/stderr; +# we do not impose a log destination here. Stdin is closed so the child +# cannot inherit a tty. Use `$!` after this call to capture the pid. claude_smart_spawn_detached() { + if claude_smart_is_windows; then + nohup "$@" < /dev/null & + return 0 + fi if command -v setsid >/dev/null 2>&1; then setsid nohup "$@" < /dev/null & - elif command -v python3 >/dev/null 2>&1; then - python3 -c 'import os,sys; os.setsid(); os.execvp(sys.argv[1], sys.argv[1:])' \ + elif _CS_PY=$(claude_smart_resolve_python) && [ -n "$_CS_PY" ]; then + "$_CS_PY" -c 'import os,sys; os.setsid(); os.execvp(sys.argv[1], sys.argv[1:])' \ "$@" < /dev/null & else nohup "$@" < /dev/null & fi } +# Terminate a process and (on POSIX) its whole process group, escalating +# from TERM to KILL after a short grace period. On Windows there are no +# POSIX process groups, so we use `taskkill /T /F /PID` which walks the +# child-process tree via the Windows job-object/parent-pid relationships +# — the closest equivalent to a group kill. +# +# Windows-specific subtlety: in Git Bash / MSYS, `$!` for a backgrounded +# job returns the MSYS pid (an internal counter), NOT the native Windows +# pid that taskkill needs. `ps -W` (or `-o winpid=`) exposes the WINPID +# column for the translation. If the lookup fails we fall back to +# treating the input as a native pid, so callers can pass either an MSYS +# pid (recorded via $!) or a Windows pid (from tasklist) interchangeably. +# The `//T //F //PID` syntax escapes Git Bash's MSYS path-mangling of +# arguments that begin with `/`. +claude_smart_kill_tree() { + pid="$1" + [ -z "$pid" ] && return 0 + if claude_smart_is_windows; then + # Git Bash's `ps` is the procps fork, not BSD/Linux ps; it has no + # -o option but its default header is `PID PPID PGID WINPID TTY ...`, + # so column 4 of the data row is the Windows pid. awk extracts it + # without depending on -o support. + target="" + if command -v ps >/dev/null 2>&1; then + target=$(ps -p "$pid" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d ' \r\n' || true) + fi + [ -z "$target" ] && target="$pid" + if command -v taskkill >/dev/null 2>&1; then + taskkill //T //F //PID "$target" >/dev/null 2>&1 || true + else + kill -TERM "$pid" 2>/dev/null || true + sleep 0.5 + kill -KILL "$pid" 2>/dev/null || true + fi + return 0 + fi + kill -TERM -- "-$pid" 2>/dev/null || true + for _ in 1 2 3 4 5; do + kill -0 -- "-$pid" 2>/dev/null || return 0 + sleep 0.2 + done + kill -KILL -- "-$pid" 2>/dev/null || true +} + # Return 0 (true) if $1 names a pid file whose pid is currently alive. # Silent on missing/empty/stale files. claude_smart_pid_alive_file() { diff --git a/plugin/scripts/backend-service.sh b/plugin/scripts/backend-service.sh index b0a1fe3..00c2085 100755 --- a/plugin/scripts/backend-service.sh +++ b/plugin/scripts/backend-service.sh @@ -54,17 +54,10 @@ mkdir -p "$STATE_DIR" emit_ok() { echo '{"continue":true,"suppressOutput":true}'; } -# Kill a process group started via setsid. Same pattern as -# dashboard-service.sh: SIGTERM, short grace, SIGKILL. Silent on failure. +# Tree-kill the recorded process. Delegates to claude_smart_kill_tree +# (POSIX: signal the process group; Windows: taskkill /T /F /PID). kill_group() { - pgid="$1" - [ -z "$pgid" ] && return 0 - kill -TERM -- "-$pgid" 2>/dev/null || true - for _ in 1 2 3 4 5; do - kill -0 -- "-$pgid" 2>/dev/null || return 0 - sleep 0.2 - done - kill -KILL -- "-$pgid" 2>/dev/null || true + claude_smart_kill_tree "$1" } # True if /health returns 200. Reflexio's /health is a plain GET with no @@ -165,25 +158,27 @@ case "$CMD" in export INTERACTION_CLEANUP_THRESHOLD="${INTERACTION_CLEANUP_THRESHOLD:-500}" export INTERACTION_CLEANUP_DELETE_COUNT="${INTERACTION_CLEANUP_DELETE_COUNT:-200}" - # --no-reload: uvicorn's reloader forks a supervisor; makes PGID + # --no-reload: uvicorn's reloader forks a supervisor; makes # bookkeeping harder and we don't need hot-reload for a user-facing - # service. Same detach pattern as dashboard-service.sh. - if command -v setsid >/dev/null 2>&1; then - setsid nohup uv run --project "$PLUGIN_ROOT" --quiet \ - reflexio services start --only backend --no-reload \ - >>"$LOG_FILE" 2>&1 < /dev/null & - echo $! > "$PID_FILE" - elif command -v python3 >/dev/null 2>&1; then - python3 -c 'import os,sys; os.setsid(); os.execvp(sys.argv[1], sys.argv[1:])' \ - uv run --project "$PLUGIN_ROOT" --quiet \ - reflexio services start --only backend --no-reload \ - >>"$LOG_FILE" 2>&1 < /dev/null & - echo $! > "$PID_FILE" + # service. Detach via claude_smart_spawn_detached so the same code + # path covers Linux (setsid), macOS (python3 os.setsid), and Windows + # (nohup; no process groups). Caller-side stdout/stderr redirection + # works across all three primitives — Git Bash routes the > and 2>&1 + # through to the underlying CRT before nohup execs the child. + claude_smart_spawn_detached uv run --project "$PLUGIN_ROOT" --quiet \ + reflexio services start --only backend --no-reload \ + >>"$LOG_FILE" 2>&1 + svc_pid=$! + if claude_smart_is_windows; then + # Windows has no process groups; record the spawned pid directly. + # claude_smart_kill_tree uses taskkill /T to walk the child tree. + echo "$svc_pid" > "$PID_FILE" + elif command -v setsid >/dev/null 2>&1; then + # setsid made the child its own session/group leader, so pid==pgid. + echo "$svc_pid" > "$PID_FILE" else - nohup uv run --project "$PLUGIN_ROOT" --quiet \ - reflexio services start --only backend --no-reload \ - >>"$LOG_FILE" 2>&1 < /dev/null & - svc_pid=$! + # macOS / fallback path: python3 os.setsid or bare nohup. Derive + # the real pgid via ps so kill_group can signal the whole tree. actual_pgid="" if command -v ps >/dev/null 2>&1; then actual_pgid=$(ps -o pgid= -p "$svc_pid" 2>/dev/null | tr -d ' ') diff --git a/plugin/scripts/dashboard-service.sh b/plugin/scripts/dashboard-service.sh index 419a864..6baaabc 100755 --- a/plugin/scripts/dashboard-service.sh +++ b/plugin/scripts/dashboard-service.sh @@ -36,18 +36,10 @@ mkdir -p "$STATE_DIR" emit_ok() { echo '{"continue":true,"suppressOutput":true}'; } -# Kill a process group started via setsid. Sends SIGTERM, waits briefly, -# then SIGKILL. Silent on failure — the PID file may point at a process -# that already exited. +# Tree-kill the recorded process. Delegates to claude_smart_kill_tree +# (POSIX: signal the process group; Windows: taskkill /T /F /PID). kill_group() { - pgid="$1" - [ -z "$pgid" ] && return 0 - kill -TERM -- "-$pgid" 2>/dev/null || true - for _ in 1 2 3 4 5; do - kill -0 -- "-$pgid" 2>/dev/null || return 0 - sleep 0.2 - done - kill -KILL -- "-$pgid" 2>/dev/null || true + claude_smart_kill_tree "$1" } # True if the marker header served by app/api/health is present on the @@ -126,23 +118,20 @@ case "$CMD" in cd "$DASHBOARD_DIR" - # Detach so the hook returns immediately, and put the child in its own - # session so kill_group can signal the whole tree via a negative PID. - # - Linux: setsid is standard. - # - macOS: setsid is not installed; use python3 (ships with the OS) - # to call os.setsid() before execing npm, which makes the child - # session/group leader with PID==PGID. - # - Fallback: bare nohup, then derive the real PGID via ps -o pgid. - if command -v setsid >/dev/null 2>&1; then - setsid nohup npm run start >>"$LOG_FILE" 2>&1 < /dev/null & - echo $! > "$PID_FILE" - elif command -v python3 >/dev/null 2>&1; then - python3 -c 'import os,sys; os.setsid(); os.execvp(sys.argv[1], sys.argv[1:])' \ - npm run start >>"$LOG_FILE" 2>&1 < /dev/null & - echo $! > "$PID_FILE" + # Detach so the hook returns immediately. claude_smart_spawn_detached + # picks the strongest primitive available: + # - Linux: setsid (puts child in its own session/group, pid==pgid). + # - macOS: python3 os.setsid + execvp (same effect as setsid). + # - Windows: nohup alone (no process groups; tree-kill via taskkill). + # Caller-side `>>file 2>&1` redirection is honoured before the child + # detaches, so per-OS log paths stay identical. + claude_smart_spawn_detached npm run start >>"$LOG_FILE" 2>&1 + dash_pid=$! + if claude_smart_is_windows; then + echo "$dash_pid" > "$PID_FILE" + elif command -v setsid >/dev/null 2>&1; then + echo "$dash_pid" > "$PID_FILE" else - nohup npm run start >>"$LOG_FILE" 2>&1 < /dev/null & - dash_pid=$! actual_pgid="" if command -v ps >/dev/null 2>&1; then actual_pgid=$(ps -o pgid= -p "$dash_pid" 2>/dev/null | tr -d ' ') diff --git a/plugin/scripts/smart-install.sh b/plugin/scripts/smart-install.sh index 068812b..8a74c39 100755 --- a/plugin/scripts/smart-install.sh +++ b/plugin/scripts/smart-install.sh @@ -43,13 +43,29 @@ fi if ! command -v uv >/dev/null 2>&1; then echo "[claude-smart] uv not found — installing from astral.sh..." >&2 - if ! curl -LsSf https://astral.sh/uv/install.sh | sh >&2; then - write_failure "uv install failed — install manually from https://docs.astral.sh/uv/" + # The astral.sh bash installer downloads a zip and unzips it. On + # Windows-flavoured bash (Git Bash / MSYS) the bundled `unzip` corrupts + # the Windows uv binary (bad CRC on the inflated uv.exe), leaving the + # install half-finished. Use the official PowerShell installer + # (install.ps1) on Windows, which writes uv.exe to ~/.local/bin + # natively — same destination the bash installer targets on POSIX, so + # claude_smart_prepend_astral_bins picks it up uniformly afterwards. + if claude_smart_is_windows; then + if ! command -v powershell >/dev/null 2>&1; then + write_failure "uv install needs PowerShell on Windows but powershell is not on PATH — install uv manually from https://docs.astral.sh/uv/" + fi + if ! powershell -NoProfile -ExecutionPolicy Bypass -Command "irm https://astral.sh/uv/install.ps1 | iex" >&2; then + write_failure "uv install via PowerShell failed — install manually from https://docs.astral.sh/uv/" + fi + else + if ! curl -LsSf https://astral.sh/uv/install.sh | sh >&2; then + write_failure "uv install failed — install manually from https://docs.astral.sh/uv/" + fi fi claude_smart_prepend_astral_bins if ! command -v uv >/dev/null 2>&1; then UV_FOUND="" - for candidate in "$HOME/.local/bin/uv" "$HOME/.cargo/bin/uv" "$HOME/bin/uv"; do + for candidate in "$HOME/.local/bin/uv" "$HOME/.local/bin/uv.exe" "$HOME/.cargo/bin/uv" "$HOME/bin/uv"; do if [ -x "$candidate" ]; then UV_FOUND="$candidate" break @@ -103,9 +119,15 @@ fi # Allowlist cs-cite globally so Claude's citation Bash calls don't pop a # permission prompt mid-turn. Idempotent: no-ops when the entry is already # present. Uses Python to preserve the rest of settings.json intact. +# Resolves python via claude_smart_resolve_python so we don't fire the +# Windows App Execution Alias stub (which exits non-zero with "Python +# was not found" when no real interpreter is installed). CLAUDE_SETTINGS="$HOME/.claude/settings.json" mkdir -p "$(dirname "$CLAUDE_SETTINGS")" -if python3 - "$CLAUDE_SETTINGS" <<'PY' >&2 +PY_BIN=$(claude_smart_resolve_python || true) +if [ -z "$PY_BIN" ]; then + echo "[claude-smart] WARNING: no working python interpreter found; skipping cs-cite allowlist" >&2 +elif "$PY_BIN" - "$CLAUDE_SETTINGS" <<'PY' >&2 import json import sys from pathlib import Path From 5f6c2581c5803c5284ba3e2bdabb3d7963bb9139 Mon Sep 17 00:00:00 2001 From: Yi Lu Date: Sun, 3 May 2026 09:39:09 -0700 Subject: [PATCH 2/3] plugin/scripts: verify POSIX python runtime --- plugin/scripts/_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/scripts/_lib.sh b/plugin/scripts/_lib.sh index cd6f63a..b4ccd82 100644 --- a/plugin/scripts/_lib.sh +++ b/plugin/scripts/_lib.sh @@ -50,7 +50,7 @@ claude_smart_resolve_python() { return 1 fi for cand in python3 python; do - if command -v "$cand" >/dev/null 2>&1; then + if command -v "$cand" >/dev/null 2>&1 && "$cand" -V >/dev/null 2>&1; then command -v "$cand" return 0 fi From f0f05f60e991b07e4b2892e7ab5657ffdf2ceff7 Mon Sep 17 00:00:00 2001 From: Yi Lu Date: Sun, 3 May 2026 09:53:55 -0700 Subject: [PATCH 3/3] plugin/scripts: avoid macos cleanup self-termination --- plugin/scripts/_lib.sh | 14 +++++++++++++- plugin/scripts/backend-service.sh | 21 +++++---------------- plugin/scripts/dashboard-service.sh | 16 +++++----------- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/plugin/scripts/_lib.sh b/plugin/scripts/_lib.sh index b4ccd82..f10bfbf 100644 --- a/plugin/scripts/_lib.sh +++ b/plugin/scripts/_lib.sh @@ -119,7 +119,19 @@ claude_smart_kill_tree() { fi return 0 fi - kill -TERM -- "-$pid" 2>/dev/null || true + current_pgid="" + if command -v ps >/dev/null 2>&1; then + current_pgid=$(ps -o pgid= -p "$$" 2>/dev/null | tr -d ' ') + fi + if [ -n "$current_pgid" ] && [ "$pid" = "$current_pgid" ]; then + return 0 + fi + if ! kill -TERM -- "-$pid" 2>/dev/null; then + kill -TERM "$pid" 2>/dev/null || true + sleep 0.5 + kill -KILL "$pid" 2>/dev/null || true + return 0 + fi for _ in 1 2 3 4 5; do kill -0 -- "-$pid" 2>/dev/null || return 0 sleep 0.2 diff --git a/plugin/scripts/backend-service.sh b/plugin/scripts/backend-service.sh index 00c2085..3c7444a 100755 --- a/plugin/scripts/backend-service.sh +++ b/plugin/scripts/backend-service.sh @@ -169,22 +169,11 @@ case "$CMD" in reflexio services start --only backend --no-reload \ >>"$LOG_FILE" 2>&1 svc_pid=$! - if claude_smart_is_windows; then - # Windows has no process groups; record the spawned pid directly. - # claude_smart_kill_tree uses taskkill /T to walk the child tree. - echo "$svc_pid" > "$PID_FILE" - elif command -v setsid >/dev/null 2>&1; then - # setsid made the child its own session/group leader, so pid==pgid. - echo "$svc_pid" > "$PID_FILE" - else - # macOS / fallback path: python3 os.setsid or bare nohup. Derive - # the real pgid via ps so kill_group can signal the whole tree. - actual_pgid="" - if command -v ps >/dev/null 2>&1; then - actual_pgid=$(ps -o pgid= -p "$svc_pid" 2>/dev/null | tr -d ' ') - fi - echo "${actual_pgid:-$svc_pid}" > "$PID_FILE" - fi + # Record the spawned pid, not a pgid sampled with ps. On POSIX, + # setsid/python os.setsid make this pid the new process group leader; + # sampling immediately can race and capture the caller's pgid instead. + # On Windows, claude_smart_kill_tree translates the MSYS pid to WINPID. + echo "$svc_pid" > "$PID_FILE" # Give uvicorn up to ~10s to answer /health. The very first boot # after a fresh checkout may be slower (LiteLLM import, chromadb diff --git a/plugin/scripts/dashboard-service.sh b/plugin/scripts/dashboard-service.sh index 6baaabc..a10eeef 100755 --- a/plugin/scripts/dashboard-service.sh +++ b/plugin/scripts/dashboard-service.sh @@ -127,17 +127,11 @@ case "$CMD" in # detaches, so per-OS log paths stay identical. claude_smart_spawn_detached npm run start >>"$LOG_FILE" 2>&1 dash_pid=$! - if claude_smart_is_windows; then - echo "$dash_pid" > "$PID_FILE" - elif command -v setsid >/dev/null 2>&1; then - echo "$dash_pid" > "$PID_FILE" - else - actual_pgid="" - if command -v ps >/dev/null 2>&1; then - actual_pgid=$(ps -o pgid= -p "$dash_pid" 2>/dev/null | tr -d ' ') - fi - echo "${actual_pgid:-$dash_pid}" > "$PID_FILE" - fi + # Record the spawned pid, not a pgid sampled with ps. On POSIX, + # setsid/python os.setsid make this pid the new process group leader; + # sampling immediately can race and capture the caller's pgid instead. + # On Windows, claude_smart_kill_tree translates the MSYS pid to WINPID. + echo "$dash_pid" > "$PID_FILE" emit_ok ;; stop)