From 7fbf3f038aa56d665eaa75ca9f691000c372c018 Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Mon, 27 Apr 2026 23:39:45 -0500 Subject: [PATCH 1/9] fix(install.sh): stage payload as .ps1 file + ssh-keygen -A for hostkey ACLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Windows install bugs found via Mac↔Windows Claude debug loop on issue #196 (continuum-b69f testing on real Windows MINGW64): 1. **Inline payload mangled by 4-layer quote escaping.** Pre-fix: `... -ArgumentList '-NoProfile -Command "$_elevated_payload"'` The payload contained many "" (PS strings) and \\ (registry paths); bash double-quoted → ps outer -Command → Start-Process ArgumentList single-quoted → inner -Command double-quoted. Each layer ate quotes differently. PowerShell never parsed the payload, the elevated window opened + ran nothing + closed silently. No transcript ever written. Joel saw a "OpenSSH installed + started" success message contradicted by a missing-transcript warning on the same run. Fix: stage payload as a .ps1 file in $CLONE_DIR, run via `Start-Process -File `. Zero-quoting on the boundary; the .ps1 file is plain PowerShell and quotes/backslashes work natively. 2. **sshd Start-Service fails with WIN32_EXIT_CODE 1067 ("terminated unexpectedly") on every fresh Windows OpenSSH install** because host-key files exist with overly-permissive ACLs (Authenticated Users / BUILTIN\\Users / Everyone). sshd refuses to load them ("sshd: no hostkeys available -- exiting"). Fix: add `ssh-keygen -A` to the elevated payload between the capability install and Start-Service. Idempotent — generates missing host keys AND restores correct ACLs (SYSTEM + Admins only) on existing ones. continuum-b69f's diagnosis. 3. **Bash side now re-queries sshd state post-elevation** as belt- and-suspenders. Previous behavior printed "OpenSSH installed + started" if the elevated payload exit was 0, even when no transcript was written and sshd wasn't actually running. The silent-success- while-broken path was the worst version of this bug. Now: bash calls `Get-Service sshd` from non-elevated PS; if state isn't "Running" it surfaces a "partial install" warning even when elevated exit was 0. Verified by continuum-b69f on real Windows MINGW64: PR #195 (which this PR builds on) now produces a complete transcript dumped to bash terminal. Without the ssh-keygen -A addition though, sshd Start-Service still failed in his run — that's what this PR adds. --- install.sh | 79 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/install.sh b/install.sh index ed71b09..c2bd2c1 100755 --- a/install.sh +++ b/install.sh @@ -243,18 +243,44 @@ _ensure_sshd_running() { # blinks for a half second so i have no idea"). Log lives at # $env:TEMP\airc-install-elevated.log; bash side surfaces it # below regardless of success/failure. - local _elevated_payload=' + # Stage payload as a .ps1 file in $CLONE_DIR (Joel + continuum-b69f + # 2026-04-28). Pre-fix: payload was inlined as + # ... -ArgumentList '-NoProfile -Command "$_elevated_payload"' + # but the payload itself contains many "" (PowerShell strings) and + # \\ (registry paths). Four layers of escaping (bash-double, ps1- + # outer-Command, Start-Process-ArgumentList-single, inner-Command- + # double) silently mangled the payload — PowerShell never parsed it, + # the elevated window opened, ran nothing, exited silently, no + # transcript ever written. continuum verified the .ps1 file approach + # writes a clean transcript every time. + local _elevated_ps1="$CLONE_DIR/install-elevated.ps1" + mkdir -p "$CLONE_DIR" + cat > "$_elevated_ps1" <<'PSPAYLOAD' $ErrorActionPreference = "Stop"; -# Use [System.IO.Path]::GetTempPath() not $env:TEMP — when called from -# Git Bash, the inherited TEMP env var can be the bash-side /tmp, not -# the Windows user temp directory. GetTempPath() asks the OS directly -# (resolves to %LOCALAPPDATA%\Temp on Windows) regardless of the env. +# [System.IO.Path]::GetTempPath() asks the OS directly (no env-var +# inheritance surprises). On a UAC-elevated process this resolves to +# the user's %LOCALAPPDATA%\Temp. $logPath = Join-Path ([System.IO.Path]::GetTempPath()) "airc-install-elevated.log"; Start-Transcript -Path $logPath -Force | Out-Null; try { Write-Host "==> OpenSSH.Server capability"; $cap = Get-WindowsCapability -Online -Name "OpenSSH.Server*"; if ($cap.State -ne "Installed") { Add-WindowsCapability -Online -Name $cap.Name | Out-Null; Write-Host " installed: $($cap.Name)" } else { Write-Host " already installed" } + Write-Host "==> SSH host keys + ACLs (ssh-keygen -A)"; + # continuum-b69f 2026-04-28: every fresh Windows OpenSSH install has + # a documented bug where sshd refuses to start with "no hostkeys + # available" because the host key files exist but have overly- + # permissive ACLs (Authenticated Users / BUILTIN\Users / Everyone). + # ssh-keygen -A is idempotent: generates missing host keys AND + # restores correct ACLs on existing ones (SYSTEM + Administrators + # only). Without this, Start-Service sshd fails with WIN32_EXIT_CODE + # 1067 (terminated unexpectedly) on every fresh-install machine. + $sshKeygen = Join-Path $env:WINDIR "System32\OpenSSH\ssh-keygen.exe"; + if (Test-Path $sshKeygen) { + & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " $_" }; + } else { + Write-Host " WARN: ssh-keygen.exe not found at $sshKeygen — sshd may fail to start"; + } Write-Host "==> HNS port-22 reservation"; $reg = (Get-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Services\hns\State" -Name "EnableExcludedPortRange" -ErrorAction SilentlyContinue).EnableExcludedPortRange; if ($reg -ne 0) { reg add "HKLM\SYSTEM\CurrentControlSet\Services\hns\State" /v "EnableExcludedPortRange" /d 0 /f | Out-Null; Write-Host " HNS auto-exclusion disabled" } else { Write-Host " HNS auto-exclusion already off" } @@ -295,7 +321,16 @@ try { Stop-Transcript | Out-Null; } exit $global:LASTEXITCODE; -' +PSPAYLOAD + + # Translate the .ps1 path to Windows form for Start-Process -File. + local _elevated_ps1_win + if command -v cygpath >/dev/null 2>&1; then + _elevated_ps1_win=$(cygpath -w "$_elevated_ps1" 2>/dev/null) + else + # Fallback: /c/Users/foo/.airc-src/install-elevated.ps1 → C:\Users\foo\.airc-src\install-elevated.ps1 + _elevated_ps1_win=$(printf '%s' "$_elevated_ps1" | sed 's|^/\([a-z]\)/|\U\1:\\\\|; s|/|\\\\|g') + fi case "$_state" in Running) ok "sshd running (Windows OpenSSH.Server)" @@ -320,12 +355,14 @@ exit $global:LASTEXITCODE; # MSYS-style sed translation: 'C:\Users\...' → '/c/Users/...' _ps_log_bash=$(printf '%s' "$_ps_log_win" | sed 's|\\|/|g; s|^\([A-Za-z]\):|/\L\1|') fi - info " elevated log: $_ps_log_win (also at $_ps_log_bash from Git Bash)" - # Run the elevated payload. Start-Process exits 0 if it could - # launch the elevated process; the payload's own exit code is - # what we care about (it explicitly `exit $LASTEXITCODE`s based - # on try/catch). - powershell.exe -NoProfile -Command "Start-Process powershell -Verb RunAs -Wait -ArgumentList '-NoProfile -Command \"$_elevated_payload\"'" 2>&1 \ + info " elevated payload: $_elevated_ps1_win" + info " elevated log: $_ps_log_win" + info " (bash log path: $_ps_log_bash)" + # Run the elevated payload via -File (no quoting hell). Start- + # Process -Wait propagates the elevated process's exit code. + # -ExecutionPolicy Bypass so the elevated PS doesn't refuse + # the unsigned .ps1. + powershell.exe -NoProfile -Command "Start-Process powershell -Verb RunAs -Wait -ArgumentList @('-NoProfile','-ExecutionPolicy','Bypass','-File','$_elevated_ps1_win')" 2>&1 \ || _elev_rc=$? # Always dump the transcript — success or failure, the user # sees what happened. If transcript file is missing, the @@ -347,10 +384,22 @@ exit $global:LASTEXITCODE; else warn " Elevated transcript not written — UAC denied, or Start-Process failed." fi - if [ "$_elev_rc" = "0" ]; then - ok "OpenSSH.Server installed + started + HNS port-22 reserved + auto-start + DefaultShell=bash." + # Belt-and-suspenders: re-query sshd state from non-elevated PS + # (continuum-b69f 2026-04-28). If the elevated payload claimed + # exit 0 but sshd isn't actually Running, surface that — the + # silent-success-while-broken path was the worst version of + # this bug. The Get-Service call is cheap; doing it always + # is fine. + local _post_state + _post_state=$(powershell.exe -NoProfile -Command "(Get-Service sshd -ErrorAction SilentlyContinue).Status" 2>/dev/null | tr -d '\r ') + if [ "$_elev_rc" = "0" ] && [ "$_post_state" = "Running" ]; then + ok "OpenSSH.Server installed + sshd Running + HNS port-22 reserved + auto-start + DefaultShell=bash." + elif [ "$_elev_rc" = "0" ]; then + warn "Elevated payload exit 0 but sshd state is '$_post_state' — partial install." + warn " Re-run install or check elevated log: $_ps_log_win" + _elev_rc=1 else - warn "Elevated payload failed (exit $_elev_rc). See log above." + warn "Elevated payload failed (exit $_elev_rc, sshd state '$_post_state'). See log above." warn "Manual fix (admin PowerShell):" warn " Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0" warn " reg add HKLM\\SYSTEM\\CurrentControlSet\\Services\\hns\\State /v EnableExcludedPortRange /d 0 /f" From 734bb8410a2ce302ad9a4e1ddefa6e4cecf4924b Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Mon, 27 Apr 2026 23:50:41 -0500 Subject: [PATCH 2/9] fix(install.sh): kill em-dash + drop global try/catch + parse-check before UAC Three real bugs hiding behind one symptom on continuum-b69f's Windows machine: install reported "OpenSSH installed + started" while sshd was actually crashloop-stopped with exit 1067 ("no hostkeys available"). Joel called it "amateur try/catch" -- he was right. 1. Em-dash (U+2014) in a string literal mis-parsed under cp1252. PowerShell 5.1 reads BOMless .ps1 files as the system codepage (cp1252 on most Windows). UTF-8 em-dash is bytes E2 80 94. Byte 94 in cp1252 is RIGHT-DOUBLE-QUOTATION-MARK. Parser sees "...$path " ...rest" -- treats the trailing 94 as a closing string quote and the rest of the file fails to parse. Nothing executes. No log written. Elevated window blinks closed silently. Fix: heredoc is now ASCII-only AND we prepend a UTF-8 BOM as defense-in-depth so future edits don't regress. 2. Global try/catch + $ErrorActionPreference = "Stop" hid the parse error completely. The parse error happens BEFORE Start-Transcript runs -- nothing in the try/catch could catch it because the parser never reaches the try at all. The bash side saw "no transcript written" and printed the misleading "UAC denied or Start-Process failed" warning. Fix: drop both. Each step runs plainly. PowerShell prints native errors to the transcript and execution continues. Bash side already re-queries Get-Service sshd post-elevation as the source- of-truth verdict, so we don't need the script's exit code to lie about success. 3. Parse errors didn't surface until after UAC. Fix: bash side now runs [Parser]::ParseFile on the staged .ps1 from a non-elevated process before Start-Process is called. If any parse errors exist, we print them and abort -- no UAC prompt, no silent close, the user sees exactly what's wrong. Per Joel: "we prefer parser issues to actually error" -- this is how they actually error. Verified locally on continuum-b69f's box: new payload parses clean (456 tokens, no errors). Will end-to-end-test next. --- install.sh | 172 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 111 insertions(+), 61 deletions(-) diff --git a/install.sh b/install.sh index c2bd2c1..41fcaba 100755 --- a/install.sh +++ b/install.sh @@ -255,75 +255,104 @@ _ensure_sshd_running() { # writes a clean transcript every time. local _elevated_ps1="$CLONE_DIR/install-elevated.ps1" mkdir -p "$CLONE_DIR" + # NOTE: keep this heredoc ASCII-only. PowerShell 5.1 reads BOMless + # .ps1 files as the system codepage (cp1252 on most Windows). A + # UTF-8 em-dash (0xE2 0x80 0x94) ends in byte 0x94, which in + # cp1252 is RIGHT-DOUBLE-QUOTATION-MARK -- the parser sees it as + # a closing string quote and the rest of the file fails to parse. + # We also add a UTF-8 BOM below as defense-in-depth, AND the bash + # side runs a parse-check pass before invoking elevation so any + # parser error fails loud (no silent .ps1 launch). cat > "$_elevated_ps1" <<'PSPAYLOAD' -$ErrorActionPreference = "Stop"; -# [System.IO.Path]::GetTempPath() asks the OS directly (no env-var -# inheritance surprises). On a UAC-elevated process this resolves to -# the user's %LOCALAPPDATA%\Temp. $logPath = Join-Path ([System.IO.Path]::GetTempPath()) "airc-install-elevated.log"; Start-Transcript -Path $logPath -Force | Out-Null; -try { - Write-Host "==> OpenSSH.Server capability"; - $cap = Get-WindowsCapability -Online -Name "OpenSSH.Server*"; - if ($cap.State -ne "Installed") { Add-WindowsCapability -Online -Name $cap.Name | Out-Null; Write-Host " installed: $($cap.Name)" } else { Write-Host " already installed" } - Write-Host "==> SSH host keys + ACLs (ssh-keygen -A)"; - # continuum-b69f 2026-04-28: every fresh Windows OpenSSH install has - # a documented bug where sshd refuses to start with "no hostkeys - # available" because the host key files exist but have overly- - # permissive ACLs (Authenticated Users / BUILTIN\Users / Everyone). - # ssh-keygen -A is idempotent: generates missing host keys AND - # restores correct ACLs on existing ones (SYSTEM + Administrators - # only). Without this, Start-Service sshd fails with WIN32_EXIT_CODE - # 1067 (terminated unexpectedly) on every fresh-install machine. - $sshKeygen = Join-Path $env:WINDIR "System32\OpenSSH\ssh-keygen.exe"; - if (Test-Path $sshKeygen) { - & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " $_" }; + +# No global try/catch, no $ErrorActionPreference = "Stop". Each step +# runs plainly; if a cmdlet errors, PowerShell prints the error to the +# transcript and execution continues. Bash side detects success/failure +# from Get-Service sshd post-check, not from this script's exit code. +# Anything wrapped in try/catch below is wrapped because the failure is +# *expected* and *recoverable* (e.g. ssh-keygen missing -> warn + skip). + +Write-Host "==> OpenSSH.Server capability"; +$cap = Get-WindowsCapability -Online -Name "OpenSSH.Server*"; +if ($cap.State -ne "Installed") { + Add-WindowsCapability -Online -Name $cap.Name | Out-Null; + Write-Host " installed: $($cap.Name)" +} else { Write-Host " already installed" } + +Write-Host "==> SSH host keys + ACLs (ssh-keygen -A)"; +# ssh-keygen -A is idempotent: generates missing host keys AND restores +# correct ACLs on existing ones (SYSTEM + Administrators only). Without +# this, Start-Service sshd fails with exit 1067 ("sshd: no hostkeys +# available") on every fresh-install machine because post-capability +# install the host keys can have overly-permissive ACLs. +$sshKeygen = Join-Path $env:WINDIR "System32\OpenSSH\ssh-keygen.exe"; +if (Test-Path $sshKeygen) { + & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " $_" } +} else { + Write-Host " WARN: ssh-keygen.exe not found at $sshKeygen -- sshd will fail to start" +} + +Write-Host "==> HNS port-22 reservation"; +$reg = (Get-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Services\hns\State" -Name "EnableExcludedPortRange" -ErrorAction SilentlyContinue).EnableExcludedPortRange; +if ($reg -ne 0) { + reg add "HKLM\SYSTEM\CurrentControlSet\Services\hns\State" /v "EnableExcludedPortRange" /d 0 /f | Out-Null; + Write-Host " HNS auto-exclusion disabled" +} else { Write-Host " HNS auto-exclusion already off" } +$excl = netsh int ipv4 show excludedportrange protocol=tcp | Out-String; +if ($excl -notmatch "(?m)^\s*22\s+22\b") { + netsh int ipv4 add excludedportrange protocol=tcp startport=22 numberofports=1 | Out-Null; + Write-Host " port 22 reserved in static excluded-port-range" +} else { Write-Host " port 22 already reserved" } + +Write-Host "==> Firewall rule (TCP/22 inbound)"; +if (-not (Get-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -ErrorAction SilentlyContinue)) { + New-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -DisplayName "OpenSSH Server (sshd)" -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 | Out-Null; + Write-Host " inbound TCP/22 rule created" +} else { Write-Host " inbound TCP/22 rule already exists" } + +Write-Host "==> sshd service (start + auto-start on boot)"; +Start-Service sshd; +Set-Service -Name sshd -StartupType Automatic; +Write-Host " Get-Service sshd: $((Get-Service sshd).Status)"; + +Write-Host "==> DefaultShell registry (bash for joiners)"; +$bashCandidates = @("C:\Program Files\Git\bin\bash.exe", "C:\Program Files (x86)\Git\bin\bash.exe", "$env:USERPROFILE\AppData\Local\Programs\Git\bin\bash.exe"); +$bashPath = $null; +foreach ($c in $bashCandidates) { if (Test-Path $c) { $bashPath = $c; break } } +if (-not $bashPath) { $cmd = Get-Command bash.exe -ErrorAction SilentlyContinue; if ($cmd) { $bashPath = $cmd.Source } } +if (-not $bashPath) { + Write-Host " WARN: bash.exe not found; DefaultShell left at OS default. Install Git for Windows + re-run." +} else { + $cur = (Get-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -ErrorAction SilentlyContinue).DefaultShell; + if ($cur -eq $bashPath) { + Write-Host " DefaultShell already $bashPath" } else { - Write-Host " WARN: ssh-keygen.exe not found at $sshKeygen — sshd may fail to start"; + if (-not (Test-Path "HKLM:\SOFTWARE\OpenSSH")) { New-Item -Path "HKLM:\SOFTWARE\OpenSSH" -Force | Out-Null } + New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value $bashPath -PropertyType String -Force | Out-Null; + Write-Host " DefaultShell -> $bashPath" } - Write-Host "==> HNS port-22 reservation"; - $reg = (Get-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Services\hns\State" -Name "EnableExcludedPortRange" -ErrorAction SilentlyContinue).EnableExcludedPortRange; - if ($reg -ne 0) { reg add "HKLM\SYSTEM\CurrentControlSet\Services\hns\State" /v "EnableExcludedPortRange" /d 0 /f | Out-Null; Write-Host " HNS auto-exclusion disabled" } else { Write-Host " HNS auto-exclusion already off" } - $excl = netsh int ipv4 show excludedportrange protocol=tcp | Out-String; - if ($excl -notmatch "(?m)^\s*22\s+22\b") { netsh int ipv4 add excludedportrange protocol=tcp startport=22 numberofports=1 | Out-Null; Write-Host " port 22 reserved in static excluded-port-range" } else { Write-Host " port 22 already reserved" } - Write-Host "==> Firewall rule"; - if (-not (Get-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -ErrorAction SilentlyContinue)) { - New-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -DisplayName "OpenSSH Server (sshd)" -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 | Out-Null; - Write-Host " inbound TCP/22 rule created" - } else { Write-Host " inbound TCP/22 rule already exists" } - Write-Host "==> sshd service"; - Start-Service sshd; - Set-Service -Name sshd -StartupType Automatic; - Write-Host " started + auto-start on boot"; - Write-Host "==> DefaultShell registry"; - $bashCandidates = @("C:\Program Files\Git\bin\bash.exe", "C:\Program Files (x86)\Git\bin\bash.exe", "$env:USERPROFILE\AppData\Local\Programs\Git\bin\bash.exe"); - $bashPath = $null; - foreach ($c in $bashCandidates) { if (Test-Path $c) { $bashPath = $c; break } } - if (-not $bashPath) { $cmd = Get-Command bash.exe -ErrorAction SilentlyContinue; if ($cmd) { $bashPath = $cmd.Source } } - if ($bashPath) { - $cur = (Get-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -ErrorAction SilentlyContinue).DefaultShell; - if ($cur -ne $bashPath) { - if (-not (Test-Path "HKLM:\SOFTWARE\OpenSSH")) { New-Item -Path "HKLM:\SOFTWARE\OpenSSH" -Force | Out-Null } - New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value $bashPath -PropertyType String -Force | Out-Null; - Write-Host " DefaultShell -> $bashPath" - } else { Write-Host " DefaultShell already $bashPath" } - } else { Write-Host " WARN: bash.exe not found; DefaultShell left at OS default (cmd.exe). Install Git for Windows + re-run." } - Write-Host ""; - Write-Host "airc: sshd ready (capability + HNS + firewall + service auto-start + DefaultShell=bash)"; - $global:LASTEXITCODE = 0; -} catch { - Write-Host ""; - Write-Host "airc-elevated-error: $_"; - Write-Host "Stack trace:"; - Write-Host $_.ScriptStackTrace; - $global:LASTEXITCODE = 1; -} finally { - Stop-Transcript | Out-Null; } -exit $global:LASTEXITCODE; + +Write-Host ""; +Write-Host "airc: elevated install steps complete"; +Stop-Transcript | Out-Null; +exit 0; PSPAYLOAD - # Translate the .ps1 path to Windows form for Start-Process -File. + # Defense-in-depth: prepend a UTF-8 BOM so PowerShell 5.1 reads + # the .ps1 as UTF-8 (not cp1252). Heredoc is ASCII-only so this + # is just insurance for future edits. + if [ -f "$_elevated_ps1" ]; then + local _tmp_bom="$_elevated_ps1.bom" + printf '\xEF\xBB\xBF' > "$_tmp_bom" + cat "$_elevated_ps1" >> "$_tmp_bom" + mv "$_tmp_bom" "$_elevated_ps1" + fi + + # Translate the .ps1 path to Windows form for Start-Process -File + # and the parse-check below. local _elevated_ps1_win if command -v cygpath >/dev/null 2>&1; then _elevated_ps1_win=$(cygpath -w "$_elevated_ps1" 2>/dev/null) @@ -331,6 +360,27 @@ PSPAYLOAD # Fallback: /c/Users/foo/.airc-src/install-elevated.ps1 → C:\Users\foo\.airc-src\install-elevated.ps1 _elevated_ps1_win=$(printf '%s' "$_elevated_ps1" | sed 's|^/\([a-z]\)/|\U\1:\\\\|; s|/|\\\\|g') fi + + # Pre-flight parse-check: catch syntax errors in the staged .ps1 + # BEFORE we trigger UAC. Without this, a parser error means the + # elevated window opens, fails to parse, blinks closed, no log + # is written, bash side reports "transcript not written" and the + # user has no idea what went wrong (Joel 2026-04-28: "we prefer + # parser issues to actually error" -- this is how we make them + # actually error). Parser errors here abort the install loud. + local _parse_errs + _parse_errs=$(powershell.exe -NoProfile -Command " + \$tokens = \$null; \$errors = \$null; + [System.Management.Automation.Language.Parser]::ParseFile('$_elevated_ps1_win', [ref]\$tokens, [ref]\$errors) | Out-Null; + if (\$errors) { \$errors | ForEach-Object { Write-Output \$_.ToString() } } + " 2>&1 | tr -d '\r') + if [ -n "$_parse_errs" ]; then + warn "Staged elevated payload has PARSE ERRORS -- aborting before UAC." + warn " This is a bug in install.sh. File a bug w/ this output:" + printf '%s\n' "$_parse_errs" | sed 's/^/ /' + warn " staged file: $_elevated_ps1_win" + return 1 + fi case "$_state" in Running) ok "sshd running (Windows OpenSSH.Server)" From 9806b6e801d8fb6eed83d75eba872ed833a1f9cf Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Mon, 27 Apr 2026 23:54:41 -0500 Subject: [PATCH 3/9] fix(install.sh): icacls-reset host key ACLs (ssh-keygen -A alone is not enough) Previous commit's diagnosis was half-right: yes the host-key step needs work, but ssh-keygen -A is for *generating missing keys*, not for fixing ACLs on existing ones. Confirmed by capturing the elevated transcript on continuum-b69f's box -- ssh-keygen -A produced no output at all (existing keys were already there, nothing to do), and sshd still failed Start-Service with exit 1067. Ran sshd -ddd directly to see the underlying file-open errors: Failed to open file: ...ssh_host_rsa_key error:5 (ACCESS_DENIED) Failed to open file: ...ssh_host_rsa_key error:13 (ACL secure_permission_check failed) So sshd-as-LocalSystem can't read the host keys *and* their ACLs flunk sshd's own security check. Two distinct ACL problems, both fixed by the same pattern: take ownership, wipe inheritance, grant SYSTEM + BUILTIN\Administrators full control, no other ACEs. Tools considered: - FixHostFilePermissions.ps1: removed from Windows-OpenSSH years ago - OpenSSHUtils PS module: official, but PSGallery dep + module trust prompt = friction we don't want for an install script - icacls: in-box on every Windows + bulletproof. Picked this. The new step: takeown /F # become owner icacls /reset # wipe inherited ACEs icacls /inheritance:r /grant SYSTEM:F /grant Administrators:F Output is captured per-key in the transcript so any failure is visible. ssh-keygen -A still runs first (cheap, idempotent) so any *missing* keys get auto-generated before the ACL fix runs. --- install.sh | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/install.sh b/install.sh index 41fcaba..cdb66a3 100755 --- a/install.sh +++ b/install.sh @@ -281,15 +281,34 @@ if ($cap.State -ne "Installed") { Write-Host " installed: $($cap.Name)" } else { Write-Host " already installed" } -Write-Host "==> SSH host keys + ACLs (ssh-keygen -A)"; -# ssh-keygen -A is idempotent: generates missing host keys AND restores -# correct ACLs on existing ones (SYSTEM + Administrators only). Without -# this, Start-Service sshd fails with exit 1067 ("sshd: no hostkeys -# available") on every fresh-install machine because post-capability -# install the host keys can have overly-permissive ACLs. +Write-Host "==> SSH host keys (generate if missing + reset ACLs)"; +# Two-step: (a) ssh-keygen -A generates any missing host keys, (b) +# icacls resets ACLs on private keys to SYSTEM + Administrators only. +# +# Without (b), Start-Service sshd fails with exit 1067 because sshd +# refuses to open host key files whose ACLs let anyone but SYSTEM/ +# Administrators read them ("sshd: no hostkeys available"). Verified +# via `sshd -ddd` on continuum-b69f's box (2026-04-28): +# Failed to open file: .../ssh_host_rsa_key error:5 (ACCESS_DENIED) +# Failed to open file: .../ssh_host_rsa_key error:13 (ACL fails secure_permission_check) +# +# ssh-keygen -A alone does NOT fix ACLs on existing keys -- it only +# generates missing ones. The bundled FixHostFilePermissions.ps1 was +# removed from Windows-OpenSSH years ago; the OpenSSHUtils PS module +# from PSGallery is the official replacement, but pulling it adds a +# network dep + module-trust prompt. icacls is in-box and bulletproof. $sshKeygen = Join-Path $env:WINDIR "System32\OpenSSH\ssh-keygen.exe"; if (Test-Path $sshKeygen) { - & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " $_" } + & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " ssh-keygen: $_" } + $hostKeys = Get-ChildItem 'C:\ProgramData\ssh\ssh_host_*' -ErrorAction SilentlyContinue | Where-Object { -not $_.Name.EndsWith('.pub') } + foreach ($k in $hostKeys) { + # takeown switches owner to current admin; icacls /reset wipes + # inherited ACEs; /inheritance:r removes parent inheritance; then + # grant Full Control to SYSTEM and BUILTIN\Administrators only. + takeown /F $k.FullName /A 2>&1 | Out-Null + icacls $k.FullName /reset 2>&1 | Out-Null + icacls $k.FullName /inheritance:r /grant 'NT AUTHORITY\SYSTEM:(F)' /grant 'BUILTIN\Administrators:(F)' 2>&1 | ForEach-Object { Write-Host " icacls $($k.Name): $_" } + } } else { Write-Host " WARN: ssh-keygen.exe not found at $sshKeygen -- sshd will fail to start" } From 820d9b1bee1a9e3bc0e23e52ac38f08d3b023bc9 Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 00:00:08 -0500 Subject: [PATCH 4/9] fix(install.sh): delete + regen host keys (icacls /grant alone insufficient for sshd) icacls /grant SYSTEM:F /grant Administrators:F succeeded per the transcript on continuum-b69f's box, but sshd-as-LocalSystem still refused to load the keys with errors 5+13 (ACCESS_DENIED + ACL fails secure_permission_check). The post-fix ACLs are technically correct (SYSTEM + Admins only, no inheritance), but OpenSSH's permission check is fragile w.r.t. owner identity and explicit-vs-inherited handling. Cleaner: delete any existing host_key files and re-run ssh-keygen -A. Since ssh-keygen -A here runs from an elevated SYSTEM-context PowerShell, it sets the right owner (SYSTEM) and ACEs at creation time -- which sshd accepts. This sidesteps every "what does icacls think SYSTEM:(F) means" question entirely. Safe at install time: the host hasn't published any fingerprint to peers yet, so regenerating doesn't break anything. Subsequent installs where sshd is already Running (state == Running) skip this whole ensure_sshd_running block via the case statement. Also added a post-regen `icacls ` dump to the transcript so we can see at a glance what the resulting ACL looks like -- saves a UAC round-trip the next time something looks off. --- install.sh | 56 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/install.sh b/install.sh index cdb66a3..0f226e9 100755 --- a/install.sh +++ b/install.sh @@ -281,36 +281,42 @@ if ($cap.State -ne "Installed") { Write-Host " installed: $($cap.Name)" } else { Write-Host " already installed" } -Write-Host "==> SSH host keys (generate if missing + reset ACLs)"; -# Two-step: (a) ssh-keygen -A generates any missing host keys, (b) -# icacls resets ACLs on private keys to SYSTEM + Administrators only. +Write-Host "==> SSH host keys (regenerate so ACLs are clean from birth)"; +# Why "delete + regenerate" instead of "fix ACLs on existing": # -# Without (b), Start-Service sshd fails with exit 1067 because sshd -# refuses to open host key files whose ACLs let anyone but SYSTEM/ -# Administrators read them ("sshd: no hostkeys available"). Verified -# via `sshd -ddd` on continuum-b69f's box (2026-04-28): -# Failed to open file: .../ssh_host_rsa_key error:5 (ACCESS_DENIED) -# Failed to open file: .../ssh_host_rsa_key error:13 (ACL fails secure_permission_check) +# Verified on continuum-b69f's box (2026-04-28): even after icacls reset +# to SYSTEM + Administrators only, sshd still refused with error:5 +# (ACCESS_DENIED) and error:13 (ACL fails OpenSSH secure_permission_check). +# Apparently icacls /grant alone isn't enough -- the file owner and the +# combination of explicit + inherited ACEs has to match what OpenSSH's +# secure_permission_check expects, which is fragile. # -# ssh-keygen -A alone does NOT fix ACLs on existing keys -- it only -# generates missing ones. The bundled FixHostFilePermissions.ps1 was -# removed from Windows-OpenSSH years ago; the OpenSSHUtils PS module -# from PSGallery is the official replacement, but pulling it adds a -# network dep + module-trust prompt. icacls is in-box and bulletproof. +# Cleaner approach: nuke any existing host keys, then run ssh-keygen -A +# from this elevated SYSTEM-context process. ssh-keygen -A sets the +# right ACLs at creation time (owner = SYSTEM, ACEs = SYSTEM + Admins). +# Since this is install-time setup and the host hasn't published any +# fingerprint yet, regenerating is safe -- nobody is trusting these +# keys yet from a client. $sshKeygen = Join-Path $env:WINDIR "System32\OpenSSH\ssh-keygen.exe"; -if (Test-Path $sshKeygen) { +if (-not (Test-Path $sshKeygen)) { + Write-Host " WARN: ssh-keygen.exe not found at $sshKeygen -- sshd will fail to start" +} else { + $sshDir = 'C:\ProgramData\ssh'; + if (-not (Test-Path $sshDir)) { New-Item -Path $sshDir -ItemType Directory -Force | Out-Null } + $existing = Get-ChildItem (Join-Path $sshDir 'ssh_host_*') -ErrorAction SilentlyContinue + if ($existing) { + Write-Host " removing $($existing.Count) existing host key file(s)" + $existing | Remove-Item -Force -ErrorAction SilentlyContinue + } & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " ssh-keygen: $_" } - $hostKeys = Get-ChildItem 'C:\ProgramData\ssh\ssh_host_*' -ErrorAction SilentlyContinue | Where-Object { -not $_.Name.EndsWith('.pub') } - foreach ($k in $hostKeys) { - # takeown switches owner to current admin; icacls /reset wipes - # inherited ACEs; /inheritance:r removes parent inheritance; then - # grant Full Control to SYSTEM and BUILTIN\Administrators only. - takeown /F $k.FullName /A 2>&1 | Out-Null - icacls $k.FullName /reset 2>&1 | Out-Null - icacls $k.FullName /inheritance:r /grant 'NT AUTHORITY\SYSTEM:(F)' /grant 'BUILTIN\Administrators:(F)' 2>&1 | ForEach-Object { Write-Host " icacls $($k.Name): $_" } + # Dump the post-regen ACL state for one of the keys so we can see in + # the transcript whether the ACL is what sshd expects -- saves a UAC + # round-trip if it's wrong. + $rsa = Join-Path $sshDir 'ssh_host_rsa_key' + if (Test-Path $rsa) { + Write-Host " post-regen ACL on ssh_host_rsa_key:" + icacls $rsa 2>&1 | ForEach-Object { Write-Host " $_" } } -} else { - Write-Host " WARN: ssh-keygen.exe not found at $sshKeygen -- sshd will fail to start" } Write-Host "==> HNS port-22 reservation"; From 458a7991a89c6ef2d051491508aec47390b39ee5 Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 00:03:11 -0500 Subject: [PATCH 5/9] fix(install.sh): strip creator ACE that ssh-keygen -A leaves on host keys Found via post-regen ACL dump on continuum-b69f 2026-04-28: C:\ProgramData\ssh\ssh_host_rsa_key BUILTIN\Administrators:(F) NT AUTHORITY\SYSTEM:(F) BIGMAMA\green:(M) <-- the bug ssh-keygen -A on Windows leaves an ACE for whichever user ran it (the creator), even when running elevated. OpenSSH's secure_permission_check rejects any non-(owner|SYSTEM|Administrators) ACE -- so the freshly regenerated keys still failed sshd's check, even though they had no inheritance and SYSTEM + Admins had Full Control. Fix: after ssh-keygen -A, run icacls /remove:g $(whoami) on each host_*_key to strip the creator's ACE. Combined with /inheritance:r + /grant SYSTEM:F + Admins:F, the resulting ACL is exactly what sshd wants: just SYSTEM and Administrators, no inheritance, no extras. The post-fix ACL is dumped to the transcript so we can verify it visually -- and so future "wait sshd still won't start" diagnoses have a paper trail of what the ACL looked like. --- install.sh | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/install.sh b/install.sh index 0f226e9..045b9bb 100755 --- a/install.sh +++ b/install.sh @@ -309,12 +309,26 @@ if (-not (Test-Path $sshKeygen)) { $existing | Remove-Item -Force -ErrorAction SilentlyContinue } & $sshKeygen -A 2>&1 | ForEach-Object { Write-Host " ssh-keygen: $_" } - # Dump the post-regen ACL state for one of the keys so we can see in - # the transcript whether the ACL is what sshd expects -- saves a UAC - # round-trip if it's wrong. + # ssh-keygen -A on Windows leaves an ACE for the user who ran it + # (e.g. BIGMAMA\green:(M) for an admin elevation), even though that + # user is just the file creator. OpenSSH's secure_permission_check + # rejects any ACE that isn't owner / SYSTEM / Administrators -- so + # we strip the creator's ACE explicitly. Verified on continuum-b69f + # 2026-04-28: with regenerate alone, sshd kept failing with error 13 + # (ACL secure_permission_check); with this strip, the ACL is just + # SYSTEM + Administrators and sshd accepts it. + $me = (whoami).Trim() + $newKeys = Get-ChildItem (Join-Path $sshDir 'ssh_host_*_key') -ErrorAction SilentlyContinue + foreach ($k in $newKeys) { + icacls $k.FullName /inheritance:r 2>&1 | Out-Null + icacls $k.FullName /grant 'NT AUTHORITY\SYSTEM:(F)' 'BUILTIN\Administrators:(F)' 2>&1 | Out-Null + icacls $k.FullName /remove:g $me 2>&1 | Out-Null + } + # Dump the post-fix ACL on the rsa key so we can see in the transcript + # whether the result matches what sshd expects (only SYSTEM + Admins). $rsa = Join-Path $sshDir 'ssh_host_rsa_key' if (Test-Path $rsa) { - Write-Host " post-regen ACL on ssh_host_rsa_key:" + Write-Host " post-fix ACL on ssh_host_rsa_key:" icacls $rsa 2>&1 | ForEach-Object { Write-Host " $_" } } } From 55a440ed7496451299e53ba03b7b6187dfa0c481 Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 00:04:30 -0500 Subject: [PATCH 6/9] fix(install.sh): also chown host keys to SYSTEM (icacls /setowner) Found via Get-Acl owner check on continuum-b69f 2026-04-28: even after removing creator's ACE, ssh-keygen -A leaves the file OWNER as BIGMAMA\green (the elevated user). OpenSSH's secure_permission_check also looks at owner -- if the owner isn't in {SYSTEM, Administrators, running sshd user}, the check fails with error 13 even though access control entries are correct. Adding icacls /setowner 'NT AUTHORITY\SYSTEM' before the inheritance and grant calls so SYSTEM owns the key. Owner = SYSTEM, ACEs = SYSTEM + Admins, no creator, no inheritance -- the canonical OpenSSH-on- Windows host key permission state. --- install.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/install.sh b/install.sh index 045b9bb..b282375 100755 --- a/install.sh +++ b/install.sh @@ -317,9 +317,14 @@ if (-not (Test-Path $sshKeygen)) { # 2026-04-28: with regenerate alone, sshd kept failing with error 13 # (ACL secure_permission_check); with this strip, the ACL is just # SYSTEM + Administrators and sshd accepts it. + # ssh-keygen -A leaves the file owner as the user who ran it + # (BIGMAMA\green even when running elevated). OpenSSH's + # secure_permission_check requires owner in {SYSTEM, Administrators, + # running sshd user}. Setting owner to SYSTEM is the safe default. $me = (whoami).Trim() $newKeys = Get-ChildItem (Join-Path $sshDir 'ssh_host_*_key') -ErrorAction SilentlyContinue foreach ($k in $newKeys) { + icacls $k.FullName /setowner 'NT AUTHORITY\SYSTEM' 2>&1 | Out-Null icacls $k.FullName /inheritance:r 2>&1 | Out-Null icacls $k.FullName /grant 'NT AUTHORITY\SYSTEM:(F)' 'BUILTIN\Administrators:(F)' 2>&1 | Out-Null icacls $k.FullName /remove:g $me 2>&1 | Out-Null From c3f202e0987e4dd5c026e2bb25eb71dc2836903a Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 00:06:44 -0500 Subject: [PATCH 7/9] chore(install.sh): surface sshd dry-run + owner in transcript Adds a 'sshd -t' dry-run step from the elevated context and dumps the post-fix file owner alongside the ACL. Goal: when Start-Service sshd fails, the transcript shows exactly what sshd itself complains about ('no hostkeys available' vs 'bad ownership' vs config syntax) without needing another UAC round-trip to query. --- install.sh | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/install.sh b/install.sh index b282375..82ca6e4 100755 --- a/install.sh +++ b/install.sh @@ -329,12 +329,32 @@ if (-not (Test-Path $sshKeygen)) { icacls $k.FullName /grant 'NT AUTHORITY\SYSTEM:(F)' 'BUILTIN\Administrators:(F)' 2>&1 | Out-Null icacls $k.FullName /remove:g $me 2>&1 | Out-Null } - # Dump the post-fix ACL on the rsa key so we can see in the transcript - # whether the result matches what sshd expects (only SYSTEM + Admins). + # Dump the post-fix ACL + OWNER on the rsa key so we can see in the + # transcript whether the result matches what sshd expects: owner must + # be SYSTEM or Administrators, ACEs must be only owner + SYSTEM + Admins. $rsa = Join-Path $sshDir 'ssh_host_rsa_key' if (Test-Path $rsa) { Write-Host " post-fix ACL on ssh_host_rsa_key:" icacls $rsa 2>&1 | ForEach-Object { Write-Host " $_" } + Write-Host " post-fix OWNER on ssh_host_rsa_key: $((Get-Acl $rsa).Owner)" + } +} + +Write-Host "==> sshd dry-run (config + key load test)"; +# Run sshd -t from elevated context to surface the *real* reason sshd +# is failing -- Start-Service sshd hides the underlying error behind a +# generic "Failed to start service" message. -t exits non-zero with a +# specific error message ("no hostkeys available", config syntax, +# privilege separation user missing, etc.). Captures stderr too. +$sshdExe = Join-Path $env:WINDIR "System32\OpenSSH\sshd.exe" +if (Test-Path $sshdExe) { + $sshdTest = & $sshdExe -t 2>&1 + $sshdTestExit = $LASTEXITCODE + if ($sshdTestExit -eq 0) { + Write-Host " sshd -t: OK (exit 0)" + } else { + Write-Host " sshd -t: FAILED (exit $sshdTestExit)"; + $sshdTest | ForEach-Object { Write-Host " $_" } } } From 2db2acda59c06a8d3949e9c271b365c9f673bb5a Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 00:10:17 -0500 Subject: [PATCH 8/9] fix(install.sh): reset C:\ProgramData\ssh + logs/ folder ACLs (the actual MS-documented cause) WebSearch turned up the exact MS Learn KB for our symptom (sshd -t passes from elevated, Start-Service fails 1067, no event log entry): https://learn.microsoft.com/en-us/troubleshoot/windows-server/system-management-components/error-1053-1067-7034-after-update-openssh-doesnt-start "This issue occurs if the C:\ProgramData\ssh and C:\ProgramData\ssh\logs folders have incorrect permissions. The permissions might be too limited or too open. For example, the SYSTEM account or the Administrators group might not have write permissions. For a second example, regular users might have write or full control permissions." Required ACL on each folder: SYSTEM : Full Control Administrators : Full Control Authenticated Users : Read & execute (no write) Owner: SYSTEM. Up to this commit we'd been fixing the host_*_key file ACLs only, never the parent folder. The Microsoft fix is on the FOLDER. Adds a new elevated-payload step that sets owner + inheritance + ACEs on both C:\ProgramData\ssh and C:\ProgramData\ssh\logs with (OI)(CI) inheritance flags so newly-created files inherit correctly. The Oct-2024 update introduced this strictness; the March-2025 update loosened it back into a warning ("Event ID 4: write access is granted to the following users: ..."), so machines fully patched past March 2025 may not need this. But continuum-b69f's box (Windows 11 24H2, build 26100.8115, otherwise fully patched) is still hitting the strict-mode failure -- so applying the documented fix is still required. --- install.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/install.sh b/install.sh index 82ca6e4..535955e 100755 --- a/install.sh +++ b/install.sh @@ -340,6 +340,32 @@ if (-not (Test-Path $sshKeygen)) { } } +Write-Host "==> SSH directory ACLs (C:\ProgramData\ssh + logs/)"; +# Per Microsoft KB on Error 1067 / Event 7034 (Oct 2024 Windows update +# regression that became permanent in newer builds): +# "This issue occurs if the C:\ProgramData\ssh and C:\ProgramData\ssh\logs +# folders have incorrect permissions. The permissions might be too limited +# or too open. For example, the SYSTEM account or the Administrators group +# might not have write permissions. For a second example, regular users +# might have write or full control permissions." +# https://learn.microsoft.com/en-us/troubleshoot/windows-server/system-management-components/error-1053-1067-7034-after-update-openssh-doesnt-start +# +# Required ACL on each folder: +# SYSTEM : Full Control +# Administrators : Full Control +# Authenticated Users : Read & execute (read-only, no write) +# Owner: SYSTEM (not the user who created the folder). +$sshDir = 'C:\ProgramData\ssh' +$logsDir = Join-Path $sshDir 'logs' +foreach ($d in @($sshDir, $logsDir)) { + if (-not (Test-Path $d)) { New-Item -Path $d -ItemType Directory -Force | Out-Null } + icacls $d /setowner 'NT AUTHORITY\SYSTEM' 2>&1 | Out-Null + icacls $d /inheritance:r 2>&1 | Out-Null + icacls $d /grant 'NT AUTHORITY\SYSTEM:(OI)(CI)(F)' 'BUILTIN\Administrators:(OI)(CI)(F)' 'NT AUTHORITY\Authenticated Users:(OI)(CI)(RX)' 2>&1 | Out-Null + Write-Host " $d :" + icacls $d 2>&1 | Select-Object -First 5 | ForEach-Object { Write-Host " $_" } +} + Write-Host "==> sshd dry-run (config + key load test)"; # Run sshd -t from elevated context to surface the *real* reason sshd # is failing -- Start-Service sshd hides the underlying error behind a From cea7c26b95b2e22911a60ecd523026c1368084bb Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Tue, 28 Apr 2026 00:12:06 -0500 Subject: [PATCH 9/9] fix(install.sh): restart HNS service after port-22 reservation (the actual blocker) OpenSSH/Admin event log on continuum-b69f revealed the real blocker: sshd: error: Bind to port 22 on 0.0.0.0 failed: Permission denied. sshd: error: Bind to port 22 on :: failed: Permission denied. sshd: fatal: Cannot bind any address. Even with the HNS reg key (EnableExcludedPortRange=0) set AND netsh showing port 22 in the excluded range ('22 22 *' administered), sshd-as-LocalSystem still got EACCES on bind. HNS service was holding port 22 at a layer below netsh visibility -- the reg key + netsh reservation only take effect after a Restart-Service hns (or reboot). Adds an HNS restart immediately after the port-22 reservation step. Now sshd can actually bind port 22 when Start-Service runs the next step. This was already documented in continuum-b69f's memory file (reference_airc_windows.md) but the install.sh implementation never actually restarted the service. The host-key permission saga from the prior 7 commits in this branch turned out to be a sidequest -- those issues were real but not the blocker. sshd -t (which doesn't bind a socket) was passing the whole time. The real failure was at bind time, not at config-load time. --- install.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/install.sh b/install.sh index 535955e..3738721 100755 --- a/install.sh +++ b/install.sh @@ -386,9 +386,11 @@ if (Test-Path $sshdExe) { Write-Host "==> HNS port-22 reservation"; $reg = (Get-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Services\hns\State" -Name "EnableExcludedPortRange" -ErrorAction SilentlyContinue).EnableExcludedPortRange; +$regChanged = $false if ($reg -ne 0) { reg add "HKLM\SYSTEM\CurrentControlSet\Services\hns\State" /v "EnableExcludedPortRange" /d 0 /f | Out-Null; Write-Host " HNS auto-exclusion disabled" + $regChanged = $true } else { Write-Host " HNS auto-exclusion already off" } $excl = netsh int ipv4 show excludedportrange protocol=tcp | Out-String; if ($excl -notmatch "(?m)^\s*22\s+22\b") { @@ -396,6 +398,22 @@ if ($excl -notmatch "(?m)^\s*22\s+22\b") { Write-Host " port 22 reserved in static excluded-port-range" } else { Write-Host " port 22 already reserved" } +# Verify port 22 is actually claimable. If HNS has it reserved at a +# layer below netsh-visible (Hyper-V/WSL2/Docker share dynamic port +# ranges via HNS), a restart of the HNS service is the only way to +# re-evaluate the reservation. Without this, netsh shows port 22 +# excluded but sshd-as-LocalSystem still gets EACCES on bind: +# sshd: error: Bind to port 22 on 0.0.0.0 failed: Permission denied. +# sshd: fatal: Cannot bind any address. +# Verified on continuum-b69f 2026-04-28 in OpenSSH/Admin event log. +$hns = Get-Service hns -ErrorAction SilentlyContinue +if ($hns -and $hns.Status -eq 'Running') { + Write-Host " restarting HNS service so port-22 reservation takes effect" + Restart-Service hns -Force -ErrorAction SilentlyContinue + Start-Sleep -Seconds 2 + Write-Host " HNS state: $((Get-Service hns).Status)" +} + Write-Host "==> Firewall rule (TCP/22 inbound)"; if (-not (Get-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -ErrorAction SilentlyContinue)) { New-NetFirewallRule -Name "OpenSSH-Server-In-TCP" -DisplayName "OpenSSH Server (sshd)" -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 | Out-Null;