Skip to content

Commit aa968d0

Browse files
committed
feat: [#14] add SSH utilities and host key verification improvements
- Add ssh-utils.sh script for managing SSH host key verification issues - Integrate SSH cleanup into infrastructure provisioning workflow - Add Makefile targets for SSH troubleshooting (ssh-clean, ssh-prepare) - Update documentation with SSH troubleshooting guidance SSH Utilities (infrastructure/scripts/ssh-utils.sh): - clean_vm_known_hosts() - Remove host keys for specific VM IP - clean_libvirt_known_hosts() - Clean entire libvirt network range - prepare_ssh_connection() - Comprehensive SSH preparation workflow - Support for both specific IP and network-wide cleanup Infrastructure Integration: - Auto-clean SSH known_hosts before and after VM provisioning - Prevent host key verification warnings during deployment - Non-critical operations (won't fail deployment if SSH cleanup fails) Makefile Enhancements: - make ssh-clean: Fix host key verification warnings - make ssh-prepare: Clean and test SSH connectivity - Updated help documentation and troubleshooting guide Benefits: - Eliminates common SSH host key verification warnings - Smoother VM development workflow - Better developer experience with local testing - Automated SSH maintenance during infrastructure operations
1 parent 80df86f commit aa968d0

File tree

4 files changed

+212
-2
lines changed

4 files changed

+212
-2
lines changed

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@ ssh: ## SSH into the VM
104104
exit 1; \
105105
fi
106106

107+
ssh-clean: ## Clean SSH known_hosts for VM (fixes host key verification warnings)
108+
@echo "Cleaning SSH known_hosts for VM..."
109+
@$(SCRIPTS_DIR)/ssh-utils.sh clean
110+
111+
ssh-prepare: ## Clean SSH known_hosts and test connectivity
112+
@echo "Preparing SSH connection to VM..."
113+
@$(SCRIPTS_DIR)/ssh-utils.sh prepare
114+
107115
console: ## Access VM console (text-based)
108116
@echo "Accessing VM console..."
109117
@virsh console $(VM_NAME) || echo "VM console not accessible. Try 'make vm-console' for graphical console."

infrastructure/docs/quick-start.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ make destroy
140140
| `make test` | Run complete test suite |
141141
| `make apply` | Deploy VM |
142142
| `make ssh` | Connect to VM |
143+
| `make ssh-clean` | Fix SSH host key verification warnings |
143144
| `make destroy` | Remove VM |
144145
| `make status` | Show infrastructure status |
145146
| `make refresh-state` | Refresh Terraform state to detect IP changes |
@@ -151,8 +152,9 @@ make destroy
151152
1. **Permission errors**: Make sure you logged out/in after `make dev-setup`
152153
2. **VM won't start**: Check with `sudo kvm-ok` that virtualization is enabled
153154
3. **SSH connection fails**: VM might still be booting, wait 2-3 minutes
154-
4. **libvirt file ownership errors**: Run `make fix-libvirt` to fix permissions
155-
5. **"No IP assigned yet" issue**: If `make status` shows no IP but VM is running:
155+
4. **SSH host key verification warnings**: Use `make ssh-clean` to fix automatically
156+
5. **libvirt file ownership errors**: Run `make fix-libvirt` to fix permissions
157+
6. **"No IP assigned yet" issue**: If `make status` shows no IP but VM is running:
156158

157159
```bash
158160
# Check if VM actually has an IP
@@ -172,6 +174,9 @@ make destroy
172174
# Fix libvirt permissions automatically
173175
make fix-libvirt
174176

177+
# Clean SSH known_hosts (fixes host key verification warnings)
178+
make ssh-clean
179+
175180
# Check test logs
176181
make logs
177182

infrastructure/scripts/provision-infrastructure.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ provision_infrastructure() {
106106

107107
log_info "Applying infrastructure changes"
108108
init_terraform
109+
110+
# Clean SSH known_hosts to prevent host key verification issues
111+
log_info "Cleaning SSH known_hosts to prevent host key verification warnings"
112+
if command -v "${SCRIPT_DIR}/ssh-utils.sh" >/dev/null 2>&1; then
113+
"${SCRIPT_DIR}/ssh-utils.sh" clean-all || log_warning "SSH cleanup failed (non-critical)"
114+
fi
115+
109116
tofu apply -auto-approve -var-file="local.tfvars"
110117

111118
# Get VM IP and display connection info
@@ -115,6 +122,12 @@ provision_infrastructure() {
115122
if [[ -n "${vm_ip}" ]]; then
116123
log_success "Infrastructure provisioned successfully"
117124
log_info "VM IP: ${vm_ip}"
125+
126+
# Clean specific IP from known_hosts
127+
if command -v "${SCRIPT_DIR}/ssh-utils.sh" >/dev/null 2>&1; then
128+
"${SCRIPT_DIR}/ssh-utils.sh" clean "${vm_ip}" || log_warning "SSH cleanup for ${vm_ip} failed (non-critical)"
129+
fi
130+
118131
log_info "SSH Access: ssh torrust@${vm_ip}"
119132
log_info "Next step: make app-deploy ENVIRONMENT=${ENVIRONMENT}"
120133
else
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#!/bin/bash
2+
# SSH utilities for VM development environments
3+
# Handles common SSH issues like host key verification failures
4+
5+
set -euo pipefail
6+
7+
# Source shell utilities
8+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9+
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
10+
# shellcheck source=scripts/shell-utils.sh
11+
source "${PROJECT_ROOT}/scripts/shell-utils.sh"
12+
13+
# Clean SSH known_hosts entries for VM IP addresses
14+
clean_vm_known_hosts() {
15+
local vm_ip="$1"
16+
local vm_name="${2:-torrust-tracker-demo}"
17+
18+
if [[ -z "$vm_ip" || "$vm_ip" == "No IP assigned yet" ]]; then
19+
log_warning "No VM IP provided for known_hosts cleanup"
20+
return 0
21+
fi
22+
23+
log_info "Cleaning SSH known_hosts entries for VM ${vm_name} (${vm_ip})"
24+
25+
# Remove entries for the IP address
26+
if [[ -f ~/.ssh/known_hosts ]]; then
27+
# Use ssh-keygen to remove entries (safe and atomic)
28+
if ssh-keygen -f ~/.ssh/known_hosts -R "${vm_ip}" >/dev/null 2>&1; then
29+
log_success "Removed old SSH host key entries for ${vm_ip}"
30+
else
31+
log_info "No existing SSH host key entries found for ${vm_ip}"
32+
fi
33+
else
34+
log_info "No ~/.ssh/known_hosts file found"
35+
fi
36+
}
37+
38+
# Clean SSH known_hosts for all libvirt default network IPs (192.168.122.0/24)
39+
clean_libvirt_known_hosts() {
40+
log_info "Cleaning SSH known_hosts entries for entire libvirt network range"
41+
42+
if [[ ! -f ~/.ssh/known_hosts ]]; then
43+
log_info "No ~/.ssh/known_hosts file found"
44+
return 0
45+
fi
46+
47+
# Remove all entries for 192.168.122.* (libvirt default network)
48+
local cleaned_count=0
49+
for ip in $(seq 1 254); do
50+
if ssh-keygen -f ~/.ssh/known_hosts -R "192.168.122.${ip}" >/dev/null 2>&1; then
51+
((cleaned_count++))
52+
fi
53+
done
54+
55+
if [[ $cleaned_count -gt 0 ]]; then
56+
log_success "Cleaned ${cleaned_count} SSH host key entries for libvirt network"
57+
else
58+
log_info "No libvirt network SSH host key entries found"
59+
fi
60+
}
61+
62+
# Get VM IP address from various sources
63+
get_vm_ip() {
64+
local vm_name="${1:-torrust-tracker-demo}"
65+
local vm_ip=""
66+
67+
# Try terraform output first
68+
if command -v tofu >/dev/null 2>&1; then
69+
vm_ip=$(cd "${PROJECT_ROOT}/infrastructure/terraform" && tofu output -raw vm_ip 2>/dev/null || echo "")
70+
if [[ -n "$vm_ip" && "$vm_ip" != "No IP assigned yet" ]]; then
71+
echo "$vm_ip"
72+
return 0
73+
fi
74+
fi
75+
76+
# Try libvirt directly
77+
vm_ip=$(virsh domifaddr "$vm_name" 2>/dev/null | grep ipv4 | awk '{print $4}' | cut -d'/' -f1 || echo "")
78+
if [[ -n "$vm_ip" ]]; then
79+
echo "$vm_ip"
80+
return 0
81+
fi
82+
83+
return 1
84+
}
85+
86+
# Prepare SSH connection to VM (clean known_hosts and test connectivity)
87+
prepare_vm_ssh() {
88+
local vm_name="${1:-torrust-tracker-demo}"
89+
local max_attempts="${2:-3}"
90+
91+
log_info "Preparing SSH connection to VM ${vm_name}"
92+
93+
# Get VM IP
94+
local vm_ip
95+
if ! vm_ip=$(get_vm_ip "$vm_name"); then
96+
log_error "Could not get IP address for VM ${vm_name}"
97+
return 1
98+
fi
99+
100+
log_info "VM IP: ${vm_ip}"
101+
102+
# Clean known_hosts entries
103+
clean_vm_known_hosts "$vm_ip" "$vm_name"
104+
105+
# Test SSH connectivity
106+
log_info "Testing SSH connectivity (up to ${max_attempts} attempts)"
107+
local attempt=1
108+
while [[ $attempt -le $max_attempts ]]; do
109+
if ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o BatchMode=yes \
110+
torrust@"${vm_ip}" "echo 'SSH OK'" >/dev/null 2>&1; then
111+
log_success "SSH connection established to ${vm_ip}"
112+
echo "$vm_ip"
113+
return 0
114+
fi
115+
116+
log_warning "SSH attempt ${attempt}/${max_attempts} failed, waiting 5 seconds..."
117+
sleep 5
118+
((attempt++))
119+
done
120+
121+
log_error "Failed to establish SSH connection after ${max_attempts} attempts"
122+
log_error "Common causes:"
123+
log_error " 1. VM is still booting (cloud-init may take 2-5 minutes)"
124+
log_error " 2. SSH service is not ready yet"
125+
log_error " 3. Firewall blocking connections"
126+
log_error "Try manually: ssh -o StrictHostKeyChecking=no torrust@${vm_ip}"
127+
return 1
128+
}
129+
130+
# Main function for command-line usage
131+
main() {
132+
case "${1:-help}" in
133+
clean)
134+
local vm_ip="${2:-}"
135+
if [[ -z "$vm_ip" ]]; then
136+
if vm_ip=$(get_vm_ip); then
137+
clean_vm_known_hosts "$vm_ip"
138+
else
139+
log_error "Could not determine VM IP. Please provide IP as argument."
140+
exit 1
141+
fi
142+
else
143+
clean_vm_known_hosts "$vm_ip"
144+
fi
145+
;;
146+
clean-all)
147+
clean_libvirt_known_hosts
148+
;;
149+
prepare)
150+
local vm_name="${2:-torrust-tracker-demo}"
151+
prepare_vm_ssh "$vm_name"
152+
;;
153+
get-ip)
154+
local vm_name="${2:-torrust-tracker-demo}"
155+
get_vm_ip "$vm_name"
156+
;;
157+
help | *)
158+
cat <<'EOF'
159+
SSH utilities for VM development environments
160+
161+
Usage:
162+
ssh-utils.sh clean [IP] - Clean known_hosts for specific IP (or auto-detect)
163+
ssh-utils.sh clean-all - Clean known_hosts for entire libvirt network
164+
ssh-utils.sh prepare [VM_NAME] - Clean known_hosts and test SSH connectivity
165+
ssh-utils.sh get-ip [VM_NAME] - Get VM IP address
166+
ssh-utils.sh help - Show this help
167+
168+
Examples:
169+
./infrastructure/scripts/ssh-utils.sh clean
170+
./infrastructure/scripts/ssh-utils.sh clean 192.168.122.25
171+
./infrastructure/scripts/ssh-utils.sh prepare torrust-tracker-demo
172+
./infrastructure/scripts/ssh-utils.sh clean-all
173+
174+
This script helps resolve SSH host key verification issues that occur when
175+
VMs are recreated with the same IP addresses but different host keys.
176+
EOF
177+
;;
178+
esac
179+
}
180+
181+
# Run main function if script is executed directly
182+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
183+
main "$@"
184+
fi

0 commit comments

Comments
 (0)