From ca546adf5b7f77fa1cfc234287831e3bac5e85dd Mon Sep 17 00:00:00 2001 From: Ryan Sweet Date: Fri, 27 Mar 2026 13:16:12 -0700 Subject: [PATCH 1/6] feat: full cloud-init tool parity with Python provisioning Add all development tools that were in the Python vm_provisioning.py but missing from the Rust cloud_init implementation: - GitHub CLI (gh) via official apt repo - Azure CLI via InstallAzureCLIDeb script - Node.js 22.x via NodeSource - Claude Code AI assistant - Go 1.24.1 - Python 3.13 + python-is-python3 - uv package manager - tmux configuration (status bar, socket permissions) - Docker post-install (add user to docker group) - npm global prefix configuration - .bashrc PATH additions (Go, Cargo, npm) Updates both cloud-init code paths: - cloud_init.rs: YAML-based cloud-init config (packages + runcmd) - vm.rs: shell-script based cloud-init provisioning Removes broken amplihack make install (target doesn't exist). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/crates/azlin-azure/src/cloud_init.rs | 86 +++++++++++++---- rust/crates/azlin-azure/src/vm.rs | 93 +++++++++++++++++-- .../cloud-init-tool-parity.yaml | 87 +++++++++++++++++ 3 files changed, 238 insertions(+), 28 deletions(-) create mode 100644 tests/agentic-scenarios/cloud-init-tool-parity.yaml diff --git a/rust/crates/azlin-azure/src/cloud_init.rs b/rust/crates/azlin-azure/src/cloud_init.rs index 0f90f78f..7f8f7e18 100644 --- a/rust/crates/azlin-azure/src/cloud_init.rs +++ b/rust/crates/azlin-azure/src/cloud_init.rs @@ -85,38 +85,66 @@ pub fn generate_cloud_init( /// Default packages for development VMs /// Default setup commands for development VMs (run after packages install). /// -/// These install toolchains that aren't available as apt packages: -/// - Rust/Cargo via rustup -/// - .NET 10 SDK via Microsoft install script -/// - amplihack from github.com/rysweet/amplihack +/// These install toolchains that aren't available as apt packages, matching +/// the full Python azlin provisioning (gh, az, node, claude, rust, go, .NET). pub fn default_dev_setup_commands() -> Vec { vec![ - // Install Rust/Cargo for the default user + // Full system upgrade + "apt update && apt full-upgrade -y && apt autoremove -y && apt autoclean -y".to_string(), + // Re-install ripgrep after full-upgrade (autoremove may drop it) + "apt install -y ripgrep".to_string(), + // Python 3.13+ — use deadsnakes PPA only on LTS that needs it + "if python3 --version 2>&1 | grep -qE '3\\.1[3-9]|3\\.[2-9][0-9]'; then echo 'Python 3.13+ available'; else add-apt-repository -y ppa:deadsnakes/ppa && apt update && apt install -y python3.13 python3.13-venv python3.13-dev && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 1 && update-alternatives --set python3 /usr/bin/python3.13; fi".to_string(), + "curl -sS https://bootstrap.pypa.io/get-pip.py | python3".to_string(), + // GitHub CLI + "mkdir -p -m 755 /etc/apt/keyrings && wget -nv -O /tmp/githubcli-archive-keyring.gpg https://cli.github.com/packages/githubcli-archive-keyring.gpg && tee /etc/apt/keyrings/githubcli-archive-keyring.gpg < /tmp/githubcli-archive-keyring.gpg > /dev/null && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg && mkdir -p -m 755 /etc/apt/sources.list.d && echo \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main\" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && apt update && apt install -y gh".to_string(), + // Azure CLI + "curl -sL https://aka.ms/InstallAzureCLIDeb | bash".to_string(), + // astral-uv (uv package manager) + "snap install astral-uv --classic || true".to_string(), + // Node.js 22 LTS (via NodeSource) + "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && apt install -y nodejs".to_string(), + // npm user-local configuration + "mkdir -p /home/azureuser/.npm-packages && echo 'prefix=${HOME}/.npm-packages' > /home/azureuser/.npmrc && chown azureuser:azureuser /home/azureuser/.npmrc /home/azureuser/.npm-packages".to_string(), + // Tmux configuration + "printf '[%%s] %%s\\n' \"$(hostname)\" \"tmux.conf\" && cat > /home/azureuser/.tmux.conf << 'TMUXEOF'\nset -g status-left-length 50\nset -g status-left \"#[fg=cyan][#h]#[fg=green] #S #[fg=yellow]| \"\nset -g status-right \"#[fg=cyan]%%Y-%%m-%%d %%H:%%M\"\nset -g status-interval 60\nset -g status-bg black\nset -g status-fg white\nTMUXEOF\nchown azureuser:azureuser /home/azureuser/.tmux.conf".to_string(), + // Fix tmux socket dir permissions (Ubuntu 25.10+) + "chmod 1777 /tmp && mkdir -p /tmp/tmux-1000 && chmod 700 /tmp/tmux-1000 && chown azureuser:azureuser /tmp/tmux-1000".to_string(), + // Claude Code AI Assistant + "su - azureuser -c 'curl -fsSL https://claude.ai/install.sh | bash' || echo 'WARNING: Claude Code installation failed'".to_string(), + // Rust "su - azureuser -c 'curl --proto =https --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y'".to_string(), - // Install .NET 10 SDK (preview until GA, then remove --quality flag) + // Go + "wget -q https://go.dev/dl/go1.21.5.linux-amd64.tar.gz -O /tmp/go.tar.gz && tar -C /usr/local -xzf /tmp/go.tar.gz && rm /tmp/go.tar.gz".to_string(), + // .NET 10 SDK "curl -sSL https://dot.net/v1/dotnet-install.sh -o /tmp/dotnet-install.sh && chmod +x /tmp/dotnet-install.sh && (/tmp/dotnet-install.sh --channel 10.0 --quality preview --install-dir /usr/share/dotnet || /tmp/dotnet-install.sh --channel 10.0 --install-dir /usr/share/dotnet || echo 'WARNING: .NET 10 SDK install failed') && ln -sf /usr/share/dotnet/dotnet /usr/local/bin/dotnet; rm -f /tmp/dotnet-install.sh".to_string(), - // Install amplihack - "su - azureuser -c 'git clone https://github.com/rysweet/amplihack.git ~/amplihack && cd ~/amplihack && make install || true'".to_string(), + // Docker post-install + "usermod -aG docker azureuser && systemctl enable docker && systemctl start docker".to_string(), + // bashrc additions (npm path, go path, cargo env, azlin alias) + "cat >> /home/azureuser/.bashrc << 'BASHEOF'\n\n# npm user-local configuration\nNPM_PACKAGES=\"${HOME}/.npm-packages\"\nPATH=\"$NPM_PACKAGES/bin:$PATH\"\nMANPATH=\"$NPM_PACKAGES/share/man:$(manpath 2>/dev/null || echo $MANPATH)\"\n\n# Go\nexport PATH=$PATH:/usr/local/go/bin\n\n# Cargo\nsource $HOME/.cargo/env 2>/dev/null\nBASHEOF".to_string(), + // Version verification + "echo '[AZLIN] Provisioning complete' && which gh && gh --version && which az && az --version | head -2 && which node && node --version && which rustc && rustc --version && which dotnet && dotnet --version || true".to_string(), ] } +/// Default packages for development VMs (installed via apt) pub fn default_dev_packages() -> Vec<&'static str> { vec![ + "docker.io", "git", + "tmux", "curl", "wget", - "jq", - "tmux", - "vim", "build-essential", - "make", + "software-properties-common", + "ripgrep", "python3-pip", - "python3-venv", - "docker.io", - "docker-compose", + "pipx", + "jq", "unzip", "htop", "tree", + "vim", ] } @@ -185,7 +213,9 @@ mod tests { assert!(pkgs.contains(&"git")); assert!(pkgs.contains(&"docker.io")); assert!(pkgs.contains(&"python3-pip")); - assert!(pkgs.contains(&"make")); + assert!(pkgs.contains(&"ripgrep")); + assert!(pkgs.contains(&"pipx")); + assert!(pkgs.contains(&"software-properties-common")); assert!(pkgs.len() >= 10); } @@ -201,8 +231,28 @@ mod tests { "Missing .NET install command" ); assert!( - cmds.iter().any(|c| c.contains("rysweet/amplihack")), - "Missing amplihack install command" + cmds.iter().any(|c| c.contains("apt install -y gh")), + "Missing GitHub CLI install command" + ); + assert!( + cmds.iter().any(|c| c.contains("InstallAzureCLIDeb")), + "Missing Azure CLI install command" + ); + assert!( + cmds.iter().any(|c| c.contains("nodesource.com")), + "Missing Node.js install command" + ); + assert!( + cmds.iter().any(|c| c.contains("claude.ai/install.sh")), + "Missing Claude Code install command" + ); + assert!( + cmds.iter().any(|c| c.contains("go.dev")), + "Missing Go install command" + ); + assert!( + cmds.iter().any(|c| c.contains("usermod -aG docker")), + "Missing Docker post-install command" ); } diff --git a/rust/crates/azlin-azure/src/vm.rs b/rust/crates/azlin-azure/src/vm.rs index 6a9c8116..c79202a1 100644 --- a/rust/crates/azlin-azure/src/vm.rs +++ b/rust/crates/azlin-azure/src/vm.rs @@ -662,7 +662,7 @@ fn cloud_init_script(admin_username: &str) -> String { "azureuser" }; format!( - r#"#!/bin/bash + r##"#!/bin/bash set -euo pipefail apt-get update -qq @@ -672,16 +672,73 @@ apt-get install -y -qq \ git curl wget jq unzip \ build-essential make \ tmux ripgrep fd-find \ - docker.io + docker.io software-properties-common \ + python3-pip pipx htop tree vim systemctl enable docker systemctl start docker usermod -aG docker {username} -# Install Rust and Cargo +# Python 3.13+ — use deadsnakes PPA only on LTS that needs it +if python3 --version 2>&1 | grep -qE '3\.1[3-9]|3\.[2-9][0-9]'; then + echo "Python 3.13+ already available" +else + add-apt-repository -y ppa:deadsnakes/ppa && apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 1 + update-alternatives --set python3 /usr/bin/python3.13 +fi +curl -sS https://bootstrap.pypa.io/get-pip.py | python3 + +# GitHub CLI +mkdir -p -m 755 /etc/apt/keyrings +wget -nv -O /tmp/githubcli-archive-keyring.gpg https://cli.github.com/packages/githubcli-archive-keyring.gpg +tee /etc/apt/keyrings/githubcli-archive-keyring.gpg < /tmp/githubcli-archive-keyring.gpg > /dev/null +chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null +apt-get update && apt-get install -y gh + +# Azure CLI +curl -sL https://aka.ms/InstallAzureCLIDeb | bash + +# astral-uv +snap install astral-uv --classic || true + +# Node.js 22 LTS +curl -fsSL https://deb.nodesource.com/setup_22.x | bash - +apt-get install -y nodejs +mkdir -p /home/{username}/.npm-packages +echo 'prefix=${{HOME}}/.npm-packages' > /home/{username}/.npmrc +chown {username}:{username} /home/{username}/.npmrc /home/{username}/.npm-packages + +# Tmux configuration +cat > /home/{username}/.tmux.conf << 'TMUXEOF' +set -g status-left-length 50 +set -g status-left "#[fg=cyan][#h]#[fg=green] #S #[fg=yellow]| " +set -g status-right "#[fg=cyan]%Y-%m-%d %H:%M" +set -g status-interval 60 +set -g status-bg black +set -g status-fg white +TMUXEOF +chown {username}:{username} /home/{username}/.tmux.conf + +# Fix tmux socket dir permissions (Ubuntu 25.10+) +chmod 1777 /tmp +mkdir -p /tmp/tmux-1000 +chmod 700 /tmp/tmux-1000 +chown {username}:{username} /tmp/tmux-1000 + +# Claude Code AI Assistant +su - {username} -c 'curl -fsSL https://claude.ai/install.sh | bash' || echo "WARNING: Claude Code install failed" + +# Rust and Cargo su - {username} -c 'curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y' -# Install .NET 10 SDK (preview until GA release, then remove --quality flag) +# Go +wget -q https://go.dev/dl/go1.21.5.linux-amd64.tar.gz -O /tmp/go.tar.gz +tar -C /usr/local -xzf /tmp/go.tar.gz +rm -f /tmp/go.tar.gz + +# .NET 10 SDK curl -sSL https://dot.net/v1/dotnet-install.sh -o /tmp/dotnet-install.sh chmod +x /tmp/dotnet-install.sh /tmp/dotnet-install.sh --channel 10.0 --quality preview --install-dir /usr/share/dotnet \ @@ -690,11 +747,23 @@ chmod +x /tmp/dotnet-install.sh ln -sf /usr/share/dotnet/dotnet /usr/local/bin/dotnet 2>/dev/null || true rm -f /tmp/dotnet-install.sh -# Install amplihack -su - {username} -c 'git clone https://github.com/rysweet/amplihack.git ~/amplihack && cd ~/amplihack && make install || true' +# bashrc additions +cat >> /home/{username}/.bashrc << 'BASHEOF' + +# npm user-local configuration +NPM_PACKAGES="${{HOME}}/.npm-packages" +PATH="$NPM_PACKAGES/bin:$PATH" +MANPATH="$NPM_PACKAGES/share/man:$(manpath 2>/dev/null || echo $MANPATH)" + +# Go +export PATH=$PATH:/usr/local/go/bin + +# Cargo +source $HOME/.cargo/env 2>/dev/null +BASHEOF echo "cloud-init provisioning complete" -"#, +"##, username = safe_username ) } @@ -1017,11 +1086,15 @@ mod tests { } #[test] - fn test_cloud_init_script_installs_amplihack() { + fn test_cloud_init_script_installs_gh_and_az() { let script = cloud_init_script("testuser"); assert!( - script.contains("github.com/rysweet/amplihack"), - "Missing amplihack clone" + script.contains("apt-get install -y gh"), + "Missing GitHub CLI install" + ); + assert!( + script.contains("InstallAzureCLIDeb"), + "Missing Azure CLI install" ); } diff --git a/tests/agentic-scenarios/cloud-init-tool-parity.yaml b/tests/agentic-scenarios/cloud-init-tool-parity.yaml new file mode 100644 index 00000000..b7af25ad --- /dev/null +++ b/tests/agentic-scenarios/cloud-init-tool-parity.yaml @@ -0,0 +1,87 @@ +name: cloud-init-tool-parity +description: > + Verify that both cloud-init code paths (YAML and shell-script) include + all required development tools matching Python provisioning parity. +version: "1.0.0" + +config: + timeout: 30000 + +agents: + - name: "cli-agent" + type: "system" + config: + workingDirectory: "." + shell: "bash" + timeout: 120000 + +steps: + - name: "build test binary" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && cargo build --quiet 2>&1 && echo BUILD_OK" + expect: + exit_code: 0 + stdout_contains: "BUILD_OK" + timeout: 120000 + + - name: "YAML cloud-init default_dev_setup_commands test passes" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && RUST_MIN_STACK=8388608 cargo test -p azlin-azure -- test_default_dev_setup_commands --nocapture 2>&1 | tail -5" + expect: + exit_code: 0 + stdout_contains: "ok" + timeout: 60000 + + - name: "YAML cloud-init default_dev_packages test passes" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && RUST_MIN_STACK=8388608 cargo test -p azlin-azure -- test_default_dev_packages --nocapture 2>&1 | tail -5" + expect: + exit_code: 0 + stdout_contains: "ok" + timeout: 60000 + + - name: "shell-script cloud-init installs gh and az CLI" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && RUST_MIN_STACK=8388608 cargo test -p azlin-azure -- test_cloud_init_script_installs_gh_and_az --nocapture 2>&1 | tail -5" + expect: + exit_code: 0 + stdout_contains: "ok" + timeout: 60000 + + - name: "shell-script cloud-init installs essential tools" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && RUST_MIN_STACK=8388608 cargo test -p azlin-azure -- test_cloud_init_script_installs_essential_tools --nocapture 2>&1 | tail -5" + expect: + exit_code: 0 + stdout_contains: "ok" + timeout: 60000 + + - name: "shell-script cloud-init installs Rust" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && RUST_MIN_STACK=8388608 cargo test -p azlin-azure -- test_cloud_init_script_installs_rust --nocapture 2>&1 | tail -5" + expect: + exit_code: 0 + stdout_contains: "ok" + timeout: 60000 + + - name: "shell-script cloud-init installs .NET" + agent: "cli-agent" + action: "execute_command" + params: + command: "cd rust && RUST_MIN_STACK=8388608 cargo test -p azlin-azure -- test_cloud_init_script_installs_dotnet --nocapture 2>&1 | tail -5" + expect: + exit_code: 0 + stdout_contains: "ok" + timeout: 60000 From f4cb1958d3f48e6488caeca5ad02056f5af084f8 Mon Sep 17 00:00:00 2001 From: Ryan Sweet Date: Fri, 27 Mar 2026 13:24:10 -0700 Subject: [PATCH 2/6] fix: parameterize username in cloud-init, use dynamic UID for tmux Quality audit findings: - default_dev_setup_commands() now takes username parameter instead of hardcoding 'azureuser' (HIGH: would fail for non-default usernames) - tmux socket dir uses dynamic UID via id -u instead of hardcoded 1000 (MEDIUM: would fail if user UID != 1000) - Added version verification step to shell-script cloud-init path (matches YAML path's existing verification) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/crates/azlin-azure/src/cloud_init.rs | 18 +++++++++--------- rust/crates/azlin-azure/src/vm.rs | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/rust/crates/azlin-azure/src/cloud_init.rs b/rust/crates/azlin-azure/src/cloud_init.rs index 7f8f7e18..ed68e14e 100644 --- a/rust/crates/azlin-azure/src/cloud_init.rs +++ b/rust/crates/azlin-azure/src/cloud_init.rs @@ -87,7 +87,7 @@ pub fn generate_cloud_init( /// /// These install toolchains that aren't available as apt packages, matching /// the full Python azlin provisioning (gh, az, node, claude, rust, go, .NET). -pub fn default_dev_setup_commands() -> Vec { +pub fn default_dev_setup_commands(username: &str) -> Vec { vec![ // Full system upgrade "apt update && apt full-upgrade -y && apt autoremove -y && apt autoclean -y".to_string(), @@ -105,23 +105,23 @@ pub fn default_dev_setup_commands() -> Vec { // Node.js 22 LTS (via NodeSource) "curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && apt install -y nodejs".to_string(), // npm user-local configuration - "mkdir -p /home/azureuser/.npm-packages && echo 'prefix=${HOME}/.npm-packages' > /home/azureuser/.npmrc && chown azureuser:azureuser /home/azureuser/.npmrc /home/azureuser/.npm-packages".to_string(), + format!("mkdir -p /home/{u}/.npm-packages && echo 'prefix=${{HOME}}/.npm-packages' > /home/{u}/.npmrc && chown {u}:{u} /home/{u}/.npmrc /home/{u}/.npm-packages", u = username), // Tmux configuration - "printf '[%%s] %%s\\n' \"$(hostname)\" \"tmux.conf\" && cat > /home/azureuser/.tmux.conf << 'TMUXEOF'\nset -g status-left-length 50\nset -g status-left \"#[fg=cyan][#h]#[fg=green] #S #[fg=yellow]| \"\nset -g status-right \"#[fg=cyan]%%Y-%%m-%%d %%H:%%M\"\nset -g status-interval 60\nset -g status-bg black\nset -g status-fg white\nTMUXEOF\nchown azureuser:azureuser /home/azureuser/.tmux.conf".to_string(), + format!("printf '[%%s] %%s\\n' \"$(hostname)\" \"tmux.conf\" && cat > /home/{u}/.tmux.conf << 'TMUXEOF'\nset -g status-left-length 50\nset -g status-left \"#[fg=cyan][#h]#[fg=green] #S #[fg=yellow]| \"\nset -g status-right \"#[fg=cyan]%%Y-%%m-%%d %%H:%%M\"\nset -g status-interval 60\nset -g status-bg black\nset -g status-fg white\nTMUXEOF\nchown {u}:{u} /home/{u}/.tmux.conf", u = username), // Fix tmux socket dir permissions (Ubuntu 25.10+) - "chmod 1777 /tmp && mkdir -p /tmp/tmux-1000 && chmod 700 /tmp/tmux-1000 && chown azureuser:azureuser /tmp/tmux-1000".to_string(), + format!("chmod 1777 /tmp && TMUX_UID=$(id -u {u}) && mkdir -p /tmp/tmux-$TMUX_UID && chmod 700 /tmp/tmux-$TMUX_UID && chown {u}:{u} /tmp/tmux-$TMUX_UID", u = username), // Claude Code AI Assistant - "su - azureuser -c 'curl -fsSL https://claude.ai/install.sh | bash' || echo 'WARNING: Claude Code installation failed'".to_string(), + format!("su - {u} -c 'curl -fsSL https://claude.ai/install.sh | bash' || echo 'WARNING: Claude Code installation failed'", u = username), // Rust - "su - azureuser -c 'curl --proto =https --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y'".to_string(), + format!("su - {u} -c 'curl --proto =https --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y'", u = username), // Go "wget -q https://go.dev/dl/go1.21.5.linux-amd64.tar.gz -O /tmp/go.tar.gz && tar -C /usr/local -xzf /tmp/go.tar.gz && rm /tmp/go.tar.gz".to_string(), // .NET 10 SDK "curl -sSL https://dot.net/v1/dotnet-install.sh -o /tmp/dotnet-install.sh && chmod +x /tmp/dotnet-install.sh && (/tmp/dotnet-install.sh --channel 10.0 --quality preview --install-dir /usr/share/dotnet || /tmp/dotnet-install.sh --channel 10.0 --install-dir /usr/share/dotnet || echo 'WARNING: .NET 10 SDK install failed') && ln -sf /usr/share/dotnet/dotnet /usr/local/bin/dotnet; rm -f /tmp/dotnet-install.sh".to_string(), // Docker post-install - "usermod -aG docker azureuser && systemctl enable docker && systemctl start docker".to_string(), + format!("usermod -aG docker {u} && systemctl enable docker && systemctl start docker", u = username), // bashrc additions (npm path, go path, cargo env, azlin alias) - "cat >> /home/azureuser/.bashrc << 'BASHEOF'\n\n# npm user-local configuration\nNPM_PACKAGES=\"${HOME}/.npm-packages\"\nPATH=\"$NPM_PACKAGES/bin:$PATH\"\nMANPATH=\"$NPM_PACKAGES/share/man:$(manpath 2>/dev/null || echo $MANPATH)\"\n\n# Go\nexport PATH=$PATH:/usr/local/go/bin\n\n# Cargo\nsource $HOME/.cargo/env 2>/dev/null\nBASHEOF".to_string(), + format!("cat >> /home/{u}/.bashrc << 'BASHEOF'\n\n# npm user-local configuration\nNPM_PACKAGES=\"${{HOME}}/.npm-packages\"\nPATH=\"$NPM_PACKAGES/bin:$PATH\"\nMANPATH=\"$NPM_PACKAGES/share/man:$(manpath 2>/dev/null || echo $MANPATH)\"\n\n# Go\nexport PATH=$PATH:/usr/local/go/bin\n\n# Cargo\nsource $HOME/.cargo/env 2>/dev/null\nBASHEOF", u = username), // Version verification "echo '[AZLIN] Provisioning complete' && which gh && gh --version && which az && az --version | head -2 && which node && node --version && which rustc && rustc --version && which dotnet && dotnet --version || true".to_string(), ] @@ -221,7 +221,7 @@ mod tests { #[test] fn test_default_dev_setup_commands() { - let cmds = default_dev_setup_commands(); + let cmds = default_dev_setup_commands("azureuser"); assert!( cmds.iter().any(|c| c.contains("rustup.rs")), "Missing Rust install command" diff --git a/rust/crates/azlin-azure/src/vm.rs b/rust/crates/azlin-azure/src/vm.rs index c79202a1..9d9092e8 100644 --- a/rust/crates/azlin-azure/src/vm.rs +++ b/rust/crates/azlin-azure/src/vm.rs @@ -723,9 +723,10 @@ chown {username}:{username} /home/{username}/.tmux.conf # Fix tmux socket dir permissions (Ubuntu 25.10+) chmod 1777 /tmp -mkdir -p /tmp/tmux-1000 -chmod 700 /tmp/tmux-1000 -chown {username}:{username} /tmp/tmux-1000 +TMUX_UID=$(id -u {username}) +mkdir -p /tmp/tmux-$TMUX_UID +chmod 700 /tmp/tmux-$TMUX_UID +chown {username}:{username} /tmp/tmux-$TMUX_UID # Claude Code AI Assistant su - {username} -c 'curl -fsSL https://claude.ai/install.sh | bash' || echo "WARNING: Claude Code install failed" @@ -762,6 +763,14 @@ export PATH=$PATH:/usr/local/go/bin source $HOME/.cargo/env 2>/dev/null BASHEOF +# Version verification +echo "[AZLIN] Verifying installed tools..." +which gh && gh --version || echo "WARNING: gh not found" +which az && az --version | head -2 || echo "WARNING: az not found" +which node && node --version || echo "WARNING: node not found" +su - {username} -c 'which rustc && rustc --version' || echo "WARNING: rustc not found" +which dotnet && dotnet --version || echo "WARNING: dotnet not found" + echo "cloud-init provisioning complete" "##, username = safe_username From 09372a39129b0ab0061c32eee75e330ba1b57056 Mon Sep 17 00:00:00 2001 From: Ryan Sweet Date: Fri, 27 Mar 2026 13:29:05 -0700 Subject: [PATCH 3/6] fix: rustc verification as user, standardize apt-get upgrade Quality audit cycle 2 findings: - rustc --version ran as root but Rust is installed in user homedir (HIGH: verification always failed even when install succeeded) - Standardize on apt-get upgrade instead of full-upgrade to match shell-script path and avoid unexpected package removal (MEDIUM) - Remove unnecessary ripgrep reinstall (only needed with full-upgrade) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/crates/azlin-azure/src/cloud_init.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rust/crates/azlin-azure/src/cloud_init.rs b/rust/crates/azlin-azure/src/cloud_init.rs index ed68e14e..36b619d7 100644 --- a/rust/crates/azlin-azure/src/cloud_init.rs +++ b/rust/crates/azlin-azure/src/cloud_init.rs @@ -89,10 +89,8 @@ pub fn generate_cloud_init( /// the full Python azlin provisioning (gh, az, node, claude, rust, go, .NET). pub fn default_dev_setup_commands(username: &str) -> Vec { vec![ - // Full system upgrade - "apt update && apt full-upgrade -y && apt autoremove -y && apt autoclean -y".to_string(), - // Re-install ripgrep after full-upgrade (autoremove may drop it) - "apt install -y ripgrep".to_string(), + // Full system upgrade (apt-get upgrade is safer than full-upgrade: never removes packages) + "apt-get update && apt-get upgrade -y && apt-get autoremove -y && apt-get autoclean -y".to_string(), // Python 3.13+ — use deadsnakes PPA only on LTS that needs it "if python3 --version 2>&1 | grep -qE '3\\.1[3-9]|3\\.[2-9][0-9]'; then echo 'Python 3.13+ available'; else add-apt-repository -y ppa:deadsnakes/ppa && apt update && apt install -y python3.13 python3.13-venv python3.13-dev && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 1 && update-alternatives --set python3 /usr/bin/python3.13; fi".to_string(), "curl -sS https://bootstrap.pypa.io/get-pip.py | python3".to_string(), @@ -122,8 +120,8 @@ pub fn default_dev_setup_commands(username: &str) -> Vec { format!("usermod -aG docker {u} && systemctl enable docker && systemctl start docker", u = username), // bashrc additions (npm path, go path, cargo env, azlin alias) format!("cat >> /home/{u}/.bashrc << 'BASHEOF'\n\n# npm user-local configuration\nNPM_PACKAGES=\"${{HOME}}/.npm-packages\"\nPATH=\"$NPM_PACKAGES/bin:$PATH\"\nMANPATH=\"$NPM_PACKAGES/share/man:$(manpath 2>/dev/null || echo $MANPATH)\"\n\n# Go\nexport PATH=$PATH:/usr/local/go/bin\n\n# Cargo\nsource $HOME/.cargo/env 2>/dev/null\nBASHEOF", u = username), - // Version verification - "echo '[AZLIN] Provisioning complete' && which gh && gh --version && which az && az --version | head -2 && which node && node --version && which rustc && rustc --version && which dotnet && dotnet --version || true".to_string(), + // Version verification (rustc is in user homedir, must check as user) + format!("echo '[AZLIN] Provisioning complete' && which gh && gh --version && which az && az --version | head -2 && which node && node --version && su - {u} -c 'which rustc && rustc --version' && which dotnet && dotnet --version || true", u = username), ] } From 83a36d6faa3cebf4c81f8941ed4d4034f82d9e45 Mon Sep 17 00:00:00 2001 From: Ryan Sweet Date: Fri, 27 Mar 2026 13:32:17 -0700 Subject: [PATCH 4/6] fix: download GPG key directly to keyring dir, avoid /tmp Quality audit cycle 3: eliminate predictable /tmp path for GitHub CLI GPG keyring download. Download directly to /etc/apt/keyrings/ in both cloud-init code paths. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rust/crates/azlin-azure/src/cloud_init.rs | 2 +- rust/crates/azlin-azure/src/vm.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rust/crates/azlin-azure/src/cloud_init.rs b/rust/crates/azlin-azure/src/cloud_init.rs index 36b619d7..0509e341 100644 --- a/rust/crates/azlin-azure/src/cloud_init.rs +++ b/rust/crates/azlin-azure/src/cloud_init.rs @@ -95,7 +95,7 @@ pub fn default_dev_setup_commands(username: &str) -> Vec { "if python3 --version 2>&1 | grep -qE '3\\.1[3-9]|3\\.[2-9][0-9]'; then echo 'Python 3.13+ available'; else add-apt-repository -y ppa:deadsnakes/ppa && apt update && apt install -y python3.13 python3.13-venv python3.13-dev && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 1 && update-alternatives --set python3 /usr/bin/python3.13; fi".to_string(), "curl -sS https://bootstrap.pypa.io/get-pip.py | python3".to_string(), // GitHub CLI - "mkdir -p -m 755 /etc/apt/keyrings && wget -nv -O /tmp/githubcli-archive-keyring.gpg https://cli.github.com/packages/githubcli-archive-keyring.gpg && tee /etc/apt/keyrings/githubcli-archive-keyring.gpg < /tmp/githubcli-archive-keyring.gpg > /dev/null && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg && mkdir -p -m 755 /etc/apt/sources.list.d && echo \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main\" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && apt update && apt install -y gh".to_string(), + "mkdir -p -m 755 /etc/apt/keyrings && wget -nv -O /etc/apt/keyrings/githubcli-archive-keyring.gpg https://cli.github.com/packages/githubcli-archive-keyring.gpg && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg && mkdir -p -m 755 /etc/apt/sources.list.d && echo \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main\" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && apt update && apt install -y gh".to_string(), // Azure CLI "curl -sL https://aka.ms/InstallAzureCLIDeb | bash".to_string(), // astral-uv (uv package manager) diff --git a/rust/crates/azlin-azure/src/vm.rs b/rust/crates/azlin-azure/src/vm.rs index 9d9092e8..b2030a72 100644 --- a/rust/crates/azlin-azure/src/vm.rs +++ b/rust/crates/azlin-azure/src/vm.rs @@ -691,8 +691,7 @@ curl -sS https://bootstrap.pypa.io/get-pip.py | python3 # GitHub CLI mkdir -p -m 755 /etc/apt/keyrings -wget -nv -O /tmp/githubcli-archive-keyring.gpg https://cli.github.com/packages/githubcli-archive-keyring.gpg -tee /etc/apt/keyrings/githubcli-archive-keyring.gpg < /tmp/githubcli-archive-keyring.gpg > /dev/null +wget -nv -O /etc/apt/keyrings/githubcli-archive-keyring.gpg https://cli.github.com/packages/githubcli-archive-keyring.gpg chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null apt-get update && apt-get install -y gh From 1820c34b09de47ecb7c952d39c5b71f6e50a6ceb Mon Sep 17 00:00:00 2001 From: Ryan Sweet Date: Wed, 1 Apr 2026 19:06:26 -0700 Subject: [PATCH 5/6] fix: wait for cloud-init completion before connecting to new VMs (#929) After VM creation, azlin now waits for cloud-init provisioning to finish before forwarding credentials or auto-connecting the user. Previously, the code only waited for SSH to become reachable (~2 min), but cloud-init takes 5-10 min to install all tools (gh, az, node, rustc, etc.). Changes: - Increase SSH wait timeout from 120s to 300s - Add wait_for_cloud_init() that polls cloud-init status over SSH every 10s until done/disabled/error (600s timeout, best-effort) - Add ssh_output() helper that captures remote command stdout - Add resolve_ssh_key() + base_ssh_args() to inject identity key (~/.ssh/azlin_key) into all SSH/SCP operations - Handle cloud-init "disabled" state as terminal (no 600s hang) - Add ConnectTimeout=10 to ssh_output to prevent hung connections - Update credential-forwarding docs with cloud-init wait behavior Fixes #929 Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/reference/credential-forwarding.md | 28 +++- rust/crates/azlin/src/auth_forward.rs | 169 ++++++++++++++++++++++-- 2 files changed, 182 insertions(+), 15 deletions(-) diff --git a/docs/reference/credential-forwarding.md b/docs/reference/credential-forwarding.md index be731ba8..3caa22e4 100644 --- a/docs/reference/credential-forwarding.md +++ b/docs/reference/credential-forwarding.md @@ -18,8 +18,8 @@ Before forwarding, azlin waits for the VM's SSH service to become reachable: | Parameter | Value | |-----------|-------| -| Timeout | Configurable (default: 120 seconds) | -| Poll interval | Configurable (default: 5 seconds) | +| Timeout | 300 seconds | +| Poll interval | 5 seconds | | TCP connect timeout | 3 seconds per attempt | | Verification | TCP connect + SSH auth handshake | @@ -30,6 +30,30 @@ The check performs two steps per attempt: Both must succeed before forwarding begins. If the timeout elapses, forwarding is skipped with a warning. +## Cloud-Init Completion Check + +After SSH is reachable, azlin waits for cloud-init provisioning to complete before forwarding credentials or connecting the user. This ensures all tools (gh, az, node, rustc, go, dotnet, claude) are installed. + +| Parameter | Value | +|-----------|-------| +| Timeout | 600 seconds | +| Poll interval | 10 seconds | +| Remote command | `cloud-init status` | +| Terminal states | `status: done`, `status: error` | + +Behavior by cloud-init state: + +| State | Action | +|-------|--------| +| `status: done` | Print success message, proceed | +| `status: disabled` | Print info message, proceed (cloud-init not active) | +| `status: error` | Print warning, proceed (best-effort) | +| `status: running` | Continue polling | +| Command not found | Treat as done (non-cloud-init VM) | +| Timeout (600s) | Print warning, proceed anyway | + +Cloud-init issues never block VM creation or user connection. All failure paths produce warnings and continue. + ## Credential Detection Each credential source is detected independently. Only sources that exist locally are offered for forwarding. diff --git a/rust/crates/azlin/src/auth_forward.rs b/rust/crates/azlin/src/auth_forward.rs index afd10f4d..bb3476e0 100644 --- a/rust/crates/azlin/src/auth_forward.rs +++ b/rust/crates/azlin/src/auth_forward.rs @@ -29,7 +29,8 @@ pub fn forward_auth_credentials( // Wait for SSH to be ready before attempting any forwarding let ssh_port = bastion_port.unwrap_or(22); let ssh_host = if bastion_port.is_some() { "127.0.0.1" } else { ip }; - wait_for_ssh(ssh_host, ssh_port, user, Duration::from_secs(120))?; + wait_for_ssh(ssh_host, ssh_port, user, Duration::from_secs(300))?; + wait_for_cloud_init(ip, user, bastion_port); let sources = detect_credentials(); if sources.is_empty() { @@ -99,18 +100,70 @@ fn wait_for_ssh(host: &str, port: u16, user: &str, timeout: Duration) -> Result< } } +/// Wait for cloud-init to finish provisioning. Best-effort: issues warn but +/// never block VM usage. Called after SSH is confirmed ready. +fn wait_for_cloud_init(ip: &str, user: &str, bastion_port: Option) { + let timeout = Duration::from_secs(600); + let interval = Duration::from_secs(10); + let start = Instant::now(); + + println!("Waiting for cloud-init to finish provisioning..."); + + loop { + if start.elapsed() >= timeout { + eprintln!( + "Warning: cloud-init did not complete within {}s. \ + Continuing — some tools may not be installed yet.", + timeout.as_secs() + ); + return; + } + + match ssh_output( + ip, + user, + bastion_port, + "cloud-init status 2>/dev/null || echo 'status: done'", + ) { + Ok(out) => { + if out.contains("status: done") { + println!("Cloud-init provisioning complete."); + return; + } + if out.contains("status: disabled") { + println!("Cloud-init is disabled on this VM. Proceeding."); + return; + } + if out.contains("status: error") { + eprintln!( + "Warning: cloud-init finished with errors. \ + Some tools may not be installed." + ); + return; + } + // Still running — continue polling + } + Err(_) => { + // SSH hiccup during cloud-init — keep trying + } + } + + std::thread::sleep(interval); + } +} + /// Test SSH authentication by running `exit 0` on the remote. fn test_ssh_auth(host: &str, port: u16, user: &str) -> bool { + let mut args = base_ssh_args(); + args.extend([ + "-o".to_string(), "ConnectTimeout=5".to_string(), + "-o".to_string(), "LogLevel=ERROR".to_string(), + "-p".to_string(), port.to_string(), + format!("{}@{}", user, host), + "exit 0".to_string(), + ]); let status = std::process::Command::new("ssh") - .args([ - "-o", "StrictHostKeyChecking=accept-new", - "-o", "BatchMode=yes", - "-o", "ConnectTimeout=5", - "-o", "LogLevel=ERROR", - "-p", &port.to_string(), - &format!("{}@{}", user, host), - "exit 0", - ]) + .args(&args) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .status(); @@ -288,15 +341,39 @@ fn forward_az(ip: &str, user: &str, bastion_port: Option) -> Result<()> { // SSH/SCP helpers // --------------------------------------------------------------------------- -/// Run a command on the remote via SSH. Returns Ok(()) on success. -fn ssh_run(ip: &str, user: &str, bastion_port: Option, command: &str) -> Result<()> { - let (ssh_host, port_args) = ssh_target(ip, user, bastion_port); +/// Resolve the preferred SSH private key for azlin VMs. +/// Checks for azlin_key, id_ed25519_azlin, id_ed25519, id_rsa in ~/.ssh/. +fn resolve_ssh_key() -> Option { + let home = dirs::home_dir()?; + let ssh_dir = home.join(".ssh"); + for name in &["azlin_key", "id_ed25519_azlin", "id_ed25519", "id_rsa"] { + let path = ssh_dir.join(name); + if path.exists() { + return Some(path); + } + } + None +} + +/// Build common SSH args: StrictHostKeyChecking, BatchMode, identity key. +fn base_ssh_args() -> Vec { let mut args = vec![ "-o".to_string(), "StrictHostKeyChecking=accept-new".to_string(), "-o".to_string(), "BatchMode=yes".to_string(), ]; + if let Some(key) = resolve_ssh_key() { + args.push("-i".to_string()); + args.push(key.to_string_lossy().to_string()); + } + args +} + +/// Run a command on the remote via SSH. Returns Ok(()) on success. +fn ssh_run(ip: &str, user: &str, bastion_port: Option, command: &str) -> Result<()> { + let (ssh_host, port_args) = ssh_target(ip, user, bastion_port); + let mut args = base_ssh_args(); args.extend(port_args); args.push(ssh_host); args.push(command.to_string()); @@ -313,6 +390,27 @@ fn ssh_run(ip: &str, user: &str, bastion_port: Option, command: &str) -> Re Ok(()) } +/// Run a command on the remote via SSH and capture its stdout. +fn ssh_output(ip: &str, user: &str, bastion_port: Option, command: &str) -> Result { + let (ssh_host, port_args) = ssh_target(ip, user, bastion_port); + let mut args = base_ssh_args(); + args.extend(["-o".to_string(), "ConnectTimeout=10".to_string()]); + args.extend(port_args); + args.push(ssh_host); + args.push(command.to_string()); + + let output = std::process::Command::new("ssh") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output()?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("ssh command failed: {}", stderr.trim()); + } + Ok(String::from_utf8_lossy(&output.stdout).to_string()) +} + /// SCP a single file to the remote. fn scp_file( local: &std::path::Path, @@ -326,6 +424,10 @@ fn scp_file( "-o".to_string(), "StrictHostKeyChecking=accept-new".to_string(), ]; + if let Some(key) = resolve_ssh_key() { + args.push("-i".to_string()); + args.push(key.to_string_lossy().to_string()); + } args.extend(scp_port_args); args.push(local.to_string_lossy().to_string()); args.push(scp_dest); @@ -351,6 +453,10 @@ fn scp_recursive( "-o".to_string(), "StrictHostKeyChecking=accept-new".to_string(), ]; + if let Some(key) = resolve_ssh_key() { + args.push("-i".to_string()); + args.push(key.to_string_lossy().to_string()); + } args.extend(scp_port_args); args.push(local_dir.to_string_lossy().to_string()); args.push(scp_dest); @@ -874,4 +980,41 @@ mod tests { } assert_eq!(tags["azlin-session"], vm_name); } + + // ======================================================================= + // cloud-init status parsing + // ======================================================================= + + #[test] + fn test_cloud_init_status_done_detected() { + let output = "status: done\n"; + assert!(output.contains("status: done")); + } + + #[test] + fn test_cloud_init_status_error_detected() { + let output = "status: error\n"; + assert!(output.contains("status: error")); + } + + #[test] + fn test_cloud_init_status_running_is_not_terminal() { + let output = "status: running\n"; + assert!(!output.contains("status: done")); + assert!(!output.contains("status: error")); + } + + #[test] + fn test_cloud_init_fallback_output_is_done() { + // When cloud-init is not installed, fallback echoes "status: done" + let output = "status: done"; + assert!(output.contains("status: done")); + } + + #[test] + fn test_cloud_init_status_disabled_is_terminal() { + // Disabled cloud-init should be treated as done, not poll for 600s + let output = "status: disabled\n"; + assert!(output.contains("status: disabled")); + } } From 1707419ec151fdd9eb31106a4d909bcf9d288e54 Mon Sep 17 00:00:00 2001 From: Ryan Sweet Date: Wed, 1 Apr 2026 19:41:33 -0700 Subject: [PATCH 6/6] fix: cloud-init script fails due to Python alternatives and pip errors The cloud-init script had two issues that caused set -euo pipefail to abort before installing gh, az, node, and other tools: 1. update-alternatives --set python3 python3.13 breaks apt tools because apt_pkg is built for system Python 3.12, causing apt-get update to fail with "No module named 'apt_pkg'" - fixed by installing python3.13 without changing the system python3 default 2. get-pip.py fails on Ubuntu 24.04 because pip 24.0 is already installed as a debian package - removed the pip reinstall entirely 3. Em dash (U+2014) in shell comment caused Azure CLI latin-1 encoding error when passing --custom-data - replaced with ASCII hyphen Fixes #929 Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/crates/azlin-azure/src/vm.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rust/crates/azlin-azure/src/vm.rs b/rust/crates/azlin-azure/src/vm.rs index b2030a72..6aa453d5 100644 --- a/rust/crates/azlin-azure/src/vm.rs +++ b/rust/crates/azlin-azure/src/vm.rs @@ -679,15 +679,13 @@ systemctl enable docker systemctl start docker usermod -aG docker {username} -# Python 3.13+ — use deadsnakes PPA only on LTS that needs it +# Python 3.13+ - install via deadsnakes but do NOT change system python3 +# (changing system python3 breaks apt tools that depend on apt_pkg) if python3 --version 2>&1 | grep -qE '3\.1[3-9]|3\.[2-9][0-9]'; then echo "Python 3.13+ already available" else - add-apt-repository -y ppa:deadsnakes/ppa && apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev - update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 1 - update-alternatives --set python3 /usr/bin/python3.13 + add-apt-repository -y ppa:deadsnakes/ppa && apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev || echo "WARNING: Python 3.13 install failed" fi -curl -sS https://bootstrap.pypa.io/get-pip.py | python3 # GitHub CLI mkdir -p -m 755 /etc/apt/keyrings