diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5e842b9 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: pip install -e '.[dev]' + + - name: Lint (ruff) + run: ruff check edcloud/ tests/ + + - name: Type check (mypy) + run: mypy edcloud/ + + - name: Tests (pytest) + run: pytest --tb=short -q diff --git a/.secrets.baseline b/.secrets.baseline index 2fa2702..2ea7c00 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -133,23 +133,23 @@ "filename": "tests/test_ec2.py", "hashed_secret": "059a14a1755ad2889811498f0e9011790ef01488", "is_verified": false, - "line_number": 153 + "line_number": 159 }, { "type": "Secret Keyword", "filename": "tests/test_ec2.py", "hashed_secret": "b8d67157d88dc68b2822821147842da5ee55cc05", "is_verified": false, - "line_number": 154 + "line_number": 160 }, { "type": "Secret Keyword", "filename": "tests/test_ec2.py", "hashed_secret": "cc9c39d66e511682ab3fa0bc3cd2c72e25cc30e6", "is_verified": false, - "line_number": 155 + "line_number": 161 } ] }, - "generated_at": "2026-02-16T17:37:43Z" + "generated_at": "2026-03-25T22:33:11Z" } diff --git a/AGENTS.md b/AGENTS.md index c6aa82c..931ad56 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -59,7 +59,10 @@ grep -RInE "TODO|FIXME|TBD|\[ \]" README.md CHANGELOG.md RUNBOOK.md AGENTS.md do ### Git discipline (LLM + operator) -- Never commit directly to `main`. +- Default: do not commit directly to `main`. +- Exception (personal repos only): direct commits/pushes to `main` are allowed for + small, low-risk changes when the user explicitly requests it in the current task. + If there is any uncertainty or elevated risk, use task branch + PR. - Create one task branch per change: `agent/-YYYYMMDD`. - Local WIP commits are allowed, but do not push noisy history (`wip`, `fix typo`, machine-specific checkpoints). @@ -111,7 +114,7 @@ Run when requested: ```bash pytest -q ruff check . -mypy edcloud tests +mypy edcloud/ pre-commit run --all-files ``` diff --git a/CHANGELOG.md b/CHANGELOG.md index f1b3a8f..0680d11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,20 @@ semantic version tags. ### Recently Completed +- Code quality pass for demo readiness: + - Fixed all mypy strict errors (8 across 4 files) and ruff lint violations; tooling now passes clean. + - Added CI workflow (`.github/workflows/ci.yml`) running pytest, ruff, and mypy on push/PR. + - Removed dead code: unused `auto_snapshot_before_destroy`, vestigial `destroy(force=)` parameter, uncalled `_ec2_resource` wrapper, test-only `tailscale.ssh_command`. + - Collapsed redundant wrapper functions in `ec2.py`, `resource_audit.py`, and `cli.py` that added indirection without logic. + - Made `cleanup.py` UI-agnostic by accepting I/O callbacks instead of importing `click` directly, consistent with `lifecycle.py`. + - Fixed N+1 `describe_volumes` API calls in `ec2.status()` (now a single batched call). + - Single-sourced package version via `importlib.metadata` instead of duplicating in `__init__.py` and `pyproject.toml`. +- Hardened dotfiles bootstrap path in cloud-init: + - Added `DOTFILES_REPO` / `DOTFILES_BRANCH` template variables rendered from `InstanceConfig`. + - Added CLI options/env support on `provision` and `reprovision` (`--dotfiles-repo`, `--dotfiles-branch`, `EDCLOUD_DOTFILES_REPO`, `EDCLOUD_DOTFILES_BRANCH`). + - Implemented repo/branch input validation in `edcloud.ec2` to reduce template-injection risk. + - Updated cloud-init logic to resolve dotfiles source with fallback order (`gh` user URL, then persisted local origin for `auto`) and continue bootstrap on non-fatal sync failures. + - Updated tests (`tests/test_ec2.py`, `tests/test_cli.py`) and docs (`README.md`, `RUNBOOK.md`, `docs/ARCHITECTURE.md`) to reflect new behavior. - Wired Dropbox FUSE mount via rclone: rclone config stored as SecureString at `/edcloud/rclone_config` in SSM; cloud-init fetches it on every rebuild and enables `rclone-dropbox.service` (user systemd, `~/Dropbox` mount); `RCLONE_CONFIG_SSM_PARAMETER` added to `config.py`. - Added oldspeak MCP bootstrap integration while keeping app code in a separate repo: cloud-init now best-effort syncs `~/src/oldspeak` (via `gh` auth path), bootstraps a local venv/install + spaCy model, and installs local wrappers (`~/.local/bin/oldspeak-mcp-stdio`, `~/.local/bin/oldspeak-mcp-http`) for on-host Cline/Claude Code usage. Docs updated in README, RUNBOOK, and ARCHITECTURE. diff --git a/README.md b/README.md index 934ec27..88acd4d 100644 --- a/README.md +++ b/README.md @@ -48,13 +48,6 @@ Console. ## Public collaboration expectations -- Open issues for bugs, reliability gaps, or unclear operator docs. -- Keep changes scoped and reviewable; prefer focused PRs over broad rewrites. -- Preserve the core guardrails: Tailscale-only access, managed-tag discovery, - and SSM-backed secret handling. -- For lifecycle-risking changes, include validation notes (for example: - `pre-commit run --all-files`, `pytest -q`). - **Core design:** - Tailscale-only access (zero inbound rules) @@ -160,12 +153,18 @@ LazyVim compatibility: **Baseline:** Docker, Portainer, Node.js, Python, and dev tooling are defined in `cloud-init/user-data.yaml`. -Bootstrap repo sync (when `gh` auth is available on the instance): - -- `https://github.com//dotfiles.git` → `~/src/dotfiles` -- `https://github.com//bin.git` → `~/src/bin` -- `https://github.com//llm-config.git` → `~/src/llm-config` -- `https://github.com//oldspeak.git` → `~/src/oldspeak` +Bootstrap repo sync: + +- Dotfiles are always attempted first via cloud-init using configurable inputs: + - `--dotfiles-repo` / `EDCLOUD_DOTFILES_REPO` (`auto` default) + - `--dotfiles-branch` / `EDCLOUD_DOTFILES_BRANCH` (`main` default) +- `--dotfiles-repo auto` resolution order: + 1. `https://github.com//dotfiles.git` when `gh auth` is available + 2. existing `~/src/dotfiles` origin URL (if present on persisted home) +- Additional non-secret repos still sync from GitHub user namespace when `gh` auth is available: + - `https://github.com//bin.git` → `~/src/bin` + - `https://github.com//llm-config.git` → `~/src/llm-config` + - `https://github.com//oldspeak.git` → `~/src/oldspeak` For local MCP usage on edcloud, cloud-init also installs best-effort wrappers: diff --git a/RUNBOOK.md b/RUNBOOK.md index e1a137f..5839a9e 100644 --- a/RUNBOOK.md +++ b/RUNBOOK.md @@ -17,7 +17,7 @@ This file is the stable operator procedure guide. Open items: - [x] Add a safe rebuild workflow (`snapshot -> reprovision -> verify`) as a single documented operator path. (`edc reprovision` now prints a post-run reminder to run `edc verify`.) -- [ ] Improve automatic repo loading: currently dotfiles/bin/llm-config/oldspeak cloning depends on gh auth during cloud-init; consider making repo list configurable and/or adding explicit clone step to provision workflow (e.g., `edc provision --sync-repos`). +- [x] Improve automatic repo loading: dotfiles bootstrap now supports explicit CLI/env configuration (`--dotfiles-repo`, `--dotfiles-branch`) with `auto` fallback to gh user or existing local origin; non-dotfiles repo sync remains gh-auth-driven. - [ ] Evaluate a secure operator login workflow that starts from one memorized string without weakening Tailscale/AWS MFA controls. - [ ] Centralize default SSH username in repo config (for example `edcloud/config.py`) and have `edc ssh`/`edc verify` read that value. - [ ] Keep snapshot spend under soft cap `$2/month`; adjust DLM retention (`edc backup-policy apply --daily-keep N --weekly-keep M --monthly-keep K`) if exceeded. @@ -631,8 +631,13 @@ Operating policy: Non-secret repo sync baseline: -- If `gh` is authenticated during cloud-init, bootstrap attempts to pull/update: - - `https://github.com//dotfiles.git` → `~/src/dotfiles` +- Dotfiles sync is configurable at provision/reprovision time: + - `--dotfiles-repo` / `EDCLOUD_DOTFILES_REPO` (`auto` default) + - `--dotfiles-branch` / `EDCLOUD_DOTFILES_BRANCH` (`main` default) +- For `--dotfiles-repo auto`, cloud-init resolves in order: + 1. `https://github.com//dotfiles.git` when `gh` auth is available + 2. existing `~/src/dotfiles` origin URL from persistent home state +- Additional repos still require `gh` auth and are synced from the authenticated user namespace: - `https://github.com//bin.git` → `~/src/bin` - `https://github.com//llm-config.git` → `~/src/llm-config` - `https://github.com//oldspeak.git` → `~/src/oldspeak` diff --git a/cloud-init/user-data.yaml b/cloud-init/user-data.yaml index 6febb27..7ef5655 100644 --- a/cloud-init/user-data.yaml +++ b/cloud-init/user-data.yaml @@ -1,6 +1,6 @@ #cloud-config # edcloud instance bootstrap -# Variables: ${TAILSCALE_AUTH_KEY_SSM_PARAMETER}, ${TAILSCALE_HOSTNAME}, ${AWS_REGION} +# Variables: ${TAILSCALE_AUTH_KEY_SSM_PARAMETER}, ${TAILSCALE_HOSTNAME}, ${AWS_REGION}, ${DOTFILES_REPO}, ${DOTFILES_BRANCH} package_update: true package_upgrade: true @@ -459,6 +459,15 @@ runcmd: BREWENV chmod 0644 /etc/profile.d/edcloud-brew.sh + # Install default Homebrew packages + runuser -u ubuntu -- bash -lc ' + set -euo pipefail + if [ -x /home/linuxbrew/.linuxbrew/bin/brew ]; then + eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)" + brew install lazygit || true + fi + ' + runuser -u ubuntu -- bash -lc ' set -euo pipefail export HOME="${HOME:-/home/ubuntu}" @@ -544,39 +553,80 @@ runcmd: runuser -u ubuntu -- bash -lc ' set -euo pipefail - if ! command -v gh &>/dev/null; then - exit 0 - fi + mkdir -p "$HOME/src" "$HOME/.local/bin" - if ! gh auth status &>/dev/null; then - echo "ℹ️ gh not authenticated; skipping dotfiles/bin/llm-config sync" - exit 0 - fi + DOTFILES_REPO_INPUT="${DOTFILES_REPO}" + DOTFILES_BRANCH_INPUT="${DOTFILES_BRANCH}" - GH_USER=$(gh api user --jq .login 2>/dev/null || true) - if [ -z "$GH_USER" ]; then - echo "ℹ️ could not resolve GitHub username; skipping repo sync" - exit 0 + GH_USER="" + if command -v gh &>/dev/null && gh auth status &>/dev/null; then + GH_USER=$(gh api user --jq .login 2>/dev/null || true) fi - mkdir -p "$HOME/src" "$HOME/.local/bin" + sync_git_repo() { + local repo_url="$1" + local target="$2" + local branch="$3" - sync_repo() { - local name="$1" - local repo_url="https://github.com/${GH_USER}/${name}.git" - local target="$HOME/src/${name}" + if [ -z "$repo_url" ]; then + return 1 + fi if [ -d "$target/.git" ]; then - git -C "$target" pull --ff-only || true + git -C "$target" remote set-url origin "$repo_url" || true + if git -C "$target" fetch --depth=1 origin "$branch"; then + git -C "$target" checkout -B "$branch" "origin/$branch" || true + else + git -C "$target" pull --ff-only || true + fi else - git clone "$repo_url" "$target" || true + if ! git clone --depth=1 --branch "$branch" "$repo_url" "$target"; then + git clone "$repo_url" "$target" || return 1 + git -C "$target" checkout "$branch" || true + fi fi } - sync_repo dotfiles - sync_repo bin - sync_repo llm-config - sync_repo oldspeak + sync_named_repo_for_gh_user() { + local name="$1" + local target="$HOME/src/$name" + + if [ -z "$GH_USER" ]; then + return 0 + fi + + local repo_url="https://github.com/${GH_USER}/${name}.git" + sync_git_repo "$repo_url" "$target" "main" || true + } + + DOTFILES_REPO_URL="" + if [ "$DOTFILES_REPO_INPUT" = "auto" ]; then + if [ -n "$GH_USER" ]; then + DOTFILES_REPO_URL="https://github.com/${GH_USER}/dotfiles.git" + elif [ -d "$HOME/src/dotfiles/.git" ]; then + DOTFILES_REPO_URL=$(git -C "$HOME/src/dotfiles" remote get-url origin 2>/dev/null || true) + fi + else + DOTFILES_REPO_URL="$DOTFILES_REPO_INPUT" + fi + + if [ -n "$DOTFILES_REPO_URL" ]; then + if sync_git_repo "$DOTFILES_REPO_URL" "$HOME/src/dotfiles" "$DOTFILES_BRANCH_INPUT"; then + echo "✅ dotfiles synced from ${DOTFILES_REPO_URL} (${DOTFILES_BRANCH_INPUT})" + else + echo "⚠️ dotfiles sync failed for ${DOTFILES_REPO_URL}; continuing bootstrap" + fi + else + echo "ℹ️ dotfiles repo not resolved (DOTFILES_REPO=auto without gh auth or existing clone); skipping dotfiles sync" + fi + + if [ -n "$GH_USER" ]; then + sync_named_repo_for_gh_user bin + sync_named_repo_for_gh_user llm-config + sync_named_repo_for_gh_user oldspeak + else + echo "ℹ️ gh not authenticated; skipping bin/llm-config/oldspeak sync" + fi # Link bashrc from dotfiles (includes git-prompt PS1) if [ -x "$HOME/src/dotfiles/install.sh" ]; then @@ -669,6 +719,15 @@ HTTP_EOF - systemctl daemon-reload - systemctl enable --now edcloud-idle-shutdown.timer + # --- browsh (text-based browser, requires Firefox) --- + - | + BROWSH_VERSION="1.8.2" + BROWSH_DEB="/tmp/browsh_${BROWSH_VERSION}_linux_amd64.deb" + curl -fsSL "https://github.com/browsh-org/browsh/releases/download/v${BROWSH_VERSION}/browsh_${BROWSH_VERSION}_linux_amd64.deb" \ + -o "$BROWSH_DEB" + dpkg -i "$BROWSH_DEB" + rm -f "$BROWSH_DEB" + # --- Signal completion --- - echo "edcloud bootstrap complete $(date -Iseconds)" > /tmp/edcloud-ready diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index b63fdf0..6bf0c63 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -9,10 +9,10 @@ edcloud/ ├── verify_catalog.py # Declarative `edc verify` check catalog ├── resource_queries.py # Shared managed-resource query/filter helpers ├── ec2.py # EC2 lifecycle core (provision/start/stop/status/destroy/resize) -├── snapshot.py # Snapshot create/list/prune + auto pre-destroy snapshots +├── snapshot.py # Snapshot create/list/prune ├── backup_policy.py # AWS DLM backup policy management -├── cleanup.py # Tailscale + orphaned volume cleanup workflow -├── tailscale.py # Tailscale discovery/conflict/SSH helpers +├── cleanup.py # Tailscale + orphaned volume cleanup (UI-agnostic) +├── tailscale.py # Tailscale discovery/conflict helpers ├── iam.py # IAM role/profile setup + teardown ├── resource_audit.py # Drift + cost audit ├── aws_clients.py # Shared boto3 session/client factories @@ -28,6 +28,7 @@ edcloud/ - **Declarative checks:** verification checks live in `verify_catalog.py`, not inline command code. - **Shared query primitives:** managed-resource filter/query composition lives in `resource_queries.py`. - **Tag-based source of truth:** no local state file; AWS tags define ownership and discovery. +- **UI-agnostic library modules:** only `cli.py` depends on `click`. Library modules accept I/O callbacks when they need user interaction. ## Architecture decisions (ADR summary) @@ -37,7 +38,7 @@ edcloud/ - **Durable state volume + disposable root:** host runtime is replaceable; durable data lives under `/opt/edcloud/state`. - **CLI-managed snapshot queue:** a single flat pool capped at 3 snapshots, enforced by the CLI. Every snapshot trigger runs `prune(3) → snapshot → prune(3)` so drift self-heals within one cycle. Triggers: `edc up` (on-start, fire-and-forget), `edc provision`/`edc reprovision`/`edc destroy` (blocking, pre-destructive-op). DLM (`backup-policy`) remains available but is not wired automatically. - **SSM-backed runtime secrets:** secrets stay out of git and host bootstrap payloads. The instance IAM role grants `ssm:GetParameter` on `/edcloud/*`. Three parameters are consumed automatically by cloud-init: `tailscale_auth_key` (required), `github_token` (optional, authenticates `gh`), and `rclone_config` (optional, writes rclone config and enables the Dropbox FUSE mount). -- **Separate app/infrastructure repos:** application MCP code (for example `oldspeak`) remains in its own repository (`~/src/oldspeak` on-host). edcloud bootstraps checkout/update and local wrappers (`oldspeak-mcp-stdio`, `oldspeak-mcp-http`) without vendoring app code into this infra repo. +- **Separate app/infrastructure repos:** application MCP code (for example `oldspeak`) remains in its own repository (`~/src/oldspeak` on-host). edcloud bootstraps checkout/update and local wrappers (`oldspeak-mcp-stdio`, `oldspeak-mcp-http`) without vendoring app code into this infra repo. Dotfiles bootstrap is configurable via `InstanceConfig.dotfiles_repo` / `dotfiles_branch` and rendered into cloud-init at provision time. - **Cloud-init as baseline contract:** reproducible host/tooling baseline is codified in `cloud-init/user-data.yaml`. - **CLI-first operations model:** commands must remain safe/repeatable from lightweight ARM/Linux operator nodes. diff --git a/edcloud/__init__.py b/edcloud/__init__.py index fbf22df..ff149cf 100644 --- a/edcloud/__init__.py +++ b/edcloud/__init__.py @@ -1,3 +1,5 @@ """edcloud — Personal cloud lab CLI.""" -__version__ = "0.1.0" +from importlib.metadata import version + +__version__ = version("edcloud") diff --git a/edcloud/aws_clients.py b/edcloud/aws_clients.py index b9df2e5..84b252e 100644 --- a/edcloud/aws_clients.py +++ b/edcloud/aws_clients.py @@ -23,12 +23,12 @@ def aws_region() -> str | None: def aws_client(service_name: str) -> Any: """Return a boto3 client for ``service_name``.""" - return aws_session().client(service_name) + return aws_session().client(service_name) # type: ignore[call-overload] def aws_resource(service_name: str) -> Any: """Return a boto3 resource for ``service_name``.""" - return aws_session().resource(service_name) + return aws_session().resource(service_name) # type: ignore[call-overload] def ec2_client() -> Any: diff --git a/edcloud/backup_policy.py b/edcloud/backup_policy.py index 62c5463..11a0f71 100644 --- a/edcloud/backup_policy.py +++ b/edcloud/backup_policy.py @@ -82,7 +82,7 @@ def _find_policy_summary() -> dict[str, Any] | None: resp = dlm.get_lifecycle_policies() for policy in resp.get("Policies", []): if policy.get("Description") == DLM_LIFECYCLE_POLICY_NAME: - return policy + return dict(policy) return None diff --git a/edcloud/cleanup.py b/edcloud/cleanup.py index 6e560f8..1c12a45 100644 --- a/edcloud/cleanup.py +++ b/edcloud/cleanup.py @@ -2,10 +2,9 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence +from collections.abc import Callable, Mapping, Sequence from typing import Any -import click from botocore.exceptions import BotoCoreError, ClientError from edcloud import tailscale @@ -18,6 +17,19 @@ ) from edcloud.resource_queries import list_managed_volumes +# Default I/O callbacks — callers can override these to decouple from any +# particular UI framework (click, logging, etc.). +_ECHO: Callable[[str], None] = print + + +def _CONFIRM(msg: str) -> bool: # noqa: N802 + return input(f"{msg} [y/N] ").strip().lower() == "y" + + +def _PROMPT_INT(msg: str, default: int) -> int: # noqa: N802 + raw = input(f"{msg} [{default}]: ") + return int(raw) if raw else default + def _is_state_volume(vol: Mapping[str, Any]) -> bool: """Return ``True`` if *vol* is tagged as a persistent state volume.""" @@ -29,31 +41,44 @@ def _is_root_volume(vol: Mapping[str, Any]) -> bool: return tag_value(vol.get("Tags", []), VOLUME_ROLE_TAG_KEY) == ROOT_VOLUME_ROLE -def cleanup_tailscale_devices(interactive: bool = True) -> bool: +def cleanup_tailscale_devices( + interactive: bool = True, + *, + echo: Callable[[str], None] = _ECHO, + confirm: Callable[[str], bool] = _CONFIRM, +) -> bool: """Clean up offline edcloud Tailscale devices. Args: - interactive: If True, prompts user for confirmation + interactive: If True, prompts user for confirmation. + echo: Callable for output messages. + confirm: Callable for yes/no confirmation prompts. Returns: - True if cleanup completed (or skipped), False if user aborted + True if cleanup completed (or skipped), False if user aborted. """ count, message = tailscale.cleanup_offline_edcloud_devices() if count == 0: if interactive: - click.echo("✅ No offline edcloud devices found in Tailscale.") + echo("No offline edcloud devices found in Tailscale.") return True - click.echo(message) - click.echo() + echo(message) + echo("") if not interactive: return True - return click.confirm("Have you cleaned up the Tailscale devices? Continue?") + return confirm("Have you cleaned up the Tailscale devices? Continue?") -def cleanup_orphaned_volumes(mode: str = "interactive", allow_delete_state: bool = False) -> bool: +def cleanup_orphaned_volumes( + mode: str = "interactive", + allow_delete_state: bool = False, + *, + echo: Callable[[str], None] = _ECHO, + prompt_int: Callable[[str, int], int] = _PROMPT_INT, +) -> bool: """Clean up orphaned (available, unattached) managed EBS volumes. Args: @@ -61,6 +86,8 @@ def cleanup_orphaned_volumes(mode: str = "interactive", allow_delete_state: bool or ``"keep"`` (skip). allow_delete_state: When ``True``, state and unknown-role volumes are also eligible for deletion. + echo: Callable for output messages. + prompt_int: Callable for integer prompts ``(message, default) -> int``. Returns: ``True`` if cleanup completed, ``False`` if the user aborted. @@ -69,17 +96,17 @@ def cleanup_orphaned_volumes(mode: str = "interactive", allow_delete_state: bool orphaned_volumes = list_managed_volumes(ec2_client, status="available") if not orphaned_volumes: - click.echo("✅ No orphaned volumes found.") + echo("No orphaned volumes found.") return True # Show volumes - click.echo(f"Found {len(orphaned_volumes)} orphaned volume(s):") + echo(f"Found {len(orphaned_volumes)} orphaned volume(s):") for vol in orphaned_volumes: vol_id = vol["VolumeId"] size = vol["Size"] vol_type = vol.get("VolumeType", "unknown") role = tag_value(vol.get("Tags", []), VOLUME_ROLE_TAG_KEY) or "unknown" - click.echo(f" - {vol_id} ({size}GB {vol_type}, role={role})") + echo(f" - {vol_id} ({size}GB {vol_type}, role={role})") state_volumes = [v for v in orphaned_volumes if _is_state_volume(v)] unknown_role_volumes = [ @@ -91,65 +118,70 @@ def cleanup_orphaned_volumes(mode: str = "interactive", allow_delete_state: bool deletable_volumes = [v for v in orphaned_volumes if _is_root_volume(v)] if state_volumes and not allow_delete_state: - click.echo() - click.echo("🔒 Protected state volume(s) detected; they will not be deleted by default.") + echo("") + echo("Protected state volume(s) detected; they will not be deleted by default.") for vol in state_volumes: - click.echo(f" - {vol['VolumeId']}") + echo(f" - {vol['VolumeId']}") if unknown_role_volumes and not allow_delete_state: - click.echo() - click.echo("🔒 Untagged/unknown-role volume(s) detected; they are protected by default.") - click.echo( + echo("") + echo("Untagged/unknown-role volume(s) detected; they are protected by default.") + echo( " (Use --allow-delete-state-volume only when you intentionally want full deletion.)" ) for vol in unknown_role_volumes: - click.echo(f" - {vol['VolumeId']}") + echo(f" - {vol['VolumeId']}") # Handle based on mode if mode == "keep": - click.echo("Keeping volumes (will reuse if possible).") + echo("Keeping volumes (will reuse if possible).") return True if mode == "delete": target = orphaned_volumes if allow_delete_state else deletable_volumes if not target: - click.echo("No deletable orphaned volumes found.") + echo("No deletable orphaned volumes found.") return True - return _delete_volumes(ec2_client, target) + return _delete_volumes(ec2_client, target, echo=echo) # Interactive mode - click.echo() - click.echo("Options:") + echo("") + echo("Options:") if allow_delete_state: - click.echo(" 1. Delete all orphaned volumes (including state)") + echo(" 1. Delete all orphaned volumes (including state)") else: - click.echo(" 1. Delete orphaned non-state volumes (state protected)") - click.echo(" 2. Keep volumes (will reuse state volume if available)") - click.echo(" 3. Abort") - choice = click.prompt("Choose option", type=int, default=2) + echo(" 1. Delete orphaned non-state volumes (state protected)") + echo(" 2. Keep volumes (will reuse state volume if available)") + echo(" 3. Abort") + choice = prompt_int("Choose option", 2) if choice == 1: target = orphaned_volumes if allow_delete_state else deletable_volumes if not target: - click.echo("No deletable orphaned volumes found.") + echo("No deletable orphaned volumes found.") return True - return _delete_volumes(ec2_client, target) + return _delete_volumes(ec2_client, target, echo=echo) elif choice == 3: return False else: - click.echo("Keeping volumes (will reuse if possible).") + echo("Keeping volumes (will reuse if possible).") return True -def _delete_volumes(ec2_client: Any, volumes: Sequence[Mapping[str, Any]]) -> bool: +def _delete_volumes( + ec2_client: Any, + volumes: Sequence[Mapping[str, Any]], + *, + echo: Callable[[str], None] = _ECHO, +) -> bool: """Delete a list of EBS volumes, logging each result.""" for vol in volumes: vol_id = vol["VolumeId"] try: ec2_client.delete_volume(VolumeId=vol_id) - click.echo(f"✅ Deleted {vol_id}") + echo(f"Deleted {vol_id}") except (ClientError, BotoCoreError) as e: - click.echo(f"❌ Failed to delete {vol_id}: {e}") + echo(f"Failed to delete {vol_id}: {e}") return True @@ -158,37 +190,46 @@ def run_cleanup_workflow( skip_snapshot: bool = False, interactive: bool = True, allow_delete_state: bool = False, + *, + echo: Callable[[str], None] = _ECHO, + confirm: Callable[[str], bool] = _CONFIRM, + prompt_int: Callable[[str, int], int] = _PROMPT_INT, ) -> bool: """Run the full pre-provision or post-destroy cleanup workflow. - Steps: Tailscale device cleanup → orphaned volume cleanup. + Steps: Tailscale device cleanup -> orphaned volume cleanup. Args: phase: Human-readable label (e.g. ``"pre-provision"``). - skip_snapshot: Unused — reserved for future snapshot-before-cleanup. + skip_snapshot: Unused -- reserved for future snapshot-before-cleanup. interactive: Prompt for confirmations when ``True``. allow_delete_state: Pass through to volume cleanup. + echo: Callable for output messages. + confirm: Callable for yes/no confirmation prompts. + prompt_int: Callable for integer prompts. Returns: ``True`` if the workflow completed, ``False`` if the user aborted. """ - click.echo("=" * 70) - click.echo(f"{phase.replace('-', ' ').title()} Cleanup") - click.echo("=" * 70) - click.echo() + echo("=" * 70) + echo(f"{phase.replace('-', ' ').title()} Cleanup") + echo("=" * 70) + echo("") # Cleanup Tailscale devices - if not cleanup_tailscale_devices(interactive=interactive): - click.echo("Aborted.") + if not cleanup_tailscale_devices(interactive=interactive, echo=echo, confirm=confirm): + echo("Aborted.") return False # Cleanup orphaned volumes mode = "interactive" if interactive else "keep" - if not cleanup_orphaned_volumes(mode=mode, allow_delete_state=allow_delete_state): - click.echo("Aborted.") + if not cleanup_orphaned_volumes( + mode=mode, allow_delete_state=allow_delete_state, echo=echo, prompt_int=prompt_int + ): + echo("Aborted.") return False - click.echo() - click.echo("=" * 70) - click.echo() + echo("") + echo("=" * 70) + echo("") return True diff --git a/edcloud/cli.py b/edcloud/cli.py index 7451282..f9aaadb 100644 --- a/edcloud/cli.py +++ b/edcloud/cli.py @@ -21,6 +21,8 @@ from edcloud.aws_check import check_aws_credentials, get_region from edcloud.aws_clients import ssm_client from edcloud.config import ( + DEFAULT_DOTFILES_BRANCH, + DEFAULT_DOTFILES_REPO, DEFAULT_SSH_USER, DEFAULT_TAILSCALE_AUTH_KEY_SSM_PARAMETER, DEFAULT_TAILSCALE_HOSTNAME, @@ -108,11 +110,6 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: return wrapper -def _fetch_tailscale_auth_key_from_ssm(parameter_name: str) -> str: - """Read a Tailscale auth key from SSM Parameter Store.""" - return ec2.fetch_tailscale_auth_key_from_ssm(parameter_name) - - def _run_checked(cmd: list[str]) -> subprocess.CompletedProcess[str]: """Run a subprocess command and raise RuntimeError on failure.""" proc = subprocess.run(cmd, capture_output=True, text=True) # nosec B603 @@ -256,6 +253,22 @@ def main() -> None: is_flag=True, help="Skip fail-fast guard for duplicate/suffixed edcloud Tailscale records.", ) +@click.option( + "--dotfiles-repo", + default=DEFAULT_DOTFILES_REPO, + envvar="EDCLOUD_DOTFILES_REPO", + show_default=True, + help=( + "Dotfiles repo URL for bootstrap ('auto' = infer https://github.com//dotfiles.git)." + ), +) +@click.option( + "--dotfiles-branch", + default=DEFAULT_DOTFILES_BRANCH, + envvar="EDCLOUD_DOTFILES_BRANCH", + show_default=True, + help="Dotfiles branch/ref to checkout during bootstrap.", +) @require_aws_creds def provision( instance_type: str, @@ -269,6 +282,8 @@ def provision( require_existing_state_volume: bool, skip_snapshot: bool, allow_tailscale_name_conflicts: bool, + dotfiles_repo: str, + dotfiles_branch: str, ) -> None: """Create the edcloud EC2 instance from scratch. @@ -301,6 +316,9 @@ def provision( "pre-provision", skip_snapshot=True, allow_delete_state=allow_delete_state_volume, + echo=click.echo, + confirm=click.confirm, + prompt_int=lambda msg, default: click.prompt(msg, type=int, default=default), ): raise SystemExit(0) @@ -350,6 +368,8 @@ def provision( state_volume_size_gb=state_volume_size, tailscale_hostname=tailscale_hostname, tailscale_auth_key_ssm_parameter=tailscale_auth_key_ssm_parameter, + dotfiles_repo=dotfiles_repo, + dotfiles_branch=dotfiles_branch, ) result = ec2.provision( cfg, @@ -388,7 +408,7 @@ def load_tailscale_env_key( ) -> None: """Load TAILSCALE_AUTH_KEY from SSM for local operator usage.""" try: - key = _fetch_tailscale_auth_key_from_ssm(tailscale_auth_key_ssm_parameter) + key = ec2.fetch_tailscale_auth_key_from_ssm(tailscale_auth_key_ssm_parameter) except ClientError as exc: click.echo( "Error: could not read Tailscale auth key from SSM parameter " @@ -766,7 +786,6 @@ def status() -> None: # destroy # --------------------------------------------------------------------------- @main.command() -@click.option("--force", is_flag=True, help="Skip confirmation prompt.") @click.option( "--confirm-instance-id", default=None, @@ -801,7 +820,6 @@ def status() -> None: ) @require_aws_creds def destroy( - force: bool, confirm_instance_id: str | None, require_fresh_snapshot: bool, fresh_snapshot_max_age_minutes: int, @@ -853,7 +871,7 @@ def destroy( continue_prompt="Continue with destroy anyway?", ) - ec2.destroy(force=force) + ec2.destroy() def _run_post_destroy_cleanup() -> None: from edcloud import cleanup as cleanup_module @@ -863,6 +881,9 @@ def _run_post_destroy_cleanup() -> None: "post-destroy", skip_snapshot=True, allow_delete_state=allow_delete_state_volume, + echo=click.echo, + confirm=click.confirm, + prompt_int=lambda msg, default: click.prompt(msg, type=int, default=default), ) maybe_run_cleanup(skip_cleanup=skip_cleanup, run_cleanup=_run_post_destroy_cleanup) @@ -1276,7 +1297,7 @@ def permissions_group() -> None: """Inspect and verify AWS permissions required by edcloud.""" -def _permission_profile_choice() -> click.Choice: +def _permission_profile_choice() -> click.Choice: # type: ignore[type-arg] return click.Choice(["all", *permissions.available_profiles()]) @@ -1410,6 +1431,22 @@ def permissions_verify_cmd(profiles: tuple[str, ...], json_output: bool) -> None is_flag=True, help="Skip fail-fast guard for duplicate/suffixed edcloud Tailscale records.", ) +@click.option( + "--dotfiles-repo", + default=DEFAULT_DOTFILES_REPO, + envvar="EDCLOUD_DOTFILES_REPO", + show_default=True, + help=( + "Dotfiles repo URL for bootstrap ('auto' = infer https://github.com//dotfiles.git)." + ), +) +@click.option( + "--dotfiles-branch", + default=DEFAULT_DOTFILES_BRANCH, + envvar="EDCLOUD_DOTFILES_BRANCH", + show_default=True, + help="Dotfiles branch/ref to checkout during bootstrap.", +) @require_aws_creds def reprovision( instance_type: str, @@ -1420,6 +1457,8 @@ def reprovision( skip_snapshot: bool, confirm_instance_id: str | None, allow_tailscale_name_conflicts: bool, + dotfiles_repo: str, + dotfiles_branch: str, ) -> None: """Atomically snapshot → destroy → provision. @@ -1452,6 +1491,8 @@ def reprovision( state_volume_size_gb=state_volume_size, tailscale_hostname=tailscale_hostname, tailscale_auth_key_ssm_parameter=tailscale_auth_key_ssm_parameter, + dotfiles_repo=dotfiles_repo, + dotfiles_branch=dotfiles_branch, ) from edcloud import cleanup as cleanup_module @@ -1461,9 +1502,9 @@ def reprovision( info=info, skip_snapshot=skip_snapshot, auto_snapshot=lambda: snapshot.snapshot_and_prune("pre-reprovision", wait=True), - destroy_instance=lambda: ec2.destroy(force=True), + destroy_instance=lambda: ec2.destroy(), cleanup_orphaned_volumes=lambda: cleanup_module.cleanup_orphaned_volumes( - mode="delete", allow_delete_state=False + mode="delete", allow_delete_state=False, echo=click.echo ), provision_replacement=lambda: ec2.provision(cfg, require_existing_state_volume=True), echo=click.echo, diff --git a/edcloud/config.py b/edcloud/config.py index b6f0751..12c59eb 100644 --- a/edcloud/config.py +++ b/edcloud/config.py @@ -42,6 +42,8 @@ DEFAULT_TAILSCALE_AUTH_KEY_SSM_PARAMETER = "/edcloud/tailscale_auth_key" GITHUB_TOKEN_SSM_PARAMETER = "/edcloud/github_token" RCLONE_CONFIG_SSM_PARAMETER = "/edcloud/rclone_config" +DEFAULT_DOTFILES_REPO = "auto" +DEFAULT_DOTFILES_BRANCH = "main" DEFAULT_SSH_USER = "ubuntu" # --------------------------------------------------------------------------- @@ -145,6 +147,8 @@ class InstanceConfig: state_volume_device_name: str = DEFAULT_STATE_VOLUME_DEVICE_NAME tailscale_hostname: str = DEFAULT_TAILSCALE_HOSTNAME tailscale_auth_key_ssm_parameter: str = DEFAULT_TAILSCALE_AUTH_KEY_SSM_PARAMETER + dotfiles_repo: str = DEFAULT_DOTFILES_REPO + dotfiles_branch: str = DEFAULT_DOTFILES_BRANCH ami_ssm_parameter: str = AMI_SSM_PARAMETER tags: dict[str, str] = field( default_factory=lambda: { diff --git a/edcloud/ec2.py b/edcloud/ec2.py index 6450550..e6c626d 100644 --- a/edcloud/ec2.py +++ b/edcloud/ec2.py @@ -17,9 +17,8 @@ from botocore.exceptions import ClientError from edcloud.aws_check import get_region -from edcloud.aws_clients import ec2_client as _shared_ec2_client -from edcloud.aws_clients import ec2_resource as _shared_ec2_resource -from edcloud.aws_clients import ssm_client as _shared_ssm_client +from edcloud.aws_clients import ec2_client as _ec2_client +from edcloud.aws_clients import ssm_client as _ssm_client from edcloud.config import ( DEFAULT_HOURS_PER_DAY, EBS_MONTHLY_RATE_PER_GB, @@ -36,6 +35,7 @@ get_volume_ids, has_managed_tag, managed_filter, + tag_value, ) from edcloud.discovery import list_instances from edcloud.iam import delete_instance_profile, ensure_instance_profile @@ -59,21 +59,6 @@ class TagDriftError(RuntimeError): # --------------------------------------------------------------------------- -def _ec2_client() -> Any: - """Return a low-level EC2 client.""" - return _shared_ec2_client() - - -def _ec2_resource() -> Any: - """Return a high-level EC2 resource.""" - return _shared_ec2_resource() - - -def _ssm_client() -> Any: - """Return a low-level SSM client.""" - return _shared_ssm_client() - - def get_ec2_client() -> Any: """Return a low-level EC2 client (public API).""" return _ec2_client() @@ -106,11 +91,6 @@ def _instance_summary(instances: list[dict[str, Any]]) -> str: return ", ".join(f"{i['InstanceId']} ({i['State']['Name']})" for i in instances) -def _list_instances(client: Any, filters: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Describe instances matching *filters*, excluding terminated ones.""" - return list_instances(client, filters) - - def _validate_instance_volume_tags(client: Any, instance: dict[str, Any]) -> None: """Ensure every volume attached to *instance* carries the managed tag. @@ -245,7 +225,7 @@ def _find_instance(client: Any) -> dict[str, Any] | None: Raises: TagDriftError: On duplicate managed instances or missing tags. """ - managed_instances = _list_instances(client, managed_filter()) + managed_instances = list_instances(client, managed_filter()) if len(managed_instances) > 1: raise TagDriftError( "Tag drift detected: multiple managed instances found: " @@ -255,7 +235,7 @@ def _find_instance(client: Any) -> dict[str, Any] | None: ) if not managed_instances: - named_instances = _list_instances(client, [{"Name": "tag:Name", "Values": [NAME_TAG]}]) + named_instances = list_instances(client, [{"Name": "tag:Name", "Values": [NAME_TAG]}]) untagged_named = [i for i in named_instances if not has_managed_tag(i.get("Tags", []))] if untagged_named: instance_ids = " ".join(i["InstanceId"] for i in untagged_named) @@ -387,6 +367,8 @@ def _validate_user_data_inputs( tailscale_auth_key: str | None = None, tailscale_auth_key_ssm_parameter: str | None = None, aws_region: str | None = None, + dotfiles_repo: str | None = None, + dotfiles_branch: str | None = None, ) -> None: """Validate user-data template inputs to prevent injection attacks. @@ -430,11 +412,39 @@ def _validate_user_data_inputs( f"Invalid aws_region: {aws_region!r}. Must match AWS region format (e.g., us-east-1)." ) + # Validate dotfiles repo selector + if dotfiles_repo is not None: + if dotfiles_repo == "auto": + pass + elif not re.match( + r"^(https://github\.com|git@github\.com:)[A-Za-z0-9._/-]+\.git$", + dotfiles_repo, + ): + raise ValueError( + f"Invalid dotfiles_repo: {dotfiles_repo!r}. " + "Use 'auto', an https GitHub URL, or an SSH GitHub URL ending in .git." + ) + + # Validate git branch/ref-ish input used for dotfiles checkout + if dotfiles_branch is not None: + if not re.match(r"^[A-Za-z0-9._/-]{1,100}$", dotfiles_branch): + raise ValueError( + f"Invalid dotfiles_branch: {dotfiles_branch!r}. " + "Use a simple branch/ref name (alphanumeric, ., _, /, -)." + ) + if ".." in dotfiles_branch or dotfiles_branch.startswith("-"): + raise ValueError( + f"Invalid dotfiles_branch: {dotfiles_branch!r}. " + "Branch cannot contain '..' or start with '-'." + ) + def _render_user_data( tailscale_auth_key_ssm_parameter: str, tailscale_hostname: str, aws_region: str, + dotfiles_repo: str, + dotfiles_branch: str, ) -> str: """Read the cloud-init template and interpolate runtime variables. @@ -450,12 +460,16 @@ def _render_user_data( tailscale_hostname=tailscale_hostname, tailscale_auth_key_ssm_parameter=tailscale_auth_key_ssm_parameter, aws_region=aws_region, + dotfiles_repo=dotfiles_repo, + dotfiles_branch=dotfiles_branch, ) template = _USER_DATA_PATH.read_text() return ( template.replace("${TAILSCALE_AUTH_KEY_SSM_PARAMETER}", tailscale_auth_key_ssm_parameter) .replace("${TAILSCALE_HOSTNAME}", tailscale_hostname) .replace("${AWS_REGION}", aws_region) + .replace("${DOTFILES_REPO}", dotfiles_repo) + .replace("${DOTFILES_BRANCH}", dotfiles_branch) ) @@ -540,6 +554,8 @@ def provision( tailscale_auth_key_ssm_parameter=cfg.tailscale_auth_key_ssm_parameter, tailscale_hostname=cfg.tailscale_hostname, aws_region=aws_region, + dotfiles_repo=cfg.dotfiles_repo, + dotfiles_branch=cfg.dotfiles_branch, ) log.info( " Tailscale auth key will be fetched from SSM: %s", @@ -840,21 +856,20 @@ def status() -> dict[str, Any]: public_ip = inst.get("PublicIpAddress") instance_type = inst.get("InstanceType", "unknown") - # Get volume info + # Get volume info (single API call for all attached volumes) + vol_ids = get_volume_ids(inst) volumes = [] - for bdm in inst.get("BlockDeviceMappings", []): - vol_id = bdm.get("Ebs", {}).get("VolumeId") - if vol_id: - vol_resp = ec2.describe_volumes(VolumeIds=[vol_id]) - for v in vol_resp.get("Volumes", []): - volumes.append( - { - "volume_id": v["VolumeId"], - "size_gb": v["Size"], - "type": v["VolumeType"], - "state": v["State"], - } - ) + if vol_ids: + vol_resp = ec2.describe_volumes(VolumeIds=vol_ids) + for v in vol_resp.get("Volumes", []): + volumes.append( + { + "volume_id": v["VolumeId"], + "size_gb": v["Size"], + "type": v["VolumeType"], + "state": v["State"], + } + ) # Orphaned managed volumes (available, not attached to this instance) orphaned_vol_resp = ec2.describe_volumes( @@ -890,15 +905,12 @@ def status() -> dict[str, Any]: return result -def destroy(force: bool = False) -> None: +def destroy() -> None: """Terminate the edcloud instance and clean up its security group and IAM. The root EBS volume is deleted automatically on termination (``DeleteOnTermination=True``). The state volume survives and is reattached on the next provision. - - Args: - force: Skip the interactive confirmation prompt. """ ec2 = _ec2_client() inst = _find_instance(ec2) @@ -943,8 +955,6 @@ def destroy(force: bool = False) -> None: # Report surviving managed volumes (state volume is expected; others are not) surviving = list_managed_volumes(ec2) for v in surviving: - from edcloud.config import tag_value - role = tag_value(v.get("Tags", []), VOLUME_ROLE_TAG_KEY) or "unknown" if role == STATE_VOLUME_ROLE: log.info("State volume preserved: %s %sGB", v["VolumeId"], v["Size"]) diff --git a/edcloud/lifecycle.py b/edcloud/lifecycle.py index 3339250..4dad5c3 100644 --- a/edcloud/lifecycle.py +++ b/edcloud/lifecycle.py @@ -79,7 +79,7 @@ def run_reprovision_flow( skip_snapshot: bool, auto_snapshot: Callable[[], list[str]], destroy_instance: Callable[[], None], - cleanup_orphaned_volumes: Callable[[], None], + cleanup_orphaned_volumes: Callable[[], object], provision_replacement: Callable[[], dict[str, Any]], echo: Callable[[str], None], echo_err: Callable[[str], None], diff --git a/edcloud/resource_audit.py b/edcloud/resource_audit.py index 2ab2db5..29a8b51 100644 --- a/edcloud/resource_audit.py +++ b/edcloud/resource_audit.py @@ -70,10 +70,6 @@ def to_dict(self) -> dict[str, Any]: return asdict(self) -def _list_instances(ec2_client: Any, filters: list[dict[str, Any]]) -> list[dict[str, Any]]: - return list_instances(ec2_client, filters) - - def _managed_addresses(ec2_client: Any) -> list[dict[str, Any]]: try: resp = ec2_client.describe_addresses(Filters=managed_filter()) @@ -98,8 +94,8 @@ def audit_resources( findings: list[AuditFinding] = [] - managed_instances = _list_instances(ec2, managed_filter()) - named_instances = _list_instances(ec2, [{"Name": "tag:Name", "Values": [NAME_TAG]}]) + managed_instances = list_instances(ec2, managed_filter()) + named_instances = list_instances(ec2, [{"Name": "tag:Name", "Values": [NAME_TAG]}]) untagged_named_instances = [ i for i in named_instances if not has_managed_tag(i.get("Tags", [])) ] diff --git a/edcloud/snapshot.py b/edcloud/snapshot.py index 44dbeb4..f70298c 100644 --- a/edcloud/snapshot.py +++ b/edcloud/snapshot.py @@ -108,7 +108,7 @@ def _find_single_state_volume(ec2: Any) -> dict[str, Any]: raise RuntimeError( f"Restore drill requires a single managed state volume, but found multiple: {ids}" ) - return volumes[0] + return dict(volumes[0]) def _latest_completed_snapshot_for_volume(ec2: Any, volume_id: str) -> dict[str, Any]: @@ -124,7 +124,7 @@ def _latest_completed_snapshot_for_volume(ec2: Any, volume_id: str) -> dict[str, if not snapshots: raise RuntimeError(f"No completed snapshots found for state volume {volume_id}.") snapshots.sort(key=lambda s: s.get("StartTime", ""), reverse=True) - return snapshots[0] + return dict(snapshots[0]) def _validated_snapshot_for_volume(ec2: Any, snapshot_id: str, volume_id: str) -> dict[str, Any]: @@ -143,7 +143,7 @@ def _validated_snapshot_for_volume(ec2: Any, snapshot_id: str, volume_id: str) - raise RuntimeError( f"Snapshot {snapshot_id} is in state {snap.get('State')}; must be completed." ) - return snap + return dict(snap) def run_restore_drill( @@ -261,31 +261,6 @@ def find_recent_prechange_snapshot(max_age_minutes: int) -> dict[str, object] | return freshest[1] if freshest else None -def auto_snapshot_before_destroy() -> list[str]: - """Snapshot all volumes of the current instance before a destructive op. - - Waits for all snapshots to reach ``completed`` state before returning, - so the caller can safely proceed with destructive operations. - - Returns: - List of snapshot IDs created, or empty list if no instance exists. - """ - ec2 = get_ec2_client() - inst = find_instance(ec2) - if not inst: - # No instance to snapshot - return [] - - ts = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M%S") - description = f"auto-pre-destroy-{ts}" - snap_ids = create_snapshot(description) - if snap_ids: - log.info("Waiting for snapshot(s) to complete before proceeding...") - wait_for_snapshot_completion(snap_ids) - log.info("Snapshot(s) completed.") - return snap_ids - - def snapshot_and_prune( description: str, keep: int = DEFAULT_SNAPSHOT_KEEP_LAST, diff --git a/edcloud/tailscale.py b/edcloud/tailscale.py index 389966e..73d47b7 100644 --- a/edcloud/tailscale.py +++ b/edcloud/tailscale.py @@ -12,8 +12,6 @@ import subprocess # nosec B404 from typing import Any -from edcloud.config import DEFAULT_SSH_USER - def _tailscale_status() -> dict[str, Any] | None: """Run ``tailscale status --json`` and return the parsed payload. @@ -267,20 +265,3 @@ def cleanup_offline_edcloud_devices() -> tuple[int, str]: "This will ensure your next provision uses 'edcloud' (no suffix)." ) return (len(offline), message) - - -def ssh_command(hostname: str, user: str = DEFAULT_SSH_USER) -> list[str]: - """Build an SSH command targeting a Tailscale hostname. - - Auto-detects the active edcloud device when *hostname* is ``"edcloud"``. - - Args: - hostname: Tailscale hostname to connect to. - user: Remote username. - - Returns: - Command list suitable for ``subprocess.run``. - """ - ip = get_tailscale_ip(hostname) - target = ip if ip else hostname - return ["ssh", f"{user}@{target}"] diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py index 485460a..e4d15de 100644 --- a/tests/test_cleanup.py +++ b/tests/test_cleanup.py @@ -110,8 +110,11 @@ def test_run_cleanup_workflow_completes_noninteractive(mock_ts_cleanup, mock_vol result = run_cleanup_workflow("post-destroy", skip_snapshot=True, interactive=False) assert result is True - mock_ts_cleanup.assert_called_once_with(interactive=False) - mock_vol_cleanup.assert_called_once_with(mode="keep", allow_delete_state=False) + mock_ts_cleanup.assert_called_once() + assert mock_ts_cleanup.call_args.kwargs["interactive"] is False + mock_vol_cleanup.assert_called_once() + assert mock_vol_cleanup.call_args.kwargs["mode"] == "keep" + assert mock_vol_cleanup.call_args.kwargs["allow_delete_state"] is False @patch("edcloud.cleanup.cleanup_orphaned_volumes") diff --git a/tests/test_cli.py b/tests/test_cli.py index 60be547..e2b4dd8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -39,6 +39,8 @@ def test_provision_reads_tailscale_key_from_ssm( # Provision now only receives config, SSM parameter is verified but not fetched cfg = mock_provision.call_args.args[0] assert cfg.tailscale_auth_key_ssm_parameter == "/edcloud/tailscale_auth_key" + assert cfg.dotfiles_repo == "auto" + assert cfg.dotfiles_branch == "main" assert mock_provision.call_args.kwargs["require_existing_state_volume"] is True # Verify SSM parameter existence is checked (not fetched with decryption) assert ssm.get_parameter.called @@ -78,6 +80,8 @@ def test_provision_reads_tailscale_key_from_ssm_envvar( assert result.exit_code == 0 cfg = mock_provision.call_args.args[0] assert cfg.tailscale_auth_key_ssm_parameter == "/edcloud/tailscale_auth_key" + assert cfg.dotfiles_repo == "auto" + assert cfg.dotfiles_branch == "main" assert mock_provision.call_args.kwargs["require_existing_state_volume"] is True call_kwargs = ssm.get_parameter.call_args.kwargs assert call_kwargs["Name"] == "/edcloud/tailscale_auth_key" @@ -212,6 +216,45 @@ def test_provision_allow_new_state_volume_disables_requirement( assert mock_provision.call_args.kwargs["require_existing_state_volume"] is False +@patch("edcloud.cli.ssm_client") +@patch("edcloud.cli.ec2.provision") +@patch("edcloud.cli.tailscale.edcloud_name_conflicts", return_value=[]) +@patch("edcloud.cli.get_region", return_value="us-east-1") +@patch("edcloud.cli.check_aws_credentials", return_value=(True, "ok")) +def test_provision_passes_dotfiles_repo_and_branch_to_instance_config( + _mock_creds, + _mock_region, + _mock_conflicts, + mock_provision, + mock_ssm_client, +): + ssm = MagicMock() + ssm.get_parameter.return_value = {"Parameter": {"Value": "exists"}} + mock_ssm_client.return_value = ssm + mock_provision.return_value = { + "instance_id": "i-abc123", + "security_group_id": "sg-abc123", + "public_ip": "none", + } + + runner = CliRunner() + result = runner.invoke( + main, + [ + "provision", + "--dotfiles-repo", + "https://github.com/example/dotfiles.git", + "--dotfiles-branch", + "linux-main", + ], + ) + + assert result.exit_code == 0 + cfg = mock_provision.call_args.args[0] + assert cfg.dotfiles_repo == "https://github.com/example/dotfiles.git" + assert cfg.dotfiles_branch == "linux-main" + + @patch("edcloud.cleanup.run_cleanup_workflow", return_value=True) @patch("edcloud.cli.ec2.provision") @patch("edcloud.cli.tailscale.edcloud_name_conflicts", return_value=[]) @@ -484,7 +527,7 @@ def test_verify_fails_when_remote_checks_fail( assert "Overall: FAIL" in result.output -@patch("edcloud.cli._fetch_tailscale_auth_key_from_ssm", return_value="tailscale-test-key") +@patch("edcloud.cli.ec2.fetch_tailscale_auth_key_from_ssm", return_value="tailscale-test-key") @patch("edcloud.cli.get_region", return_value="us-east-1") @patch("edcloud.cli.check_aws_credentials", return_value=(True, "ok")) def test_load_tailscale_env_key_shell_export( @@ -499,7 +542,7 @@ def test_load_tailscale_env_key_shell_export( assert "export TAILSCALE_AUTH_KEY=tailscale-test-key" in result.output -@patch("edcloud.cli._fetch_tailscale_auth_key_from_ssm", return_value="tailscale-test-key") +@patch("edcloud.cli.ec2.fetch_tailscale_auth_key_from_ssm", return_value="tailscale-test-key") @patch("edcloud.cli.get_region", return_value="us-east-1") @patch("edcloud.cli.check_aws_credentials", return_value=(True, "ok")) def test_load_tailscale_env_key_requires_output_mode( @@ -704,7 +747,7 @@ def test_destroy_requires_confirm_instance_id( mock_status.return_value = {"exists": True, "instance_id": "i-abc123"} runner = CliRunner() - result = runner.invoke(main, ["destroy", "--force"]) + result = runner.invoke(main, ["destroy"]) assert result.exit_code == 1 assert "requires explicit instance ID confirmation" in result.output @@ -729,7 +772,6 @@ def test_destroy_with_matching_confirm_id_calls_destroy( main, [ "destroy", - "--force", "--confirm-instance-id", "i-abc123", "--skip-snapshot", @@ -738,7 +780,7 @@ def test_destroy_with_matching_confirm_id_calls_destroy( ) assert result.exit_code == 0 - mock_destroy.assert_called_once_with(force=True) + mock_destroy.assert_called_once_with() @patch("edcloud.cli.snapshot.list_snapshots") @@ -761,7 +803,6 @@ def test_destroy_require_fresh_snapshot_fails_without_recent_snapshot( main, [ "destroy", - "--force", "--confirm-instance-id", "i-abc123", "--require-fresh-snapshot", @@ -800,7 +841,6 @@ def test_destroy_require_fresh_snapshot_passes_with_recent_snapshot( main, [ "destroy", - "--force", "--confirm-instance-id", "i-abc123", "--require-fresh-snapshot", @@ -811,7 +851,7 @@ def test_destroy_require_fresh_snapshot_passes_with_recent_snapshot( assert result.exit_code == 0 assert "Using pre-change snapshot: snap-abc123" in result.output - mock_destroy.assert_called_once_with(force=True) + mock_destroy.assert_called_once_with() @patch("edcloud.cli.tailscale.get_tailscale_ip", return_value="100.64.1.1") @@ -906,7 +946,6 @@ def test_destroy_cleanup_passes_allow_delete_state_volume( main, [ "destroy", - "--force", "--confirm-instance-id", "i-abc123", "--skip-snapshot", @@ -915,7 +954,7 @@ def test_destroy_cleanup_passes_allow_delete_state_volume( ) assert result.exit_code == 0 - mock_destroy.assert_called_once_with(force=True) + mock_destroy.assert_called_once_with() assert mock_cleanup.call_args.kwargs["allow_delete_state"] is True @@ -1011,7 +1050,7 @@ def test_reprovision_snapshots_destroys_and_provisions( assert result.exit_code == 0, result.output mock_snapshot.assert_called_once() - mock_destroy.assert_called_once_with(force=True) + mock_destroy.assert_called_once_with() mock_provision.assert_called_once() assert "Next step: run 'edc verify' to confirm rebuild health." in result.output @@ -1139,6 +1178,52 @@ def test_reprovision_delegates_to_lifecycle_flow( assert mock_flow.called +@patch("edcloud.cli.run_reprovision_flow") +@patch("edcloud.cli.ec2.status") +@patch("edcloud.cli.tailscale.edcloud_name_conflicts", return_value=[]) +@patch("edcloud.cli.tailscale.tailscale_available", return_value=True) +@patch("edcloud.cli.get_region", return_value="us-east-1") +@patch("edcloud.cli.check_aws_credentials", return_value=(True, "ok")) +def test_reprovision_passes_dotfiles_repo_and_branch_to_instance_config( + _mock_creds, + _mock_region, + _mock_available, + _mock_conflicts, + mock_status, + mock_flow, +): + mock_status.return_value = {"exists": True, "instance_id": "i-abc123"} + mock_flow.return_value = ( + ["snap-abc123"], + {"instance_id": "i-new", "security_group_id": "sg-new", "public_ip": "1.2.3.4"}, + ) + + runner = CliRunner() + result = runner.invoke( + main, + [ + "reprovision", + "--confirm-instance-id", + "i-abc123", + "--tailscale-auth-key-ssm-parameter", + "/edcloud/tailscale_auth_key", + "--dotfiles-repo", + "https://github.com/example/dotfiles.git", + "--dotfiles-branch", + "linux-main", + ], + ) + + assert result.exit_code == 0 + from edcloud.config import InstanceConfig + + provision_lambda = mock_flow.call_args.kwargs["provision_replacement"] + closure_cells = [cell.cell_contents for cell in (provision_lambda.__closure__ or ())] + cfg = next(c for c in closure_cells if isinstance(c, InstanceConfig)) + assert cfg.dotfiles_repo == "https://github.com/example/dotfiles.git" + assert cfg.dotfiles_branch == "linux-main" + + @patch("edcloud.cli.ec2.destroy") @patch("edcloud.cli.ec2.status") @patch("edcloud.cli.tailscale.edcloud_name_conflicts", return_value=[]) diff --git a/tests/test_ec2.py b/tests/test_ec2.py index b691caa..d5a8a37 100644 --- a/tests/test_ec2.py +++ b/tests/test_ec2.py @@ -136,13 +136,19 @@ def test_render_substitutes_variables(self): "/edcloud/tailscale_auth_key", "my-hostname", "us-east-1", + "https://github.com/example/dotfiles.git", + "main", ) assert "/edcloud/tailscale_auth_key" in rendered assert "my-hostname" in rendered assert "us-east-1" in rendered + assert "https://github.com/example/dotfiles.git" in rendered + assert "main" in rendered assert "${TAILSCALE_AUTH_KEY_SSM_PARAMETER}" not in rendered assert "${TAILSCALE_HOSTNAME}" not in rendered assert "${AWS_REGION}" not in rendered + assert "${DOTFILES_REPO}" not in rendered + assert "${DOTFILES_BRANCH}" not in rendered class TestInputValidation: @@ -224,6 +230,51 @@ def test_invalid_region_raises(self): with pytest.raises(ValueError, match="Invalid aws_region"): _validate_user_data_inputs("edcloud", aws_region="us-east-1; whoami") + def test_valid_dotfiles_repo_values_pass(self): + from edcloud.ec2 import _validate_user_data_inputs + + _validate_user_data_inputs("edcloud", dotfiles_repo="auto") + _validate_user_data_inputs( + "edcloud", + dotfiles_repo="https://github.com/example/dotfiles.git", + ) + _validate_user_data_inputs( + "edcloud", + dotfiles_repo="git@github.com:example/dotfiles.git", + ) + + def test_invalid_dotfiles_repo_values_raise(self): + from edcloud.ec2 import _validate_user_data_inputs + + with pytest.raises(ValueError, match="Invalid dotfiles_repo"): + _validate_user_data_inputs( + "edcloud", dotfiles_repo="https://gitlab.com/example/dotfiles" + ) + + with pytest.raises(ValueError, match="Invalid dotfiles_repo"): + _validate_user_data_inputs( + "edcloud", dotfiles_repo="https://github.com/example/dotfiles" + ) + + def test_valid_dotfiles_branch_values_pass(self): + from edcloud.ec2 import _validate_user_data_inputs + + _validate_user_data_inputs("edcloud", dotfiles_branch="main") + _validate_user_data_inputs("edcloud", dotfiles_branch="feature/linux-refresh") + _validate_user_data_inputs("edcloud", dotfiles_branch="release-2026.03") + + def test_invalid_dotfiles_branch_values_raise(self): + from edcloud.ec2 import _validate_user_data_inputs + + with pytest.raises(ValueError, match="Invalid dotfiles_branch"): + _validate_user_data_inputs("edcloud", dotfiles_branch="") + + with pytest.raises(ValueError, match="Invalid dotfiles_branch"): + _validate_user_data_inputs("edcloud", dotfiles_branch="main..prod") + + with pytest.raises(ValueError, match="Invalid dotfiles_branch"): + _validate_user_data_inputs("edcloud", dotfiles_branch="-main") + class TestProvision: @patch("edcloud.ec2._ec2_client") diff --git a/tests/test_tailscale.py b/tests/test_tailscale.py index 54e1c1e..bbaf08a 100644 --- a/tests/test_tailscale.py +++ b/tests/test_tailscale.py @@ -7,7 +7,6 @@ edcloud_name_conflicts, format_conflict_message, get_tailscale_ip, - ssh_command, ) MOCK_TS_STATUS = { @@ -46,18 +45,6 @@ def test_returns_none_for_unknown_host(self, mock_run, _mock_avail): assert get_tailscale_ip("nonexistent") is None -class TestSSHCommand: - @patch("edcloud.tailscale.get_tailscale_ip", return_value="100.64.1.42") - def test_uses_tailscale_ip(self, _mock): - cmd = ssh_command("edcloud") - assert cmd == ["ssh", "ubuntu@100.64.1.42"] - - @patch("edcloud.tailscale.get_tailscale_ip", return_value=None) - def test_falls_back_to_hostname(self, _mock): - cmd = ssh_command("edcloud") - assert cmd == ["ssh", "ubuntu@edcloud"] - - def test_edcloud_name_conflicts_detects_suffixed_dns() -> None: with patch("edcloud.tailscale.list_all_edcloud_devices") as mock_devices: mock_devices.return_value = [