diff --git a/.claude/context/PROJECT.md b/.claude/context/PROJECT.md index 8872cf4b..6a64e7e4 100644 --- a/.claude/context/PROJECT.md +++ b/.claude/context/PROJECT.md @@ -10,7 +10,7 @@ Replace the sections below with information about your project. --- -## Project: azlin +## Project: task-unnamed-1774842804 ## Overview @@ -29,7 +29,6 @@ Replace the sections below with information about your project. - **Language**: Python - **Language**: JavaScript/TypeScript - **Language**: Rust -- **Language**: Go - **Framework**: [Main framework if applicable] - **Database**: [Database system if applicable] diff --git a/.github/hooks/amplihack-hooks.json b/.github/hooks/amplihack-hooks.json new file mode 100644 index 00000000..17b0c84a --- /dev/null +++ b/.github/hooks/amplihack-hooks.json @@ -0,0 +1,47 @@ +{ + "version": 1, + "hooks": { + "sessionStart": [ + { + "type": "command", + "bash": ".github/hooks/session-start", + "timeoutSec": 30 + } + ], + "sessionEnd": [ + { + "type": "command", + "bash": ".github/hooks/session-stop", + "timeoutSec": 30 + } + ], + "userPromptSubmitted": [ + { + "type": "command", + "bash": ".github/hooks/user-prompt-submit", + "timeoutSec": 10 + } + ], + "preToolUse": [ + { + "type": "command", + "bash": ".github/hooks/pre-tool-use", + "timeoutSec": 15 + } + ], + "postToolUse": [ + { + "type": "command", + "bash": ".github/hooks/post-tool-use", + "timeoutSec": 10 + } + ], + "errorOccurred": [ + { + "type": "command", + "bash": ".github/hooks/error-occurred", + "timeoutSec": 10 + } + ] + } +} diff --git a/.github/hooks/error-occurred b/.github/hooks/error-occurred new file mode 100755 index 00000000..9105c04d --- /dev/null +++ b/.github/hooks/error-occurred @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Copilot hook: error-occurred +# Logs error to runtime log. No dedicated Python hook exists for this event; +# error_protocol.py is a utility module, not a hook entry point. + +AMPLIHACK_HOOKS="$HOME/.amplihack/.claude/tools/amplihack/hooks" +LOG_DIR="$HOME/.amplihack/.claude/runtime/logs" + +# If a dedicated error_occurred.py hook exists, use it +if [[ -f "${AMPLIHACK_HOOKS}/error_occurred.py" ]]; then + python3 "${AMPLIHACK_HOOKS}/error_occurred.py" "$@" +elif REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/error_occurred.py" ]]; then + python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/error_occurred.py" "$@" +else + # Fallback: log the error from stdin + mkdir -p "$LOG_DIR" + INPUT=$(cat) + ERROR_MSG=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('error',{}).get('message','unknown'))" 2>/dev/null || echo "unknown") + echo "$(date -Iseconds): ERROR - $ERROR_MSG" >> "${LOG_DIR}/errors.log" + echo "{}" +fi diff --git a/.github/hooks/post-tool-use b/.github/hooks/post-tool-use new file mode 100755 index 00000000..57e00d79 --- /dev/null +++ b/.github/hooks/post-tool-use @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Copilot hook wrapper - generated by amplihack +HOOK="post_tool_use.py" +AMPLIHACK_HOOKS="$HOME/.amplihack/.claude/tools/amplihack/hooks" + +if [[ -f "${AMPLIHACK_HOOKS}/${HOOK}" ]]; then + exec python3 "${AMPLIHACK_HOOKS}/${HOOK}" "$@" +elif REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/${HOOK}" ]]; then + exec python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/${HOOK}" "$@" +else + echo "{}" +fi diff --git a/.github/hooks/pre-tool-use b/.github/hooks/pre-tool-use new file mode 100755 index 00000000..0d6d2fe4 --- /dev/null +++ b/.github/hooks/pre-tool-use @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Copilot hook wrapper - generated by amplihack (python engine) +# Aggregates amplihack and XPIA pre-tool validation into one JSON response +REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || REPO_ROOT="" +INPUT=$(cat) + +AMPLIHACK_OUTPUT="{}" +AMPLIHACK_HOOKS="$HOME/.amplihack/.claude/tools/amplihack/hooks" +if [[ -f "${AMPLIHACK_HOOKS}/pre_tool_use.py" ]]; then + AMPLIHACK_OUTPUT=$(echo "$INPUT" | python3 "${AMPLIHACK_HOOKS}/pre_tool_use.py" "$@" 2>/dev/null || printf '{}') +elif [[ -n "$REPO_ROOT" ]] && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/pre_tool_use.py" ]]; then + AMPLIHACK_OUTPUT=$(echo "$INPUT" | python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/pre_tool_use.py" "$@" 2>/dev/null || printf '{}') +fi + +XPIA_OUTPUT="{}" +XPIA_HOOKS="$HOME/.amplihack/.claude/tools/xpia/hooks" +if [[ -f "${XPIA_HOOKS}/pre_tool_use.py" ]]; then + XPIA_OUTPUT=$(echo "$INPUT" | python3 "${XPIA_HOOKS}/pre_tool_use.py" "$@" 2>/dev/null || printf '{}') +elif [[ -n "$REPO_ROOT" ]] && [[ -f "${REPO_ROOT}/.claude/tools/xpia/hooks/pre_tool_use.py" ]]; then + XPIA_OUTPUT=$(echo "$INPUT" | python3 "${REPO_ROOT}/.claude/tools/xpia/hooks/pre_tool_use.py" "$@" 2>/dev/null || printf '{}') +fi + +python3 - "$AMPLIHACK_OUTPUT" "$XPIA_OUTPUT" <<'PY' +import json +import sys + + +def parse_payload(raw: str) -> dict: + raw = raw.strip() + if not raw: + return {} + for line in reversed(raw.splitlines()): + line = line.strip() + if not line: + continue + try: + value = json.loads(line) + except json.JSONDecodeError: + continue + if isinstance(value, dict): + return value + return {} + + +amplihack = parse_payload(sys.argv[1]) +xpia = parse_payload(sys.argv[2]) + +permission = xpia.get("permissionDecision") +if permission in {"allow", "deny", "ask"}: + print(json.dumps(xpia)) + raise SystemExit(0) + +permission = amplihack.get("permissionDecision") +if permission in {"allow", "deny", "ask"}: + print(json.dumps(amplihack)) + raise SystemExit(0) + +if amplihack.get("block"): + print( + json.dumps( + { + "permissionDecision": "deny", + "message": amplihack.get( + "message", + "Blocked by amplihack pre-tool-use hook.", + ), + } + ) + ) + raise SystemExit(0) + +print("{}") +PY diff --git a/.github/hooks/session-start b/.github/hooks/session-start new file mode 100755 index 00000000..15b287ec --- /dev/null +++ b/.github/hooks/session-start @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Copilot hook wrapper - generated by amplihack +HOOK="session_start.py" +AMPLIHACK_HOOKS="$HOME/.amplihack/.claude/tools/amplihack/hooks" + +if [[ -f "${AMPLIHACK_HOOKS}/${HOOK}" ]]; then + exec python3 "${AMPLIHACK_HOOKS}/${HOOK}" "$@" +elif REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/${HOOK}" ]]; then + exec python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/${HOOK}" "$@" +else + echo "{}" +fi diff --git a/.github/hooks/session-stop b/.github/hooks/session-stop new file mode 100755 index 00000000..2c369513 --- /dev/null +++ b/.github/hooks/session-stop @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Copilot hook wrapper - generated by amplihack +# Runs multiple hook scripts for this event +AMPLIHACK_HOOKS="$HOME/.amplihack/.claude/tools/amplihack/hooks" +REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || REPO_ROOT="" +INPUT=$(cat) + +if [[ -f "${AMPLIHACK_HOOKS}/stop.py" ]]; then + echo "$INPUT" | python3 "${AMPLIHACK_HOOKS}/stop.py" "$@" 2>/dev/null || true +elif [[ -n "$REPO_ROOT" ]] && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/stop.py" ]]; then + echo "$INPUT" | python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/stop.py" "$@" 2>/dev/null || true +fi + +if [[ -f "${AMPLIHACK_HOOKS}/session_stop.py" ]]; then + echo "$INPUT" | python3 "${AMPLIHACK_HOOKS}/session_stop.py" "$@" 2>/dev/null || true +elif [[ -n "$REPO_ROOT" ]] && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/session_stop.py" ]]; then + echo "$INPUT" | python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/session_stop.py" "$@" 2>/dev/null || true +fi diff --git a/.github/hooks/user-prompt-submit b/.github/hooks/user-prompt-submit new file mode 100755 index 00000000..4f112305 --- /dev/null +++ b/.github/hooks/user-prompt-submit @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Copilot hook wrapper - generated by amplihack +# Runs multiple hook scripts for this event +AMPLIHACK_HOOKS="$HOME/.amplihack/.claude/tools/amplihack/hooks" +REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || REPO_ROOT="" +INPUT=$(cat) + +if [[ -f "${AMPLIHACK_HOOKS}/user_prompt_submit.py" ]]; then + echo "$INPUT" | python3 "${AMPLIHACK_HOOKS}/user_prompt_submit.py" "$@" 2>/dev/null || true +elif [[ -n "$REPO_ROOT" ]] && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/user_prompt_submit.py" ]]; then + echo "$INPUT" | python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/user_prompt_submit.py" "$@" 2>/dev/null || true +fi + +if [[ -f "${AMPLIHACK_HOOKS}/workflow_classification_reminder.py" ]]; then + echo "$INPUT" | python3 "${AMPLIHACK_HOOKS}/workflow_classification_reminder.py" "$@" 2>/dev/null || true +elif [[ -n "$REPO_ROOT" ]] && [[ -f "${REPO_ROOT}/.claude/tools/amplihack/hooks/workflow_classification_reminder.py" ]]; then + echo "$INPUT" | python3 "${REPO_ROOT}/.claude/tools/amplihack/hooks/workflow_classification_reminder.py" "$@" 2>/dev/null || true +fi diff --git a/AGENTS.md b/AGENTS.md index 503f315e..24ce10b3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -12,392 +12,4 @@ After the skill is activated, the next tool call must execute the `smart-orchest Do not follow the workflow manually and do not fall back to legacy `ultrathink` behavior. -## Auto-routing prompt - - -Before responding, classify this prompt using parallel signal evaluation: - -```mermaid -flowchart TD - MSG[User Message] --> BYPASS{"Bypass?
'just answer' / 'skip workflow'"} - BYPASS -->|yes| SKIP[SKIP: respect bypass] - - MSG --> EVAL{Detect all signals} - EVAL --> S1["UNDERSTAND
explain, how does, why,
analyze, research, explore"] - EVAL --> S2["IMPLEMENT
build, fix, add, create,
refactor, update, write"] - EVAL --> S3["FILE_EDIT
any code/doc file will be
created / modified / deleted"] - EVAL --> S4["SHELL_ONLY
run tests, git status,
check logs, show disk"] - EVAL --> S5["QUESTION
what is, how do I,
explain, compare"] - - EVAL --> RESOLVE{Resolve by priority} - RESOLVE -->|"UNDERSTAND + IMPLEMENT"| HYBRID[HYBRID → dev-orchestrator] - RESOLVE -->|"SHELL_ONLY + IMPLEMENT"| HYBRID - RESOLVE -->|"FILE_EDIT or IMPLEMENT alone"| DEV[DEV → dev-orchestrator] - RESOLVE -->|"UNDERSTAND alone"| INVESTIGATE[INVESTIGATE → dev-orchestrator] - RESOLVE -->|"SHELL_ONLY alone"| OPS[OPS: execute directly] - RESOLVE -->|"QUESTION alone"| QA[Q&A: answer directly] -``` - -Resolution rules — detect all signals, then apply the FIRST matching rule: -1. BYPASS → SKIP -2. UNDERSTAND + IMPLEMENT both present → **HYBRID** (even if FILE_EDIT also present) -3. SHELL_ONLY + IMPLEMENT both present → **HYBRID** (run command then fix) -4. FILE_EDIT or IMPLEMENT present (no UNDERSTAND) → **DEV** -5. UNDERSTAND present (no IMPLEMENT) → **INVESTIGATE** -6. SHELL_ONLY present (no FILE_EDIT) → **OPS** -7. QUESTION present (no FILE_EDIT) → **Q&A** -8. Ambiguous → **DEV** (safe default) - -Calibration examples (use these to resolve edge cases): - "run tests and fix failures" = HYBRID (SHELL_ONLY + IMPLEMENT → both phases) - "investigate X then fix Y" = HYBRID - "what does this error mean and how do I fix it" = HYBRID (UNDERSTAND + IMPLEMENT) - "change the default model" = DEV. "update README" = DEV. "fix a typo" = DEV. - "review this PR" = DEV. "make sure it works" = DEV. "write docs" = DEV. - "run tests" = OPS. "git status" = OPS. - "what is OAuth?" = Q&A. "how does X work?" = Q&A (no fix requested). - "what's broken?" = INVESTIGATE. "why is CI failing?" = INVESTIGATE. - "research rate limiting then implement it" = HYBRID. - "analyze deps and remove circular imports" = HYBRID (UNDERSTAND + IMPLEMENT). - -Actions: - DEV → Say "[auto-routed] DEV → launching dev-orchestrator" then Invoke Skill(skill="dev-orchestrator"). Do not answer inline. - INVESTIGATE → Say "[auto-routed] INVESTIGATE → launching dev-orchestrator" then Invoke Skill(skill="dev-orchestrator"). Do not answer inline. - HYBRID → Say "[auto-routed] HYBRID → launching dev-orchestrator" then Invoke Skill(skill="dev-orchestrator"). Do not answer inline. - Q&A → Answer directly. No workflow needed. - OPS → Execute directly. No workflow needed. - SKIP → Respect the bypass. - -When in doubt, choose DEV. False positive costs minutes. False negative costs quality. -
- -## Execution Instructions - -**YOUR NEXT ACTION after reading this skill MUST include a Bash tool call that -executes `run_recipe_by_name("smart-orchestrator")`.** If your runtime requires -a `report_intent` call (e.g. Copilot CLI), emit it **in parallel** with the -Bash launch — both tool calls in the same response. The constraint is that no -_other_ substantive action (reading files, calling TodoWrite, invoking Agent, -or typing a response) may precede or replace the Bash launch. - -If you find yourself doing anything else instead of launching the recipe -runner, you are bypassing the workflow. Stop. Go back. Run the recipe runner. - -Development and Investigation tasks have exactly ONE execution path: the -recipe runner. There is no "Option B." There is no "I'll follow the steps -manually." There is no "Let me just quickly..." The recipe runner IS the -workflow. Skipping it means skipping the 23-step process, the recursion -guards, the goal verification, and the quality gates that exist for a reason. - -A PostToolUse hook monitors every tool call after this skill activates. If -it detects 3 tool calls without recipe runner evidence, it will fire a hard -WARNING. Do not wait for the warning — run the recipe runner immediately. - -When this skill is activated: - -### REQUIRED: Execute via Recipe Runner — IMMEDIATELY - -Your next tool call(s) must include the recipe runner launch (alongside -`report_intent` if your runtime requires it). - -#### Default: Direct Execution - -The recipe runner is a plain subprocess — it does **not** require tmux. -Call `run_recipe_by_name()` directly: - -```bash -cd /path/to/repo && env -u CLAUDECODE \ - AMPLIHACK_HOME=/path/to/amplihack PYTHONPATH=src \ - python3 -c " -from amplihack.recipes import run_recipe_by_name - -result = run_recipe_by_name( - 'smart-orchestrator', - user_context={ - 'task_description': '''TASK_DESCRIPTION_HERE''', - 'repo_path': '.', - }, - progress=True, -) -print(f'Recipe result: {result}') -" -``` - -**Key points:** - -- `PYTHONPATH=src python3` — uses the interpreter on PATH while forcing imports from the checked-out repo source tree (do NOT hardcode `.venv/bin/python`) -- `run_recipe_by_name` — delegates to the Rust binary via `subprocess.Popen`; no tmux involved -- `progress=True` — streams recipe-runner stderr live so you see nested step activity -- The recipe runner manages its own child processes (agent sessions, bash steps) as direct subprocesses - -This is the preferred execution mode for most scenarios. It is simpler, has -no external dependencies beyond Python and the Rust binary, works on all -platforms, and makes output capture straightforward. - -#### Durable Execution (tmux) — optional - -Use tmux **only** when: - -- The agent runtime may kill background processes after a timeout (e.g., some - Claude Code hosted environments) -- You need to survive SSH disconnects or terminal closures -- You want to detach and monitor a long-running recipe interactively - -```bash -LOG_FILE=$(mktemp /tmp/recipe-runner-output.XXXXXX.log) -SCRIPT_FILE=$(mktemp /tmp/recipe-runner-script.XXXXXX.py) -chmod 600 "$LOG_FILE" "$SCRIPT_FILE" -cat > "$SCRIPT_FILE" << 'RECIPE_SCRIPT' -from amplihack.recipes import run_recipe_by_name - -result = run_recipe_by_name( - "smart-orchestrator", - user_context={ - "task_description": """TASK_DESCRIPTION_HERE""", - "repo_path": ".", - }, - progress=True, -) -print(f"Recipe result: {result}") -RECIPE_SCRIPT -tmux new-session -d -s recipe-runner \ - "cd /path/to/repo && env -u CLAUDECODE \ - AMPLIHACK_HOME=/path/to/amplihack PYTHONPATH=src \ - python3 $SCRIPT_FILE 2>&1 | tee $LOG_FILE" -echo "Recipe runner log: $LOG_FILE" -``` - -- The Python payload is written to a temp script to avoid nested quoting - issues that cause silent launch failures (see issue #3215) -- `chmod 600 "$LOG_FILE" "$SCRIPT_FILE"` — keeps both files private -- `tmux new-session -d` — detached session, no timeout, survives disconnects -- Monitor with: `tail -f "$LOG_FILE"` or `tmux attach -t recipe-runner` - -**Restarting a stale tmux session**: Some runtimes (e.g. Copilot CLI) block -`tmux kill-session` because it does not target a numeric PID. Use one of these -shell-policy-safe alternatives instead: - -```bash -# Option A (preferred): use a unique session name per run to avoid collisions -tmux new-session -d -s "recipe-$(date +%s)" "..." - -# Option B: locate the tmux server PID and terminate with numeric kill -tmux list-sessions -F '#{pid}' 2>/dev/null | xargs -I{} kill {} - -# Option C: let tmux itself handle it — send exit to all panes -tmux send-keys -t recipe-runner "exit" Enter 2>/dev/null; sleep 1 -``` - -If using Option A, update the `tail -f` / `tmux attach` commands to use the -same session name. - -**The recipe runner is the required execution path for Development and -Investigation tasks.** Always try `smart-orchestrator` first. - -**Required environment variables** for the recipe runner: - -- `AMPLIHACK_HOME` — must point to the amplihack repo root (e.g., - `/home/user/src/amplihack`). The recipe runner uses this to find - `amplifier-bundle/tools/orch_helper.py` and other orchestrator scripts. -- Preserve `AMPLIHACK_AGENT_BINARY` — nested workflow agents read this env var - to stay on the caller's active binary (for example, Copilot in Copilot CLI). - The Python wrapper no longer forwards the removed `--agent-binary` CLI flag, - so keeping this env var set is now the correct behavior. -- Unset `CLAUDECODE` — required so nested Claude Code sessions can launch. - -**Fallback: Direct recipe invocation when smart-orchestrator fails.** - -Always try `smart-orchestrator` first — it handles classification, decomposition, -and routing automatically. However, if `smart-orchestrator` fails at the -**infrastructure level** (e.g., 0 workstreams from decomposition, missing env -vars, Rust binary version mismatch), you MAY invoke the specific workflow -recipe directly based on your classification: - -| Classification | Direct Recipe | When to Use | -| -------------- | ------------------------ | --------------------------------------- | -| Investigation | `investigation-workflow` | smart-orchestrator decomposition failed | -| Development | `default-workflow` | smart-orchestrator decomposition failed | -| Q&A (complex) | `qa-workflow` | Q&A needing multi-step research | -| Consensus | `consensus-workflow` | Critical decisions needing validation | - -Example: - -```python -run_recipe_by_name("investigation-workflow", user_context={ - 'task_description': task, 'repo_path': '.', -}, progress=True) -``` - -This is NOT a license to bypass `smart-orchestrator`. Only use direct -invocation after `smart-orchestrator` has failed at an infrastructure level -(not because the task seems "too simple" or "too specific"). - -**Handling hollow success** (recipe completes but agents produce no findings): - -If a recipe returns SUCCESS but the agent outputs indicate the agents could -not access the codebase or produced empty/generic results (e.g., "no codebase -exists", "cannot proceed without a target"), this is a **hollow success**. -In this case: - -1. Check that `repo_path` and `AMPLIHACK_HOME` are correct -2. Verify the working directory is the repo root -3. Retry with explicit file paths in the `task_description` -4. If retries also produce hollow results, report the infrastructure - failure to the user with specifics - -**Common rationalizations that are NOT acceptable:** - -- "Let me first understand the codebase" — the recipe does that in Step 0 -- "I'll follow the workflow steps manually" — NO, the recipe enforces them -- "The recipe runner might not work" — try it first, report errors if it fails -- "This is a simple task" — simple or complex, the recipe runner handles both -- "The recipe succeeded but didn't do anything useful, so I'll do it myself" - — this is hollow success; retry with better context first - -**Q&A and Operations only** may bypass the recipe runner: - -- Q&A: Respond directly (analyzer agent) -- Operations: Builder agent (direct execution, no workflow steps) - -### Error Recovery: Adaptive Strategy (NOT Degradation) - -When `smart-orchestrator` fails, **failures must be visible and surfaced** — -never swallowed or silently degraded. The recipe handles error recovery -automatically via its built-in adaptive strategy steps, but if you observe -a failure outside the recipe, follow this protocol: - -**1. Surface the error with full context:** - -Report the exact error, the step that failed, and the log output. Never say -"something went wrong" — always include the specific failure details. - -**2. File a bug with reproduction details:** - -For infrastructure failures (import errors, missing env vars, binary not found, -decomposition producing invalid output), file a GitHub issue: - -```bash -gh issue create \ - --title "smart-orchestrator infrastructure failure: " \ - --body "" \ - --label "bug" -``` - -**3. Evaluate alternative strategies:** - -If `smart-orchestrator` fails at the infrastructure level (not because the task -is wrong), you MAY invoke the specific workflow recipe directly. This is an -**adaptive strategy** — it must be announced explicitly, not done silently: - -| Classification | Direct Recipe | When Permitted | -| -------------- | ------------------------ | --------------------------------------------------- | -| Investigation | `investigation-workflow` | smart-orchestrator failed at parse/decompose/launch | -| Development | `default-workflow` | smart-orchestrator failed at parse/decompose/launch | - -Example: - -```python -# ANNOUNCE the strategy change first — never do this silently -print("[ADAPTIVE] smart-orchestrator failed at parse-decomposition: ") -print("[ADAPTIVE] Switching to direct investigation-workflow invocation") -run_recipe_by_name("investigation-workflow", user_context={...}, progress=True) -``` - -**This is NOT a license to bypass smart-orchestrator.** Always try it first. -Direct invocation is only permitted when smart-orchestrator fails at the -infrastructure level. "The task seems simple" is NOT an infrastructure failure. - -**4. Detect hollow success:** - -A recipe can complete structurally (all steps exit 0) but produce empty or -meaningless results — agents reporting "no codebase found" or reflection -marking ACHIEVED when no work was done. After execution, check that: - -- Round results contain actual findings or code changes (not "I could not access...") -- PR URLs or concrete outputs are present for Development tasks -- At least one success criterion was verifiably evaluated - -If results are hollow, report this to the user with the specific empty outputs. -Do not declare success when agents produced no meaningful work. - -### Required Environment Variables - -The recipe runner requires these environment variables to function: - -| Variable | Purpose | Default | -| -------------------------- | ------------------------------------------------- | --------------- | -| `AMPLIHACK_HOME` | Root of amplihack installation (for asset lookup) | Auto-detected | -| `AMPLIHACK_AGENT_BINARY` | Which agent binary to use (claude, copilot, etc.) | Set by launcher | -| `AMPLIHACK_MAX_DEPTH` | Max recursion depth for nested sessions | `3` | -| `AMPLIHACK_NONINTERACTIVE` | Set to `1` to skip interactive prompts | Unset | - -If `AMPLIHACK_HOME` is not set and auto-detection fails, `parse-decomposition` -and `activate-workflow` will fail with "orch_helper.py not found". Set it to -the directory containing `amplifier-bundle/`. - -### After Execution: Reflect and verify - -After execution completes, verify the goal was achieved. If not: - -- For missing information: ask the user -- For fixable gaps: re-invoke with the remaining work description -- For infrastructure failures: file a bug and try adaptive strategy - -### Enforcement: PostToolUse Workflow Guard - -A PostToolUse hook (`workflow_enforcement_hook.py`) actively monitors every -tool call after this skill is invoked. It tracks: - -- Whether `/dev` or `dev-orchestrator` was called (sets a flag) -- Whether the recipe runner was actually executed (clears the flag) -- How many tool calls have passed without workflow evidence - -If 3+ tool calls pass without evidence of recipe runner execution, the hook -emits a hard WARNING. This is not a suggestion — it means you are violating -the mandatory workflow. State is stored in `/tmp/amplihack-workflow-state/`. - -## User Preferences - -# User Preferences - -**MANDATORY**: These preferences MUST be followed by all agents. Priority #2 (only explicit user requirements override). - -## Autonomy - -Work autonomously. Follow workflows without asking permission between steps. Only ask when truly blocked on critical missing information. - -## Core Preferences - -| Setting | Value | -| ------------------- | -------------------------- | -| Verbosity | balanced | -| Communication Style | (not set) | -| Update Frequency | regular | -| Priority Type | balanced | -| Collaboration Style | autonomous and independent | -| Auto Update | ask | -| Neo4j Auto-Shutdown | ask | -| Preferred Languages | (not set) | -| Coding Standards | (not set) | - -## Workflow Configuration - -**Selected**: DEFAULT_WORKFLOW (`@~/.amplihack/.claude/workflows/DEFAULT_WORKFLOW.md`) -**Consensus Depth**: balanced - -Use CONSENSUS_WORKFLOW for: ambiguous requirements, architectural changes, critical/security code, public APIs. - -## Behavioral Rules - -- **No sycophancy**: Be direct, challenge wrong ideas, point out flaws. Never use "Great idea!", "Excellent point!", etc. See `@~/.amplihack/.claude/context/TRUST.md`. -- **Quality over speed**: Always prefer complete, high-quality work over fast delivery. - -## Learned Patterns - - - -## Managing Preferences - -Use `/amplihack:customize` to view or modify (`set`, `show`, `reset`, `learn`). - diff --git a/README.md b/README.md index 159bc40c..7fee5afd 100644 --- a/README.md +++ b/README.md @@ -2395,4 +2395,4 @@ azlin COMMAND --help --- -**For detailed API documentation and architecture, see [docs/](docs/)** +**For detailed API documentation and architecture, see [docs/](docs/)** | **[Testing Guide](TESTING.md)** diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 00000000..31bf857e --- /dev/null +++ b/TESTING.md @@ -0,0 +1,192 @@ +# Testing Guide + +How to run azlin tests. For detailed specifications, see the linked docs. + +## Prerequisites + +- **Rust 1.85+** (edition 2021) — install via [rustup](https://rustup.rs/) +- **Azure CLI** (`az`) — for live Azure tests only +- **gadugi-agentic-test** — for YAML scenario tests only + +The workspace config at `rust/.cargo/config.toml` automatically sets +`RUST_MIN_STACK=8388608` (8 MB stack), so no manual env setup is needed locally. + +## Quick Start + +```bash +cd rust +cargo test --all +``` + +This runs all unit and integration tests (excluding `#[ignore]`-gated live Azure tests). + +## Test Categories + +### Rust Unit Tests + +72 test groups in `rust/crates/azlin/src/tests/` plus 13 handler test groups in +`rust/crates/azlin/src/handlers/tests/`. These test command parsing, output +formatting, config handling, and business logic with mock data. + +```bash +cd rust +cargo test --all # all unit + integration tests +cargo test --lib # unit tests only +cargo test test_group_list # single test group by name +``` + +### Rust Integration Tests + +18 test files in `rust/crates/azlin/tests/` covering CLI invocation, config +loading, session management, error handling, output formats, and end-to-end +command flows. + +```bash +cd rust +cargo test --test cli_integration # single integration test file +cargo test --test config_integration +``` + +Key integration test files: + +| File | Coverage | +|------|----------| +| `cli_integration.rs` | CLI arg parsing, help, version | +| `config_integration.rs` | Config load/save/defaults | +| `session_integration.rs` | Session persistence | +| `local_e2e.rs` | End-to-end command flows (local, no Azure) | +| `parity_integration.rs` | Python/Rust output parity | +| `backup_dr_integration.rs` | Snapshot/backup CLI validation | +| `azure_live_integration.rs` | Live Azure API calls (ignored) | +| `live_commands_integration.rs` | Live command execution (ignored) | + +### Live Azure Tests + +Tests in `azure_live_integration.rs` and `live_commands_integration.rs` are +marked `#[ignore]` and require real Azure credentials. + +```bash +# Setup +az login + +# Run ignored tests explicitly +cd rust +cargo test --test azure_live_integration -- --ignored +cargo test --test live_commands_integration -- --ignored +``` + +These tests hit real Azure APIs against hardcoded resource groups and VMs. +See [docs/REAL_AZURE_TESTING.md](docs/REAL_AZURE_TESTING.md) for manual +testing procedures. + +### Agentic Scenario Tests + +YAML-based tests in `tests/agentic-scenarios/` using the gadugi test runner. +These verify CLI behavior through scripted agent interactions. + +```bash +# Point AZLIN_BIN at the debug or release binary +export AZLIN_BIN=./rust/target/debug/azlin + +# Build first +cd rust && cargo build && cd .. + +# Run scenarios +gadugi-test run -d tests/agentic-scenarios +``` + +Scenarios: +- `ssh-identity-key.yaml` — SSH key auto-resolution +- `new-command-parity.yaml` — `azlin new` command parity checks + +See [docs/AGENTIC_INTEGRATION_TESTS.md](docs/AGENTIC_INTEGRATION_TESTS.md) +for the full agentic test case specification. + +### E2E Tests + +End-to-end YAML scenarios in `tests/e2e/`: + +- `test_restore_multi_session.yaml` — Multi-session restore flow + +These also use the gadugi runner with `AZLIN_BIN`. + +### Benchmarks + +Python-based performance benchmarks in `benchmarks/`. These were written for +the original Python implementation and measure Azure API and SSH operation +latency. + +```bash +pip install memory-profiler line-profiler pytest-benchmark +python benchmarks/benchmark_vm_list.py +python benchmarks/benchmark_parallel_vm_list.py +``` + +See [benchmarks/README.md](benchmarks/README.md) for full setup and +baseline comparison workflows. + +### Backup & Disaster Recovery Tests + +[PLANNED — Implementation Pending] + +The backup and DR feature (Issue #439) adds four modules: BackupManager, +ReplicationManager, VerificationManager, and DRTestManager. The test plan +follows the testing pyramid: + +- **Unit tests (60%)** — 102+ methods across 4 test files covering backup + scheduling, cross-region replication, backup verification, and DR test + orchestration. +- **Integration tests (30%)** — 12+ methods testing multi-module workflows + (backup → replicate → verify). +- **E2E tests (10%)** — 6 methods covering complete user journeys including + region-failover and RTO validation (<15 min target). + +Current Rust integration tests in `backup_dr_integration.rs` validate the +`snapshot` subcommand CLI surface. Full test specifications are in +[docs/testing/backup-dr-test-coverage.md](docs/testing/backup-dr-test-coverage.md). + +## Environment Variables + +| Variable | Purpose | Required | +|----------|---------|----------| +| `RUST_MIN_STACK` | 8 MB stack for large CLI enum (set automatically by `.cargo/config.toml`) | Auto | +| `AZLIN_BIN` | Path to azlin binary for agentic/E2E tests | Agentic tests | +| `AZLIN_TEST_MODE` | Enables mock data in list commands | Some unit tests | +| `ANTHROPIC_API_KEY` | Anthropic API access for `azlin do` commands | Agentic tests | +| `AZURE_SUBSCRIPTION_ID` | Azure subscription (removed in test helpers to isolate) | Live Azure tests | +| `AZURE_TENANT_ID` | Azure tenant (removed in test helpers to isolate) | Live Azure tests | + +## Linting + +```bash +cd rust +cargo clippy --all -- -D warnings +``` + +CI treats all clippy warnings as errors. + +## Test Coverage + +```bash +cd rust +cargo llvm-cov --all +``` + +Requires `cargo-llvm-cov` (`cargo install cargo-llvm-cov`). + +## CI Pipeline + +The GitHub Actions workflow at `.github/workflows/rust-ci.yml` runs on every +push and PR touching `rust/**`: + +1. **Build** — `cargo build --release` +2. **Test** — `cargo test --all` (with `RUST_MIN_STACK=8388608`) +3. **Lint** — `cargo clippy --all -- -D warnings` + +## Detailed Documentation + +- [docs/TEST_SUITE_SPECIFICATION.md](docs/TEST_SUITE_SPECIFICATION.md) — Exhaustive CLI syntax test spec (300+ tests) +- [docs/AGENTIC_INTEGRATION_TESTS.md](docs/AGENTIC_INTEGRATION_TESTS.md) — Agentic "do" mode test cases +- [docs/REAL_AZURE_TESTING.md](docs/REAL_AZURE_TESTING.md) — Manual Azure testing procedures +- [docs/testing/backup-dr-test-coverage.md](docs/testing/backup-dr-test-coverage.md) — Backup & DR TDD test plan (170+ tests) +- [benchmarks/README.md](benchmarks/README.md) — Benchmark setup and comparison workflows diff --git a/docs/index.md b/docs/index.md index 1c77085b..66d7314d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -46,6 +46,19 @@ - [Tmux Session Status](./features/tmux-session-status.md) - [VM Lifecycle Automation](./features/vm-lifecycle-automation.md) +## Testing + +- [Testing Guide](../TESTING.md) — How to run all azlin tests (quick start, categories, env vars, CI) +- [Test Suite Specification](./TEST_SUITE_SPECIFICATION.md) — 300+ CLI syntax tests +- [Agentic Integration Tests](./AGENTIC_INTEGRATION_TESTS.md) — YAML-based scenario tests +- [Real Azure Testing](./REAL_AZURE_TESTING.md) — Manual testing with live Azure credentials +- [Backup & DR Test Coverage](./testing/backup-dr-test-coverage.md) — TDD test plan for backup and disaster recovery (170+ tests) +- [Test Strategy](./testing/test_strategy.md) — Test pyramid, mocking patterns, TDD approach + +## Features (In Progress) + +- [Backup & Disaster Recovery](./backup-disaster-recovery.md) — Automated backup scheduling, cross-region replication, verification, and DR testing + ## Monitoring - [Monitoring Quick Reference](./monitoring-quick-reference.md) — Dashboard, alerts, metrics diff --git a/pyproject.toml b/pyproject.toml index 29bed8e0..8fd7a29d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "azlin" -version = "2.7.0" +version = "2.8.0" description = "Azure VM fleet management CLI - provision, manage, and monitor development VMs" requires-python = ">=3.11" authors = [ diff --git a/rust/crates/azlin-cli/src/lib.rs b/rust/crates/azlin-cli/src/lib.rs index 1c8334a2..ca5ec492 100644 --- a/rust/crates/azlin-cli/src/lib.rs +++ b/rust/crates/azlin-cli/src/lib.rs @@ -548,6 +548,20 @@ pub enum Commands { action: SnapshotAction, }, + // ── Backup Commands ────────────────────────────────────────────── + /// Manage VM backup policies, retention, and cross-region replication + Backup { + #[command(subcommand)] + action: BackupAction, + }, + + // ── Disaster Recovery Commands ─────────────────────────────────── + /// Disaster recovery testing and validation + Dr { + #[command(subcommand)] + action: DrAction, + }, + // ── Storage Commands ────────────────────────────────────────────── /// Manage NFS storage for shared home directories Storage { @@ -1491,6 +1505,194 @@ pub enum SnapshotAction { }, } +// ── Backup subcommands ──────────────────────────────────────────────────── + +/// Backup tier for retention classification. +#[derive(ValueEnum, Debug, Clone, Copy)] +pub enum BackupTier { + /// Daily backups + Daily, + /// Weekly backups + Weekly, + /// Monthly backups + Monthly, +} + +#[derive(Subcommand, Debug)] +pub enum BackupAction { + /// Configure backup policy for a VM + Configure { + /// VM name + vm_name: String, + /// Number of daily backups to retain + #[arg(long)] + daily_retention: Option, + /// Number of weekly backups to retain + #[arg(long)] + weekly_retention: Option, + /// Number of monthly backups to retain + #[arg(long)] + monthly_retention: Option, + /// Enable cross-region replication + #[arg(long)] + cross_region: bool, + /// Target region for cross-region replication + #[arg(long)] + target_region: Option, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Trigger an on-demand backup for a VM + Trigger { + /// VM name + vm_name: String, + /// Backup tier override + #[arg(long, value_enum)] + tier: Option, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// List backups for a VM + List { + /// VM name + vm_name: String, + /// Filter by backup tier + #[arg(long, value_enum)] + tier: Option, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Restore a VM from a backup + Restore { + /// VM name + vm_name: String, + /// Backup name to restore from + #[arg(long)] + backup: String, + /// Skip confirmation prompt + #[arg(long)] + force: bool, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Verify integrity of a specific backup + Verify { + /// Backup name to verify + backup_name: String, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Replicate a backup to another region + Replicate { + /// Backup name to replicate + backup_name: String, + /// Target region for replication + target_region: String, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Show current backup configuration for a VM + ConfigShow { + /// VM name + vm_name: String, + }, + /// Disable backups for a VM + Disable { + /// VM name + vm_name: String, + }, + /// Replicate all backups for a VM to another region + ReplicateAll { + /// VM name + vm_name: String, + /// Target region for replication + target_region: String, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Show replication status for a VM + ReplicationStatus { + /// VM name + vm_name: String, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// List replication jobs + ReplicationJobs { + /// Filter by job status + #[arg(long)] + status: Option, + /// Filter by VM name + #[arg(long)] + vm: Option, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Verify all backups for a VM + VerifyAll { + /// VM name + vm_name: String, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Show backup verification report + VerificationReport { + /// Number of days to include in report + #[arg(long, default_value = "30")] + days: u32, + /// Filter by VM name + #[arg(long)] + vm: Option, + #[arg(long, alias = "rg")] + resource_group: Option, + }, +} + +// ── Disaster Recovery subcommands ───────────────────────────────────────── + +#[derive(Subcommand, Debug)] +pub enum DrAction { + /// Run a disaster recovery test for a VM + Test { + /// VM name + vm_name: String, + /// Region to test recovery in + #[arg(long)] + test_region: String, + /// Specific backup to use for DR test + #[arg(long)] + backup: Option, + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Run DR tests for all VMs in a resource group + TestAll { + /// Test region for DR validation + #[arg(long)] + test_region: Option, + /// Resource group to test + #[arg(long, alias = "rg")] + resource_group: Option, + }, + /// Show DR test history for a VM + TestHistory { + /// VM name + vm_name: String, + /// Number of days of history to show + #[arg(long, default_value = "30")] + days: u32, + }, + /// Show DR test success rate + SuccessRate { + /// Filter by VM name + #[arg(long)] + vm: Option, + /// Number of days to include + #[arg(long, default_value = "90")] + days: u32, + }, +} + // ── Storage subcommands ─────────────────────────────────────────────────── #[derive(Subcommand, Debug)] diff --git a/rust/crates/azlin/src/cmd_backup.rs b/rust/crates/azlin/src/cmd_backup.rs new file mode 100644 index 00000000..7834d5e4 --- /dev/null +++ b/rust/crates/azlin/src/cmd_backup.rs @@ -0,0 +1,130 @@ +#[allow(unused_imports)] +use super::*; +use anyhow::Result; + +pub(crate) async fn dispatch( + command: azlin_cli::Commands, + verbose: bool, + output: &azlin_cli::OutputFormat, +) -> Result<()> { + #[allow(unused_variables)] + let _ = (verbose, output); + match command { + azlin_cli::Commands::Backup { action } => match action { + azlin_cli::BackupAction::Configure { + vm_name, + daily_retention, + weekly_retention, + monthly_retention, + cross_region, + target_region, + resource_group, + } => { + crate::cmd_backup_ops::handle_backup_configure( + &vm_name, + daily_retention, + weekly_retention, + monthly_retention, + cross_region, + target_region.as_deref(), + resource_group.as_deref(), + )?; + } + azlin_cli::BackupAction::Trigger { + vm_name, + tier, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_trigger(&vm_name, tier, &rg).await?; + } + azlin_cli::BackupAction::List { + vm_name, + tier, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_list(&vm_name, tier, &rg).await?; + } + azlin_cli::BackupAction::Restore { + vm_name, + backup, + force, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_restore(&vm_name, &backup, force, &rg) + .await?; + } + azlin_cli::BackupAction::Verify { + backup_name, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_verify(&backup_name, &rg).await?; + } + azlin_cli::BackupAction::Replicate { + backup_name, + target_region, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_replicate(&backup_name, &target_region, &rg) + .await?; + } + azlin_cli::BackupAction::ConfigShow { vm_name } => { + crate::cmd_backup_ops::handle_backup_config_show(&vm_name)?; + } + azlin_cli::BackupAction::Disable { vm_name } => { + crate::cmd_backup_ops::handle_backup_disable(&vm_name)?; + } + azlin_cli::BackupAction::ReplicateAll { + vm_name, + target_region, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_replicate_all(&vm_name, &target_region, &rg) + .await?; + } + azlin_cli::BackupAction::ReplicationStatus { + vm_name, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_replication_status(&vm_name, &rg).await?; + } + azlin_cli::BackupAction::ReplicationJobs { + status, + vm, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_replication_jobs( + status.as_deref(), + vm.as_deref(), + &rg, + ) + .await?; + } + azlin_cli::BackupAction::VerifyAll { + vm_name, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_backup_verify_all(&vm_name, &rg).await?; + } + azlin_cli::BackupAction::VerificationReport { + days, + vm, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + crate::cmd_backup_ops::handle_verification_report(days, vm.as_deref(), &rg) + .await?; + } + }, + _ => unreachable!(), + } + Ok(()) +} diff --git a/rust/crates/azlin/src/cmd_backup_ops.rs b/rust/crates/azlin/src/cmd_backup_ops.rs new file mode 100644 index 00000000..39b21aef --- /dev/null +++ b/rust/crates/azlin/src/cmd_backup_ops.rs @@ -0,0 +1,876 @@ +#[allow(unused_imports)] +use super::*; +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +// --------------------------------------------------------------------------- +// Backup config persistence (~/.azlin/backup/{vm_name}.toml) +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct BackupConfig { + pub vm_name: String, + pub daily_retention: Option, + pub weekly_retention: Option, + pub monthly_retention: Option, + pub cross_region: bool, + pub target_region: Option, + pub resource_group: Option, + pub created: String, +} + +fn backup_config_dir() -> PathBuf { + dirs::home_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join(".azlin") + .join("backup") +} + +fn backup_config_path(vm_name: &str) -> PathBuf { + backup_config_dir().join(format!("{}.toml", vm_name)) +} + +fn load_backup_config(vm_name: &str) -> Result> { + let path = backup_config_path(vm_name); + match std::fs::read_to_string(&path) { + Ok(contents) => { + let config: BackupConfig = toml::from_str(&contents) + .map_err(|e| anyhow::anyhow!("Corrupt backup config at {}: {}", path.display(), e))?; + Ok(Some(config)) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(anyhow::anyhow!("Failed to read backup config: {}", e)), + } +} + +fn save_backup_config(config: &BackupConfig) -> Result<()> { + let dir = backup_config_dir(); + std::fs::create_dir_all(&dir)?; + let path = backup_config_path(&config.vm_name); + let contents = toml::to_string_pretty(config)?; + std::fs::write(path, contents)?; + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup configure — local config, no Azure credentials needed +// --------------------------------------------------------------------------- + +pub(crate) fn handle_backup_configure( + vm_name: &str, + daily_retention: Option, + weekly_retention: Option, + monthly_retention: Option, + cross_region: bool, + target_region: Option<&str>, + resource_group: Option<&str>, +) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + + let config = BackupConfig { + vm_name: vm_name.to_string(), + daily_retention, + weekly_retention, + monthly_retention, + cross_region, + target_region: target_region.map(|s| s.to_string()), + resource_group: resource_group.map(|s| s.to_string()), + created: chrono::Utc::now().to_rfc3339(), + }; + save_backup_config(&config)?; + + println!("Configured backup policy for VM '{}':", vm_name); + if let Some(d) = daily_retention { + println!(" Daily retention: {} days", d); + } + if let Some(w) = weekly_retention { + println!(" Weekly retention: {} weeks", w); + } + if let Some(m) = monthly_retention { + println!(" Monthly retention: {} months", m); + } + if cross_region { + if let Some(region) = target_region { + println!(" Cross-region: enabled (target: {})", region); + } else { + println!(" Cross-region: enabled"); + } + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup config-show — reads local config +// --------------------------------------------------------------------------- + +pub(crate) fn handle_backup_config_show(vm_name: &str) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + match load_backup_config(vm_name)? { + Some(config) => { + println!("Backup configuration for VM '{}':", vm_name); + println!( + " Daily retention: {}", + config + .daily_retention + .map_or("not set".to_string(), |d| format!("{} days", d)) + ); + println!( + " Weekly retention: {}", + config + .weekly_retention + .map_or("not set".to_string(), |w| format!("{} weeks", w)) + ); + println!( + " Monthly retention: {}", + config + .monthly_retention + .map_or("not set".to_string(), |m| format!("{} months", m)) + ); + if config.cross_region { + println!( + " Cross-region: enabled (target: {})", + config.target_region.as_deref().unwrap_or("not set") + ); + } else { + println!(" Cross-region: disabled"); + } + } + None => { + println!("No backup configuration found for VM '{}'.", vm_name); + } + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup disable — removes local config +// --------------------------------------------------------------------------- + +pub(crate) fn handle_backup_disable(vm_name: &str) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let path = backup_config_path(vm_name); + if path.exists() { + std::fs::remove_file(&path)?; + println!("Disabled backups for VM '{}'.", vm_name); + } else { + println!("No backup configuration found for VM '{}'.", vm_name); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup trigger — triggers on-demand backup via az CLI +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_backup_trigger( + vm_name: &str, + tier: Option, + rg: &str, +) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let tier_label = match tier { + Some(azlin_cli::BackupTier::Daily) => "daily", + Some(azlin_cli::BackupTier::Weekly) => "weekly", + Some(azlin_cli::BackupTier::Monthly) => "monthly", + None => "daily", + }; + + let ts = chrono::Utc::now().format("%Y%m%d_%H%M%S").to_string(); + let backup_name = format!("{}-backup-{}-{}", vm_name, tier_label, ts); + + let (disk_id, location) = crate::dispatch_helpers::lookup_vm_disk_info(rg, vm_name)?; + + let pb = penguin_spinner(&format!("Creating {} backup for '{}'...", tier_label, vm_name)); + + let output = std::process::Command::new("az") + .args([ + "snapshot", + "create", + "--resource-group", + rg, + "--source", + &disk_id, + "--name", + &backup_name, + "--location", + &location, + "--tags", + &format!("tier={}", tier_label), + &format!("vm={}", vm_name), + "type=backup", + "--output", + "json", + ]) + .output()?; + + pb.finish_and_clear(); + if output.status.success() { + println!( + "Triggered {} backup '{}' for VM '{}'", + tier_label, backup_name, vm_name + ); + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to trigger backup: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup list — lists backups for a VM +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_backup_list( + vm_name: &str, + tier: Option, + rg: &str, +) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + // Push tier filter into JMESPath query to reduce data transfer from Azure + let query = match &tier { + Some(t) => { + let tier_str = match t { + azlin_cli::BackupTier::Daily => "daily", + azlin_cli::BackupTier::Weekly => "weekly", + azlin_cli::BackupTier::Monthly => "monthly", + }; + format!( + "[?tags.vm=='{}' && tags.type=='backup' && tags.tier=='{}']", + vm_name, tier_str + ) + } + None => format!("[?tags.vm=='{}' && tags.type=='backup']", vm_name), + }; + + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &query, + "--output", + "json", + ]) + .output()?; + + if output.status.success() { + let snapshots: Vec = + serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse backup list JSON: {}", e))?; + + if snapshots.is_empty() { + println!("No backups found for VM '{}'.", vm_name); + } else { + let mut table = new_table( + &["Name", "Tier", "Disk Size (GB)", "Created", "State"], + &[40, 8, 14, 22, 10], + ); + for snap in &snapshots { + let name = snap["name"].as_str().unwrap_or("-"); + let snap_tier = snap + .get("tags") + .and_then(|t| t.get("tier")) + .and_then(|t| t.as_str()) + .unwrap_or("-"); + let size = snap["diskSizeGb"] + .as_u64() + .map_or("-".to_string(), |s| s.to_string()); + let created = snap["timeCreated"].as_str().unwrap_or("-"); + let state = snap["provisioningState"].as_str().unwrap_or("-"); + table.add_row(vec![ + name.to_string(), + snap_tier.to_string(), + size, + created.to_string(), + state.to_string(), + ]); + } + println!("{table}"); + } + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to list backups: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup restore — restores from a named backup +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_backup_restore( + vm_name: &str, + backup_name: &str, + force: bool, + rg: &str, +) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + if let Err(e) = crate::name_validation::validate_name(backup_name) { + anyhow::bail!("Invalid backup name: {}", e); + } + if !safe_confirm( + &format!( + "Restore VM '{}' from backup '{}'? This will replace the current disk.", + vm_name, backup_name + ), + force, + )? { + println!("Cancelled."); + return Ok(()); + } + + crate::cmd_snapshot_ops::handle_snapshot_restore(vm_name, backup_name, true, rg).await +} + +// --------------------------------------------------------------------------- +// backup verify — verifies backup integrity +// --------------------------------------------------------------------------- + +/// Core blocking verify — shared by single verify and parallel verify-all. +fn verify_backup_core(backup_name: &str, rg: &str) -> Result<(String, u64)> { + let output = std::process::Command::new("az") + .args([ + "snapshot", + "show", + "--resource-group", + rg, + "--name", + backup_name, + "--output", + "json", + ]) + .output()?; + + if output.status.success() { + let snap: serde_json::Value = serde_json::from_slice(&output.stdout)?; + let state = snap["provisioningState"] + .as_str() + .unwrap_or("Unknown") + .to_string(); + let size = snap["diskSizeGb"].as_u64().unwrap_or(0); + Ok((state, size)) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to verify backup '{}': {}", + backup_name, + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } +} + +pub(crate) async fn handle_backup_verify(backup_name: &str, rg: &str) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(backup_name) { + anyhow::bail!("Invalid backup name: {}", e); + } + let pb = penguin_spinner(&format!("Verifying backup '{}'...", backup_name)); + let result = verify_backup_core(backup_name, rg); + pb.finish_and_clear(); + let (state, size) = result?; + println!("Backup '{}': state={}, size={}GB — Verified OK", backup_name, state, size); + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup replicate — replicates a single backup to another region +// --------------------------------------------------------------------------- + +/// Core blocking replicate — shared by single replicate and parallel replicate-all. +fn replicate_backup_core(backup_name: &str, target_region: &str, rg: &str) -> Result { + let replica_name = format!("{}-replica-{}", backup_name, target_region); + + // Query both the snapshot ID and its vm tag so we can propagate it to the replica + let show_output = std::process::Command::new("az") + .args([ + "snapshot", + "show", + "--resource-group", + rg, + "--name", + backup_name, + "--query", + "{id: id, vm: tags.vm}", + "--output", + "json", + ]) + .output()?; + + if !show_output.status.success() { + anyhow::bail!("Backup '{}' not found.", backup_name); + } + + let info: serde_json::Value = serde_json::from_slice(&show_output.stdout)?; + let source_id = info["id"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("Backup '{}' has no resource ID.", backup_name))?; + let vm_tag = info["vm"].as_str().unwrap_or(""); + + let mut tag_args = vec![ + format!("source={}", backup_name), + "type=replica".to_string(), + ]; + if !vm_tag.is_empty() { + tag_args.push(format!("vm={}", vm_tag)); + } + + let mut args = vec![ + "snapshot", + "create", + "--resource-group", + rg, + "--name", + &replica_name, + "--source", + source_id, + "--location", + target_region, + "--tags", + ]; + for tag in &tag_args { + args.push(tag); + } + args.push("--output"); + args.push("json"); + + let output = std::process::Command::new("az").args(&args).output()?; + + if output.status.success() { + Ok(replica_name) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to replicate backup: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } +} + +pub(crate) async fn handle_backup_replicate( + backup_name: &str, + target_region: &str, + rg: &str, +) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(backup_name) { + anyhow::bail!("Invalid backup name: {}", e); + } + if let Err(e) = crate::name_validation::validate_name(target_region) { + anyhow::bail!("Invalid target region: {}", e); + } + let pb = penguin_spinner(&format!( + "Replicating '{}' to {}...", + backup_name, target_region + )); + let result = replicate_backup_core(backup_name, target_region, rg); + pb.finish_and_clear(); + let replica_name = result?; + println!( + "Replicated '{}' to {} as '{}'", + backup_name, target_region, replica_name + ); + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup replicate-all — replicates all backups for a VM +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_backup_replicate_all( + vm_name: &str, + target_region: &str, + rg: &str, +) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &format!("[?tags.vm=='{}' && tags.type=='backup'].name", vm_name), + "--output", + "json", + ]) + .output()?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to list backups: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + + let names: Vec = serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse backup list JSON: {}", e))?; + if names.is_empty() { + println!("No backups found for VM '{}'.", vm_name); + return Ok(()); + } + + let total = names.len(); + println!( + "Replicating {} backups for '{}' to {}...", + total, vm_name, target_region + ); + + // Run replications in parallel via the blocking thread pool + let mut set = tokio::task::JoinSet::new(); + for name in names { + let region = target_region.to_string(); + let rg = rg.to_string(); + set.spawn_blocking(move || { + replicate_backup_core(&name, ®ion, &rg).map(|replica| (name, replica)) + }); + } + + let mut ok_count = 0u32; + let mut fail_count = 0u32; + while let Some(result) = set.join_next().await { + match result { + Ok(Ok((name, replica))) => { + println!(" OK: '{}' → '{}'", name, replica); + ok_count += 1; + } + Ok(Err(e)) => { + eprintln!(" FAIL: {}", e); + fail_count += 1; + } + Err(join_err) => { + eprintln!(" FAIL: task error — {}", join_err); + fail_count += 1; + } + } + } + + if fail_count > 0 { + anyhow::bail!( + "{} of {} backups failed to replicate", + fail_count, + total + ); + } + println!("All {} backups replicated successfully.", ok_count); + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup replication-status — shows replication status +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_replication_status(vm_name: &str, rg: &str) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &format!( + "[?tags.vm=='{}' && tags.type=='replica']", + vm_name + ), + "--output", + "json", + ]) + .output()?; + + if output.status.success() { + let replicas: Vec = + serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse replica list JSON: {}", e))?; + if replicas.is_empty() { + println!("No replicated backups found for VM '{}'.", vm_name); + } else { + let mut table = new_table( + &["Replica", "Location", "Source", "State"], + &[40, 15, 40, 12], + ); + for r in &replicas { + let name = r["name"].as_str().unwrap_or("-"); + let loc = r["location"].as_str().unwrap_or("-"); + let source = r + .get("tags") + .and_then(|t| t.get("source")) + .and_then(|s| s.as_str()) + .unwrap_or("-"); + let state = r["provisioningState"].as_str().unwrap_or("-"); + table.add_row(vec![ + name.to_string(), + loc.to_string(), + source.to_string(), + state.to_string(), + ]); + } + println!("{table}"); + } + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to get replication status: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup replication-jobs — lists replication jobs +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_replication_jobs( + status_filter: Option<&str>, + vm_filter: Option<&str>, + rg: &str, +) -> Result<()> { + if let Some(vm) = vm_filter { + if let Err(e) = crate::name_validation::validate_name(vm) { + anyhow::bail!("Invalid VM name: {}", e); + } + } + let mut query = "[?tags.type=='replica']".to_string(); + if let Some(vm) = vm_filter { + query = format!("[?tags.vm=='{}' && tags.type=='replica']", vm); + } + + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &query, + "--output", + "json", + ]) + .output()?; + + if output.status.success() { + let jobs: Vec = + serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse replication jobs JSON: {}", e))?; + + let filtered: Vec<&serde_json::Value> = if let Some(st) = status_filter { + jobs.iter() + .filter(|j| { + j["provisioningState"] + .as_str() + .map_or(false, |s| s.eq_ignore_ascii_case(st)) + }) + .collect() + } else { + jobs.iter().collect() + }; + + if filtered.is_empty() { + println!("No replication jobs found."); + } else { + let mut table = new_table( + &["Name", "Source", "Location", "State"], + &[40, 40, 15, 12], + ); + for j in &filtered { + let name = j["name"].as_str().unwrap_or("-"); + let source = j + .get("tags") + .and_then(|t| t.get("source")) + .and_then(|s| s.as_str()) + .unwrap_or("-"); + let loc = j["location"].as_str().unwrap_or("-"); + let state = j["provisioningState"].as_str().unwrap_or("-"); + table.add_row(vec![ + name.to_string(), + source.to_string(), + loc.to_string(), + state.to_string(), + ]); + } + println!("{table}"); + } + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to list replication jobs: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup verify-all — verifies all backups for a VM +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_backup_verify_all(vm_name: &str, rg: &str) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &format!("[?tags.vm=='{}' && tags.type=='backup'].name", vm_name), + "--output", + "json", + ]) + .output()?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to list backups: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + + let names: Vec = serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse backup list JSON: {}", e))?; + if names.is_empty() { + println!("No backups found for VM '{}'.", vm_name); + return Ok(()); + } + + let total = names.len(); + println!("Verifying {} backups for '{}'...", total, vm_name); + + // Run verifications in parallel via the blocking thread pool + let mut set = tokio::task::JoinSet::new(); + for name in names { + let rg = rg.to_string(); + set.spawn_blocking(move || { + verify_backup_core(&name, &rg).map(|(state, size)| (name, state, size)) + }); + } + + let mut passed = 0u32; + let mut failed = 0u32; + while let Some(result) = set.join_next().await { + match result { + Ok(Ok((name, state, size))) => { + println!(" OK: '{}' state={}, size={}GB", name, state, size); + passed += 1; + } + Ok(Err(e)) => { + eprintln!(" FAIL: {}", e); + failed += 1; + } + Err(join_err) => { + eprintln!(" FAIL: task error — {}", join_err); + failed += 1; + } + } + } + println!( + "Verification complete: {} passed, {} failed out of {} total", + passed, failed, total + ); + Ok(()) +} + +// --------------------------------------------------------------------------- +// backup verification-report — aggregate verification report +// --------------------------------------------------------------------------- + +pub(crate) async fn handle_verification_report( + days: u32, + vm_filter: Option<&str>, + rg: &str, +) -> Result<()> { + if let Some(vm) = vm_filter { + if let Err(e) = crate::name_validation::validate_name(vm) { + anyhow::bail!("Invalid VM name: {}", e); + } + } + let cutoff = chrono::Utc::now() - chrono::Duration::days(i64::from(days)); + + let mut query = "[?tags.type=='backup']".to_string(); + if let Some(vm) = vm_filter { + query = format!("[?tags.vm=='{}' && tags.type=='backup']", vm); + } + + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &query, + "--output", + "json", + ]) + .output()?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to list backups: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + + let snaps: Vec = + serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse backup list JSON: {}", e))?; + + let recent: Vec<&serde_json::Value> = snaps + .iter() + .filter(|s| { + s["timeCreated"] + .as_str() + .and_then(|t| chrono::DateTime::parse_from_rfc3339(t).ok()) + .map_or(false, |dt| dt >= cutoff) + }) + .collect(); + + let total = recent.len(); + let succeeded = recent + .iter() + .filter(|s| s["provisioningState"].as_str() == Some("Succeeded")) + .count(); + + println!("Backup Verification Report (last {} days):", days); + if let Some(vm) = vm_filter { + println!(" VM filter: {}", vm); + } + println!(" Total backups: {}", total); + println!(" Succeeded: {}", succeeded); + println!(" Failed: {}", total - succeeded); + if total > 0 { + println!( + " Success rate: {:.1}%", + (succeeded as f64 / total as f64) * 100.0 + ); + } + Ok(()) +} diff --git a/rust/crates/azlin/src/cmd_dr.rs b/rust/crates/azlin/src/cmd_dr.rs new file mode 100644 index 00000000..7d63fa79 --- /dev/null +++ b/rust/crates/azlin/src/cmd_dr.rs @@ -0,0 +1,457 @@ +#[allow(unused_imports)] +use super::*; +use anyhow::Result; + +pub(crate) async fn dispatch( + command: azlin_cli::Commands, + verbose: bool, + output: &azlin_cli::OutputFormat, +) -> Result<()> { + #[allow(unused_variables)] + let _ = (verbose, output); + match command { + azlin_cli::Commands::Dr { action } => match action { + azlin_cli::DrAction::Test { + vm_name, + test_region, + backup, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + handle_dr_test(&vm_name, &test_region, backup.as_deref(), &rg).await?; + } + azlin_cli::DrAction::TestAll { + test_region, + resource_group, + } => { + let rg = resolve_resource_group(resource_group)?; + handle_dr_test_all(test_region.as_deref(), &rg).await?; + } + azlin_cli::DrAction::TestHistory { vm_name, days } => { + handle_dr_test_history(&vm_name, days)?; + } + azlin_cli::DrAction::SuccessRate { vm, days } => { + handle_dr_success_rate(vm.as_deref(), days)?; + } + }, + _ => unreachable!(), + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// dr test — run a DR test for a single VM +// --------------------------------------------------------------------------- + +/// Outcome of a successful DR test, used for reporting. +struct DrTestOutcome { + backup_name: String, + test_name: String, +} + +/// Core blocking DR test — shared by single test and parallel test-all. +fn dr_test_core( + vm_name: &str, + test_region: &str, + backup: Option<&str>, + rg: &str, +) -> Result { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let backup_name = match backup { + Some(b) => b.to_string(), + None => { + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + &format!( + "[?tags.vm=='{}' && tags.type=='backup'] | sort_by(@, &timeCreated) | [-1].name", + vm_name + ), + "--output", + "tsv", + ]) + .output()?; + + if !output.status.success() || output.stdout.is_empty() { + anyhow::bail!( + "No backups found for VM '{}'. Create a backup first with 'azlin backup trigger {}'.", + vm_name, + vm_name + ); + } + String::from_utf8_lossy(&output.stdout).trim().to_string() + } + }; + + let test_name = format!( + "{}-dr-test-{}", + vm_name, + chrono::Utc::now().format("%Y%m%d_%H%M%S") + ); + + let source_output = std::process::Command::new("az") + .args([ + "snapshot", + "show", + "--resource-group", + rg, + "--name", + &backup_name, + "--query", + "id", + "--output", + "tsv", + ]) + .output()?; + + if !source_output.status.success() { + anyhow::bail!("Backup '{}' not found.", backup_name); + } + + let source_id = String::from_utf8_lossy(&source_output.stdout) + .trim() + .to_string(); + + let create_output = std::process::Command::new("az") + .args([ + "snapshot", + "create", + "--resource-group", + rg, + "--name", + &test_name, + "--source", + &source_id, + "--location", + test_region, + "--tags", + &format!("vm={}", vm_name), + "type=dr-test", + &format!("source={}", backup_name), + "--output", + "json", + ]) + .output()?; + + if create_output.status.success() { + // Clean up test snapshot — surface failure rather than silently orphaning resources + let cleanup = std::process::Command::new("az") + .args([ + "snapshot", + "delete", + "--resource-group", + rg, + "--name", + &test_name, + "--yes", + ]) + .output(); + match cleanup { + Ok(o) if !o.status.success() => { + eprintln!( + "Warning: DR test passed but cleanup of '{}' failed (orphaned resource)", + test_name + ); + } + Err(e) => { + eprintln!( + "Warning: DR test passed but cleanup of '{}' failed: {}", + test_name, e + ); + } + _ => {} + } + + record_dr_result(vm_name, test_region, &backup_name, true)?; + Ok(DrTestOutcome { + backup_name, + test_name, + }) + } else { + let stderr = String::from_utf8_lossy(&create_output.stderr); + record_dr_result(vm_name, test_region, &backup_name, false)?; + anyhow::bail!( + "DR test FAILED for '{}': {}", + vm_name, + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } +} + +async fn handle_dr_test( + vm_name: &str, + test_region: &str, + backup: Option<&str>, + rg: &str, +) -> Result<()> { + let pb = penguin_spinner(&format!( + "Running DR test for '{}' in {}...", + vm_name, test_region + )); + let result = dr_test_core(vm_name, test_region, backup, rg); + pb.finish_and_clear(); + let outcome = result?; + println!("DR test for '{}' PASSED:", vm_name); + println!(" Backup: {}", outcome.backup_name); + println!(" Test region: {}", test_region); + println!(" Test name: {}", outcome.test_name); + Ok(()) +} + +// --------------------------------------------------------------------------- +// dr test-all — run DR tests for all VMs in a resource group +// --------------------------------------------------------------------------- + +async fn handle_dr_test_all(test_region: Option<&str>, rg: &str) -> Result<()> { + let region = test_region.unwrap_or("eastus2"); + + let output = std::process::Command::new("az") + .args([ + "snapshot", + "list", + "--resource-group", + rg, + "--query", + "[?tags.type=='backup'].tags.vm", + "--output", + "json", + ]) + .output()?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "Failed to list VMs with backups: {}", + azlin_core::sanitizer::sanitize(stderr.trim()) + ); + } + + let vm_names: Vec = serde_json::from_slice(&output.stdout) + .map_err(|e| anyhow::anyhow!("Failed to parse VM list JSON: {}", e))?; + let unique_vms: std::collections::BTreeSet = vm_names.into_iter().collect(); + + if unique_vms.is_empty() { + println!("No VMs with backups found in resource group."); + return Ok(()); + } + + let total = unique_vms.len(); + println!( + "Running DR tests for {} VMs in {} (target: {})...", + total, rg, region + ); + + // Run DR tests in parallel via the blocking thread pool + let mut set = tokio::task::JoinSet::new(); + for vm in unique_vms { + let region = region.to_string(); + let rg = rg.to_string(); + set.spawn_blocking(move || -> (String, Result) { + let result = dr_test_core(&vm, ®ion, None, &rg); + (vm, result) + }); + } + + let mut passed = 0u32; + let mut failed = 0u32; + while let Some(result) = set.join_next().await { + match result { + Ok((vm, Ok(_outcome))) => { + println!(" PASS: {}", vm); + passed += 1; + } + Ok((vm, Err(e))) => { + eprintln!(" FAIL: {} — {}", vm, e); + failed += 1; + } + Err(join_err) => { + eprintln!(" FAIL: task error — {}", join_err); + failed += 1; + } + } + } + println!( + "DR test-all complete: {} passed, {} failed out of {} VMs", + passed, failed, total + ); + Ok(()) +} + +// --------------------------------------------------------------------------- +// DR result persistence (~/.azlin/dr-history/) +// --------------------------------------------------------------------------- + +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +#[derive(Debug, Serialize, Deserialize)] +struct DrTestResult { + vm_name: String, + test_region: String, + backup_name: String, + success: bool, + timestamp: String, +} + +fn dr_history_dir() -> PathBuf { + dirs::home_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join(".azlin") + .join("dr-history") +} + +fn record_dr_result( + vm_name: &str, + test_region: &str, + backup_name: &str, + success: bool, +) -> Result<()> { + let dir = dr_history_dir(); + std::fs::create_dir_all(&dir)?; + + let ts = chrono::Utc::now(); + let result = DrTestResult { + vm_name: vm_name.to_string(), + test_region: test_region.to_string(), + backup_name: backup_name.to_string(), + success, + timestamp: ts.to_rfc3339(), + }; + + let filename = format!( + "{}-{}.json", + vm_name, + ts.format("%Y%m%d_%H%M%S") + ); + let path = dir.join(filename); + let json = serde_json::to_string_pretty(&result)?; + std::fs::write(path, json)?; + Ok(()) +} + +fn load_dr_history(vm_filter: Option<&str>, days: u32) -> Result> { + let dir = dr_history_dir(); + let entries = match std::fs::read_dir(&dir) { + Ok(e) => e, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()), + Err(e) => anyhow::bail!("Failed to read DR history directory: {}", e), + }; + + let cutoff = chrono::Utc::now() - chrono::Duration::days(i64::from(days)); + let mut results = Vec::new(); + + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().map_or(true, |e| e != "json") { + continue; + } + // Skip files whose name doesn't start with the VM prefix (avoids + // unnecessary file reads and JSON parsing for large history dirs) + if let Some(vm) = vm_filter { + if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { + if !stem.starts_with(vm) { + continue; + } + } + } + let contents = std::fs::read_to_string(&path) + .map_err(|e| anyhow::anyhow!("Failed to read DR history file {}: {}", path.display(), e))?; + let result: DrTestResult = serde_json::from_str(&contents) + .map_err(|e| anyhow::anyhow!("Corrupt DR history file {}: {}", path.display(), e))?; + if let Some(vm) = vm_filter { + if result.vm_name != vm { + continue; + } + } + if let Ok(ts) = chrono::DateTime::parse_from_rfc3339(&result.timestamp) { + if ts >= cutoff { + results.push(result); + } + } + } + + results.sort_by(|a, b| b.timestamp.cmp(&a.timestamp)); + Ok(results) +} + +// --------------------------------------------------------------------------- +// dr test-history — show test history for a VM +// --------------------------------------------------------------------------- + +fn handle_dr_test_history(vm_name: &str, days: u32) -> Result<()> { + if let Err(e) = crate::name_validation::validate_name(vm_name) { + anyhow::bail!("Invalid VM name: {}", e); + } + let results = load_dr_history(Some(vm_name), days)?; + + if results.is_empty() { + println!( + "No DR test history found for '{}' in the last {} days.", + vm_name, days + ); + return Ok(()); + } + + println!( + "DR test history for '{}' (last {} days):", + vm_name, days + ); + let mut table = new_table( + &["Timestamp", "Region", "Backup", "Result"], + &[26, 15, 40, 8], + ); + for r in &results { + table.add_row(vec![ + r.timestamp.clone(), + r.test_region.clone(), + r.backup_name.clone(), + if r.success { + "PASS".to_string() + } else { + "FAIL".to_string() + }, + ]); + } + println!("{table}"); + Ok(()) +} + +// --------------------------------------------------------------------------- +// dr success-rate — show success rate across VMs +// --------------------------------------------------------------------------- + +fn handle_dr_success_rate(vm_filter: Option<&str>, days: u32) -> Result<()> { + if let Some(vm) = vm_filter { + if let Err(e) = crate::name_validation::validate_name(vm) { + anyhow::bail!("Invalid VM name: {}", e); + } + } + let results = load_dr_history(vm_filter, days)?; + + if results.is_empty() { + println!("No DR test results found in the last {} days.", days); + return Ok(()); + } + + let total = results.len(); + let passed = results.iter().filter(|r| r.success).count(); + + println!("DR Test Success Rate (last {} days):", days); + if let Some(vm) = vm_filter { + println!(" VM filter: {}", vm); + } + println!(" Total tests: {}", total); + println!(" Passed: {}", passed); + println!(" Failed: {}", total - passed); + println!( + " Success rate: {:.1}%", + (passed as f64 / total as f64) * 100.0 + ); + Ok(()) +} diff --git a/rust/crates/azlin/src/dispatch.rs b/rust/crates/azlin/src/dispatch.rs index da1c05af..94d4559f 100644 --- a/rust/crates/azlin/src/dispatch.rs +++ b/rust/crates/azlin/src/dispatch.rs @@ -59,6 +59,12 @@ pub(crate) async fn dispatch_command(cli: azlin_cli::Cli) -> Result<()> { cmd @ azlin_cli::Commands::Snapshot { .. } => { crate::cmd_snapshot::dispatch(cmd, cli.verbose, &cli.output).await?; } + cmd @ azlin_cli::Commands::Backup { .. } => { + crate::cmd_backup::dispatch(cmd, cli.verbose, &cli.output).await?; + } + cmd @ azlin_cli::Commands::Dr { .. } => { + crate::cmd_dr::dispatch(cmd, cli.verbose, &cli.output).await?; + } cmd @ azlin_cli::Commands::Storage { .. } => { crate::cmd_storage::dispatch(cmd, cli.verbose, &cli.output).await?; } diff --git a/rust/crates/azlin/src/main.rs b/rust/crates/azlin/src/main.rs index ab18ab35..8323c4a3 100644 --- a/rust/crates/azlin/src/main.rs +++ b/rust/crates/azlin/src/main.rs @@ -1102,6 +1102,9 @@ mod cmd_session; mod cmd_snapshot; mod cmd_snapshot_ops; mod cmd_snapshot_ops2; +mod cmd_backup; +mod cmd_backup_ops; +mod cmd_dr; mod cmd_storage; mod cmd_storage_ops; mod cmd_storage_ops2; diff --git a/rust/crates/azlin/src/name_validation.rs b/rust/crates/azlin/src/name_validation.rs index 98857377..532caa54 100644 --- a/rust/crates/azlin/src/name_validation.rs +++ b/rust/crates/azlin/src/name_validation.rs @@ -1,22 +1,23 @@ -/// Reject names containing path-traversal or null-byte characters. +/// Reject names that don't match the positive allowlist. /// /// A valid name consists only of ASCII alphanumerics, hyphens, underscores, -/// and dots (but not `..`). No slashes, backslashes, or null bytes. +/// and dots (but not `..`). This enforces the allowlist rather than +/// blocklisting specific characters, preventing injection into JMESPath +/// queries, filenames, and shell arguments. pub fn validate_name(name: &str) -> Result<(), String> { if name.is_empty() { return Err("Name must not be empty".into()); } - if name.contains('/') || name.contains('\\') { - return Err(format!( - "Name '{}' contains path separator characters", - name - )); - } - if name.contains('\0') { - return Err(format!("Name '{}' contains a null byte", name)); - } if name.contains("..") { return Err(format!("Name '{}' contains '..' (path traversal)", name)); } + if let Some(ch) = name.chars().find(|c| { + !c.is_ascii_alphanumeric() && *c != '-' && *c != '_' && *c != '.' + }) { + return Err(format!( + "Name '{}' contains invalid character '{}' (only a-z, A-Z, 0-9, hyphen, underscore, dot allowed)", + name, ch + )); + } Ok(()) } diff --git a/rust/crates/azlin/src/tests/mod.rs b/rust/crates/azlin/src/tests/mod.rs index a59f4f9e..6f511bf6 100644 --- a/rust/crates/azlin/src/tests/mod.rs +++ b/rust/crates/azlin/src/tests/mod.rs @@ -67,3 +67,6 @@ mod test_group_63; mod test_group_64; mod test_group_65; mod test_group_66; +mod test_group_backup_config; +mod test_group_backup_ops; +mod test_group_dr_testing; diff --git a/rust/crates/azlin/src/tests/test_group_backup_config.rs b/rust/crates/azlin/src/tests/test_group_backup_config.rs new file mode 100644 index 00000000..3a535d9a --- /dev/null +++ b/rust/crates/azlin/src/tests/test_group_backup_config.rs @@ -0,0 +1,409 @@ +//! TDD RED PHASE: Backup configuration unit tests. +//! +//! These tests define the expected behavior for backup scheduling, +//! tier determination, and retention policy logic. They FAIL until +//! the backup configuration module is implemented. +//! +//! Expected module: crate::backup_config (or integrated into snapshot_helpers) +//! +//! Feature spec: docs/backup-disaster-recovery.md +//! Test coverage spec: docs/testing/backup-dr-test-coverage.md + +use chrono::{Datelike, NaiveDate, Weekday}; +use serde::{Deserialize, Serialize}; +use std::fs; +use tempfile::TempDir; + +// --------------------------------------------------------------------------- +// Expected data structures (contract definition) +// These types define the API that the implementation must provide. +// Once implemented, replace with: use crate::backup_config::*; +// --------------------------------------------------------------------------- + +/// Retention tier for a backup snapshot. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +enum BackupTier { + Daily, + Weekly, + Monthly, +} + +/// Backup schedule configuration stored per-VM. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct BackupScheduleConfig { + vm_name: String, + resource_group: String, + daily_retention: u32, + weekly_retention: Option, + monthly_retention: Option, + cross_region: bool, + target_region: Option, + enabled: bool, +} + +/// Information about a single backup. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct BackupInfo { + name: String, + vm_name: String, + tier: BackupTier, + size_gb: u64, + created: String, + verified: bool, + replicated: bool, +} + +// --------------------------------------------------------------------------- +// Helper: determine backup tier from date +// This logic MUST be implemented in the backup_config module. +// --------------------------------------------------------------------------- + +fn determine_backup_tier(date: NaiveDate) -> BackupTier { + // Contract: first day of month → Monthly + if date.day() == 1 { + return BackupTier::Monthly; + } + // Contract: first day of week (Sunday in US convention) → Weekly + if date.weekday() == Weekday::Sun { + return BackupTier::Weekly; + } + BackupTier::Daily +} + +/// Generate a backup name from VM name, tier, and timestamp. +fn build_backup_name(vm_name: &str, tier: BackupTier, timestamp: &str) -> String { + let tier_str = match tier { + BackupTier::Daily => "daily", + BackupTier::Weekly => "weekly", + BackupTier::Monthly => "monthly", + }; + format!("{}-backup-{}-{}", vm_name, tier_str, timestamp) +} + +// =========================================================================== +// BackupScheduleConfig serialization tests +// =========================================================================== + +#[test] +fn test_backup_config_serializes_to_toml() { + let config = BackupScheduleConfig { + vm_name: "prod-db-vm".to_string(), + resource_group: "prod-rg".to_string(), + daily_retention: 7, + weekly_retention: Some(4), + monthly_retention: Some(12), + cross_region: true, + target_region: Some("westus2".to_string()), + enabled: true, + }; + let toml_str = toml::to_string_pretty(&config).unwrap(); + assert!(toml_str.contains("prod-db-vm")); + assert!(toml_str.contains("daily_retention = 7")); + assert!(toml_str.contains("weekly_retention = 4")); + assert!(toml_str.contains("monthly_retention = 12")); + assert!(toml_str.contains("cross_region = true")); + assert!(toml_str.contains("westus2")); +} + +#[test] +fn test_backup_config_roundtrip_toml() { + let config = BackupScheduleConfig { + vm_name: "test-vm".to_string(), + resource_group: "test-rg".to_string(), + daily_retention: 7, + weekly_retention: None, + monthly_retention: None, + cross_region: false, + target_region: None, + enabled: true, + }; + let toml_str = toml::to_string_pretty(&config).unwrap(); + let loaded: BackupScheduleConfig = toml::from_str(&toml_str).unwrap(); + assert_eq!(loaded.vm_name, "test-vm"); + assert_eq!(loaded.daily_retention, 7); + assert!(loaded.weekly_retention.is_none()); + assert!(!loaded.cross_region); +} + +#[test] +fn test_backup_config_persists_to_file() { + let tmp = TempDir::new().unwrap(); + let config = BackupScheduleConfig { + vm_name: "file-test-vm".to_string(), + resource_group: "rg".to_string(), + daily_retention: 14, + weekly_retention: Some(8), + monthly_retention: Some(24), + cross_region: true, + target_region: Some("eastus2".to_string()), + enabled: true, + }; + let path = tmp.path().join("file-test-vm.backup.toml"); + let contents = toml::to_string_pretty(&config).unwrap(); + fs::write(&path, &contents).unwrap(); + + let loaded: BackupScheduleConfig = + toml::from_str(&fs::read_to_string(&path).unwrap()).unwrap(); + assert_eq!(loaded.vm_name, "file-test-vm"); + assert_eq!(loaded.daily_retention, 14); + assert_eq!(loaded.weekly_retention, Some(8)); + assert_eq!(loaded.monthly_retention, Some(24)); + assert!(loaded.cross_region); + assert_eq!(loaded.target_region.as_deref(), Some("eastus2")); +} + +#[test] +fn test_backup_config_json_serialization() { + let config = BackupScheduleConfig { + vm_name: "json-vm".to_string(), + resource_group: "rg".to_string(), + daily_retention: 7, + weekly_retention: Some(4), + monthly_retention: None, + cross_region: false, + target_region: None, + enabled: false, + }; + let json = serde_json::to_string(&config).unwrap(); + let loaded: BackupScheduleConfig = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.vm_name, "json-vm"); + assert!(!loaded.enabled); +} + +// =========================================================================== +// Tier determination logic tests +// =========================================================================== + +#[test] +fn test_determine_tier_regular_weekday_is_daily() { + // Wednesday March 18, 2026 — not first of week or month + let date = NaiveDate::from_ymd_opt(2026, 3, 18).unwrap(); + assert_eq!(date.weekday(), Weekday::Wed); + assert_eq!(determine_backup_tier(date), BackupTier::Daily); +} + +#[test] +fn test_determine_tier_first_of_month_is_monthly() { + // April 1, 2026 — first day of month + let date = NaiveDate::from_ymd_opt(2026, 4, 1).unwrap(); + assert_eq!(determine_backup_tier(date), BackupTier::Monthly); +} + +#[test] +fn test_determine_tier_sunday_is_weekly() { + // March 29, 2026 — Sunday + let date = NaiveDate::from_ymd_opt(2026, 3, 29).unwrap(); + assert_eq!(date.weekday(), Weekday::Sun); + assert_eq!(determine_backup_tier(date), BackupTier::Weekly); +} + +#[test] +fn test_determine_tier_first_of_month_on_sunday_is_monthly() { + // June 1, 2025 — first of month AND Sunday → Monthly takes priority + let date = NaiveDate::from_ymd_opt(2025, 6, 1).unwrap(); + assert_eq!(date.weekday(), Weekday::Sun); + assert_eq!( + determine_backup_tier(date), + BackupTier::Monthly, + "Monthly takes priority over Weekly when both match" + ); +} + +#[test] +fn test_determine_tier_saturday_is_daily() { + let date = NaiveDate::from_ymd_opt(2026, 3, 28).unwrap(); + assert_eq!(date.weekday(), Weekday::Sat); + assert_eq!(determine_backup_tier(date), BackupTier::Daily); +} + +#[test] +fn test_determine_tier_end_of_month_is_daily() { + // March 31, 2026 — last day, Tuesday + let date = NaiveDate::from_ymd_opt(2026, 3, 31).unwrap(); + assert_eq!(determine_backup_tier(date), BackupTier::Daily); +} + +// =========================================================================== +// Backup name generation tests +// =========================================================================== + +#[test] +fn test_backup_name_daily() { + let name = build_backup_name("prod-db-vm", BackupTier::Daily, "20261201-0800"); + assert_eq!(name, "prod-db-vm-backup-daily-20261201-0800"); +} + +#[test] +fn test_backup_name_weekly() { + let name = build_backup_name("prod-db-vm", BackupTier::Weekly, "20261124-0800"); + assert_eq!(name, "prod-db-vm-backup-weekly-20261124-0800"); +} + +#[test] +fn test_backup_name_monthly() { + let name = build_backup_name("prod-db-vm", BackupTier::Monthly, "20261201-0800"); + assert_eq!(name, "prod-db-vm-backup-monthly-20261201-0800"); +} + +#[test] +fn test_backup_name_contains_vm_name() { + let name = build_backup_name("my-special-vm", BackupTier::Daily, "20260101-0000"); + assert!(name.starts_with("my-special-vm-backup-")); +} + +// =========================================================================== +// BackupInfo structure tests +// =========================================================================== + +#[test] +fn test_backup_info_serialization() { + let info = BackupInfo { + name: "vm-backup-daily-20261201-0800".to_string(), + vm_name: "my-vm".to_string(), + tier: BackupTier::Daily, + size_gb: 128, + created: "2026-12-01T08:00:00Z".to_string(), + verified: true, + replicated: false, + }; + let json = serde_json::to_string(&info).unwrap(); + assert!(json.contains("\"tier\":\"daily\"")); + assert!(json.contains("\"verified\":true")); + assert!(json.contains("\"replicated\":false")); +} + +#[test] +fn test_backup_info_deserialization() { + let json = r#"{ + "name": "vm-backup-weekly-20261124", + "vm_name": "my-vm", + "tier": "weekly", + "size_gb": 64, + "created": "2026-11-24T08:00:00Z", + "verified": false, + "replicated": true + }"#; + let info: BackupInfo = serde_json::from_str(json).unwrap(); + assert_eq!(info.tier, BackupTier::Weekly); + assert_eq!(info.size_gb, 64); + assert!(!info.verified); + assert!(info.replicated); +} + +// =========================================================================== +// Retention policy validation tests +// =========================================================================== + +#[test] +fn test_retention_defaults_daily_only() { + let config = BackupScheduleConfig { + vm_name: "vm".to_string(), + resource_group: "rg".to_string(), + daily_retention: 7, + weekly_retention: None, + monthly_retention: None, + cross_region: false, + target_region: None, + enabled: true, + }; + assert_eq!(config.daily_retention, 7); + assert!(config.weekly_retention.is_none()); + assert!(config.monthly_retention.is_none()); +} + +#[test] +fn test_retention_zero_daily_is_valid() { + // Zero retention means "don't keep daily backups" (edge case) + let config = BackupScheduleConfig { + vm_name: "vm".to_string(), + resource_group: "rg".to_string(), + daily_retention: 0, + weekly_retention: Some(4), + monthly_retention: Some(12), + cross_region: false, + target_region: None, + enabled: true, + }; + assert_eq!(config.daily_retention, 0); +} + +#[test] +fn test_cross_region_requires_target_region() { + let config = BackupScheduleConfig { + vm_name: "vm".to_string(), + resource_group: "rg".to_string(), + daily_retention: 7, + weekly_retention: None, + monthly_retention: None, + cross_region: true, + target_region: None, // This should be validated by implementation + enabled: true, + }; + // Contract: implementation MUST validate that cross_region=true requires target_region + assert!( + config.cross_region && config.target_region.is_none(), + "Test setup: cross_region=true but no target_region" + ); + // When implementation exists, calling configure() with this config should return an error +} + +// =========================================================================== +// Boundary condition tests +// =========================================================================== + +#[test] +fn test_backup_name_with_empty_vm_name() { + let name = build_backup_name("", BackupTier::Daily, "20261201-0800"); + assert_eq!(name, "-backup-daily-20261201-0800"); + // Contract: implementation should reject empty VM names before reaching this point +} + +#[test] +fn test_backup_tier_serializes_lowercase() { + let daily = serde_json::to_string(&BackupTier::Daily).unwrap(); + let weekly = serde_json::to_string(&BackupTier::Weekly).unwrap(); + let monthly = serde_json::to_string(&BackupTier::Monthly).unwrap(); + assert_eq!(daily, "\"daily\""); + assert_eq!(weekly, "\"weekly\""); + assert_eq!(monthly, "\"monthly\""); +} + +// =========================================================================== +// Contract tests — these verify expected module structure +// Once backup_config module exists, uncomment the imports and these will pass. +// =========================================================================== + +#[test] +fn test_existing_snapshot_helpers_build_name() { + // Verify existing snapshot_helpers::build_snapshot_name still works + // This ensures backward compatibility during feature development + let name = crate::snapshot_helpers::build_snapshot_name("my-vm", "20260301_120000"); + assert_eq!(name, "my-vm_snapshot_20260301_120000"); +} + +#[test] +fn test_existing_snapshot_helpers_filter() { + let snaps = vec![ + serde_json::json!({"name": "my-vm_snapshot_1"}), + serde_json::json!({"name": "other-vm_snapshot_1"}), + ]; + let filtered = crate::snapshot_helpers::filter_snapshots(&snaps, "my-vm"); + assert_eq!(filtered.len(), 1); +} + +#[test] +fn test_existing_snapshot_schedule_structure() { + // Verify the existing SnapshotSchedule is compatible with new BackupScheduleConfig + let schedule = crate::snapshot_helpers::SnapshotSchedule { + vm_name: "test-vm".to_string(), + resource_group: "rg".to_string(), + every_hours: 24, + keep_count: 7, + enabled: true, + created: "2026-03-01".to_string(), + }; + assert_eq!(schedule.vm_name, "test-vm"); + assert_eq!(schedule.every_hours, 24); + // Contract: BackupScheduleConfig must be a superset of SnapshotSchedule capabilities +} diff --git a/rust/crates/azlin/src/tests/test_group_backup_ops.rs b/rust/crates/azlin/src/tests/test_group_backup_ops.rs new file mode 100644 index 00000000..05513edf --- /dev/null +++ b/rust/crates/azlin/src/tests/test_group_backup_ops.rs @@ -0,0 +1,451 @@ +//! TDD RED PHASE: Backup replication and verification unit tests. +//! +//! These tests define the expected behavior for cross-region replication +//! and backup verification. They FAIL until the replication and +//! verification modules are implemented. +//! +//! Expected modules: +//! - crate::backup_replication +//! - crate::backup_verification +//! +//! Feature spec: docs/backup-disaster-recovery.md §Cross-Region Replication, §Verification +//! Test coverage spec: docs/testing/backup-dr-test-coverage.md + +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Expected replication data structures (contract definition) +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +enum ReplicationStatus { + Pending, + InProgress, + Completed, + Failed, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ReplicationJob { + job_id: u64, + snapshot_name: String, + vm_name: String, + source_region: String, + target_region: String, + status: ReplicationStatus, + started_at: Option, + completed_at: Option, + error_message: Option, +} + +// --------------------------------------------------------------------------- +// Expected verification data structures (contract definition) +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +enum VerificationResult { + Pass, + Fail, + Pending, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct VerificationRecord { + snapshot_name: String, + vm_name: String, + result: VerificationResult, + disk_size_gb: Option, + expected_size_gb: Option, + duration_secs: Option, + error_message: Option, + verified_at: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct VerificationReport { + vm_name: String, + total_backups: u32, + verified_count: u32, + success_count: u32, + failure_count: u32, + success_rate: f64, + failures: Vec, +} + +// =========================================================================== +// ReplicationJob serialization tests +// =========================================================================== + +#[test] +fn test_replication_job_serialization() { + let job = ReplicationJob { + job_id: 1234, + snapshot_name: "prod-vm-backup-daily-20261201-0800".to_string(), + vm_name: "prod-vm".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Pending, + started_at: None, + completed_at: None, + error_message: None, + }; + let json = serde_json::to_string(&job).unwrap(); + assert!(json.contains("\"status\":\"pending\"")); + assert!(json.contains("\"job_id\":1234")); + assert!(json.contains("\"source_region\":\"eastus\"")); + assert!(json.contains("\"target_region\":\"westus2\"")); +} + +#[test] +fn test_replication_job_roundtrip() { + let job = ReplicationJob { + job_id: 5678, + snapshot_name: "snap-1".to_string(), + vm_name: "vm-1".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Completed, + started_at: Some("2026-12-01T08:00:00Z".to_string()), + completed_at: Some("2026-12-01T08:12:34Z".to_string()), + error_message: None, + }; + let json = serde_json::to_string(&job).unwrap(); + let loaded: ReplicationJob = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.job_id, 5678); + assert_eq!(loaded.status, ReplicationStatus::Completed); + assert!(loaded.completed_at.is_some()); + assert!(loaded.error_message.is_none()); +} + +#[test] +fn test_replication_status_transitions() { + // Contract: valid status transitions + let statuses = vec![ + ReplicationStatus::Pending, + ReplicationStatus::InProgress, + ReplicationStatus::Completed, + ]; + // Pending → InProgress → Completed is valid + assert_eq!(statuses[0], ReplicationStatus::Pending); + assert_eq!(statuses[1], ReplicationStatus::InProgress); + assert_eq!(statuses[2], ReplicationStatus::Completed); +} + +#[test] +fn test_replication_failed_job_has_error() { + let job = ReplicationJob { + job_id: 9999, + snapshot_name: "snap-fail".to_string(), + vm_name: "vm".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Failed, + started_at: Some("2026-12-01T08:00:00Z".to_string()), + completed_at: Some("2026-12-01T08:05:00Z".to_string()), + error_message: Some("Quota exceeded in target region".to_string()), + }; + assert_eq!(job.status, ReplicationStatus::Failed); + assert!(job.error_message.is_some()); + assert!( + job.error_message.as_ref().unwrap().contains("Quota"), + "Error message should describe the failure" + ); +} + +// =========================================================================== +// Replication filtering tests +// =========================================================================== + +#[test] +fn test_filter_replication_jobs_by_status() { + let jobs = vec![ + ReplicationJob { + job_id: 1, + snapshot_name: "s1".to_string(), + vm_name: "vm".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Completed, + started_at: None, + completed_at: None, + error_message: None, + }, + ReplicationJob { + job_id: 2, + snapshot_name: "s2".to_string(), + vm_name: "vm".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Pending, + started_at: None, + completed_at: None, + error_message: None, + }, + ReplicationJob { + job_id: 3, + snapshot_name: "s3".to_string(), + vm_name: "vm".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Completed, + started_at: None, + completed_at: None, + error_message: None, + }, + ]; + let pending: Vec<_> = jobs + .iter() + .filter(|j| j.status == ReplicationStatus::Pending) + .collect(); + assert_eq!(pending.len(), 1); + assert_eq!(pending[0].job_id, 2); + + let completed: Vec<_> = jobs + .iter() + .filter(|j| j.status == ReplicationStatus::Completed) + .collect(); + assert_eq!(completed.len(), 2); +} + +#[test] +fn test_filter_replication_jobs_by_vm() { + let jobs = vec![ + ReplicationJob { + job_id: 1, + snapshot_name: "s1".to_string(), + vm_name: "vm-a".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Completed, + started_at: None, + completed_at: None, + error_message: None, + }, + ReplicationJob { + job_id: 2, + snapshot_name: "s2".to_string(), + vm_name: "vm-b".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Pending, + started_at: None, + completed_at: None, + error_message: None, + }, + ]; + let vm_a_jobs: Vec<_> = jobs.iter().filter(|j| j.vm_name == "vm-a").collect(); + assert_eq!(vm_a_jobs.len(), 1); + assert_eq!(vm_a_jobs[0].job_id, 1); +} + +// =========================================================================== +// Replication boundary conditions +// =========================================================================== + +#[test] +fn test_replication_empty_snapshot_name() { + let job = ReplicationJob { + job_id: 0, + snapshot_name: String::new(), + vm_name: "vm".to_string(), + source_region: "eastus".to_string(), + target_region: "westus2".to_string(), + status: ReplicationStatus::Pending, + started_at: None, + completed_at: None, + error_message: None, + }; + assert!( + job.snapshot_name.is_empty(), + "Empty snapshot name should be caught by validation in implementation" + ); +} + +#[test] +fn test_replication_same_source_and_target() { + let job = ReplicationJob { + job_id: 0, + snapshot_name: "snap".to_string(), + vm_name: "vm".to_string(), + source_region: "eastus".to_string(), + target_region: "eastus".to_string(), + status: ReplicationStatus::Pending, + started_at: None, + completed_at: None, + error_message: None, + }; + assert_eq!( + job.source_region, job.target_region, + "Implementation should reject same source/target region" + ); +} + +// =========================================================================== +// VerificationRecord tests +// =========================================================================== + +#[test] +fn test_verification_record_pass() { + let record = VerificationRecord { + snapshot_name: "snap-1".to_string(), + vm_name: "vm".to_string(), + result: VerificationResult::Pass, + disk_size_gb: Some(128), + expected_size_gb: Some(128), + duration_secs: Some(108), + error_message: None, + verified_at: Some("2026-12-01T10:30:00Z".to_string()), + }; + assert_eq!(record.result, VerificationResult::Pass); + assert_eq!(record.disk_size_gb, record.expected_size_gb); + assert!(record.error_message.is_none()); +} + +#[test] +fn test_verification_record_size_mismatch() { + let record = VerificationRecord { + snapshot_name: "snap-bad".to_string(), + vm_name: "vm".to_string(), + result: VerificationResult::Fail, + disk_size_gb: Some(0), + expected_size_gb: Some(64), + duration_secs: Some(45), + error_message: Some("Disk size mismatch (expected 64GB, got 0GB)".to_string()), + verified_at: Some("2026-12-01T10:30:00Z".to_string()), + }; + assert_eq!(record.result, VerificationResult::Fail); + assert_ne!(record.disk_size_gb, record.expected_size_gb); + assert!(record.error_message.is_some()); +} + +#[test] +fn test_verification_record_serialization() { + let record = VerificationRecord { + snapshot_name: "snap".to_string(), + vm_name: "vm".to_string(), + result: VerificationResult::Pass, + disk_size_gb: Some(128), + expected_size_gb: Some(128), + duration_secs: Some(90), + error_message: None, + verified_at: None, + }; + let json = serde_json::to_string(&record).unwrap(); + assert!(json.contains("\"result\":\"pass\"")); + assert!(json.contains("\"disk_size_gb\":128")); +} + +// =========================================================================== +// VerificationReport tests +// =========================================================================== + +#[test] +fn test_verification_report_100_percent() { + let report = VerificationReport { + vm_name: "prod-vm".to_string(), + total_backups: 23, + verified_count: 23, + success_count: 23, + failure_count: 0, + success_rate: 100.0, + failures: vec![], + }; + assert_eq!(report.success_rate, 100.0); + assert!(report.failures.is_empty()); + assert_eq!(report.verified_count, report.total_backups); +} + +#[test] +fn test_verification_report_with_failures() { + let failed_record = VerificationRecord { + snapshot_name: "snap-bad".to_string(), + vm_name: "vm".to_string(), + result: VerificationResult::Fail, + disk_size_gb: Some(0), + expected_size_gb: Some(64), + duration_secs: Some(30), + error_message: Some("Disk not readable".to_string()), + verified_at: None, + }; + let report = VerificationReport { + vm_name: "vm".to_string(), + total_backups: 10, + verified_count: 10, + success_count: 9, + failure_count: 1, + success_rate: 90.0, + failures: vec![failed_record], + }; + assert_eq!(report.failure_count, 1); + assert_eq!(report.success_rate, 90.0); + assert_eq!(report.failures.len(), 1); + assert_eq!(report.failures[0].snapshot_name, "snap-bad"); +} + +#[test] +fn test_verification_report_success_rate_calculation() { + // Contract: success_rate = (success_count / verified_count) * 100 + let verified: u32 = 145; + let success: u32 = 144; + let rate = (success as f64 / verified as f64) * 100.0; + assert!((rate - 99.31).abs() < 0.01, "Expected ~99.31%, got {}", rate); +} + +// =========================================================================== +// Verification boundary conditions +// =========================================================================== + +#[test] +fn test_verification_zero_backups() { + let report = VerificationReport { + vm_name: "empty-vm".to_string(), + total_backups: 0, + verified_count: 0, + success_count: 0, + failure_count: 0, + success_rate: 0.0, + failures: vec![], + }; + assert_eq!(report.total_backups, 0); + assert_eq!(report.success_rate, 0.0); +} + +#[test] +fn test_verification_timeout_record() { + // Contract: verification that takes >15 minutes should be flagged + let record = VerificationRecord { + snapshot_name: "snap-slow".to_string(), + vm_name: "vm".to_string(), + result: VerificationResult::Fail, + disk_size_gb: None, + expected_size_gb: Some(256), + duration_secs: Some(900), // 15 minutes + error_message: Some("Verification timed out".to_string()), + verified_at: None, + }; + assert!(record.duration_secs.unwrap() >= 900); + assert_eq!(record.result, VerificationResult::Fail); +} + +// =========================================================================== +// Parallel replication batch size tests +// =========================================================================== + +#[test] +fn test_parallel_replication_batch_size() { + // Contract: replicate-all with --max-parallel N should process in batches of N + let total_jobs = 10; + let max_parallel: usize = 3; + let expected_batches = (total_jobs + max_parallel - 1) / max_parallel; + assert_eq!(expected_batches, 4, "10 jobs / 3 parallel = 4 batches"); +} + +#[test] +fn test_parallel_replication_single_batch() { + let total_jobs: usize = 2; + let max_parallel: usize = 5; + let expected_batches = (total_jobs + max_parallel - 1) / max_parallel; + assert_eq!(expected_batches, 1, "2 jobs / 5 parallel = 1 batch"); +} diff --git a/rust/crates/azlin/src/tests/test_group_dr_testing.rs b/rust/crates/azlin/src/tests/test_group_dr_testing.rs new file mode 100644 index 00000000..da6b8d5b --- /dev/null +++ b/rust/crates/azlin/src/tests/test_group_dr_testing.rs @@ -0,0 +1,543 @@ +//! TDD RED PHASE: Disaster recovery testing unit tests. +//! +//! These tests define the expected behavior for DR test execution, +//! RTO measurement, success rate tracking, and test history. +//! They FAIL until the DR testing module is implemented. +//! +//! Expected module: crate::dr_testing +//! +//! Feature spec: docs/backup-disaster-recovery.md §Disaster Recovery Testing +//! Test coverage spec: docs/testing/backup-dr-test-coverage.md + +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Expected DR testing data structures (contract definition) +// --------------------------------------------------------------------------- + +/// Result of a single DR test phase. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +enum DRPhaseResult { + Success, + Failure, + Skipped, +} + +/// Configuration for a DR test run. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DRTestConfig { + vm_name: String, + backup_name: Option, + test_region: String, + test_resource_group: Option, + rto_target_minutes: u32, + skip_connectivity: bool, + skip_cleanup: bool, +} + +/// Detailed results of each DR test phase. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DRPhaseDetail { + phase_name: String, + result: DRPhaseResult, + duration_secs: u64, + error_message: Option, +} + +/// Complete result of a DR test run. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DRTestResult { + vm_name: String, + backup_name: String, + test_region: String, + overall_result: DRPhaseResult, + rto_seconds: u64, + rto_target_seconds: u64, + rto_met: bool, + phases: Vec, + started_at: String, + completed_at: String, +} + +/// Historical DR test success rate. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DRSuccessRate { + vm_name: String, + total_tests: u32, + passed: u32, + failed: u32, + success_rate: f64, + target_rate: f64, + compliant: bool, +} + +// =========================================================================== +// DRTestConfig tests +// =========================================================================== + +#[test] +fn test_dr_config_basic() { + let config = DRTestConfig { + vm_name: "prod-db-vm".to_string(), + backup_name: None, + test_region: "westus2".to_string(), + test_resource_group: None, + rto_target_minutes: 15, + skip_connectivity: false, + skip_cleanup: false, + }; + assert_eq!(config.vm_name, "prod-db-vm"); + assert_eq!(config.rto_target_minutes, 15); + assert!(config.backup_name.is_none(), "None means use latest backup"); +} + +#[test] +fn test_dr_config_with_specific_backup() { + let config = DRTestConfig { + vm_name: "vm".to_string(), + backup_name: Some("vm-backup-weekly-20261124-0800".to_string()), + test_region: "westus2".to_string(), + test_resource_group: Some("DR-Testing".to_string()), + rto_target_minutes: 15, + skip_connectivity: false, + skip_cleanup: false, + }; + assert!(config.backup_name.is_some()); + assert!(config.test_resource_group.is_some()); +} + +#[test] +fn test_dr_config_serialization() { + let config = DRTestConfig { + vm_name: "vm".to_string(), + backup_name: None, + test_region: "westus2".to_string(), + test_resource_group: None, + rto_target_minutes: 15, + skip_connectivity: false, + skip_cleanup: false, + }; + let json = serde_json::to_string(&config).unwrap(); + assert!(json.contains("\"rto_target_minutes\":15")); + assert!(json.contains("\"test_region\":\"westus2\"")); +} + +// =========================================================================== +// DRTestResult tests +// =========================================================================== + +#[test] +fn test_dr_result_all_phases_pass() { + let result = DRTestResult { + vm_name: "prod-db-vm".to_string(), + backup_name: "prod-db-vm-backup-daily-20261201-0800".to_string(), + test_region: "westus2".to_string(), + overall_result: DRPhaseResult::Success, + rto_seconds: 647, // 10m 47s + rto_target_seconds: 900, + rto_met: true, + phases: vec![ + DRPhaseDetail { + phase_name: "Restore backup".to_string(), + result: DRPhaseResult::Success, + duration_secs: 512, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify boot".to_string(), + result: DRPhaseResult::Success, + duration_secs: 135, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify connectivity".to_string(), + result: DRPhaseResult::Success, + duration_secs: 8, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Cleanup".to_string(), + result: DRPhaseResult::Success, + duration_secs: 105, + error_message: None, + }, + ], + started_at: "2026-12-01T10:00:00Z".to_string(), + completed_at: "2026-12-01T10:10:47Z".to_string(), + }; + assert_eq!(result.overall_result, DRPhaseResult::Success); + assert!(result.rto_met); + assert_eq!(result.phases.len(), 4); + assert!(result.phases.iter().all(|p| p.result == DRPhaseResult::Success)); +} + +#[test] +fn test_dr_result_restore_failure() { + let result = DRTestResult { + vm_name: "vm".to_string(), + backup_name: "snap".to_string(), + test_region: "westus2".to_string(), + overall_result: DRPhaseResult::Failure, + rto_seconds: 0, + rto_target_seconds: 900, + rto_met: false, + phases: vec![DRPhaseDetail { + phase_name: "Restore backup".to_string(), + result: DRPhaseResult::Failure, + duration_secs: 300, + error_message: Some("Snapshot not found in target region".to_string()), + }], + started_at: "2026-12-01T10:00:00Z".to_string(), + completed_at: "2026-12-01T10:05:00Z".to_string(), + }; + assert_eq!(result.overall_result, DRPhaseResult::Failure); + assert!(!result.rto_met); + assert_eq!(result.phases[0].result, DRPhaseResult::Failure); + assert!(result.phases[0].error_message.is_some()); +} + +#[test] +fn test_dr_result_boot_failure() { + let result = DRTestResult { + vm_name: "vm".to_string(), + backup_name: "snap".to_string(), + test_region: "westus2".to_string(), + overall_result: DRPhaseResult::Failure, + rto_seconds: 0, + rto_target_seconds: 900, + rto_met: false, + phases: vec![ + DRPhaseDetail { + phase_name: "Restore backup".to_string(), + result: DRPhaseResult::Success, + duration_secs: 480, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify boot".to_string(), + result: DRPhaseResult::Failure, + duration_secs: 300, + error_message: Some("VM failed to reach Running state within timeout".to_string()), + }, + ], + started_at: "2026-12-01T10:00:00Z".to_string(), + completed_at: "2026-12-01T10:13:00Z".to_string(), + }; + assert_eq!(result.overall_result, DRPhaseResult::Failure); + assert_eq!(result.phases[0].result, DRPhaseResult::Success); + assert_eq!(result.phases[1].result, DRPhaseResult::Failure); +} + +#[test] +fn test_dr_result_connectivity_failure() { + let result = DRTestResult { + vm_name: "vm".to_string(), + backup_name: "snap".to_string(), + test_region: "westus2".to_string(), + overall_result: DRPhaseResult::Failure, + rto_seconds: 0, + rto_target_seconds: 900, + rto_met: false, + phases: vec![ + DRPhaseDetail { + phase_name: "Restore backup".to_string(), + result: DRPhaseResult::Success, + duration_secs: 480, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify boot".to_string(), + result: DRPhaseResult::Success, + duration_secs: 120, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify connectivity".to_string(), + result: DRPhaseResult::Failure, + duration_secs: 60, + error_message: Some("SSH connection refused on port 22".to_string()), + }, + ], + started_at: "2026-12-01T10:00:00Z".to_string(), + completed_at: "2026-12-01T10:11:00Z".to_string(), + }; + assert_eq!(result.phases[2].result, DRPhaseResult::Failure); + assert!(result.phases[2] + .error_message + .as_ref() + .unwrap() + .contains("SSH")); +} + +#[test] +fn test_dr_result_skipped_connectivity() { + let result = DRTestResult { + vm_name: "vm".to_string(), + backup_name: "snap".to_string(), + test_region: "westus2".to_string(), + overall_result: DRPhaseResult::Success, + rto_seconds: 615, + rto_target_seconds: 900, + rto_met: true, + phases: vec![ + DRPhaseDetail { + phase_name: "Restore backup".to_string(), + result: DRPhaseResult::Success, + duration_secs: 480, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify boot".to_string(), + result: DRPhaseResult::Success, + duration_secs: 135, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Verify connectivity".to_string(), + result: DRPhaseResult::Skipped, + duration_secs: 0, + error_message: None, + }, + DRPhaseDetail { + phase_name: "Cleanup".to_string(), + result: DRPhaseResult::Success, + duration_secs: 100, + error_message: None, + }, + ], + started_at: "2026-12-01T10:00:00Z".to_string(), + completed_at: "2026-12-01T10:10:15Z".to_string(), + }; + // Even with connectivity skipped, overall can pass + assert_eq!(result.overall_result, DRPhaseResult::Success); + assert_eq!(result.phases[2].result, DRPhaseResult::Skipped); +} + +// =========================================================================== +// RTO measurement tests +// =========================================================================== + +#[test] +fn test_rto_within_target() { + let rto_seconds: u64 = 647; // 10m 47s + let target_seconds: u64 = 900; // 15 minutes + assert!(rto_seconds < target_seconds, "RTO should be within target"); +} + +#[test] +fn test_rto_exceeds_target() { + let rto_seconds: u64 = 960; // 16 minutes + let target_seconds: u64 = 900; // 15 minutes + assert!( + rto_seconds > target_seconds, + "RTO exceeding target should be flagged" + ); +} + +#[test] +fn test_rto_exactly_at_target() { + let rto_seconds: u64 = 900; + let target_seconds: u64 = 900; + // Contract: exactly at target should be considered "met" + let rto_met = rto_seconds <= target_seconds; + assert!(rto_met, "RTO exactly at target should be considered met"); +} + +#[test] +fn test_rto_from_phase_durations() { + // Contract: RTO is the sum of restore + boot + connectivity phases (not cleanup) + let restore_secs: u64 = 512; + let boot_secs: u64 = 135; + let connectivity_secs: u64 = 8; + let cleanup_secs: u64 = 105; + let rto = restore_secs + boot_secs + connectivity_secs; + assert_eq!(rto, 655, "RTO should exclude cleanup time"); + assert_ne!( + rto, + restore_secs + boot_secs + connectivity_secs + cleanup_secs, + "Cleanup should not be included in RTO" + ); +} + +// =========================================================================== +// DR success rate tests +// =========================================================================== + +#[test] +fn test_success_rate_100_percent() { + let rate = DRSuccessRate { + vm_name: "prod-vm".to_string(), + total_tests: 12, + passed: 12, + failed: 0, + success_rate: 100.0, + target_rate: 99.9, + compliant: true, + }; + assert_eq!(rate.success_rate, 100.0); + assert!(rate.compliant); + assert_eq!(rate.passed + rate.failed, rate.total_tests); +} + +#[test] +fn test_success_rate_partial() { + let passed: u32 = 35; + let total: u32 = 36; + let rate = (passed as f64 / total as f64) * 100.0; + let target = 99.9; + assert!( + (rate - 97.22).abs() < 0.01, + "Expected ~97.22%, got {}", + rate + ); + assert!(rate < target, "97.22% < 99.9% target = non-compliant"); +} + +#[test] +fn test_success_rate_zero_tests() { + let rate = DRSuccessRate { + vm_name: "new-vm".to_string(), + total_tests: 0, + passed: 0, + failed: 0, + success_rate: 0.0, + target_rate: 99.9, + compliant: false, + }; + assert_eq!(rate.total_tests, 0); + assert!(!rate.compliant, "Zero tests should not be compliant"); +} + +#[test] +fn test_success_rate_single_failure() { + let total: u32 = 100; + let passed: u32 = 99; + let rate = (passed as f64 / total as f64) * 100.0; + assert_eq!(rate, 99.0); + // 99.0% < 99.9% target → non-compliant + assert!(rate < 99.9); +} + +#[test] +fn test_success_rate_serialization() { + let rate = DRSuccessRate { + vm_name: "vm".to_string(), + total_tests: 10, + passed: 10, + failed: 0, + success_rate: 100.0, + target_rate: 99.9, + compliant: true, + }; + let json = serde_json::to_string(&rate).unwrap(); + assert!(json.contains("\"compliant\":true")); + assert!(json.contains("\"target_rate\":99.9")); +} + +// =========================================================================== +// DR test phase sequence validation +// =========================================================================== + +#[test] +fn test_dr_phases_are_sequential() { + // Contract: DR test phases must execute in order: + // 1. Restore → 2. Boot → 3. Connectivity → 4. Cleanup + let expected_phases = vec![ + "Restore backup", + "Verify boot", + "Verify connectivity", + "Cleanup", + ]; + let phases: Vec = expected_phases + .iter() + .map(|name| DRPhaseDetail { + phase_name: name.to_string(), + result: DRPhaseResult::Success, + duration_secs: 0, + error_message: None, + }) + .collect(); + assert_eq!(phases.len(), 4); + assert_eq!(phases[0].phase_name, "Restore backup"); + assert_eq!(phases[3].phase_name, "Cleanup"); +} + +#[test] +fn test_dr_failure_stops_subsequent_phases() { + // Contract: if restore fails, boot/connectivity/cleanup should not run + let phases = vec![DRPhaseDetail { + phase_name: "Restore backup".to_string(), + result: DRPhaseResult::Failure, + duration_secs: 300, + error_message: Some("Snapshot not found".to_string()), + }]; + // Only 1 phase executed, remaining 3 were not attempted + assert_eq!(phases.len(), 1); + assert_eq!(phases[0].result, DRPhaseResult::Failure); +} + +// =========================================================================== +// Boundary conditions +// =========================================================================== + +#[test] +fn test_dr_config_empty_vm_name() { + let config = DRTestConfig { + vm_name: String::new(), + backup_name: None, + test_region: "westus2".to_string(), + test_resource_group: None, + rto_target_minutes: 15, + skip_connectivity: false, + skip_cleanup: false, + }; + assert!( + config.vm_name.is_empty(), + "Empty VM name should be rejected by implementation validation" + ); +} + +#[test] +fn test_dr_config_zero_rto_target() { + let config = DRTestConfig { + vm_name: "vm".to_string(), + backup_name: None, + test_region: "westus2".to_string(), + test_resource_group: None, + rto_target_minutes: 0, + skip_connectivity: false, + skip_cleanup: false, + }; + assert_eq!( + config.rto_target_minutes, 0, + "Zero RTO target should be rejected by implementation validation" + ); +} + +#[test] +fn test_dr_phase_result_serializes_lowercase() { + let success = serde_json::to_string(&DRPhaseResult::Success).unwrap(); + let failure = serde_json::to_string(&DRPhaseResult::Failure).unwrap(); + let skipped = serde_json::to_string(&DRPhaseResult::Skipped).unwrap(); + assert_eq!(success, "\"success\""); + assert_eq!(failure, "\"failure\""); + assert_eq!(skipped, "\"skipped\""); +} + +// =========================================================================== +// Error type tests — AzlinError::Backup variant +// =========================================================================== + +#[test] +fn test_azlin_error_backup_variant() { + let err = azlin_core::error::AzlinError::Backup("backup failed".to_string()); + assert!(err.to_string().contains("backup failed")); + assert!(err.to_string().contains("Backup")); +} + +#[test] +fn test_azlin_error_snapshot_variant() { + let err = azlin_core::error::AzlinError::Snapshot("snapshot not found".to_string()); + assert!(err.to_string().contains("snapshot not found")); +} diff --git a/rust/crates/azlin/tests/backup_dr_integration.rs b/rust/crates/azlin/tests/backup_dr_integration.rs index 35a98dac..cf914c1a 100644 --- a/rust/crates/azlin/tests/backup_dr_integration.rs +++ b/rust/crates/azlin/tests/backup_dr_integration.rs @@ -1,14 +1,19 @@ //! Backup and disaster-recovery integration tests. //! +//! TDD RED PHASE: Tests define the expected CLI behavior for the backup and +//! DR feature (Issue #439). Tests marked [RED] FAIL until implementation adds +//! the `backup` and `dr` subcommands. Tests marked [GREEN] exercise existing +//! snapshot functionality that already works. +//! //! Ported from Python E2E: test_backup_dr_e2e.py, test_disaster_recovery_e2e.py. mod integration; use integration::run_azlin; -// --------------------------------------------------------------------------- -// Snapshot (backup) subcommand help -// --------------------------------------------------------------------------- +// =========================================================================== +// [GREEN] Existing snapshot tests — these pass today +// =========================================================================== #[test] fn test_snapshot_help() { @@ -40,3 +45,302 @@ fn test_snapshot_restore_requires_vm_name() { let (_, _, code) = run_azlin(&["snapshot", "restore"]); assert_ne!(code, 0, "snapshot restore requires a VM name argument"); } + +// =========================================================================== +// [RED] Backup command — help and argument validation +// =========================================================================== + +#[test] +fn test_backup_help_shows_subcommands() { + let (stdout, _, code) = run_azlin(&["backup", "--help"]); + assert_eq!(code, 0, "backup --help should succeed"); + assert!( + stdout.contains("configure"), + "should list configure subcommand" + ); + assert!(stdout.contains("trigger"), "should list trigger subcommand"); + assert!(stdout.contains("list"), "should list list subcommand"); + assert!(stdout.contains("restore"), "should list restore subcommand"); + assert!(stdout.contains("verify"), "should list verify subcommand"); + assert!( + stdout.contains("replicate"), + "should list replicate subcommand" + ); +} + +#[test] +fn test_backup_configure_help() { + let (stdout, _, code) = run_azlin(&["backup", "configure", "--help"]); + assert_eq!(code, 0, "backup configure --help should succeed"); + assert!( + stdout.contains("daily-retention"), + "should document --daily-retention flag" + ); + assert!( + stdout.contains("weekly-retention"), + "should document --weekly-retention flag" + ); + assert!( + stdout.contains("monthly-retention"), + "should document --monthly-retention flag" + ); +} + +#[test] +fn test_backup_configure_requires_vm_name() { + let (_, _, code) = run_azlin(&["backup", "configure"]); + assert_ne!(code, 0, "backup configure requires a VM name argument"); +} + +#[test] +fn test_backup_trigger_requires_vm_name() { + let (_, _, code) = run_azlin(&["backup", "trigger"]); + assert_ne!(code, 0, "backup trigger requires a VM name argument"); +} + +#[test] +fn test_backup_list_requires_vm_name() { + let (_, _, code) = run_azlin(&["backup", "list"]); + assert_ne!(code, 0, "backup list requires a VM name argument"); +} + +#[test] +fn test_backup_restore_requires_vm_and_backup() { + let (_, _, code) = run_azlin(&["backup", "restore"]); + assert_ne!(code, 0, "backup restore requires vm name"); + let (_, _, code2) = run_azlin(&["backup", "restore", "my-vm"]); + assert_ne!(code2, 0, "backup restore requires --backup flag"); +} + +#[test] +fn test_backup_verify_requires_backup_name() { + let (_, _, code) = run_azlin(&["backup", "verify"]); + assert_ne!(code, 0, "backup verify requires a backup name argument"); +} + +#[test] +fn test_backup_replicate_requires_args() { + let (_, _, code) = run_azlin(&["backup", "replicate"]); + assert_ne!( + code, 0, + "backup replicate requires backup name and target region" + ); +} + +#[test] +fn test_backup_config_show_requires_vm() { + let (_, _, code) = run_azlin(&["backup", "config-show"]); + assert_ne!(code, 0, "backup config-show requires a VM name"); +} + +#[test] +fn test_backup_disable_requires_vm() { + let (_, _, code) = run_azlin(&["backup", "disable"]); + assert_ne!(code, 0, "backup disable requires a VM name"); +} + +// =========================================================================== +// [RED] Backup configure — end-to-end behavior +// =========================================================================== + +#[test] +fn test_backup_configure_daily_only() { + let (stdout, _, code) = run_azlin(&["backup", "configure", "test-vm", "--daily-retention", "7"]); + assert_eq!(code, 0, "backup configure with daily retention should succeed"); + assert!( + stdout.contains("configured") || stdout.contains("Configured"), + "should confirm configuration" + ); + assert!(stdout.contains("7"), "should echo daily retention value"); +} + +#[test] +fn test_backup_configure_full_retention() { + let (stdout, _, code) = run_azlin(&[ + "backup", + "configure", + "test-vm", + "--daily-retention", + "7", + "--weekly-retention", + "4", + "--monthly-retention", + "12", + ]); + assert_eq!(code, 0, "backup configure with full retention should succeed"); + assert!(stdout.contains("7"), "should echo daily retention"); + assert!(stdout.contains("4"), "should echo weekly retention"); + assert!(stdout.contains("12"), "should echo monthly retention"); +} + +#[test] +fn test_backup_configure_with_cross_region() { + let (stdout, _, code) = run_azlin(&[ + "backup", + "configure", + "test-vm", + "--daily-retention", + "7", + "--cross-region", + "--target-region", + "westus2", + ]); + assert_eq!( + code, 0, + "backup configure with cross-region should succeed" + ); + assert!( + stdout.contains("westus2"), + "should confirm target region" + ); +} + +// =========================================================================== +// [RED] Backup trigger — tier determination +// =========================================================================== + +#[test] +fn test_backup_trigger_help_shows_tier_option() { + let (stdout, _, code) = run_azlin(&["backup", "trigger", "--help"]); + assert_eq!(code, 0, "backup trigger --help should succeed"); + assert!( + stdout.contains("tier"), + "should document --tier option for manual tier override" + ); +} + +// =========================================================================== +// [RED] Backup list — filtering +// =========================================================================== + +#[test] +fn test_backup_list_help_shows_filters() { + let (stdout, _, code) = run_azlin(&["backup", "list", "--help"]); + assert_eq!(code, 0, "backup list --help should succeed"); + assert!( + stdout.contains("tier"), + "should document --tier filter option" + ); +} + +// =========================================================================== +// [RED] Backup replication commands +// =========================================================================== + +#[test] +fn test_backup_replicate_all_requires_vm_and_region() { + let (_, _, code) = run_azlin(&["backup", "replicate-all"]); + assert_ne!( + code, 0, + "backup replicate-all requires vm name and target region" + ); +} + +#[test] +fn test_backup_replication_status_requires_vm() { + let (_, _, code) = run_azlin(&["backup", "replication-status"]); + assert_ne!( + code, 0, + "backup replication-status requires a VM name" + ); +} + +#[test] +fn test_backup_replication_jobs_help() { + let (stdout, _, code) = run_azlin(&["backup", "replication-jobs", "--help"]); + assert_eq!(code, 0, "backup replication-jobs --help should succeed"); + assert!( + stdout.contains("status"), + "should document --status filter" + ); +} + +// =========================================================================== +// [RED] Backup verification commands +// =========================================================================== + +#[test] +fn test_backup_verify_all_requires_vm() { + let (_, _, code) = run_azlin(&["backup", "verify-all"]); + assert_ne!(code, 0, "backup verify-all requires a VM name"); +} + +#[test] +fn test_backup_verification_report_help() { + let (stdout, _, code) = run_azlin(&["backup", "verification-report", "--help"]); + assert_eq!( + code, 0, + "backup verification-report --help should succeed" + ); + assert!(stdout.contains("days"), "should document --days option"); + assert!(stdout.contains("vm"), "should document --vm option"); +} + +// =========================================================================== +// [RED] DR test commands +// =========================================================================== + +#[test] +fn test_dr_help_shows_subcommands() { + let (stdout, _, code) = run_azlin(&["dr", "--help"]); + assert_eq!(code, 0, "dr --help should succeed"); + assert!(stdout.contains("test"), "should list test subcommand"); + assert!( + stdout.contains("test-history"), + "should list test-history subcommand" + ); + assert!( + stdout.contains("success-rate"), + "should list success-rate subcommand" + ); +} + +#[test] +fn test_dr_test_requires_vm_and_region() { + let (_, _, code) = run_azlin(&["dr", "test"]); + assert_ne!(code, 0, "dr test requires VM name"); + let (_, _, code2) = run_azlin(&["dr", "test", "my-vm"]); + assert_ne!(code2, 0, "dr test requires --test-region"); +} + +#[test] +fn test_dr_test_help_shows_options() { + let (stdout, _, code) = run_azlin(&["dr", "test", "--help"]); + assert_eq!(code, 0, "dr test --help should succeed"); + assert!( + stdout.contains("test-region"), + "should document --test-region" + ); + assert!(stdout.contains("backup"), "should document --backup option"); +} + +#[test] +fn test_dr_test_all_help() { + let (stdout, _, code) = run_azlin(&["dr", "test-all", "--help"]); + assert_eq!(code, 0, "dr test-all --help should succeed"); + assert!( + stdout.contains("resource-group") || stdout.contains("resource_group"), + "should document --resource-group option" + ); +} + +#[test] +fn test_dr_test_history_requires_vm() { + let (_, _, code) = run_azlin(&["dr", "test-history"]); + assert_ne!(code, 0, "dr test-history requires VM name"); +} + +#[test] +fn test_dr_test_history_help_shows_days() { + let (stdout, _, code) = run_azlin(&["dr", "test-history", "--help"]); + assert_eq!(code, 0, "dr test-history --help should succeed"); + assert!(stdout.contains("days"), "should document --days option"); +} + +#[test] +fn test_dr_success_rate_help() { + let (stdout, _, code) = run_azlin(&["dr", "success-rate", "--help"]); + assert_eq!(code, 0, "dr success-rate --help should succeed"); + assert!(stdout.contains("vm"), "should document --vm filter"); + assert!(stdout.contains("days"), "should document --days option"); +}