diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 3e9c13f..6073270 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -80,65 +80,34 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max + - name: Copy deploy script to VPS + uses: appleboy/scp-action@v1 + with: + host: ${{ secrets.VPS_HOST }} + username: ${{ secrets.VPS_USER }} + key: ${{ secrets.VPS_SSH_KEY }} + source: scripts/deploy-with-rollback.sh + target: ~/app + strip_components: 1 + - name: Deploy to VPS via SSH uses: appleboy/ssh-action@v1 + env: + IMAGE: ghcr.io/${{ github.repository }}:${{ steps.version.outputs.version }} + PORT: ${{ secrets.APP_PORT || '3000' }} with: host: ${{ secrets.VPS_HOST }} username: ${{ secrets.VPS_USER }} key: ${{ secrets.VPS_SSH_KEY }} + envs: IMAGE,PORT script: | set -e - IMAGE="ghcr.io/${{ github.repository }}:${{ steps.version.outputs.version }}" - PORT="${{ secrets.APP_PORT || '3000' }}" - mkdir -p ~/app - cd ~/app - - # Capture currently running image for potential rollback - PREV_IMAGE="" - if docker compose ps -q app >/dev/null 2>&1; then - CID=$(docker compose ps -q app || true) - if [ -n "$CID" ]; then - PREV_IMAGE=$(docker inspect --format '{{.Config.Image}}' "$CID" 2>/dev/null || true) - fi - fi - echo "Previous image: ${PREV_IMAGE:-}" - - write_compose() { - local img="$1" - cat > ~/app/docker-compose.yml << EOF - services: - app: - image: $img - env_file: $HOME/.env.app - ports: - - "${PORT}:${PORT}" - restart: unless-stopped - healthcheck: - test: ["CMD", "wget", "-qO", "/dev/null", "http://localhost:${PORT}/health"] - interval: 10s - timeout: 5s - retries: 3 - start_period: 10s - EOF - } - - write_compose "$IMAGE" - docker compose pull - - if docker compose up -d --wait; then - echo "Deploy succeeded." - docker image prune -f - else - echo "::error::Deploy health check failed." - if [ -n "$PREV_IMAGE" ] && [ "$PREV_IMAGE" != "$IMAGE" ]; then - echo "Rolling back to $PREV_IMAGE" - write_compose "$PREV_IMAGE" - docker compose up -d --wait || echo "::error::Rollback also failed — manual intervention required." - else - echo "No previous image available to roll back to." - fi - exit 1 - fi + chmod +x ~/app/deploy-with-rollback.sh + IMAGE="$IMAGE" \ + PORT="$PORT" \ + DEPLOY_DIR="$HOME/app" \ + ENV_FILE="$HOME/.env.app" \ + ~/app/deploy-with-rollback.sh - name: Clean up old GHCR images uses: actions/delete-package-versions@v5 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec88166..dea012a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,3 +68,14 @@ jobs: echo "::error::Docker image exceeds 500 MB (${SIZE_MB} MB) — consider multi-stage build" exit 1 fi + + rollback-integration-test: + # Regression test for scripts/deploy-with-rollback.sh. Exercises the + # same script the CD workflow calls over SSH, but with local images + # and local docker compose so no secrets/registry are required. + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Run rollback integration test + run: bash tests/rollback-integration.sh diff --git a/scripts/deploy-with-rollback.sh b/scripts/deploy-with-rollback.sh new file mode 100755 index 0000000..3a30125 --- /dev/null +++ b/scripts/deploy-with-rollback.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Deploy a container image via docker compose with automatic rollback on +# health-check failure. +# +# This script is shared between: +# - .github/workflows/cd.yml (runs on the VPS over SSH during production deploy) +# - .github/workflows/ci.yml (runs locally on the CI runner as a regression test) +# +# Responsibilities: +# 1. Detect the image currently running for the "app" service (if any) and +# remember it as the rollback target. +# 2. Write a docker-compose.yml that points at the new image. +# 3. `docker compose up -d --wait` — Docker blocks until the container is +# healthy. If unhealthy, the command fails. +# 4. On failure, rewrite the compose file with the previous image and +# restart. If there was no previous image, exit non-zero. +# +# Required env vars: +# IMAGE Container image to deploy (e.g. ghcr.io/org/app:1.2.3) +# PORT Port the app listens on (host and container) +# DEPLOY_DIR Directory that holds docker-compose.yml (created if missing) +# +# Optional env vars: +# ENV_FILE Path to an env_file for the compose service. Empty disables it. +# SKIP_PULL If "1", skip `docker compose pull` (useful when the test has +# already loaded a local image that is not in a registry). + +set -euo pipefail + +: "${IMAGE:?IMAGE is required}" +: "${PORT:?PORT is required}" +: "${DEPLOY_DIR:?DEPLOY_DIR is required}" +ENV_FILE="${ENV_FILE:-}" +SKIP_PULL="${SKIP_PULL:-0}" + +mkdir -p "$DEPLOY_DIR" +cd "$DEPLOY_DIR" + +write_compose() { + local img="$1" + { + echo "services:" + echo " app:" + echo " image: $img" + if [ -n "$ENV_FILE" ]; then + echo " env_file: $ENV_FILE" + fi + echo " environment:" + echo " PORT: \"${PORT}\"" + echo " ports:" + echo " - \"${PORT}:${PORT}\"" + echo " restart: unless-stopped" + echo " healthcheck:" + echo " test: [\"CMD-SHELL\", \"wget -qO /dev/null http://localhost:${PORT}/health || exit 1\"]" + echo " interval: 5s" + echo " timeout: 3s" + echo " retries: 3" + echo " start_period: 5s" + } > docker-compose.yml +} + +# Capture currently running image for potential rollback. +PREV_IMAGE="" +if docker compose ps -q app >/dev/null 2>&1; then + CID="$(docker compose ps -q app || true)" + if [ -n "$CID" ]; then + PREV_IMAGE="$(docker inspect --format '{{.Config.Image}}' "$CID" 2>/dev/null || true)" + fi +fi +echo "Previous image: ${PREV_IMAGE:-}" +echo "Target image: ${IMAGE}" + +write_compose "$IMAGE" + +if [ "$SKIP_PULL" != "1" ]; then + docker compose pull +fi + +if docker compose up -d --wait; then + echo "Deploy succeeded." + docker image prune -f >/dev/null 2>&1 || true + exit 0 +fi + +echo "::error::Deploy health check failed." +if [ -n "$PREV_IMAGE" ] && [ "$PREV_IMAGE" != "$IMAGE" ]; then + echo "Rolling back to $PREV_IMAGE" + write_compose "$PREV_IMAGE" + if docker compose up -d --wait; then + echo "Rollback succeeded." + else + echo "::error::Rollback also failed — manual intervention required." + fi +else + echo "No previous image available to roll back to." +fi +exit 1 diff --git a/tests/fixtures/bad-app/Dockerfile b/tests/fixtures/bad-app/Dockerfile new file mode 100644 index 0000000..87fc357 --- /dev/null +++ b/tests/fixtures/bad-app/Dockerfile @@ -0,0 +1,7 @@ +FROM node:20-alpine +WORKDIR /app +COPY server.js . +EXPOSE 3000 +HEALTHCHECK --interval=5s --timeout=3s --retries=3 --start-period=5s \ + CMD wget -qO /dev/null http://localhost:${PORT:-3000}/health || exit 1 +CMD ["node", "server.js"] diff --git a/tests/fixtures/bad-app/server.js b/tests/fixtures/bad-app/server.js new file mode 100644 index 0000000..7e07f2f --- /dev/null +++ b/tests/fixtures/bad-app/server.js @@ -0,0 +1,14 @@ +// Broken app fixture: /health always returns 500. +// Used by the rollback integration test to simulate a failed deploy that +// should trigger rollback to the previous (good) image. +const http = require('http'); +const port = process.env.PORT || 3000; +http.createServer((req, res) => { + if (req.url === '/health') { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ status: 'broken', build: 'bad' })); + return; + } + res.writeHead(500); + res.end('bad'); +}).listen(port, () => console.log(`bad app on ${port}`)); diff --git a/tests/fixtures/good-app/Dockerfile b/tests/fixtures/good-app/Dockerfile new file mode 100644 index 0000000..87fc357 --- /dev/null +++ b/tests/fixtures/good-app/Dockerfile @@ -0,0 +1,7 @@ +FROM node:20-alpine +WORKDIR /app +COPY server.js . +EXPOSE 3000 +HEALTHCHECK --interval=5s --timeout=3s --retries=3 --start-period=5s \ + CMD wget -qO /dev/null http://localhost:${PORT:-3000}/health || exit 1 +CMD ["node", "server.js"] diff --git a/tests/fixtures/good-app/server.js b/tests/fixtures/good-app/server.js new file mode 100644 index 0000000..673f734 --- /dev/null +++ b/tests/fixtures/good-app/server.js @@ -0,0 +1,13 @@ +// Healthy app fixture: responds 200 on /health. +// Used by the rollback integration test to simulate a successful deploy. +const http = require('http'); +const port = process.env.PORT || 3000; +http.createServer((req, res) => { + if (req.url === '/health') { + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ status: 'ok', build: 'good' })); + return; + } + res.writeHead(200); + res.end('good'); +}).listen(port, () => console.log(`good app on ${port}`)); diff --git a/tests/rollback-integration.sh b/tests/rollback-integration.sh new file mode 100755 index 0000000..92f0cac --- /dev/null +++ b/tests/rollback-integration.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# Integration test for scripts/deploy-with-rollback.sh. +# +# Runs entirely on the CI runner using local `docker compose` — no SSH, no +# registry. Builds two local images (good, bad) and exercises the full +# deploy flow the CD workflow uses in production. +# +# Scenarios covered: +# 1. First deploy of the good image succeeds. +# 2. Attempting to deploy the bad image over the good one fails AND +# rollback restores the good image. After the attempt, /health must +# still return 200 from the good image. +# 3. First deploy of a bad image (no previous image) fails with a +# non-zero exit and leaves nothing healthy — the script must not +# silently swallow the failure. + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SCRIPT="$REPO_ROOT/scripts/deploy-with-rollback.sh" +GOOD_IMAGE="rollback-test/good:1" +BAD_IMAGE="rollback-test/bad:1" +PORT="${PORT:-38080}" +WORK_DIR="$(mktemp -d)" + +pass() { echo "PASS: $1"; } +fail() { echo "FAIL: $1" >&2; exit 1; } + +cleanup() { + if [ -f "$WORK_DIR/docker-compose.yml" ]; then + (cd "$WORK_DIR" && docker compose down -v --remove-orphans >/dev/null 2>&1 || true) + fi + rm -rf "$WORK_DIR" + docker rmi -f "$GOOD_IMAGE" "$BAD_IMAGE" >/dev/null 2>&1 || true +} +trap cleanup EXIT + +check_health() { + local expected_build="$1" + for _ in 1 2 3 4 5 6 7 8 9 10; do + body="$(curl -sf "http://127.0.0.1:${PORT}/health" 2>/dev/null || true)" + if echo "$body" | grep -q "\"build\":\"${expected_build}\""; then + return 0 + fi + sleep 1 + done + echo "health check did not return build=${expected_build} (last body: ${body:-})" >&2 + return 1 +} + +echo "==> Building fixture images" +docker build -t "$GOOD_IMAGE" "$REPO_ROOT/tests/fixtures/good-app" >/dev/null +docker build -t "$BAD_IMAGE" "$REPO_ROOT/tests/fixtures/bad-app" >/dev/null + +# --------------------------------------------------------------------------- +# Scenario 1: first deploy of the good image must succeed. +# --------------------------------------------------------------------------- +echo "==> Scenario 1: deploy good image (first deploy)" +if IMAGE="$GOOD_IMAGE" PORT="$PORT" DEPLOY_DIR="$WORK_DIR" SKIP_PULL=1 \ + bash "$SCRIPT"; then + pass "good image deploy returned 0" +else + fail "good image deploy returned non-zero" +fi +check_health good || fail "good image not serving /health" +pass "good image /health responds with build=good" + +# --------------------------------------------------------------------------- +# Scenario 2: deploying the bad image on top must fail AND rollback must +# restore the good image. +# --------------------------------------------------------------------------- +echo "==> Scenario 2: deploy bad image, expect rollback to good" +set +e +IMAGE="$BAD_IMAGE" PORT="$PORT" DEPLOY_DIR="$WORK_DIR" SKIP_PULL=1 \ + bash "$SCRIPT" +deploy_rc=$? +set -e +if [ "$deploy_rc" -eq 0 ]; then + fail "bad image deploy returned 0 — rollback script failed to detect failure" +fi +pass "bad image deploy returned non-zero ($deploy_rc)" + +# After rollback, /health must still respond build=good. +check_health good || fail "rollback did not restore the good image" +pass "rollback restored good image — /health still serves build=good" + +# The compose file on disk should now point back at the good image. +if grep -q "image: $GOOD_IMAGE" "$WORK_DIR/docker-compose.yml"; then + pass "compose file was rewritten to the good image after rollback" +else + fail "compose file does not point to good image after rollback" +fi + +# Tear down before scenario 3 (we want a truly fresh state with no PREV_IMAGE). +(cd "$WORK_DIR" && docker compose down -v --remove-orphans >/dev/null 2>&1 || true) +rm -f "$WORK_DIR/docker-compose.yml" + +# --------------------------------------------------------------------------- +# Scenario 3: first deploy of a bad image (no previous) must fail loudly. +# --------------------------------------------------------------------------- +echo "==> Scenario 3: first deploy of bad image (no previous) — expect failure" +set +e +IMAGE="$BAD_IMAGE" PORT="$PORT" DEPLOY_DIR="$WORK_DIR" SKIP_PULL=1 \ + bash "$SCRIPT" +rc=$? +set -e +if [ "$rc" -eq 0 ]; then + fail "bad image first deploy returned 0 — failure was swallowed" +fi +pass "bad image first deploy returned non-zero ($rc)" + +echo "" +echo "All rollback integration scenarios passed."