From dc693106ee0fc0624f979d4f0ae331c6a85965ff Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 04:24:36 +0200 Subject: [PATCH 1/9] Add PR preview TTL cleanup and deterministic Terraform backend flow --- .github/workflows/infra-dev.yml | 10 ++ .github/workflows/preview-pr.yml | 185 +++++++++++++++++++++++++++ .github/workflows/reusable-cicd.yml | 43 +++++++ README.md | 6 + cmd.md | 8 ++ infra/terraform/envs/dev/versions.tf | 1 + scripts/devctl.sh | 22 ++++ scripts/tf-reconcile-dev-state.sh | 105 +++++++++++++++ 8 files changed, 380 insertions(+) create mode 100644 .github/workflows/preview-pr.yml create mode 100644 scripts/tf-reconcile-dev-state.sh diff --git a/.github/workflows/infra-dev.yml b/.github/workflows/infra-dev.yml index ace7ee2..69f8351 100644 --- a/.github/workflows/infra-dev.yml +++ b/.github/workflows/infra-dev.yml @@ -32,6 +32,9 @@ jobs: env: TF_IN_AUTOMATION: true TF_INPUT: 0 + TF_TOKEN_app_terraform_io: ${{ secrets.TF_API_TOKEN }} + TF_CLOUD_ORGANIZATION: ${{ secrets.TF_CLOUD_ORGANIZATION }} + TF_WORKSPACE: gmp-dev TF_VAR_tenancy_ocid: ${{ secrets.OCI_CLI_TENANCY }} TF_VAR_user_ocid: ${{ secrets.OCI_CLI_USER }} TF_VAR_fingerprint: ${{ secrets.OCI_CLI_FINGERPRINT }} @@ -41,6 +44,13 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Validate Terraform Cloud settings + run: | + if [ -z "$TF_CLOUD_ORGANIZATION" ] || [ -z "$TF_TOKEN_app_terraform_io" ]; then + echo "::error::Missing TF_CLOUD_ORGANIZATION or TF_API_TOKEN secrets for remote backend locking" + exit 1 + fi + - uses: hashicorp/setup-terraform@v3 with: terraform_version: 1.5.7 diff --git a/.github/workflows/preview-pr.yml b/.github/workflows/preview-pr.yml new file mode 100644 index 0000000..b6cba53 --- /dev/null +++ b/.github/workflows/preview-pr.yml @@ -0,0 +1,185 @@ +name: Preview PR + +on: + pull_request: + types: [opened, synchronize, reopened, closed] + branches: [dev] + schedule: + - cron: "0 */6 * * *" + workflow_dispatch: + +permissions: + contents: read + packages: write + security-events: write + id-token: write + pull-requests: write + +concurrency: + group: preview-pr-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + +jobs: + deploy-preview: + if: ${{ github.event.action != 'closed' && github.event.pull_request.head.repo.full_name == github.repository }} + uses: ./.github/workflows/reusable-cicd.yml + with: + environment_name: dev + namespace: pr-${{ github.event.pull_request.number }} + overlay: dev + deploy: true + build_images: true + push_images: true + sign_images: true + run_tests: false + run_frontend: false + validate_k8s: true + validate_k8s_all: false + policy_check: true + canary_checks: false + create_namespace: true + ref: ${{ github.event.pull_request.head.sha }} + preview_pr_number: ${{ github.event.pull_request.number }} + secrets: inherit + + cleanup-preview: + if: ${{ github.event.action == 'closed' && github.event.pull_request.head.repo.full_name == github.repository }} + runs-on: ubuntu-latest + environment: dev + steps: + - name: Install OCI CLI + run: | + curl -L -O https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh + bash install.sh --accept-all-defaults + echo "$HOME/bin" >> "$GITHUB_PATH" + + - name: Configure OCI CLI + env: + OCI_CLI_USER: ${{ secrets.OCI_CLI_USER }} + OCI_CLI_TENANCY: ${{ secrets.OCI_CLI_TENANCY }} + OCI_CLI_FINGERPRINT: ${{ secrets.OCI_CLI_FINGERPRINT }} + OCI_CLI_KEY_CONTENT: ${{ secrets.OCI_CLI_KEY_CONTENT }} + OCI_CLI_REGION: ${{ secrets.OCI_CLI_REGION }} + run: | + for v in OCI_CLI_USER OCI_CLI_TENANCY OCI_CLI_FINGERPRINT OCI_CLI_KEY_CONTENT OCI_CLI_REGION; do + if [ -z "${!v}" ]; then + echo "::error::Missing required secret: $v" + exit 1 + fi + done + mkdir -p ~/.oci + { + echo "[DEFAULT]" + echo "user=${OCI_CLI_USER}" + echo "fingerprint=${OCI_CLI_FINGERPRINT}" + echo "tenancy=${OCI_CLI_TENANCY}" + echo "region=${OCI_CLI_REGION}" + echo "key_file=~/.oci/oci_api_key.pem" + } > ~/.oci/config + RAW_KEY_STRIPPED=$(printf '%s' "${OCI_CLI_KEY_CONTENT}" | tr -d '\r') + if printf '%s' "$RAW_KEY_STRIPPED" | grep -q "BEGIN .*PRIVATE KEY"; then + printf '%s\n' "$RAW_KEY_STRIPPED" > ~/.oci/oci_api_key.pem + elif printf '%b' "$RAW_KEY_STRIPPED" | grep -q "BEGIN .*PRIVATE KEY"; then + printf '%b' "$RAW_KEY_STRIPPED" > ~/.oci/oci_api_key.pem + elif printf '%s' "$RAW_KEY_STRIPPED" | base64 -d > ~/.oci/oci_api_key.pem 2>/dev/null; then + : + else + printf '%b' "$RAW_KEY_STRIPPED" > ~/.oci/oci_api_key.pem + fi + chmod 600 ~/.oci/config ~/.oci/oci_api_key.pem + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: 'v1.30.2' + + - name: Configure kubeconfig from OCI + run: | + set -euo pipefail + mkdir -p $HOME/.kube + CLUSTER_OCID=$(oci ce cluster list --compartment-id "${{ secrets.OCI_CLI_TENANCY }}" --all --query 'data[?name==`gmp-oke-dev` && "lifecycle-state"==`ACTIVE`] | [-1].id' --raw-output 2>/dev/null || true) + if [ -z "$CLUSTER_OCID" ] || [ "$CLUSTER_OCID" = "null" ]; then + echo "::error::No ACTIVE gmp-oke-dev cluster found" + exit 1 + fi + oci ce cluster create-kubeconfig --cluster-id "$CLUSTER_OCID" --file "$HOME/.kube/config" --region "${{ secrets.OCI_CLI_REGION }}" --token-version 2.0.0 --kube-endpoint PUBLIC_ENDPOINT + chmod 600 "$HOME/.kube/config" + + - name: Delete preview namespace + run: | + NS="pr-${{ github.event.pull_request.number }}" + kubectl delete namespace "$NS" --ignore-not-found=true --wait=true + + cleanup-stale-preview: + if: ${{ github.event_name != 'pull_request' }} + runs-on: ubuntu-latest + environment: dev + steps: + - name: Install OCI CLI + run: | + curl -L -O https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh + bash install.sh --accept-all-defaults + echo "$HOME/bin" >> "$GITHUB_PATH" + + - name: Configure OCI CLI + env: + OCI_CLI_USER: ${{ secrets.OCI_CLI_USER }} + OCI_CLI_TENANCY: ${{ secrets.OCI_CLI_TENANCY }} + OCI_CLI_FINGERPRINT: ${{ secrets.OCI_CLI_FINGERPRINT }} + OCI_CLI_KEY_CONTENT: ${{ secrets.OCI_CLI_KEY_CONTENT }} + OCI_CLI_REGION: ${{ secrets.OCI_CLI_REGION }} + run: | + for v in OCI_CLI_USER OCI_CLI_TENANCY OCI_CLI_FINGERPRINT OCI_CLI_KEY_CONTENT OCI_CLI_REGION; do + if [ -z "${!v}" ]; then + echo "::error::Missing required secret: $v" + exit 1 + fi + done + mkdir -p ~/.oci + { + echo "[DEFAULT]" + echo "user=${OCI_CLI_USER}" + echo "fingerprint=${OCI_CLI_FINGERPRINT}" + echo "tenancy=${OCI_CLI_TENANCY}" + echo "region=${OCI_CLI_REGION}" + echo "key_file=~/.oci/oci_api_key.pem" + } > ~/.oci/config + RAW_KEY_STRIPPED=$(printf '%s' "${OCI_CLI_KEY_CONTENT}" | tr -d '\r') + if printf '%s' "$RAW_KEY_STRIPPED" | grep -q "BEGIN .*PRIVATE KEY"; then + printf '%s\n' "$RAW_KEY_STRIPPED" > ~/.oci/oci_api_key.pem + elif printf '%b' "$RAW_KEY_STRIPPED" | grep -q "BEGIN .*PRIVATE KEY"; then + printf '%b' "$RAW_KEY_STRIPPED" > ~/.oci/oci_api_key.pem + elif printf '%s' "$RAW_KEY_STRIPPED" | base64 -d > ~/.oci/oci_api_key.pem 2>/dev/null; then + : + else + printf '%b' "$RAW_KEY_STRIPPED" > ~/.oci/oci_api_key.pem + fi + chmod 600 ~/.oci/config ~/.oci/oci_api_key.pem + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: 'v1.30.2' + + - name: Configure kubeconfig from OCI + run: | + set -euo pipefail + mkdir -p $HOME/.kube + CLUSTER_OCID=$(oci ce cluster list --compartment-id "${{ secrets.OCI_CLI_TENANCY }}" --all --query 'data[?name==`gmp-oke-dev` && "lifecycle-state"==`ACTIVE`] | [-1].id' --raw-output 2>/dev/null || true) + if [ -z "$CLUSTER_OCID" ] || [ "$CLUSTER_OCID" = "null" ]; then + echo "::error::No ACTIVE gmp-oke-dev cluster found" + exit 1 + fi + oci ce cluster create-kubeconfig --cluster-id "$CLUSTER_OCID" --file "$HOME/.kube/config" --region "${{ secrets.OCI_CLI_REGION }}" --token-version 2.0.0 --kube-endpoint PUBLIC_ENDPOINT + chmod 600 "$HOME/.kube/config" + + - name: Delete stale preview namespaces by TTL + run: | + set -euo pipefail + NOW=$(date +%s) + kubectl get ns -o json | jq -r '.items[] | select(.metadata.labels["preview.gmp/enabled"]=="true") | [.metadata.name, (.metadata.labels["preview.gmp/created-at"] // "0"), (.metadata.labels["preview.gmp/ttl-hours"] // "24")] | @tsv' | while IFS=$'\t' read -r NS CREATED TTL; do + EXPIRE=$((CREATED + TTL * 3600)) + if [ "$NOW" -ge "$EXPIRE" ]; then + kubectl delete namespace "$NS" --ignore-not-found=true --wait=true + fi + done diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index d577e3d..8985393 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -77,6 +77,10 @@ on: required: false type: string default: "" + preview_pr_number: + required: false + type: string + default: "" env: REGISTRY: ghcr.io @@ -450,6 +454,17 @@ jobs: if: ${{ inputs.create_namespace }} run: kubectl get namespace ${{ inputs.namespace }} || kubectl create namespace ${{ inputs.namespace }} + - name: Label preview namespace TTL + if: ${{ inputs.preview_pr_number != '' }} + run: | + NOW=$(date +%s) + kubectl label namespace ${{ inputs.namespace }} \ + preview.gmp/enabled=true \ + preview.gmp/pr=${{ inputs.preview_pr_number }} \ + preview.gmp/created-at="$NOW" \ + preview.gmp/ttl-hours=24 \ + --overwrite=true + - name: Configure GHCR pull secret env: GHCR_PASSWORD: ${{ secrets.GHCR_TOKEN || secrets.GITHUB_TOKEN }} @@ -506,6 +521,34 @@ jobs: echo "If IP is pending, wait 1-3 minutes and re-check service status." } >> "$GITHUB_STEP_SUMMARY" + - name: Comment preview URLs on PR + if: ${{ inputs.environment_name == 'dev' && inputs.preview_pr_number != '' }} + env: + GH_TOKEN: ${{ github.token }} + run: | + WEB_IP=$(kubectl get svc web -n ${{ inputs.namespace }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || true) + API_IP=$(kubectl get svc api-gateway -n ${{ inputs.namespace }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || true) + WEB_URL="pending" + API_URL="pending" + if [ -n "$WEB_IP" ]; then WEB_URL="http://$WEB_IP"; fi + if [ -n "$API_IP" ]; then API_URL="http://$API_IP"; fi + MARKER="" + BODY="$MARKER + ✅ Preview environment deployed + + - Namespace: \`${{ inputs.namespace }}\` + - Web: $WEB_URL + - API Gateway: $API_URL + + If URL is pending, wait a few minutes and re-run deployment." + + EXISTING_ID=$(gh api repos/${{ github.repository }}/issues/${{ inputs.preview_pr_number }}/comments --paginate --jq '.[] | select(.body | contains("'"$MARKER"'")) | .id' | head -n 1 || true) + if [ -n "$EXISTING_ID" ]; then + gh api repos/${{ github.repository }}/issues/comments/$EXISTING_ID -X PATCH -f body="$BODY" >/dev/null + else + gh api repos/${{ github.repository }}/issues/${{ inputs.preview_pr_number }}/comments -X POST -f body="$BODY" >/dev/null + fi + - name: Wait for rollouts run: | for d in api-gateway web identity seller catalog search pricing inventory cart checkout payments orders fulfillment notifications reviews analytics; do diff --git a/README.md b/README.md index 22d5a8a..a964284 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ kubectl apply -k platform/k8s/base - `.github/workflows/ci.yml` runs service tests. - `.github/workflows/ci-extended.yml` runs lint, tests, image builds, SBOM, and vulnerability scans. - `.github/workflows/deploy.yml` deploys to dev on `dev` branch push; prod canary and promotion are manual dispatch. +- `.github/workflows/preview-pr.yml` deploys PR previews to `pr-` namespace, comments URLs on the PR, and auto-cleans stale previews every 6 hours (24h TTL). **Secrets** - `KUBE_CONFIG_DEV` (base64 kubeconfig for dev) @@ -101,6 +102,11 @@ Use one command entrypoints for learning lifecycle: - `make up-dev` → Apply infra + generate kubeconfig + update `KUBE_CONFIG_DEV` + trigger `Deploy Dev` - `make infra-status` → Show Terraform state and active OCI cluster/LB +Required for remote backend locking: +- `TF_CLOUD_ORGANIZATION` (local shell env) +- `TF_WORKSPACE` (recommended: `gmp-dev`) +- GitHub `dev` environment secrets: `TF_CLOUD_ORGANIZATION`, `TF_API_TOKEN` + Script entrypoint: `scripts/devctl.sh` Full quick commands: `cmd.md` diff --git a/cmd.md b/cmd.md index 916c927..3a4163c 100644 --- a/cmd.md +++ b/cmd.md @@ -4,6 +4,8 @@ ```bash chmod +x scripts/devctl.sh +export TF_CLOUD_ORGANIZATION= +export TF_WORKSPACE=gmp-dev ``` ## Team default workflow @@ -15,6 +17,12 @@ make ship-dev This pushes current code to `dev`. `Deploy Dev` workflow auto-runs on push to `dev`. +PR previews are automatic: +- Open/update PR to `dev` → deploys into namespace `pr-` +- Workflow comments preview Web/API URLs on the PR +- Closing PR deletes the preview namespace +- Scheduled cleanup removes stale preview namespaces every 6 hours (24h TTL) + ## Infra as one-command from GitHub Actions ```bash diff --git a/infra/terraform/envs/dev/versions.tf b/infra/terraform/envs/dev/versions.tf index e3cfed6..c3ae2b8 100644 --- a/infra/terraform/envs/dev/versions.tf +++ b/infra/terraform/envs/dev/versions.tf @@ -1,5 +1,6 @@ terraform { required_version = ">= 1.5.0" + cloud {} required_providers { oci = { source = "oracle/oci" diff --git a/scripts/devctl.sh b/scripts/devctl.sh index 88f9ba2..d51fd73 100755 --- a/scripts/devctl.sh +++ b/scripts/devctl.sh @@ -9,6 +9,8 @@ GH_REF="${GH_REF:-dev}" WORKFLOW_NAME="${WORKFLOW_NAME:-Deploy Dev}" INFRA_WORKFLOW_NAME="${INFRA_WORKFLOW_NAME:-Infra Dev}" INFRA_CLEANUP_WORKFLOW_NAME="${INFRA_CLEANUP_WORKFLOW_NAME:-Infra Dev Cleanup}" +TF_CLOUD_ORGANIZATION="${TF_CLOUD_ORGANIZATION:-}" +TF_WORKSPACE="${TF_WORKSPACE:-gmp-dev}" CREATE_NAMESPACE="${CREATE_NAMESPACE:-true}" KUBECONFIG_FILE="${KUBECONFIG_FILE:-/tmp/kubeconfig-dev.yaml}" @@ -44,6 +46,19 @@ tf() { terraform -chdir="$TF_DIR" "$@" } +ensure_remote_backend_env() { + if [ -z "$TF_CLOUD_ORGANIZATION" ]; then + echo "TF_CLOUD_ORGANIZATION is required for remote Terraform backend." + exit 1 + fi + export TF_CLOUD_ORGANIZATION + export TF_WORKSPACE +} + +reconcile_state() { + ensure_remote_backend_env + bash "$ROOT_DIR/scripts/tf-reconcile-dev-state.sh" +} detect_repo() { if [ -n "$GH_REPO" ]; then echo "$GH_REPO" @@ -67,22 +82,26 @@ active_cluster_id() { } cmd_init() { + ensure_remote_backend_env tf init -reconfigure -upgrade } cmd_plan() { load_tf_env + ensure_remote_backend_env tf init -reconfigure tf plan } cmd_apply() { + reconcile_state load_tf_env tf init -reconfigure tf apply -auto-approve } cmd_destroy() { + reconcile_state load_tf_env tf init -reconfigure tf destroy -auto-approve @@ -90,6 +109,7 @@ cmd_destroy() { cmd_status() { load_tf_env + ensure_remote_backend_env tf init -reconfigure >/dev/null echo "Terraform state resources:" tf state list || true @@ -209,6 +229,7 @@ Commands: ci-apply Trigger Infra Dev workflow apply ci-destroy Trigger Infra Dev workflow destroy ci-cleanup Trigger Infra Dev Cleanup workflow (state-independent) + reconcile Import existing OCI dev resources into Terraform state ship-dev Push current HEAD to dev branch up Apply infra + kubeconfig secret + deploy EOF @@ -228,6 +249,7 @@ main() { ci-apply) cmd_ci_apply ;; ci-destroy) cmd_ci_destroy ;; ci-cleanup) cmd_ci_cleanup ;; + reconcile) reconcile_state ;; ship-dev) cmd_ship_dev ;; up) cmd_up ;; *) usage; exit 1 ;; diff --git a/scripts/tf-reconcile-dev-state.sh b/scripts/tf-reconcile-dev-state.sh new file mode 100644 index 0000000..850048b --- /dev/null +++ b/scripts/tf-reconcile-dev-state.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +TF_DIR="${ROOT_DIR}/infra/terraform/envs/dev" +TF_CLOUD_ORGANIZATION="${TF_CLOUD_ORGANIZATION:-}" +TF_WORKSPACE="${TF_WORKSPACE:-gmp-dev}" + +extract_default() { + local key="$1" + awk -F'=' -v key="$key" ' + /^\[/ { in_default = ($0 == "[DEFAULT]") } + in_default && $1 ~ "^[[:space:]]*" key "[[:space:]]*$" { + val = $2 + sub(/^[[:space:]]+/, "", val) + sub(/[[:space:]]+$/, "", val) + gsub(/\r/, "", val) + gsub(/"/, "", val) + print val + exit + } + ' "$HOME/.oci/config" +} + +export TF_INPUT=0 +if [ -z "$TF_CLOUD_ORGANIZATION" ]; then + echo "TF_CLOUD_ORGANIZATION is required for remote Terraform backend." + exit 1 +fi +export TF_CLOUD_ORGANIZATION +export TF_WORKSPACE +export TF_VAR_tenancy_ocid="${TF_VAR_tenancy_ocid:-$(extract_default tenancy)}" +export TF_VAR_user_ocid="${TF_VAR_user_ocid:-$(extract_default user)}" +export TF_VAR_fingerprint="${TF_VAR_fingerprint:-$(extract_default fingerprint)}" +export TF_VAR_private_key_path="${TF_VAR_private_key_path:-$(extract_default key_file)}" +export TF_VAR_region="${TF_VAR_region:-$(extract_default region)}" +export TF_VAR_compartment_ocid="${TF_VAR_compartment_ocid:-$TF_VAR_tenancy_ocid}" +TF_VAR_private_key_path="${TF_VAR_private_key_path/#\~/$HOME}" +export TF_VAR_private_key_path + +tf() { + terraform -chdir="$TF_DIR" "$@" +} + +import_if_present() { + local address="$1" + local id="${2:-}" + if [ -z "$id" ] || [ "$id" = "null" ]; then + return + fi + if tf state show "$address" >/dev/null 2>&1; then + return + fi + tf import "$address" "$id" >/dev/null +} + +tf init -reconfigure >/dev/null + +COMP="$TF_VAR_compartment_ocid" + +ACTIVE_CLUSTER_JSON="$(oci ce cluster list --compartment-id "$COMP" --all --output json | jq -c '.data | map(select(."lifecycle-state"=="ACTIVE" and .name=="gmp-oke-dev")) | sort_by(."time-created") | last // empty')" +if [ -n "$ACTIVE_CLUSTER_JSON" ]; then + CLUSTER_ID="$(printf '%s' "$ACTIVE_CLUSTER_JSON" | jq -r '.id')" + VCN_ID="$(printf '%s' "$ACTIVE_CLUSTER_JSON" | jq -r '."vcn-id"')" +else + CLUSTER_ID="" + VCN_ID="$(oci network vcn list --compartment-id "$COMP" --all --output json | jq -r '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."display-name"=="gmp-vcn")) | sort_by(."time-created") | last | .id // empty')" +fi + +if [ -z "$VCN_ID" ]; then + echo "No candidate gmp-vcn found; nothing to reconcile." + exit 0 +fi + +IGW_ID="$(oci network internet-gateway list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-igw")) | sort_by(."time-created") | last | .id // empty')" +NAT_ID="$(oci network nat-gateway list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-nat")) | sort_by(."time-created") | last | .id // empty')" + +PUB_RT_ID="$(oci network route-table list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-public-rt")) | sort_by(."time-created") | last | .id // empty')" +PRV_RT_ID="$(oci network route-table list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-private-rt")) | sort_by(."time-created") | last | .id // empty')" + +PUB_SL_ID="$(oci network security-list list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-public-sl")) | sort_by(."time-created") | last | .id // empty')" +PRV_SL_ID="$(oci network security-list list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-private-sl")) | sort_by(."time-created") | last | .id // empty')" + +PUB_SUBNET_ID="$(oci network subnet list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-public-subnet")) | sort_by(."time-created") | last | .id // empty')" +PRV_SUBNET_ID="$(oci network subnet list --compartment-id "$COMP" --all --output json | jq -r --arg V "$VCN_ID" '.data | map(select(."lifecycle-state"=="AVAILABLE" and ."vcn-id"==$V and ."display-name"=="gmp-private-subnet")) | sort_by(."time-created") | last | .id // empty')" + +if [ -n "$CLUSTER_ID" ]; then + NODE_POOL_ID="$(oci ce node-pool list --compartment-id "$COMP" --cluster-id "$CLUSTER_ID" --all --output json | jq -r '.data | map(select(."lifecycle-state"=="ACTIVE" and .name=="gmp-oke-dev-pool")) | sort_by(."time-created") | last | .id // empty')" +else + NODE_POOL_ID="" +fi + +import_if_present "module.network.oci_core_vcn.this" "$VCN_ID" +import_if_present "module.network.oci_core_internet_gateway.igw" "$IGW_ID" +import_if_present "module.network.oci_core_nat_gateway.nat" "$NAT_ID" +import_if_present "module.network.oci_core_route_table.public_rt" "$PUB_RT_ID" +import_if_present "module.network.oci_core_route_table.private_rt" "$PRV_RT_ID" +import_if_present "module.network.oci_core_security_list.public_sl" "$PUB_SL_ID" +import_if_present "module.network.oci_core_security_list.private_sl" "$PRV_SL_ID" +import_if_present "module.network.oci_core_subnet.public" "$PUB_SUBNET_ID" +import_if_present "module.network.oci_core_subnet.private" "$PRV_SUBNET_ID" +import_if_present "module.oke.oci_containerengine_cluster.this" "$CLUSTER_ID" +import_if_present "module.oke.oci_containerengine_node_pool.pool" "$NODE_POOL_ID" + +echo "Reconcile complete." From 10ace22333754d01097c7b9158dfa19515106f6c Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 04:28:34 +0200 Subject: [PATCH 2/9] Gate PR previews by label and document trigger --- .github/workflows/preview-pr.yml | 4 ++-- README.md | 2 +- cmd.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/preview-pr.yml b/.github/workflows/preview-pr.yml index b6cba53..1c8cb03 100644 --- a/.github/workflows/preview-pr.yml +++ b/.github/workflows/preview-pr.yml @@ -2,7 +2,7 @@ name: Preview PR on: pull_request: - types: [opened, synchronize, reopened, closed] + types: [opened, synchronize, reopened, labeled, unlabeled, closed] branches: [dev] schedule: - cron: "0 */6 * * *" @@ -21,7 +21,7 @@ concurrency: jobs: deploy-preview: - if: ${{ github.event.action != 'closed' && github.event.pull_request.head.repo.full_name == github.repository }} + if: ${{ github.event.action != 'closed' && github.event.pull_request.head.repo.full_name == github.repository && contains(github.event.pull_request.labels.*.name, 'preview') }} uses: ./.github/workflows/reusable-cicd.yml with: environment_name: dev diff --git a/README.md b/README.md index a964284..b38d5f8 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ kubectl apply -k platform/k8s/base - `.github/workflows/ci.yml` runs service tests. - `.github/workflows/ci-extended.yml` runs lint, tests, image builds, SBOM, and vulnerability scans. - `.github/workflows/deploy.yml` deploys to dev on `dev` branch push; prod canary and promotion are manual dispatch. -- `.github/workflows/preview-pr.yml` deploys PR previews to `pr-` namespace, comments URLs on the PR, and auto-cleans stale previews every 6 hours (24h TTL). +- `.github/workflows/preview-pr.yml` deploys PR previews to `pr-` namespace when PR has `preview` label, comments URLs on the PR, and auto-cleans stale previews every 6 hours (24h TTL). **Secrets** - `KUBE_CONFIG_DEV` (base64 kubeconfig for dev) diff --git a/cmd.md b/cmd.md index 3a4163c..327876e 100644 --- a/cmd.md +++ b/cmd.md @@ -18,7 +18,7 @@ This pushes current code to `dev`. `Deploy Dev` workflow auto-runs on push to `dev`. PR previews are automatic: -- Open/update PR to `dev` → deploys into namespace `pr-` +- Add `preview` label on PR to `dev` → deploys into namespace `pr-` - Workflow comments preview Web/API URLs on the PR - Closing PR deletes the preview namespace - Scheduled cleanup removes stale preview namespaces every 6 hours (24h TTL) From 733f1b275ef09172a3ca5ab5bbcbdcd45c41b495 Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 05:27:15 +0200 Subject: [PATCH 3/9] Fix preview deploy namespace by setting kustomize namespace from workflow input --- .github/workflows/reusable-cicd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index 8985393..bde8a8f 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -485,6 +485,7 @@ jobs: - name: Prepare kustomize images run: | cd platform/k8s/overlays/${{ inputs.overlay }} + kustomize edit set namespace ${{ inputs.namespace }} for img in api-gateway web identity seller catalog search pricing inventory cart checkout payments orders fulfillment notifications reviews analytics; do kustomize edit set image $img=${{ env.REGISTRY }}/${{ env.REPO_LOWER }}/$img:${{ needs.meta.outputs.sha }} done From 232cc91c389203803a71d2b5cb0451f1fad44bb9 Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 05:43:13 +0200 Subject: [PATCH 4/9] Improve preview rollout reliability and failure diagnostics --- .github/workflows/reusable-cicd.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index bde8a8f..1d6669b 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -552,8 +552,22 @@ jobs: - name: Wait for rollouts run: | - for d in api-gateway web identity seller catalog search pricing inventory cart checkout payments orders fulfillment notifications reviews analytics; do - kubectl rollout status deployment/$d -n ${{ inputs.namespace }} --timeout=180s || exit 1 + NS=${{ inputs.namespace }} + kubectl rollout status statefulset/postgres -n "$NS" --timeout=600s + kubectl rollout status statefulset/kafka -n "$NS" --timeout=600s + kubectl rollout status statefulset/opensearch -n "$NS" --timeout=600s + for d in redis minio kafka-connect; do + kubectl rollout status deployment/$d -n "$NS" --timeout=600s + done + for d in identity seller catalog search pricing inventory cart checkout payments orders fulfillment notifications reviews analytics api-gateway web transformer-api; do + if ! kubectl rollout status deployment/$d -n "$NS" --timeout=600s; then + kubectl get pods -n "$NS" -o wide || true + kubectl get events -n "$NS" --sort-by='.lastTimestamp' | tail -n 120 || true + kubectl describe deployment/$d -n "$NS" || true + kubectl describe pods -n "$NS" -l app=$d || true + kubectl logs -n "$NS" -l app=$d --all-containers --tail=200 || true + exit 1 + fi done # ───────────────────────────────────────────────────────────── From 2df89627a9a96528adbf0ee8d5e59999fb33ec5d Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 06:11:24 +0200 Subject: [PATCH 5/9] Stabilize preview kubeconfig source and verify kustomize namespace render --- .github/workflows/reusable-cicd.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index 1d6669b..d385982 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -366,8 +366,12 @@ jobs: oci ce cluster create-kubeconfig --cluster-id "$CLUSTER_OCID" --file "$CONFIG_FILE" --region "${OCI_CLI_REGION}" --token-version 2.0.0 --kube-endpoint PUBLIC_ENDPOINT } + PREVIEW_PR="${{ inputs.preview_pr_number }}" KCFG_OK=false - if [ -n "${KUBE_CONFIG_DEV}" ]; then + if [ -n "$PREVIEW_PR" ]; then + ensure_kubeconfig_from_oci + KCFG_OK=true + elif [ -n "${KUBE_CONFIG_DEV}" ]; then printf '%s' "${KUBE_CONFIG_DEV}" > "$RAW_FILE" if grep -q "apiVersion:" "$RAW_FILE"; then cp "$RAW_FILE" "$CONFIG_FILE" @@ -490,6 +494,11 @@ jobs: kustomize edit set image $img=${{ env.REGISTRY }}/${{ env.REPO_LOWER }}/$img:${{ needs.meta.outputs.sha }} done + - name: Verify rendered namespace + run: | + cat platform/k8s/overlays/${{ inputs.overlay }}/kustomization.yaml + kustomize build platform/k8s/overlays/${{ inputs.overlay }} | grep -m 40 '^ namespace:' + - name: Apply dev overlay run: kubectl apply -k platform/k8s/overlays/${{ inputs.overlay }} From 6cfdb775f9283621fb277830b34c4fc754614fa9 Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 06:30:39 +0200 Subject: [PATCH 6/9] Add rollout diagnostics for infra and app resources in preview deploy --- .github/workflows/reusable-cicd.yml | 33 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index d385982..3548ebf 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -562,21 +562,30 @@ jobs: - name: Wait for rollouts run: | NS=${{ inputs.namespace }} - kubectl rollout status statefulset/postgres -n "$NS" --timeout=600s - kubectl rollout status statefulset/kafka -n "$NS" --timeout=600s - kubectl rollout status statefulset/opensearch -n "$NS" --timeout=600s + diagnose_ns() { + kubectl get pods -n "$NS" -o wide || true + kubectl get events -n "$NS" --sort-by='.lastTimestamp' | tail -n 150 || true + } + check_rollout() { + local kind="$1" + local name="$2" + echo "Checking rollout: ${kind}/${name}" + if ! kubectl rollout status "${kind}/${name}" -n "$NS" --timeout=600s; then + kubectl describe "${kind}/${name}" -n "$NS" || true + kubectl describe pods -n "$NS" -l app="$name" || true + kubectl logs -n "$NS" -l app="$name" --all-containers --tail=200 || true + diagnose_ns + exit 1 + fi + } + check_rollout statefulset postgres + check_rollout statefulset kafka + check_rollout statefulset opensearch for d in redis minio kafka-connect; do - kubectl rollout status deployment/$d -n "$NS" --timeout=600s + check_rollout deployment "$d" done for d in identity seller catalog search pricing inventory cart checkout payments orders fulfillment notifications reviews analytics api-gateway web transformer-api; do - if ! kubectl rollout status deployment/$d -n "$NS" --timeout=600s; then - kubectl get pods -n "$NS" -o wide || true - kubectl get events -n "$NS" --sort-by='.lastTimestamp' | tail -n 120 || true - kubectl describe deployment/$d -n "$NS" || true - kubectl describe pods -n "$NS" -l app=$d || true - kubectl logs -n "$NS" -l app=$d --all-containers --tail=200 || true - exit 1 - fi + check_rollout deployment "$d" done # ───────────────────────────────────────────────────────────── From 98ce3315593b0828c3986d9d30b144c684a9faf1 Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 06:49:14 +0200 Subject: [PATCH 7/9] Fix preview infra blockers: OCI volume attach and Docker Hub limits --- .github/workflows/reusable-cicd.yml | 18 ++++++++++++++++++ infra/terraform/modules/oke/main.tf | 2 +- platform/k8s/base/minio-bucket-job.yaml | 2 +- platform/k8s/base/minio-deployment.yaml | 2 +- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index 3548ebf..55cfb9f 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -469,6 +469,20 @@ jobs: preview.gmp/ttl-hours=24 \ --overwrite=true + - name: Ensure preview data secrets + if: ${{ inputs.preview_pr_number != '' }} + run: | + kubectl create secret generic postgres-credentials \ + -n ${{ inputs.namespace }} \ + --from-literal=username=market \ + --from-literal=password=marketpass \ + --dry-run=client -o yaml | kubectl apply -f - + kubectl create secret generic minio-credentials \ + -n ${{ inputs.namespace }} \ + --from-literal=accesskey=admin \ + --from-literal=secretkey=adminadmin \ + --dry-run=client -o yaml | kubectl apply -f - + - name: Configure GHCR pull secret env: GHCR_PASSWORD: ${{ secrets.GHCR_TOKEN || secrets.GITHUB_TOKEN }} @@ -502,6 +516,10 @@ jobs: - name: Apply dev overlay run: kubectl apply -k platform/k8s/overlays/${{ inputs.overlay }} + - name: Limit preview load balancers + if: ${{ inputs.preview_pr_number != '' }} + run: kubectl patch svc web -n ${{ inputs.namespace }} -p '{"spec":{"type":"ClusterIP"}}' || true + - name: Show external services run: | kubectl get svc web api-gateway -n ${{ inputs.namespace }} -o wide || true diff --git a/infra/terraform/modules/oke/main.tf b/infra/terraform/modules/oke/main.tf index 4dc599e..cc8e5d9 100644 --- a/infra/terraform/modules/oke/main.tf +++ b/infra/terraform/modules/oke/main.tf @@ -30,7 +30,7 @@ resource "oci_containerengine_node_pool" "pool" { availability_domain = var.availability_domain subnet_id = var.node_subnet_id } - is_pv_encryption_in_transit_enabled = true + is_pv_encryption_in_transit_enabled = false } node_shape = var.node_shape node_shape_config { diff --git a/platform/k8s/base/minio-bucket-job.yaml b/platform/k8s/base/minio-bucket-job.yaml index 9b2e08d..118ea88 100644 --- a/platform/k8s/base/minio-bucket-job.yaml +++ b/platform/k8s/base/minio-bucket-job.yaml @@ -13,7 +13,7 @@ spec: restartPolicy: OnFailure containers: - name: mc - image: docker.io/minio/mc:latest + image: quay.io/minio/mc:RELEASE.2024-11-21T17-21-54Z env: - name: MINIO_ACCESS_KEY valueFrom: diff --git a/platform/k8s/base/minio-deployment.yaml b/platform/k8s/base/minio-deployment.yaml index 34083c5..f9dc797 100644 --- a/platform/k8s/base/minio-deployment.yaml +++ b/platform/k8s/base/minio-deployment.yaml @@ -19,7 +19,7 @@ spec: spec: containers: - name: minio - image: docker.io/minio/minio:latest + image: quay.io/minio/minio:RELEASE.2024-12-13T22-19-12Z args: - server - /data From bd37f0a3ad66d9693454ad1dd9c0ae02f1bb17e8 Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 06:50:54 +0200 Subject: [PATCH 8/9] Skip data-plane rollout gate for preview namespaces --- .github/workflows/reusable-cicd.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index 55cfb9f..84c1899 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -580,6 +580,7 @@ jobs: - name: Wait for rollouts run: | NS=${{ inputs.namespace }} + PREVIEW_PR="${{ inputs.preview_pr_number }}" diagnose_ns() { kubectl get pods -n "$NS" -o wide || true kubectl get events -n "$NS" --sort-by='.lastTimestamp' | tail -n 150 || true @@ -596,12 +597,14 @@ jobs: exit 1 fi } - check_rollout statefulset postgres - check_rollout statefulset kafka - check_rollout statefulset opensearch - for d in redis minio kafka-connect; do - check_rollout deployment "$d" - done + if [ -z "$PREVIEW_PR" ]; then + check_rollout statefulset postgres + check_rollout statefulset kafka + check_rollout statefulset opensearch + for d in redis minio kafka-connect; do + check_rollout deployment "$d" + done + fi for d in identity seller catalog search pricing inventory cart checkout payments orders fulfillment notifications reviews analytics api-gateway web transformer-api; do check_rollout deployment "$d" done From 8870383d13a014dcf70181df932f3ccb9785c1a6 Mon Sep 17 00:00:00 2001 From: karimfin Date: Sun, 8 Mar 2026 06:57:20 +0200 Subject: [PATCH 9/9] Delete immutable bootstrap jobs before preview apply --- .github/workflows/reusable-cicd.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/reusable-cicd.yml b/.github/workflows/reusable-cicd.yml index 84c1899..5ce7374 100644 --- a/.github/workflows/reusable-cicd.yml +++ b/.github/workflows/reusable-cicd.yml @@ -513,6 +513,16 @@ jobs: cat platform/k8s/overlays/${{ inputs.overlay }}/kustomization.yaml kustomize build platform/k8s/overlays/${{ inputs.overlay }} | grep -m 40 '^ namespace:' + - name: Reset immutable bootstrap jobs + run: | + kubectl delete job \ + minio-make-bucket \ + s3-sink-register \ + debezium-register \ + kafka-topics-init \ + -n ${{ inputs.namespace }} \ + --ignore-not-found=true + - name: Apply dev overlay run: kubectl apply -k platform/k8s/overlays/${{ inputs.overlay }}