diff --git a/.github/scripts/dsh-pod-test.sh b/.github/scripts/dsh-pod-test.sh index 7fadce89..26202986 100644 --- a/.github/scripts/dsh-pod-test.sh +++ b/.github/scripts/dsh-pod-test.sh @@ -1,4 +1,4 @@ -echo "Checking pods in namespace: devzero" +echo "Checking pods in namespace: devzero" PODS=$(kubectl get pods -n devzero --no-headers) if [[ -z "$PODS" ]]; then diff --git a/.github/workflows/dsh-testing.yaml b/.github/workflows/dsh-testing.yaml index adf693b6..dd0b041d 100644 --- a/.github/workflows/dsh-testing.yaml +++ b/.github/workflows/dsh-testing.yaml @@ -47,17 +47,30 @@ jobs: uses: hashicorp/setup-terraform@v3 with: terraform_version: 1.5.7 + + - name: Install yq + run: | + sudo wget https://github.com/mikefarah/yq/releases/download/v4.15.1/yq_linux_amd64 -O /usr/local/bin/yq + sudo chmod +x /usr/local/bin/yq + + - name : Add SHORT_SHA Environment Variable + id : short-sha + shell: bash + run : echo "SHORT_SHA=`git rev-parse --short HEAD`" >> $GITHUB_ENV + + - name : Generate unique job identifier + id : job-identifier + shell: bash + run : echo "JOB_IDENTIFIER=gh-ci-${{ github.event.inputs.base_image }}-${SHORT_SHA}" >> $GITHUB_ENV - name: Add Backend Override (Base Cluster) - env: - BASE_IMAGE: ${{ github.event.inputs.base_image }} run: | cd self-hosted/terraform/examples/aws/base-cluster cat < backend_override.tf terraform { backend "s3" { bucket = "dsh-tf-state" - key = "${BASE_IMAGE}/base-cluster/terraform.tfstate" + key = "${JOB_IDENTIFIER}/base-cluster/terraform.tfstate" region = "us-west-1" } } @@ -68,36 +81,28 @@ jobs: cd self-hosted/terraform/examples/aws/base-cluster terraform init if [ "${{ github.event.inputs.base_image }}" == "al2023" ]; then - terraform apply -auto-approve -var="cluster_name=dz-dsh-testing-al2023" + terraform apply -auto-approve -var="cluster_name=$JOB_IDENTIFIER" else - terraform apply -auto-approve -var="cluster_name=dz-dsh-testing-ubuntu" -var="base_image=ubuntu" + terraform apply -auto-approve -var="cluster_name=$JOB_IDENTIFIER" -var="base_image=ubuntu" fi - echo "CLUSTER_NAME=$(terraform output -raw cluster_name)" >> $GITHUB_ENV - echo "VPC_ID=$(terraform output -raw vpc_id)" >> $GITHUB_ENV - echo "PUBLIC_SUBNET_IDS=$(terraform output -json public_subnet_ids | jq -c .)" >> $GITHUB_ENV - echo "PRIVATE_SUBNET_IDS=$(terraform output -json private_subnet_ids | jq -c .)" >> $GITHUB_ENV - name: Update Cluster-Extensions tfvars run: | cat < self-hosted/terraform/examples/aws/cluster-extensions/terraform.tfvars region = "us-west-1" - vpc_id = "$VPC_ID" - public_subnet_ids = $PUBLIC_SUBNET_IDS - private_subnet_ids = $PRIVATE_SUBNET_IDS enable_cluster_autoscaler = false - cluster_name = "$CLUSTER_NAME" + cluster_name = "$JOB_IDENTIFIER" + domain = "$JOB_IDENTIFIER.ci.selfzero.net" EOT - name: Add Backend Override (Cluster Extensions) - env: - BASE_IMAGE: ${{ github.event.inputs.base_image }} run: | cd self-hosted/terraform/examples/aws/cluster-extensions cat < backend_override.tf terraform { backend "s3" { bucket = "dsh-tf-state" - key = "${BASE_IMAGE}/cluster-extensions/terraform.tfstate" + key = "${JOB_IDENTIFIER}/cluster-extensions/terraform.tfstate" region = "us-west-1" } } @@ -111,39 +116,32 @@ jobs: - name: Configure Kubernetes Access run: | - aws eks update-kubeconfig --region us-west-1 --name $CLUSTER_NAME + aws eks update-kubeconfig --region us-west-1 --name $JOB_IDENTIFIER - - name: Deploy Control Plane Dependencies + - name: Deploy Control Plane Dependencies (and modify domains) run: | cd self-hosted/charts/dz-control-plane-deps + find values -type f -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find values -name "*.bak" -delete make install - - name: Install yq - run: | - sudo wget https://github.com/mikefarah/yq/releases/download/v4.15.1/yq_linux_amd64 -O /usr/local/bin/yq - sudo chmod +x /usr/local/bin/yq - - - name: Update values.yaml for Control Plane + - name: Update values.yaml for dz-control-plane env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} BACKEND_LICENSE_KEY: ${{ secrets.BACKEND_LICENSE_KEY }} run: | - yq e -i ' - .credentials.registry = "docker.io" | - .credentials.username = strenv(DOCKERHUB_USERNAME) | - .credentials.password = strenv(DOCKERHUB_TOKEN) | - .credentials.email = "garvit@devzero.io" | - .backend.licenseKey = strenv(BACKEND_LICENSE_KEY) - ' self-hosted/charts/dz-control-plane/values.yaml + # setting credentials enable to false since we will explicitly feed the dockerhub creds to kubernetes api + # also setting image.pullsecrets to empty to make sure that each of the deployments dont try to pull their relevant OCI images from this registry + # backend license key is ... needed + + yq e '.credentials.enable = false | .backend.licenseKey = strenv(BACKEND_LICENSE_KEY) | .image.pullSecrets = []' -i self-hosted/charts/dz-control-plane/values.yaml - - name: Deploy DevZero Control Plane + - name: Deploy DevZero Control Plane (after configuring kubernetes to use dockerhub creds, and patching all the deployments to point to the right domain) env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} run: | cd self-hosted/charts/dz-control-plane make add-docker-creds + find . -name "values.yaml" -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find . -name "values.yaml.bak" -delete make install - name: Validate Control Plane @@ -154,118 +152,64 @@ jobs: self-hosted/.github/scripts/dsh-pod-test.sh echo -e "\nIngress in namespace devzero:" kubectl get ingress -n devzero - - - name: Update values.yaml for Data Plane - run: | - cat < self-hosted/charts/dz-data-plane/values.yaml - ## @section Devzero - devzero: - teamId: "" - region: "us-west-1" - vault: - server: "https://csi.devzero.io" - ## @section Node Labeler - nodeLabeler: - enabled: true - ## @section Credentials Configuration - credentials: - registry: "docker.io" - username: "${{ secrets.DOCKERHUB_USERNAME }}" - password: "${{ secrets.DOCKERHUB_TOKEN }}" - email: "garvit@devzero.io" - EOT - + - name: Deploy Data Plane Dependencies run: | cd self-hosted/charts/dz-data-plane-deps + find values -type f -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find values -name "*.bak" -delete make install - name: Deploy DevZero Data Plane - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} run: | cd self-hosted/charts/dz-data-plane + find . -name "values.yaml" -exec sed -i'.bak' "s/example\.com/$JOB_IDENTIFIER\.ci\.selfzero\.net/g" {} \; && find . -name "values.yaml.bak" -delete make install - name: Validate Data Plane run: | kubectl get pods -n devzero-self-hosted kubectl get ingress -n devzero-self-hosted - - - name: Run Destroy cluster-extensions + + - name: '[helm] Destroy data-plane' if: always() run: | - for lb in $(kubectl get svc -A -o json | jq -r '.items[] | select(.spec.type=="LoadBalancer") | "\(.metadata.namespace)|\(.metadata.name)"'); do - [[ -z "$lb" ]] && continue - NS=$(echo "$lb" | awk -F '|' '{print $1}') - SVC=$(echo "$lb" | awk -F '|' '{print $2}') - [[ -z "$NS" || -z "$SVC" ]] && continue - echo "Deleting Kubernetes LoadBalancer service: $SVC in namespace: $NS" - kubectl delete svc "$SVC" -n "$NS" || true - done + cd self-hosted/charts/dz-data-plane + make delete + + - name: '[helm] Destroy data-plane-deps' + if: always() + run: | + cd self-hosted/charts/dz-data-plane-deps + make delete + - name: '[helm] Destroy control-plane' + if: always() + run: | + cd self-hosted/charts/dz-control-plane + make delete + + - name: '[helm] Destroy control-plane-deps' + if: always() + run: | + cd self-hosted/charts/dz-control-plane-deps + make delete + + - name: '[terraform] Destroy cluster-extensions' + if: always() + run: | cd self-hosted/terraform/examples/aws/cluster-extensions terraform destroy -auto-approve - - - name: Destroy AWS Dependencies + + - name: '[terraform] Destroy base-cluster' if: always() run: | cd self-hosted/terraform/examples/aws/base-cluster - terraform destroy -target=module.kata_node_group -auto-approve - terraform destroy -target=module.eks -auto-approve - - IGW_ID=$(aws ec2 describe-internet-gateways --filters "Name=attachment.vpc-id,Values=$VPC_ID" --query 'InternetGateways[0].InternetGatewayId' --output text) - if [ "$IGW_ID" != "None" ]; then - for eip in $(aws ec2 describe-addresses --filters "Name=domain,Values=vpc" --query 'Addresses[*].AllocationId' --output text); do - aws ec2 release-address --allocation-id $eip || true - done - - aws ec2 detach-internet-gateway --internet-gateway-id $IGW_ID --vpc-id $VPC_ID || true - aws ec2 delete-internet-gateway --internet-gateway-id $IGW_ID || true - fi - - for nat in $(aws ec2 describe-nat-gateways --filter "Name=vpc-id,Values=$VPC_ID" --query 'NatGateways[*].NatGatewayId' --output text); do - ALLOC_ID=$(aws ec2 describe-nat-gateways --nat-gateway-ids $nat --query 'NatGateways[0].NatGatewayAddresses[0].AllocationId' --output text) - aws ec2 delete-nat-gateway --nat-gateway-id $nat || true - aws ec2 release-address --allocation-id $ALLOC_ID || true - done - - for instance in $(aws ec2 describe-instances --filters "Name=vpc-id,Values=$VPC_ID" --query 'Reservations[*].Instances[*].InstanceId' --output text); do - aws ec2 terminate-instances --instance-ids $instance || true - aws ec2 wait instance-terminated --instance-ids $instance || true - done - - for rtb in $(aws ec2 describe-route-tables --filters "Name=vpc-id,Values=$VPC_ID" --query 'RouteTables[*].RouteTableId' --output text); do - for assoc in $(aws ec2 describe-route-tables --route-table-ids $rtb --query 'RouteTables[0].Associations[*].RouteTableAssociationId' --output text); do - aws ec2 disassociate-route-table --association-id $assoc || true - done - aws ec2 delete-route-table --route-table-id $rtb || true - done - - for lb in $(aws elbv2 describe-load-balancers --query 'LoadBalancers[*].LoadBalancerArn' --output text); do - aws elbv2 delete-load-balancer --load-balancer-arn $lb || true - done - - for subnet in $(aws ec2 describe-subnets --filters "Name=vpc-id,Values=$VPC_ID" --query 'Subnets[*].SubnetId' --output text); do - aws ec2 delete-subnet --subnet-id $subnet || true - done - - for sg in $(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$VPC_ID" --query 'SecurityGroups[?GroupName!=`default`].GroupId' --output text); do - aws ec2 delete-security-group --group-id $sg || true - done - - for eni in $(aws ec2 describe-network-interfaces --filters "Name=vpc-id,Values=$VPC_ID" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do - aws ec2 delete-network-interface --network-interface-id $eni || true - done - - - name: Run Destroy base-cluster + terraform destroy -auto-approve + + - name: '[aws-cli] clean up volumes explicitly' if: always() run: | - cd self-hosted/terraform/examples/aws/base-cluster - terraform destroy -auto-approve - - for volume_id in $(aws ec2 describe-volumes --filters "Name=status,Values=available" "Name=tag:Name,Values=${CLUSTER_NAME}*" --query "Volumes[].VolumeId" --output text); do + for volume_id in $(aws ec2 describe-volumes --filters "Name=status,Values=available" "Name=tag:Name,Values=${JOB_IDENTIFIER}*" --query "Volumes[].VolumeId" --output text); do echo "Deleting volume: $volume_id" aws ec2 delete-volume --volume-id $volume_id done