Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 37 additions & 26 deletions src/jobs/e2e/collect-e2e-logs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ steps:
echo "Contents of << parameters.env-name-path >>:"
cat << parameters.env-name-path >>
if [ -f << parameters.env-name-path >> ] && [ "$(cat << parameters.env-name-path >> )" != "null" ]; then
DEV_ENV_NAME=$(cat << parameters.env-name-path >> )
else
DEV_ENV_NAME=<< parameters.e2e-env-name >>
fi
DEV_ENV_NAME=$(cat << parameters.env-name-path >> )
else
DEV_ENV_NAME=<< parameters.e2e-env-name >>
fi
# Gather summary state of all pods in the namespace
echo "Gathering Kubernetes state before run for env $DEV_ENV_NAME"
kubectl get pods -n $DEV_ENV_NAME >> "${LOG_DIR:?}/${KUBE_STATE_DIR:?}/pods-summary-state-before-run.log"
Expand All @@ -59,8 +59,8 @@ steps:
background: true
command: |
function capture_logs() {
if [ -f << parameters.env-name-path >> ] && [ "$(cat << parameters.env-name-path >> )" != "null" ]; then
DEV_ENV_NAME=$(cat << parameters.env-name-path >> )
if [ -f << parameters.env-name-path >> ] && [ "$(cat << parameters.env-name-path >>)" != "null" ]; then
DEV_ENV_NAME=$(cat << parameters.env-name-path >>)
else
DEV_ENV_NAME=<< parameters.e2e-env-name >>
fi
Expand All @@ -71,49 +71,60 @@ steps:
for ((i = 0; i < ${#components[@]} - 1; i++)); do
component=${components[$i]}
echo "Capturing logs for component $component"
stern -n "${DEV_ENV_NAME:?}" -l "app.kubernetes.io/name=$component" >>"${LOG_DIR:?}/${COMPONENT_LOG_DIR:?}/$component.log" --since 5m &
stern -n "${DEV_ENV_NAME:?}" -l "app.kubernetes.io/name=$component" --since 5m >>"${LOG_DIR:?}/${COMPONENT_LOG_DIR:?}/$component.log" &
done
# Handle the last component separately to introduce blocking.If all components' log collection is done as non blocking tasks,
# the circleci step will terminate.So having last component as blocking ensures the collect logs step continues executing
last_component=${components[${#components[@]}-1]}
last_component=${components[${#components[@]} - 1]}
echo "Capturing logs for last component $last_component"
stern -n "${DEV_ENV_NAME:?}" -l "app.kubernetes.io/name=$last_component" >>"${LOG_DIR:?}/${COMPONENT_LOG_DIR:?}/$last_component.log" --since 5m
stern -n "${DEV_ENV_NAME:?}" -l "app.kubernetes.io/name=$last_component" --since 5m >>"${LOG_DIR:?}/${COMPONENT_LOG_DIR:?}/$last_component.log"
}
capture_logs

- run:
name: Wait for smoke test jobs to complete
command: |
# Loop through to check if each of the jobs have been completed
for job in $(echo "<< parameters.smoke-test-jobs >>" | tr "," "\n"); do
while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CIRCLE_TOKEN"| jq -r ".items[]|select(.name == \"$job\")|.status" | grep -c "running") -gt 0 ]]
do
sleep 5
done
JOBS=$(paste -sd '|' \<<<"<< parameters.smoke-test-jobs >>")
function running_jobs() {
DATE=$(date -u)
FILTERED_JOBS=$(
curl "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" \
--silent \
--location \
--request GET \
--header "Circle-Token: $CIRCLE_TOKEN" \
| jq -cr ".items[] | { status, started_at, name }" \
| grep -E "\"name\": \"($JOBS)\""
)
echo "$DATE - \n${FILTERED_JOBS}"
return grep -qE '"status": "running"' \<<<"$FILTERED_JOBS"
}
while running_jobs ; do
sleep 5
done

- run: echo "All required jobs have now completed"
- run:
name: Gather Kubernetes state after run
environment:
LOG_DIR: *log_dir
KUBE_STATE_DIR: *kube_state_dir
command: |
if [ -f << parameters.env-name-path >> ] && [ "$(cat << parameters.env-name-path >> )" != "null" ]; then
DEV_ENV_NAME=$(cat << parameters.env-name-path >> )
else
DEV_ENV_NAME=<< parameters.e2e-env-name >>
fi
if [ -f << parameters.env-name-path >> ] && [ "$(cat << parameters.env-name-path >>)" != "null" ]; then
DEV_ENV_NAME=$(cat << parameters.env-name-path >>)
else
DEV_ENV_NAME=<< parameters.e2e-env-name >>
fi
# Read components into an array directly from the command output
components=($(vfcli component list -n "${DEV_ENV_NAME:?}" | awk 'NR>3 {print $1}'))
# Gather summary state of all pods in the namespace
kubectl get pods -n $DEV_ENV_NAME >> "${LOG_DIR:?}/${KUBE_STATE_DIR:?}/pods-summary-state-after-run.log"
kubectl get pods -n $DEV_ENV_NAME >>"${LOG_DIR:?}/${KUBE_STATE_DIR:?}/pods-summary-state-after-run.log"
# Gather detailed state of all pods in the namespace.
for ((i = 0; i < ${#components[@]}; i++)); do
component=${components[$i]}
for component in "${components[@]}"; do
echo "Capturing logs for component $component"
kubectl describe pod $component -n $DEV_ENV_NAME >> "${LOG_DIR:?}/${KUBE_STATE_DIR:?}/${component}-k8-state.log" &
done
wait
kubectl describe pod $component -n $DEV_ENV_NAME >>"${LOG_DIR:?}/${KUBE_STATE_DIR:?}/${component}-k8-state.log" &
done
wait
- run:
name: Gather Kubernetes events
environment:
Expand Down
75 changes: 48 additions & 27 deletions src/jobs/e2e/waiter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,58 +10,79 @@ steps:
- run:
name: Check if all jobs have completed
command: |
# Global variable to store the status code
check_jobs_status=0
## Function to check if the API request was successful and process the response
check_jobs() {
date -u
echo "Fetching jobs from CircleCI API..."
response=$(curl --silent --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CIRCLECI_API_TOKEN")
response=$(curl "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" \
--silent \
--location \
--request GET \
--header "Circle-Token: $CIRCLECI_API_TOKEN")

if [[ $? -ne 0 ]]; then
echo "Error: Failed to fetch jobs from CircleCI API"
check_jobs_status=1
return
return 1
fi
echo "API response received:" >&2
echo "$response" >&2
if echo "$response" | jq -e . >/dev/null 2>&1; then
statuses=$(echo "$response" | jq -r '.items[] | select(.name != "vfcommon/waiter") | .status')
if [[ -z "$statuses" ]]; then
echo "No job statuses found or unexpected response format"
check_jobs_status=1
return
fi
echo "Job statuses extracted:"
echo "$statuses"
else

echo "API response received" >&2
echo "$response" >responses.log

if ! jq -e . >/dev/null 2>&1 \<<<"$response" ; then
echo "Error: Malformed JSON response"
check_jobs_status=1
return
cat responses.log
return 1
fi

STATUS_FILTER=$(cat \<<EOF
.items
| sort_by(.status, .started_at, .name)[]
| select(.name != "vfcommon/waiter")
| { status, started_at, name }
EOF
)

statuses=$(jq -cr "$STATUS_FILTER" \<<<"$response" )
if [[ -z "$statuses" ]]; then
echo "No job statuses found or unexpected response format"
return 1
fi

echo "Job statuses extracted:"
echo "$statuses"

## Check if any job is still running
if echo "$statuses" | grep -q "running"; then
## We likely need to consider blocked too
if grep -q "running" \<<<"$statuses" ; then
echo "There are still running jobs."
check_jobs_status=0 # jobs are still running
# jobs are still running
return 0
else
echo "No jobs are running."
check_jobs_status=2 # all jobs are completed successfully
# all jobs are completed successfully
return 2
fi
}

## The waiter job keeps looping through to check if all running jobs have been completed
while true; do
echo "Running check_jobs function..."

set +e
check_jobs
status=$check_jobs_status
status=$?
set -e

echo "Status returned from check_jobs: $status"
if [[ $status -eq 1 ]]; then
echo "Retrying in << parameters.wait-duration >> seconds due to error..."
sleep << parameters.wait-duration >>
continue
elif [[ $status -eq 2 ]]; then
echo "All jobs completed successfully."
exit 0 # success
exit 0 # success
else
echo "Jobs are still running, checking again in << parameters.wait-duration >> seconds..."
sleep << parameters.wait-duration >>
fi
sleep << parameters.wait-duration >>
done

- run: echo "All required jobs have now completed"