diff --git a/01_install_requirements.sh b/01_install_requirements.sh index 9a4c343a4..0952bce5a 100755 --- a/01_install_requirements.sh +++ b/01_install_requirements.sh @@ -163,10 +163,29 @@ sudo python -m pip install netaddr lxml sudo python -m pip install ansible=="${ANSIBLE_VERSION}" pushd ${METAL3_DEV_ENV_PATH} -ansible-galaxy install -r vm-setup/requirements.yml + +# Check if requirements.yml exists before attempting installation +if [[ ! -f vm-setup/requirements.yml ]]; then + echo "ERROR: requirements.yml file not found in vm-setup directory." >&2 + exit 1 +fi + +# Install roles from requirements.yml with retry logic +# retries default to 5, no timeout limit +RETRY_DELAY=${ANSIBLE_GALAXY_RETRY_DELAY:-15} \ + EXPONENTIAL_BACKOFF=true \ + retry_with_timeout ${ANSIBLE_GALAXY_MAX_RETRIES:-5} \ + "ansible-galaxy install -r vm-setup/requirements.yml" + # Let's temporarily pin these collections to the latest compatible with ansible-2.15 #ansible-galaxy collection install --upgrade ansible.netcommon ansible.posix ansible.utils community.general -ansible-galaxy collection install 'ansible.netcommon<8.0.0' ansible.posix 'ansible.utils<6.0.0' community.general +# Install collections with retry logic +# retries default to 5, no timeout limit +RETRY_DELAY=${ANSIBLE_GALAXY_RETRY_DELAY:-15} \ + EXPONENTIAL_BACKOFF=true \ + retry_with_timeout ${ANSIBLE_GALAXY_MAX_RETRIES:-5} \ + "ansible-galaxy collection install 'ansible.netcommon<8.0.0' ansible.posix 'ansible.utils<6.0.0' community.general" + ANSIBLE_FORCE_COLOR=true ansible-playbook \ -e "working_dir=$WORKING_DIR" \ -e "virthost=$HOSTNAME" \ diff --git a/utils.sh b/utils.sh index 5e4c4f75c..ff5d168ad 100755 --- a/utils.sh +++ b/utils.sh @@ -13,16 +13,40 @@ function default_installer_cmd() { } function retry_with_timeout() { - retries=$1 - timeout_duration=$2 - command=${*:3} + local retries=$1 + local timeout_duration=$2 + local command=${*:3} + local retry_delay=${RETRY_DELAY:-0} + local exponential_backoff=${EXPONENTIAL_BACKOFF:-false} + # Use RETRY_TIMEOUT env var if set, otherwise use timeout_duration parameter + local timeout=${RETRY_TIMEOUT:-${timeout_duration:-0}} + local attempt=1 for _ in $(seq "$retries"); do exit_code=0 - timeout "$timeout_duration" bash -c "$command" || exit_code=$? + + # Use timeout only if timeout is greater than 0 + if (( timeout > 0 )); then + timeout "$timeout" bash -c "$command" || exit_code=$? + else + eval "$command" || exit_code=$? + fi + if (( exit_code == 0 )); then return 0 fi + + # Add delay between retries if configured + if (( attempt < retries )) && (( retry_delay > 0 )); then + local sleep_time=$retry_delay + if [[ "$exponential_backoff" == "true" ]]; then + sleep_time=$(( retry_delay * attempt )) + fi + echo "Command failed (attempt $attempt/$retries). Retrying in ${sleep_time}s..." + sleep $sleep_time + fi + + (( attempt++ )) done return $(( exit_code ))