Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions ci-operator/step-registry/ipi/conf/gcp/ipi-conf-gcp-commands.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,26 @@ if [[ -z "${COMPUTE_NODE_TYPE}" ]]; then
fi
fi

# Get standard zones from the region (excluding AI zones) and randomize selection
# This prevents control plane nodes from being placed in AI zones when zones aren't explicitly set
function get_zones_from_region() {
local zone_count=${1:-3}
# Get all zones from the region, filtering out AI zones and randomizing
mapfile -t AVAILABILITY_ZONES < <(gcloud compute zones list --filter="region:${GCP_REGION} AND status:UP" --format='value(name)' 2>/dev/null | grep -v '\-ai[0-9]' | shuf)

# Take the first zone_count zones
local zones=("${AVAILABILITY_ZONES[@]:0:${zone_count}}")
# Format as YAML array: [zone1, zone2, zone3]
local zones_str="["
for i in "${!zones[@]}"; do
if [[ $i -gt 0 ]]; then
zones_str+=", "
fi
zones_str+="${zones[$i]}"
done
zones_str+="]"
echo "${zones_str}"
}

cat >> "${CONFIG}" << EOF
baseDomain: ${GCP_BASE_DOMAIN}
Expand All @@ -87,6 +107,43 @@ compute:
type: ${COMPUTE_NODE_TYPE}
EOF

# Set zones for control plane and compute in regions with AI zones to avoid AI zones
# AI zones (e.g., us-central1-ai1a, us-south1-ai1b) are optimized for GPU/AI machine types
# and should not be used for standard machine types like control plane nodes
if [[ "${GCP_REGION}" == "us-central1" ]] || [[ "${GCP_REGION}" == "us-south1" ]]; then
export GCP_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/gce.json"
GOOGLE_PROJECT_ID=$(jq -r .project_id ${GCP_SHARED_CREDENTIALS_FILE} 2>/dev/null || echo "")
if [[ -n "${GOOGLE_PROJECT_ID}" ]]; then
sa_email=$(jq -r .client_email ${GCP_SHARED_CREDENTIALS_FILE} 2>/dev/null || echo "")
if [[ -n "${sa_email}" ]] && ! gcloud auth list 2>/dev/null | grep -qE "\*\s+${sa_email}"; then
gcloud auth activate-service-account --key-file="${GCP_SHARED_CREDENTIALS_FILE}" 2>/dev/null || true
gcloud config set project "${GOOGLE_PROJECT_ID}" 2>/dev/null || true
fi

# Get zones for control plane (3 zones for HA)
CONTROL_PLANE_ZONES_STR=$(get_zones_from_region 3)
# Get zones for compute (same zones for consistency)
COMPUTE_ZONES_STR="${CONTROL_PLANE_ZONES_STR}"

# Apply zones via patch if we got valid zones
if [[ -n "${CONTROL_PLANE_ZONES_STR}" ]] && [[ "${CONTROL_PLANE_ZONES_STR}" != "[]" ]]; then
PATCH="${SHARED_DIR}/install-config-zones.yaml.patch"
cat > "${PATCH}" << ZONESPATCH
controlPlane:
platform:
gcp:
zones: ${CONTROL_PLANE_ZONES_STR}
compute:
- platform:
gcp:
zones: ${COMPUTE_ZONES_STR}
ZONESPATCH
yq-go m -x -i "${CONFIG}" "${PATCH}"
rm "${PATCH}"
fi
fi
fi

if [ ${RT_ENABLED} = "true" ]; then
cat > "${SHARED_DIR}/manifest_mc-kernel-rt.yml" << EOF
apiVersion: machineconfiguration.openshift.io/v1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,41 @@ ZONES_COUNT=3

function join_by { local IFS="$1"; shift; echo "$*"; }

function get_zones_from_region() {
# Get all zones from the region, filtering out AI zones
mapfile -t AVAILABILITY_ZONES < <(gcloud compute zones list --filter="region:${GCP_REGION} AND status:UP" --format='value(name)' | grep -v '\-ai[0-9]' | shuf)

# Take the first ZONES_COUNT zones
ZONES=("${AVAILABILITY_ZONES[@]:0:${ZONES_COUNT}}")
ZONES_STR="[ $(join_by , "${ZONES[@]}") ]"
echo "GCP region: ${GCP_REGION} (zones: ${ZONES_STR})"
}

function get_zones_by_machine_type() {
local machine_type=$1

mapfile -t AVAILABILITY_ZONES < <(gcloud compute machine-types list --filter="zone~${GCP_REGION} AND name=${machine_type}" --format='value(zone)' | sort)
# Get all zones that support this machine type
mapfile -t AVAILABILITY_ZONES < <(gcloud compute machine-types list --filter="zone~${GCP_REGION} AND name=${machine_type}" --format='value(zone)')

# Filter out AI zones if this is not an AI machine type (AI types start with "a2-")
if [[ ! "${machine_type}" =~ ^a2- ]]; then
# Filter out zones containing "-ai" followed by a digit (e.g., us-central1-ai1a)
local filtered_zones=()
for zone in "${AVAILABILITY_ZONES[@]}"; do
if [[ ! "${zone}" =~ -ai[0-9] ]]; then
filtered_zones+=("${zone}")
fi
done
# Only use filtered zones if we found non-AI zones, otherwise use all zones
if [[ ${#filtered_zones[@]} -gt 0 ]]; then
AVAILABILITY_ZONES=("${filtered_zones[@]}")
fi
fi

# Shuffle zones randomly to spread load across zones instead of always picking alphabetically first
mapfile -t AVAILABILITY_ZONES < <(printf '%s\n' "${AVAILABILITY_ZONES[@]}" | shuf)

# Take the first ZONES_COUNT zones
ZONES=("${AVAILABILITY_ZONES[@]:0:${ZONES_COUNT}}")
ZONES_STR="[ $(join_by , "${ZONES[@]}") ]"
echo "[${machine_type}] GCP region: ${GCP_REGION} (zones: ${ZONES_STR})"
Expand Down Expand Up @@ -51,6 +82,9 @@ if [[ -n "${CONTROL_PLANE_ZONES}" ]]; then
ZONES_STR="${CONTROL_PLANE_ZONES}"
elif [[ -n "${CONTROL_PLANE_NODE_TYPE}" ]]; then
get_zones_by_machine_type "${CONTROL_PLANE_NODE_TYPE}"
else
# If no zones are set, get standard zones from the region (excluding AI zones)
get_zones_from_region
fi
if [[ -n "${ZONES_STR}" ]]; then
cat >> "${PATCH}" << EOF
Expand Down