From 094e8b7f5aeccd2ab6d1a6536321df5c620f374f Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Thu, 15 Jan 2026 11:41:54 +0100 Subject: [PATCH] Add pre-shutdown certificate refresh to maximize cluster restart window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenShift uses short-lived (24h) intermediate signing certificates that automatically rotate. Leaf certificates are capped by the remaining validity of their signer. If you shut down a cluster when signers are close to expiration, the leaf certs may have very short remaining validity, causing startup failures if the cluster is stopped for too long. This change adds: - helpers/refresh-certificates.sh: Standalone script that forces regeneration of all API server signer certificates with fresh 24-hour validity - Integration with shutdown-cluster.sh to automatically refresh certs before shutting down cluster VMs The helper script can also be run independently when needed. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../scripts/shutdown-cluster.sh | 14 ++ helpers/refresh-certificates.sh | 168 ++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100755 helpers/refresh-certificates.sh diff --git a/deploy/openshift-clusters/scripts/shutdown-cluster.sh b/deploy/openshift-clusters/scripts/shutdown-cluster.sh index 2e0cd07..da19573 100755 --- a/deploy/openshift-clusters/scripts/shutdown-cluster.sh +++ b/deploy/openshift-clusters/scripts/shutdown-cluster.sh @@ -59,6 +59,20 @@ fi echo "Found dev-scripts directory. Performing orderly shutdown of cluster VMs..." +# Refresh certificates before shutdown to maximize validity window on next startup +REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +REFRESH_CERTS_SCRIPT="${REPO_ROOT}/helpers/refresh-certificates.sh" +PROXY_ENV="${SCRIPT_DIR}/../proxy.env" + +if [[ -x "${REFRESH_CERTS_SCRIPT}" && -f "${PROXY_ENV}" ]]; then + echo "" + echo "Refreshing certificates before shutdown..." + "${REFRESH_CERTS_SCRIPT}" --proxy-env "${PROXY_ENV}" || echo "Warning: Certificate refresh failed. Proceeding with shutdown." + echo "" +else + echo "Skipping certificate refresh (script or proxy.env not found)." +fi + # Perform orderly shutdown of the cluster VMs ssh "$(cat "${SHARED_DIR}/ssh_user")@${HOST_PUBLIC_IP}" << 'EOF' set -e diff --git a/helpers/refresh-certificates.sh b/helpers/refresh-certificates.sh new file mode 100755 index 0000000..7c09907 --- /dev/null +++ b/helpers/refresh-certificates.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# +# refresh-certificates.sh - Force renewal of OpenShift API server certificates +# +# This script forces the kube-apiserver-operator to regenerate all short-lived +# signer certificates with fresh 24-hour validity. This is useful before shutting +# down a cluster for an extended period to maximize the certificate validity +# window on the next startup. +# +# Background: +# OpenShift uses short-lived (24h) intermediate signing certificates that +# automatically rotate. Leaf certificates (like API server serving certs) +# are capped by the remaining validity of their signer. If you shut down +# a cluster when signers are close to expiration, the leaf certs may have +# very short remaining validity, causing startup failures if the cluster +# is stopped for too long. +# +# Usage: +# ./refresh-certificates.sh [--proxy-env /path/to/proxy.env] +# +# If --proxy-env is not specified, the script will look for proxy.env in +# the standard location relative to the two-node-toolbox deploy directory. +# + +set -o nounset +set -o pipefail + +# Get the directory where this script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Default proxy.env location (relative to helpers/) +DEFAULT_PROXY_ENV="${SCRIPT_DIR}/../deploy/openshift-clusters/proxy.env" + +# Parse arguments +PROXY_ENV="" +while [[ $# -gt 0 ]]; do + case $1 in + --proxy-env) + PROXY_ENV="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [--proxy-env /path/to/proxy.env]" + echo "" + echo "Force renewal of OpenShift API server certificates to maximize" + echo "validity window before cluster shutdown." + echo "" + echo "Options:" + echo " --proxy-env PATH Path to proxy.env file (default: deploy/openshift-clusters/proxy.env)" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + +# Use default if not specified +if [[ -z "${PROXY_ENV}" ]]; then + PROXY_ENV="${DEFAULT_PROXY_ENV}" +fi + +echo "========================================" +echo "OpenShift Certificate Refresh" +echo "========================================" +echo "" + +# Check if proxy.env exists +if [[ ! -f "${PROXY_ENV}" ]]; then + echo "Error: proxy.env not found at ${PROXY_ENV}" + echo "" + echo "Please specify the correct path with --proxy-env or ensure" + echo "the cluster has been deployed and proxy.env exists." + exit 1 +fi + +echo "Loading proxy environment from: ${PROXY_ENV}" +# shellcheck source=/dev/null +source "${PROXY_ENV}" + +# Verify we can reach the API +echo "Checking cluster API accessibility..." +if ! oc get nodes --request-timeout=10s &>/dev/null; then + echo "" + echo "Error: Cannot reach the cluster API." + echo "" + echo "Possible causes:" + echo " - Cluster is not running" + echo " - Proxy is not accessible" + echo " - Certificates have already expired" + echo "" + echo "If the cluster is running, check that the proxy (squid) is accessible" + echo "at ${HTTP_PROXY:-}" + exit 1 +fi + +echo "Cluster API is accessible." +echo "" + +# List of short-lived signer secrets to refresh +SIGNERS=( + "aggregator-client-signer" + "loadbalancer-serving-signer" + "localhost-serving-signer" + "service-network-serving-signer" +) + +echo "Forcing renewal of API server signer certificates..." +echo "" + +# Helper function to display certificate expiry times +show_cert_expiry() { + for signer in "${SIGNERS[@]}"; do + EXPIRY=$(oc get secret "${signer}" -n openshift-kube-apiserver-operator \ + -o jsonpath='{.metadata.annotations.auth\.openshift\.io/certificate-not-after}' 2>/dev/null || echo "not found") + echo " ${signer}: ${EXPIRY}" + done +} + +echo "Current certificate expiry times:" +show_cert_expiry +echo "" + +# Delete signer secrets to trigger regeneration +echo "Deleting signer secrets to trigger regeneration..." +for signer in "${SIGNERS[@]}"; do + echo " Deleting ${signer}..." + oc delete secret "${signer}" -n openshift-kube-apiserver-operator --ignore-not-found=true +done + +echo "" +echo "Waiting for certificate regeneration (up to 60s)..." + +# Wait for all secrets to be recreated (active polling instead of fixed sleep) +TIMEOUT=60 +ELAPSED=0 +while [[ ${ELAPSED} -lt ${TIMEOUT} ]]; do + ALL_EXIST=true + for signer in "${SIGNERS[@]}"; do + if ! oc get secret "${signer}" -n openshift-kube-apiserver-operator &>/dev/null; then + ALL_EXIST=false + break + fi + done + if [[ "${ALL_EXIST}" == "true" ]]; then + break + fi + sleep 5 + ELAPSED=$((ELAPSED + 5)) + echo -n "." +done +echo "" +echo "" + +echo "New certificate expiry times:" +show_cert_expiry +echo "" + +if [[ "${ALL_EXIST}" == "true" ]]; then + echo "Certificate refresh completed successfully!" + echo "All signers renewed with fresh 24-hour validity." +else + echo "Warning: Some certificates may still be regenerating." + echo "Check kube-apiserver-operator logs if issues persist." +fi