From 8636ffd525f4594a72a37cd32ed477dadac82603 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 9 Sep 2025 14:33:49 +0000 Subject: [PATCH 1/3] delete build VMs in CI nightly cleanup --- .github/workflows/nightly-cleanup.yml | 48 ++++----------------------- 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/.github/workflows/nightly-cleanup.yml b/.github/workflows/nightly-cleanup.yml index 897d3572d..783beb9fb 100644 --- a/.github/workflows/nightly-cleanup.yml +++ b/.github/workflows/nightly-cleanup.yml @@ -40,53 +40,17 @@ jobs: echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml shell: bash - - name: Find CI clusters + - name: Delete all CI clusters run: | . venv/bin/activate - CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq || true) - echo "DEBUG: Raw CI clusters: $CI_CLUSTERS" - - if [[ -z "$CI_CLUSTERS" ]]; then - echo "No matching CI clusters found." - else - # Flatten multiline value so can be passed as env var - CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//') - echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED" - echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> $GITHUB_ENV - fi + ./dev/delete-cluster.py slurmci-RL --force shell: bash - - - name: Delete CI clusters + + - name: Delete all CI build VMs run: | . venv/bin/activate - if [[ -z ${ci_clusters} ]]; then - echo "No clusters to delete." - exit 0 - fi - - for cluster_prefix in ${ci_clusters} + for build_vm in $(openstack server list -c Name -f value | grep openhpc-extra-RL) do - echo "Processing cluster: $cluster_prefix" - - # Get all servers with the matching name for control node - CONTROL_SERVERS=$(openstack server list --name ${cluster_prefix}-control --format json) - - # Get unique server names to avoid duplicate cleanup - UNIQUE_NAMES=$(echo "$CONTROL_SERVERS" | jq -r '.[].Name' | sort | uniq) - for name in $UNIQUE_NAMES; do - echo "Deleting cluster with control node: $name" - - # Get the first matching server ID by name - server=$(echo "$CONTROL_SERVERS" | jq -r '.[] | select(.Name=="'"$name"'") | .ID' | head -n1) - - # Make sure server still exists (wasn't deleted earlier) - if ! openstack server show "$server" &>/dev/null; then - echo "Server $server no longer exists, skipping $name." - continue - fi - - echo "Deleting cluster $cluster_prefix (server $server)..." - ./dev/delete-cluster.py $cluster_prefix --force - done + openstack server delete $build_vm done shell: bash From 1b96add8a659531c2bd042688ee9ab8df1d36652 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 6 Oct 2025 15:04:40 +0000 Subject: [PATCH 2/3] name build volumes and include in nightly cleanup --- .github/workflows/nightly-cleanup.yml | 9 +++++++++ packer/openstack.pkr.hcl | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/.github/workflows/nightly-cleanup.yml b/.github/workflows/nightly-cleanup.yml index 783beb9fb..30ce84bb8 100644 --- a/.github/workflows/nightly-cleanup.yml +++ b/.github/workflows/nightly-cleanup.yml @@ -54,3 +54,12 @@ jobs: openstack server delete $build_vm done shell: bash + + - name: Delete all CI build volumes + run: | + . venv/bin/activate + for build_vol in $(openstack volume list -c Name -f value | grep openhpc-extra-RL) + do + openstack volume delete $build_vol + done + shell: bash diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 3f93d50a8..a1c5f2727 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -128,6 +128,11 @@ variable "volume_size" { default = 15 } +variable "volume_name" { + type = string + default = null +} + variable "image_disk_format" { type = string default = "raw" @@ -162,6 +167,7 @@ source "openstack" "openhpc" { use_blockstorage_volume = var.use_blockstorage_volume volume_type = var.volume_type volume_size = var.volume_size + volume_name = "${var.image_name}${local.image_name_version}" metadata = var.metadata instance_metadata = { ansible_init_disable = "true" From 5691326f1f89a74b54725b2c1f2381666d35d371 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 7 Oct 2025 08:41:54 +0000 Subject: [PATCH 3/3] simplify cleanup of volumes and include fatimage build VMs --- .github/workflows/nightly-cleanup.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/nightly-cleanup.yml b/.github/workflows/nightly-cleanup.yml index 9cf74d11a..4c2fd0164 100644 --- a/.github/workflows/nightly-cleanup.yml +++ b/.github/workflows/nightly-cleanup.yml @@ -52,20 +52,14 @@ jobs: ./dev/delete-cluster.py slurmci-RL --force shell: bash - - name: Delete all CI build VMs + - name: Delete all CI extra build VMs and volumes run: | . venv/bin/activate - for build_vm in $(openstack server list -c Name -f value | grep openhpc-extra-RL) - do - openstack server delete $build_vm - done + ./dev/delete-cluster.py openhpc-extra-RL --force shell: bash - - name: Delete all CI build volumes + - name: Delete all fatimage build VMs and volumes run: | . venv/bin/activate - for build_vol in $(openstack volume list -c Name -f value | grep openhpc-extra-RL) - do - openstack volume delete $build_vol - done + ./dev/delete-cluster.py openhpc-RL --force shell: bash