From f7efa6c4e335725bd289c4ed49ae163b198654ce Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Tue, 4 Nov 2025 11:20:39 +0000
Subject: [PATCH 01/16] Add protected environment checks hook

---
 environments/site/hooks/pre.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 environments/site/hooks/pre.yml

diff --git a/environments/site/hooks/pre.yml b/environments/site/hooks/pre.yml
new file mode 100644
index 000000000..cd25e9217
--- /dev/null
+++ b/environments/site/hooks/pre.yml
@@ -0,0 +1,20 @@
+---
+
+- hosts: localhost
+  gather_facts: no
+  become: no
+  tasks:
+    - name: Confirm continuing if using production environment
+      ansible.builtin.pause:
+        prompt: |
+          *************************************
+          *  WARNING: PROTECTED ENVIRONMENT!  *
+          *************************************
+
+          Current environment: {{ appliances_environment_name }}
+          Do you really want to continue (yes/no)?
+      register: env_confirm_safe
+      when:
+        - appliances_environment_name in protected_environments
+        - not (prd_continue | default(false) | bool)
+      failed_when: not (env_confirm_safe.user_input | bool)
\ No newline at end of file

From ca475780c660cdd19e6e840d5165e9859c21ca3f Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Tue, 4 Nov 2025 11:23:16 +0000
Subject: [PATCH 02/16] populate protected_environments list

---
 environments/site/hooks/pre.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/environments/site/hooks/pre.yml b/environments/site/hooks/pre.yml
index cd25e9217..cc0a6c6a9 100644
--- a/environments/site/hooks/pre.yml
+++ b/environments/site/hooks/pre.yml
@@ -3,6 +3,9 @@
 - hosts: localhost
   gather_facts: no
   become: no
+  vars:
+    protected_environments:
+      - prd
   tasks:
     - name: Confirm continuing if using production environment
       ansible.builtin.pause:

From ceaba175873f02c595e235c1693a788be5663f7b Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Tue, 4 Nov 2025 16:24:48 +0000
Subject: [PATCH 03/16] unlock instances before rebuild-via-slurm

---
 ansible/adhoc/lock_unlock_instances.yml | 10 ++++++++++
 ansible/adhoc/rebuild-via-slurm.yml     | 10 ++++++++++
 ansible/site.yml                        |  6 ++++++
 3 files changed, 26 insertions(+)
 create mode 100644 ansible/adhoc/lock_unlock_instances.yml

diff --git a/ansible/adhoc/lock_unlock_instances.yml b/ansible/adhoc/lock_unlock_instances.yml
new file mode 100644
index 000000000..81ec547ab
--- /dev/null
+++ b/ansible/adhoc/lock_unlock_instances.yml
@@ -0,0 +1,10 @@
+---
+
+- hosts: "{{ target_hosts | default('all') }}"
+  gather_facts: no
+  become: no
+  tasks:
+    - name: Lock/Unlock instances
+      openstack.cloud.server_action:
+        action: "{{ server_action | default('lock') }}"
+        server: "{{ inventory_hostname }}"
\ No newline at end of file
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index 33cbe5cc7..bf26e0323 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -8,6 +8,16 @@
 
 # See docs/slurm-controlled-rebuild.md.
 
+- hosts: localhost
+  gather_facts: false
+  vars:
+    server_action: unlock
+    target_hosts: compute
+  tasks:
+    - name: Unlock compute instances for rebuild
+      ansible.builtin.include_playbook:
+        file: adhoc/lock_unlock_instances.yml
+
 - hosts: login
   run_once: true
   gather_facts: false
diff --git a/ansible/site.yml b/ansible/site.yml
index 79b71e10a..fa229800a 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -1,4 +1,10 @@
 ---
+- name: Lock all instances
+  vars:
+    server_action: lock
+    target_hosts: all
+  ansible.builtin.import_playbook: adhoc/lock_unlock_instances.yml
+
 - name: Run pre.yml hook
   vars:
     # hostvars not available here, so have to recalculate environment root:

From 36a10e776faec49ec9974120dbb1b3de97994a11 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Fri, 7 Nov 2025 09:40:08 +0000
Subject: [PATCH 04/16] fix rebuild unlocking

---
 ansible/adhoc/lock_unlock_instances.yml |  3 ++-
 ansible/adhoc/rebuild-via-slurm.yml     |  8 ++------
 ansible/safe-env.yml                    | 22 ++++++++++++++++++++++
 ansible/site.yml                        |  3 +++
 environments/site/hooks/pre.yml         | 22 ----------------------
 5 files changed, 29 insertions(+), 29 deletions(-)
 create mode 100644 ansible/safe-env.yml

diff --git a/ansible/adhoc/lock_unlock_instances.yml b/ansible/adhoc/lock_unlock_instances.yml
index 81ec547ab..db9464cae 100644
--- a/ansible/adhoc/lock_unlock_instances.yml
+++ b/ansible/adhoc/lock_unlock_instances.yml
@@ -7,4 +7,5 @@
     - name: Lock/Unlock instances
       openstack.cloud.server_action:
         action: "{{ server_action | default('lock') }}"
-        server: "{{ inventory_hostname }}"
\ No newline at end of file
+        server: "{{ inventory_hostname }}"
+      delegate_to: localhost
\ No newline at end of file
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index bf26e0323..fca4258a8 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -8,15 +8,11 @@
 
 # See docs/slurm-controlled-rebuild.md.
 
-- hosts: localhost
-  gather_facts: false
+- name: Unlock compute instances for rebuild
   vars:
     server_action: unlock
     target_hosts: compute
-  tasks:
-    - name: Unlock compute instances for rebuild
-      ansible.builtin.include_playbook:
-        file: adhoc/lock_unlock_instances.yml
+  ansible.builtin.import_playbook: lock_unlock_instances.yml
 
 - hosts: login
   run_once: true
diff --git a/ansible/safe-env.yml b/ansible/safe-env.yml
new file mode 100644
index 000000000..8479a298b
--- /dev/null
+++ b/ansible/safe-env.yml
@@ -0,0 +1,22 @@
+---
+- hosts: localhost
+  gather_facts: no
+  become: no
+  vars:
+    protected_environments:
+      - prd
+  tasks:
+    - name: Confirm continuing if using production environment
+      ansible.builtin.pause:
+        prompt: |
+          *************************************
+          *  WARNING: PROTECTED ENVIRONMENT!  *
+          *************************************
+
+          Current environment: {{ appliances_environment_name }}
+          Do you really want to continue (yes/no)?
+      register: env_confirm_safe
+      when:
+        - appliances_environment_name in protected_environments
+        - not (prd_continue | default(false) | bool)
+      failed_when: not (env_confirm_safe.user_input | bool)
\ No newline at end of file
diff --git a/ansible/site.yml b/ansible/site.yml
index fa229800a..4cafa71c2 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -1,4 +1,7 @@
 ---
+
+- ansible.builtin.import_playbook: safe-env.yml
+
 - name: Lock all instances
   vars:
     server_action: lock
diff --git a/environments/site/hooks/pre.yml b/environments/site/hooks/pre.yml
index cc0a6c6a9..ed97d539c 100644
--- a/environments/site/hooks/pre.yml
+++ b/environments/site/hooks/pre.yml
@@ -1,23 +1 @@
 ---
-
-- hosts: localhost
-  gather_facts: no
-  become: no
-  vars:
-    protected_environments:
-      - prd
-  tasks:
-    - name: Confirm continuing if using production environment
-      ansible.builtin.pause:
-        prompt: |
-          *************************************
-          *  WARNING: PROTECTED ENVIRONMENT!  *
-          *************************************
-
-          Current environment: {{ appliances_environment_name }}
-          Do you really want to continue (yes/no)?
-      register: env_confirm_safe
-      when:
-        - appliances_environment_name in protected_environments
-        - not (prd_continue | default(false) | bool)
-      failed_when: not (env_confirm_safe.user_input | bool)
\ No newline at end of file

From 675d3ba294eafcdeda0783d163783ab341778684 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Fri, 7 Nov 2025 09:42:13 +0000
Subject: [PATCH 05/16] remove site hook

---
 environments/site/hooks/pre.yml | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 environments/site/hooks/pre.yml

diff --git a/environments/site/hooks/pre.yml b/environments/site/hooks/pre.yml
deleted file mode 100644
index ed97d539c..000000000
--- a/environments/site/hooks/pre.yml
+++ /dev/null
@@ -1 +0,0 @@
----

From 0a4988d3e08537e868464237ffdcc869e3256fae Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Tue, 11 Nov 2025 19:30:30 +0000
Subject: [PATCH 06/16] define protected envs in common vars, improve
 lock_unlock_instances

---
 ansible/adhoc/lock_unlock_instances.yml                   | 4 ++--
 ansible/adhoc/rebuild-via-slurm.yml                       | 5 ++---
 ansible/safe-env.yml                                      | 7 ++-----
 ansible/site.yml                                          | 7 ++-----
 environments/common/inventory/group_vars/all/defaults.yml | 2 ++
 5 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/ansible/adhoc/lock_unlock_instances.yml b/ansible/adhoc/lock_unlock_instances.yml
index db9464cae..80e3404a4 100644
--- a/ansible/adhoc/lock_unlock_instances.yml
+++ b/ansible/adhoc/lock_unlock_instances.yml
@@ -1,11 +1,11 @@
 ---
 
-- hosts: "{{ target_hosts | default('all') }}"
+- hosts: cluster
   gather_facts: no
   become: no
   tasks:
     - name: Lock/Unlock instances
       openstack.cloud.server_action:
-        action: "{{ server_action | default('lock') }}"
+        action: "{{ appliances_server_action | default('lock') }}"
         server: "{{ inventory_hostname }}"
       delegate_to: localhost
\ No newline at end of file
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index fca4258a8..fbe96c700 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -10,9 +10,8 @@
 
 - name: Unlock compute instances for rebuild
   vars:
-    server_action: unlock
-    target_hosts: compute
-  ansible.builtin.import_playbook: lock_unlock_instances.yml
+    appliances_server_action: unlock
+  ansible.builtin.command: ansible-playbook --limit compute adhoc/lock_unlock_instances.yml
 
 - hosts: login
   run_once: true
diff --git a/ansible/safe-env.yml b/ansible/safe-env.yml
index 8479a298b..7aab7c8da 100644
--- a/ansible/safe-env.yml
+++ b/ansible/safe-env.yml
@@ -2,9 +2,6 @@
 - hosts: localhost
   gather_facts: no
   become: no
-  vars:
-    protected_environments:
-      - prd
   tasks:
     - name: Confirm continuing if using production environment
       ansible.builtin.pause:
@@ -17,6 +14,6 @@
           Do you really want to continue (yes/no)?
       register: env_confirm_safe
       when:
-        - appliances_environment_name in protected_environments
-        - not (prd_continue | default(false) | bool)
+        - appliances_environment_name in appliances_protected_environments
+        - not (appliances_protected_environment_autoapprove | default(false) | bool)
       failed_when: not (env_confirm_safe.user_input | bool)
\ No newline at end of file
diff --git a/ansible/site.yml b/ansible/site.yml
index 4cafa71c2..191aa3cd6 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -2,11 +2,8 @@
 
 - ansible.builtin.import_playbook: safe-env.yml
 
-- name: Lock all instances
-  vars:
-    server_action: lock
-    target_hosts: all
-  ansible.builtin.import_playbook: adhoc/lock_unlock_instances.yml
+- name: Lock cluster instances
+  ansible.builtin.command: ansible-playbook adhoc/lock_unlock_instances.yml
 
 - name: Run pre.yml hook
   vars:
diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml
index e9852afd6..6cc02ff59 100644
--- a/environments/common/inventory/group_vars/all/defaults.yml
+++ b/environments/common/inventory/group_vars/all/defaults.yml
@@ -4,6 +4,8 @@ ansible_user: rocky
 appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}"
 appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}"
 appliances_environment_name: "{{ appliances_environment_root | basename | regex_replace('\\W+', '') }}" # [a-zA-Z0-9_] only
+appliances_protected_environments:
+  - prd
 appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in genericcloud images; appliance defaults to removing it
 # appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform
 appliances_mode: configure

From 2576d250b23a39f469a19a8c61848141a752f552 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 11:36:05 +0000
Subject: [PATCH 07/16] fix locking instances play

---
 ansible/adhoc/lock_unlock_instances.yml |  4 ++--
 ansible/adhoc/rebuild-via-slurm.yml     | 10 ++++++----
 ansible/site.yml                        |  8 ++++++--
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/ansible/adhoc/lock_unlock_instances.yml b/ansible/adhoc/lock_unlock_instances.yml
index 80e3404a4..6aafce1e3 100644
--- a/ansible/adhoc/lock_unlock_instances.yml
+++ b/ansible/adhoc/lock_unlock_instances.yml
@@ -1,8 +1,8 @@
 ---
 
 - hosts: cluster
-  gather_facts: no
-  become: no
+  gather_facts: false
+  become: false
   tasks:
     - name: Lock/Unlock instances
       openstack.cloud.server_action:
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index fbe96c700..4f1fc5fe9 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -8,10 +8,12 @@
 
 # See docs/slurm-controlled-rebuild.md.
 
-- name: Unlock compute instances for rebuild
-  vars:
-    appliances_server_action: unlock
-  ansible.builtin.command: ansible-playbook --limit compute adhoc/lock_unlock_instances.yml
+- hosts: localhost
+  gather_facts: false
+  tasks:
+    - name: Unlock compute nodes to ready rebuild
+      ansible.builtin.command: 
+        cmd: ansible-playbook --limit compute adhoc/lock_unlock_instances.yml -e "appliances_server_action=unlock"
 
 - hosts: login
   run_once: true
diff --git a/ansible/site.yml b/ansible/site.yml
index 191aa3cd6..748933a50 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -2,8 +2,12 @@
 
 - ansible.builtin.import_playbook: safe-env.yml
 
-- name: Lock cluster instances
-  ansible.builtin.command: ansible-playbook adhoc/lock_unlock_instances.yml
+- hosts: localhost
+  gather_facts: false
+  tasks:
+    - name: Lock all cluster instances
+      ansible.builtin.command:
+        cmd: ansible-playbook adhoc/lock_unlock_instances.yml
 
 - name: Run pre.yml hook
   vars:

From e2d2a933137e635538f028d65cba0669e4b25d8a Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 11:37:45 +0000
Subject: [PATCH 08/16] linting fix

---
 ansible/safe-env.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ansible/safe-env.yml b/ansible/safe-env.yml
index 7aab7c8da..81a688709 100644
--- a/ansible/safe-env.yml
+++ b/ansible/safe-env.yml
@@ -1,7 +1,7 @@
 ---
 - hosts: localhost
-  gather_facts: no
-  become: no
+  gather_facts: false
+  become: false
   tasks:
     - name: Confirm continuing if using production environment
       ansible.builtin.pause:

From f8424099f2c66d703165c712de3751ae48e61334 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 14:28:18 +0000
Subject: [PATCH 09/16] linter error

---
 ansible/adhoc/lock_unlock_instances.yml |  4 ++--
 ansible/adhoc/rebuild-via-slurm.yml     | 11 +++++------
 ansible/safe-env.yml                    |  2 +-
 ansible/site.yml                        |  8 ++------
 4 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/ansible/adhoc/lock_unlock_instances.yml b/ansible/adhoc/lock_unlock_instances.yml
index 6aafce1e3..64886cd71 100644
--- a/ansible/adhoc/lock_unlock_instances.yml
+++ b/ansible/adhoc/lock_unlock_instances.yml
@@ -1,6 +1,6 @@
 ---
 
-- hosts: cluster
+- hosts: "{{ target_hosts | default('cluster') }}"
   gather_facts: false
   become: false
   tasks:
@@ -8,4 +8,4 @@
       openstack.cloud.server_action:
         action: "{{ appliances_server_action | default('lock') }}"
         server: "{{ inventory_hostname }}"
-      delegate_to: localhost
\ No newline at end of file
+      delegate_to: localhost
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index 4f1fc5fe9..5f22d7764 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -8,12 +8,11 @@
 
 # See docs/slurm-controlled-rebuild.md.
 
-- hosts: localhost
-  gather_facts: false
-  tasks:
-    - name: Unlock compute nodes to ready rebuild
-      ansible.builtin.command: 
-        cmd: ansible-playbook --limit compute adhoc/lock_unlock_instances.yml -e "appliances_server_action=unlock"
+- name: Unlock compute instances for rebuild
+  vars:
+    appliances_server_action: unlock
+    target_hosts: compute
+  ansible.builtin.import_playbook: adhoc/lock_unlock_instances.yml
 
 - hosts: login
   run_once: true
diff --git a/ansible/safe-env.yml b/ansible/safe-env.yml
index 81a688709..b32b5d86e 100644
--- a/ansible/safe-env.yml
+++ b/ansible/safe-env.yml
@@ -16,4 +16,4 @@
       when:
         - appliances_environment_name in appliances_protected_environments
         - not (appliances_protected_environment_autoapprove | default(false) | bool)
-      failed_when: not (env_confirm_safe.user_input | bool)
\ No newline at end of file
+      failed_when: not (env_confirm_safe.user_input | bool)
diff --git a/ansible/site.yml b/ansible/site.yml
index 748933a50..8adc8cf13 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -2,12 +2,8 @@
 
 - ansible.builtin.import_playbook: safe-env.yml
 
-- hosts: localhost
-  gather_facts: false
-  tasks:
-    - name: Lock all cluster instances
-      ansible.builtin.command:
-        cmd: ansible-playbook adhoc/lock_unlock_instances.yml
+- name: Lock cluster instances
+  ansible.builtin.import_playbook: adhoc/lock_unlock_instances.yml
 
 - name: Run pre.yml hook
   vars:

From 0fa67e5a21b6544aff955d65dbd800002be7f1fb Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 14:48:09 +0000
Subject: [PATCH 10/16] lock playbook path fix

---
 ansible/adhoc/rebuild-via-slurm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index 5f22d7764..f00062423 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -12,7 +12,7 @@
   vars:
     appliances_server_action: unlock
     target_hosts: compute
-  ansible.builtin.import_playbook: adhoc/lock_unlock_instances.yml
+  ansible.builtin.import_playbook: lock_unlock_instances.yml
 
 - hosts: login
   run_once: true

From af27191a355685b08c571b80544578b6514ac2ae Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 18:33:34 +0000
Subject: [PATCH 11/16] document locking/unlocking instances

---
 .github/workflows/stackhpc.yml                |  1 +
 docs/experimental/compute-init.md             |  2 +-
 docs/experimental/slurm-controlled-rebuild.md | 10 ++++++----
 docs/sequence.md                              |  1 +
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml
index 60c05389e..cf987d5bf 100644
--- a/.github/workflows/stackhpc.yml
+++ b/.github/workflows/stackhpc.yml
@@ -154,6 +154,7 @@ jobs:
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
+          ansible-playbook --limit login,control ansible/adhoc/lock_unlock_instances.yml -e "appliances_server_action=unlock"
           cd "$STACKHPC_TF_DIR"
           tofu init
           tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
diff --git a/docs/experimental/compute-init.md b/docs/experimental/compute-init.md
index dfad27bcf..e0f548aff 100644
--- a/docs/experimental/compute-init.md
+++ b/docs/experimental/compute-init.md
@@ -22,7 +22,7 @@ login and control nodes. The process follows
 1. Compute nodes are reimaged:
 
 ```shell
-ansible-playbook -v --limit compute ansible/adhoc/rebuild.yml
+ansible-playbook -v ansible/adhoc/rebuild-via-slurm.yml
 ```
 
 2. Ansible-init runs against newly reimaged compute nodes
diff --git a/docs/experimental/slurm-controlled-rebuild.md b/docs/experimental/slurm-controlled-rebuild.md
index fc654d354..4c8caefbb 100644
--- a/docs/experimental/slurm-controlled-rebuild.md
+++ b/docs/experimental/slurm-controlled-rebuild.md
@@ -12,14 +12,16 @@ In summary, the way this functionality works is as follows:
 
 1. The image references(s) are manually updated in the OpenTofu configuration
    in the normal way.
+2. `lock_unlock_instances.yml --limit control,login -e "appliances_server_action=unlock"`
+   is run to unlock the control and login nodes for reimaging.
 2. `tofu apply` is run which rebuilds the login and control nodes to the new
    image(s). The new image reference for compute nodes is ignored, but is
    written into the hosts inventory file (and is therefore available as an
    Ansible hostvar).
-3. The `site.yml` playbook is run which reconfigures the cluster as normal. At
-   this point the cluster is functional, but using a new image for the login
-   and control nodes and the old image for the compute nodes. This playbook
-   also:
+3. The `site.yml` playbook is run which locks the instances again and reconfigures
+   the cluster as normal. At this point the cluster is functional, but using a new
+   image for the login and control nodes and the old image for the compute nodes.
+   This playbook also:
    - Writes cluster configuration to the control node, using the
      [compute_init](../../ansible/roles/compute_init/README.md) role.
    - Configures an application credential and helper programs on the control
diff --git a/docs/sequence.md b/docs/sequence.md
index 6f3b77922..96a2333f2 100644
--- a/docs/sequence.md
+++ b/docs/sequence.md
@@ -100,6 +100,7 @@ sequenceDiagram
     participant cloud as Cloud
     participant nodes as Cluster Instances
     note over ansible: Update OpenTofu cluster_image variable [1]
+    ansible->>cloud: Unlock control and and login nodes
     rect rgb(204, 232, 250)
     note over ansible: $ tofu apply ....
     ansible<<->>cloud: Check login/compute current vs desired images

From 5192f60dd7081ac98bf2935599146ac1ac05114e Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 18:45:27 +0000
Subject: [PATCH 12/16] linting docs

---
 docs/experimental/slurm-controlled-rebuild.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/experimental/slurm-controlled-rebuild.md b/docs/experimental/slurm-controlled-rebuild.md
index 4c8caefbb..3d16fabf5 100644
--- a/docs/experimental/slurm-controlled-rebuild.md
+++ b/docs/experimental/slurm-controlled-rebuild.md
@@ -12,7 +12,9 @@ In summary, the way this functionality works is as follows:
 
 1. The image references(s) are manually updated in the OpenTofu configuration
    in the normal way.
-2. `lock_unlock_instances.yml --limit control,login -e "appliances_server_action=unlock"`
+2. ``` ansible-playbook lock_unlock_instances.yml
+       --limit control,login -e "appliances_server_action=unlock"
+   ```
    is run to unlock the control and login nodes for reimaging.
 2. `tofu apply` is run which rebuilds the login and control nodes to the new
    image(s). The new image reference for compute nodes is ignored, but is

From 6119c227aa7fb0ea123b34d1fec5b8ae581ba78d Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 18:46:37 +0000
Subject: [PATCH 13/16] fix

---
 docs/experimental/slurm-controlled-rebuild.md | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs/experimental/slurm-controlled-rebuild.md b/docs/experimental/slurm-controlled-rebuild.md
index 3d16fabf5..1f924edf0 100644
--- a/docs/experimental/slurm-controlled-rebuild.md
+++ b/docs/experimental/slurm-controlled-rebuild.md
@@ -12,15 +12,13 @@ In summary, the way this functionality works is as follows:
 
 1. The image references(s) are manually updated in the OpenTofu configuration
    in the normal way.
-2. ``` ansible-playbook lock_unlock_instances.yml
-       --limit control,login -e "appliances_server_action=unlock"
-   ```
+2. `ansible-playbook lock_unlock_instances.yml --limit control,login -e "appliances_server_action=unlock"`
    is run to unlock the control and login nodes for reimaging.
-2. `tofu apply` is run which rebuilds the login and control nodes to the new
+3. `tofu apply` is run which rebuilds the login and control nodes to the new
    image(s). The new image reference for compute nodes is ignored, but is
    written into the hosts inventory file (and is therefore available as an
    Ansible hostvar).
-3. The `site.yml` playbook is run which locks the instances again and reconfigures
+4. The `site.yml` playbook is run which locks the instances again and reconfigures
    the cluster as normal. At this point the cluster is functional, but using a new
    image for the login and control nodes and the old image for the compute nodes.
    This playbook also:
@@ -28,22 +26,22 @@ In summary, the way this functionality works is as follows:
      [compute_init](../../ansible/roles/compute_init/README.md) role.
    - Configures an application credential and helper programs on the control
      node, using the [rebuild](../../ansible/roles/rebuild/README.md) role.
-4. An admin submits Slurm jobs, one for each node, to a special "rebuild"
+5. An admin submits Slurm jobs, one for each node, to a special "rebuild"
    partition using an Ansible playbook. Because this partition has higher
    priority than the partitions normal users can use, these rebuild jobs become
    the next job in the queue for every node (although any jobs currently
    running will complete as normal).
-5. Because these rebuild jobs have the `--reboot` flag set, before launching them
+6. Because these rebuild jobs have the `--reboot` flag set, before launching them
    the Slurm control node runs a [RebootProgram](https://slurm.schedmd.com/slurm.conf.html#OPT_RebootProgram)
    which compares the current image for the node to the one in the cluster
    configuration, and if it does not match, uses OpenStack to rebuild the
    node to the desired (updated) image.
    TODO: Describe the logic if they DO match
-6. After a rebuild, the compute node runs various Ansible tasks during boot,
+7. After a rebuild, the compute node runs various Ansible tasks during boot,
    controlled by the [compute_init](../../ansible/roles/compute_init/README.md)
    role, to fully configure the node again. It retrieves the required cluster
    configuration information from the control node via an NFS mount.
-7. Once the `slurmd` daemon starts on a compute node, the slurm controller
+8. Once the `slurmd` daemon starts on a compute node, the slurm controller
    registers the node as having finished rebooting. It then launches the actual
    job, which does not do anything.
 

From 7b832c3a90741dfcd0822d46bc09592e31a2f1c2 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Wed, 12 Nov 2025 18:52:01 +0000
Subject: [PATCH 14/16] improve docs style

---
 docs/experimental/slurm-controlled-rebuild.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/experimental/slurm-controlled-rebuild.md b/docs/experimental/slurm-controlled-rebuild.md
index 1f924edf0..d2ee4df95 100644
--- a/docs/experimental/slurm-controlled-rebuild.md
+++ b/docs/experimental/slurm-controlled-rebuild.md
@@ -12,8 +12,8 @@ In summary, the way this functionality works is as follows:
 
 1. The image references(s) are manually updated in the OpenTofu configuration
    in the normal way.
-2. `ansible-playbook lock_unlock_instances.yml --limit control,login -e "appliances_server_action=unlock"`
-   is run to unlock the control and login nodes for reimaging.
+2. The `lock_unlock_instances.yml` playbook is run against control and login with
+   `unlock` to allow for reimaging the nodes.
 3. `tofu apply` is run which rebuilds the login and control nodes to the new
    image(s). The new image reference for compute nodes is ignored, but is
    written into the hosts inventory file (and is therefore available as an

From 5bb6d98f3c9f9c3626236cf280b0de1500b4eb84 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Thu, 13 Nov 2025 14:02:05 +0000
Subject: [PATCH 15/16] Review changes / unlock instances before CI cleanup

---
 .github/workflows/stackhpc.yml                |  3 ++-
 README.md                                     |  4 ++-
 ansible/adhoc/lock-unlock-instances.yml       | 27 +++++++++++++++++++
 ansible/adhoc/lock_unlock_instances.yml       | 11 --------
 ansible/adhoc/rebuild-via-slurm.yml           |  6 ++---
 ansible/adhoc/rebuild.yml                     |  4 +++
 ansible/site.yml                              |  2 +-
 docs/experimental/slurm-controlled-rebuild.md | 12 ++++-----
 docs/operations.md                            |  2 ++
 .../inventory/group_vars/all/defaults.yml     |  2 +-
 10 files changed, 49 insertions(+), 24 deletions(-)
 create mode 100644 ansible/adhoc/lock-unlock-instances.yml
 delete mode 100644 ansible/adhoc/lock_unlock_instances.yml

diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml
index cf987d5bf..dc981d892 100644
--- a/.github/workflows/stackhpc.yml
+++ b/.github/workflows/stackhpc.yml
@@ -154,7 +154,7 @@ jobs:
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
-          ansible-playbook --limit login,control ansible/adhoc/lock_unlock_instances.yml -e "appliances_server_action=unlock"
+          ansible-playbook --limit login,control ansible/adhoc/lock-unlock-instances.yml -e "lock_unlock_action=unlock"
           cd "$STACKHPC_TF_DIR"
           tofu init
           tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
@@ -238,6 +238,7 @@ jobs:
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
+          ansible-playbook ansible/adhoc/lock-unlock-instances.yml -e "lock_unlock_action=unlock"
           cd "$STACKHPC_TF_DIR"
           tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars" || echo "tofu failed in $STACKHPC_TF_DIR"
         if: ${{ success() || cancelled() }}
diff --git a/README.md b/README.md
index 8acd424fa..093d70a08 100644
--- a/README.md
+++ b/README.md
@@ -141,8 +141,10 @@ To configure the appliance, ensure the venv and the environment are [activated](
 ```shell
 ansible-playbook ansible/site.yml
 ```
+To prevent the cluster instances from being changed or `tofu destroy`ed, this playbook begins by locking the OpenStack instances. Any subsequent desired changes to the OpenTofu state require
+running an unlocking playbook as detailed in the adhoc command section of [docs/operations.md](docs/operations.md)
 
-Once it completes you can log in to the cluster using:
+Once `site.yml` completes you can log in to the cluster using:
 
 ```shell
 ssh rocky@$login_ip
diff --git a/ansible/adhoc/lock-unlock-instances.yml b/ansible/adhoc/lock-unlock-instances.yml
new file mode 100644
index 000000000..72194b4d5
--- /dev/null
+++ b/ansible/adhoc/lock-unlock-instances.yml
@@ -0,0 +1,27 @@
+---
+# Lock or unlock cluster instances
+
+# Used for site.yml / rebuild-via-slurm.yml
+# Run required for rebuild.yml / tofu destroy / changes to tofu state etc.
+
+# Examples:
+
+#   ansible-playbook --limit login,control ansible/adhoc/lock-unlock-instances.yml -e "lock_unlock_action=unlock"
+
+#   ansansible-playbook ansible/adhoc/lock-unlock-instances.yml -e "lock_unlock_action=unlock" -e "lock_unlock_hosts=compute"
+
+# - name: Unlock compute instances
+#   vars:
+#     lock_unlock_action: unlock
+#     lock_unlock_hosts: compute
+#   ansible.builtin.import_playbook: lock-unlock-instances.yml
+
+- hosts: "{{ lock_unlock_hosts | default('cluster') }}"
+  gather_facts: false
+  become: false
+  tasks:
+    - name: Lock/Unlock instances
+      openstack.cloud.server_action:
+        action: "{{ lock_unlock_action | default('lock') }}"
+        server: "{{ inventory_hostname }}"
+      delegate_to: localhost
diff --git a/ansible/adhoc/lock_unlock_instances.yml b/ansible/adhoc/lock_unlock_instances.yml
deleted file mode 100644
index 64886cd71..000000000
--- a/ansible/adhoc/lock_unlock_instances.yml
+++ /dev/null
@@ -1,11 +0,0 @@
----
-
-- hosts: "{{ target_hosts | default('cluster') }}"
-  gather_facts: false
-  become: false
-  tasks:
-    - name: Lock/Unlock instances
-      openstack.cloud.server_action:
-        action: "{{ appliances_server_action | default('lock') }}"
-        server: "{{ inventory_hostname }}"
-      delegate_to: localhost
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index f00062423..8597521fe 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -10,9 +10,9 @@
 
 - name: Unlock compute instances for rebuild
   vars:
-    appliances_server_action: unlock
-    target_hosts: compute
-  ansible.builtin.import_playbook: lock_unlock_instances.yml
+    lock_unlock_action: unlock
+    lock_unlock_hosts: compute
+  ansible.builtin.import_playbook: lock-unlock-instances.yml
 
 - hosts: login
   run_once: true
diff --git a/ansible/adhoc/rebuild.yml b/ansible/adhoc/rebuild.yml
index b6033e43c..1db17d26b 100644
--- a/ansible/adhoc/rebuild.yml
+++ b/ansible/adhoc/rebuild.yml
@@ -5,6 +5,10 @@
 # Use --limit to control which hosts to rebuild (either specific hosts or the <cluster_name>_<partition_name> groups defining partitions).
 # Optionally, supply `-e rebuild_image=<image_name_or_id>` to define a specific image, otherwise the current image is reused.
 #
+# After running site.yml, all instances are locked, so to run the rebuild.yml, the unlock playbook must be run:
+#   ansible-playbook ansible/adhoc/lock-unlock-instances.yml -e "lock_unlock_action=unlock"
+# Similarly to rebuild, --limit can be used to control which hosts to unlock.
+#
 # NOTE: If a hostvar `instance_id` is defined this is used to select hosts.
 #       Otherwise the hostname is used and this must be unique, which may not be the case e.g. if using identically-named staging and production hosts.
 #
diff --git a/ansible/site.yml b/ansible/site.yml
index 8adc8cf13..5d61be819 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -3,7 +3,7 @@
 - ansible.builtin.import_playbook: safe-env.yml
 
 - name: Lock cluster instances
-  ansible.builtin.import_playbook: adhoc/lock_unlock_instances.yml
+  ansible.builtin.import_playbook: adhoc/lock-unlock-instances.yml
 
 - name: Run pre.yml hook
   vars:
diff --git a/docs/experimental/slurm-controlled-rebuild.md b/docs/experimental/slurm-controlled-rebuild.md
index d2ee4df95..6aab761ce 100644
--- a/docs/experimental/slurm-controlled-rebuild.md
+++ b/docs/experimental/slurm-controlled-rebuild.md
@@ -12,8 +12,8 @@ In summary, the way this functionality works is as follows:
 
 1. The image references(s) are manually updated in the OpenTofu configuration
    in the normal way.
-2. The `lock_unlock_instances.yml` playbook is run against control and login with
-   `unlock` to allow for reimaging the nodes.
+2. The adhoc playbook `lock-unlock-instances.yml` is run limited to control and login
+   nodes, with `lock_unlock_action=unlock` to allow the nodes to be rebuilt.
 3. `tofu apply` is run which rebuilds the login and control nodes to the new
    image(s). The new image reference for compute nodes is ignored, but is
    written into the hosts inventory file (and is therefore available as an
@@ -27,10 +27,10 @@ In summary, the way this functionality works is as follows:
    - Configures an application credential and helper programs on the control
      node, using the [rebuild](../../ansible/roles/rebuild/README.md) role.
 5. An admin submits Slurm jobs, one for each node, to a special "rebuild"
-   partition using an Ansible playbook. Because this partition has higher
-   priority than the partitions normal users can use, these rebuild jobs become
-   the next job in the queue for every node (although any jobs currently
-   running will complete as normal).
+   partition using the adhoc playbook `rebuild-via-slurm.yml`. Because this partition
+   has higher priority than the partitions normal users can use, these rebuild jobs
+   become the next job in the queue for every node (although any jobs currently running
+   will complete as normal).
 6. Because these rebuild jobs have the `--reboot` flag set, before launching them
    the Slurm control node runs a [RebootProgram](https://slurm.schedmd.com/slurm.conf.html#OPT_RebootProgram)
    which compares the current image for the node to the one in the cluster
diff --git a/docs/operations.md b/docs/operations.md
index 525a3e01c..5b70fc46d 100644
--- a/docs/operations.md
+++ b/docs/operations.md
@@ -212,7 +212,9 @@ ansible-playbook ansible/adhoc/$PLAYBOOK
 Currently they include the following (see each playbook for links to documentation):
 
 - `hpctests.yml`: MPI-based cluster tests for latency, bandwidth and floating point performance.
+- `lock-unlock-instances.yml`: Lock cluster instances for preventing tofu changes, or unlock to allow changes.
 - `rebuild.yml`: Rebuild nodes with existing or new images (NB: this is intended for development not for re-imaging nodes on an in-production cluster).
+Requires `lock-unlock-instances.yml` be run first.
 - `restart-slurm.yml`: Restart all Slurm daemons in the correct order.
 - `update-packages.yml`: Update specified packages on cluster nodes (NB: not recommended for routine use).
 
diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml
index 6cc02ff59..622390bc1 100644
--- a/environments/common/inventory/group_vars/all/defaults.yml
+++ b/environments/common/inventory/group_vars/all/defaults.yml
@@ -5,7 +5,7 @@ appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}"
 appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}"
 appliances_environment_name: "{{ appliances_environment_root | basename | regex_replace('\\W+', '') }}" # [a-zA-Z0-9_] only
 appliances_protected_environments:
-  - prd
+  - production
 appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in genericcloud images; appliance defaults to removing it
 # appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform
 appliances_mode: configure

From f003db80fecd549b585485acb4eafe4862068e06 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Thu, 13 Nov 2025 14:18:32 +0000
Subject: [PATCH 16/16] prettier linting

---
 README.md          | 4 ++--
 docs/operations.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 093d70a08..27d9ef0a9 100644
--- a/README.md
+++ b/README.md
@@ -141,8 +141,8 @@ To configure the appliance, ensure the venv and the environment are [activated](
 ```shell
 ansible-playbook ansible/site.yml
 ```
-To prevent the cluster instances from being changed or `tofu destroy`ed, this playbook begins by locking the OpenStack instances. Any subsequent desired changes to the OpenTofu state require
-running an unlocking playbook as detailed in the adhoc command section of [docs/operations.md](docs/operations.md)
+
+To prevent the cluster instances from being changed or `tofu destroy` running, this playbook begins by locking the OpenStack instances. Any subsequent desired changes to the OpenTofu state require running an unlocking playbook as detailed in the adhoc command section of [docs/operations.md](docs/operations.md).
 
 Once `site.yml` completes you can log in to the cluster using:
 
diff --git a/docs/operations.md b/docs/operations.md
index 5b70fc46d..0ae41d9a7 100644
--- a/docs/operations.md
+++ b/docs/operations.md
@@ -214,7 +214,7 @@ Currently they include the following (see each playbook for links to documentati
 - `hpctests.yml`: MPI-based cluster tests for latency, bandwidth and floating point performance.
 - `lock-unlock-instances.yml`: Lock cluster instances for preventing tofu changes, or unlock to allow changes.
 - `rebuild.yml`: Rebuild nodes with existing or new images (NB: this is intended for development not for re-imaging nodes on an in-production cluster).
-Requires `lock-unlock-instances.yml` be run first.
+  Requires `lock-unlock-instances.yml` be run first.
 - `restart-slurm.yml`: Restart all Slurm daemons in the correct order.
 - `update-packages.yml`: Update specified packages on cluster nodes (NB: not recommended for routine use).