From ba9699267449fba58cd9c04c451759a914fd7144 Mon Sep 17 00:00:00 2001
From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com>
Date: Thu, 28 Aug 2025 13:55:03 +0100
Subject: [PATCH 01/50] Delete environments/.caas/ansible.cfg (#766)

---
 environments/.caas/ansible.cfg | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 environments/.caas/ansible.cfg
diff --git a/environments/.caas/ansible.cfg b/environments/.caas/ansible.cfg
deleted file mode 100644
index 922f086..0000000
--- a/environments/.caas/ansible.cfg
+++ /dev/null
@@ -1,19 +0,0 @@
-[defaults]
-any_errors_fatal = True
-stdout_callback = debug
-stderr_callback = debug
-gathering = smart
-forks = 30
-host_key_checking = False
-inventory = ../common/inventory,inventory
-collections_path = ../../ansible/collections
-roles_path = ../../ansible/roles
-filter_plugins = ../../ansible/filter_plugins
-
-[ssh_connection]
-ssh_args = -o ControlMaster=auto ControlPath=~/.ssh/%r@%h-%p -o ControlPersist=240s -o PreferredAuthentications=publickey -o UserKnownHostsFile=/dev/null
-pipelining = True
-
-[inventory]
-# Fail when any inventory source cannot be parsed.
-any_unparsed_is_failed = True

From 73f614abb51e5b277a9611759491aceae61afae3 Mon Sep 17 00:00:00 2001
From: Matt Crees <mattc@stackhpc.com>
Date: Fri, 29 Aug 2025 15:06:57 +0100
Subject: [PATCH 02/50] Add filesystems docs (#710)

* Add filesystems docs

* Apply suggestions from code review

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>

* Update Ceph instructions for Manila integrations

* Update overview

* Update docs/filesystems.md

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>

* Update image build instructions for Manila

---------

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>
---
 docs/filesystems.md                           | 88 +++++++++++++++++++
 .../inventory/group_vars/all/manila.yml       |  6 +-
 .../group_vars/all/os-manila-mount.yml        |  3 -
 3 files changed, 93 insertions(+), 4 deletions(-)
 create mode 100644 docs/filesystems.md
 delete mode 100644 environments/common/inventory/group_vars/all/os-manila-mount.yml

diff --git a/docs/filesystems.md b/docs/filesystems.md
new file mode 100644
index 0000000..5509aef
--- /dev/null
+++ b/docs/filesystems.md
@@ -0,0 +1,88 @@
+# Overview
+
+The Slurm appliance supports multiple ways of configuring shared filesystems, including:
+
+- Configuring the control node as an NFS server. (Default)
+
+- CephFS via Manila
+
+- Lustre
+
+# Manila
+
+The Slurm appliance supports mounting shared filesystems using [CephFS](https://docs.ceph.com/en/latest/cephfs/) via [OpenStack Manila](https://docs.openstack.org/manila/latest/). This section explains:
+
+- How to create the shares in OpenStack Manila.
+
+- How to configure the Slurm Appliance to mount these Manila shares.
+
+- How to switch to a Manila share for a shared home directory.
+
+## Creating shares in OpenStack
+
+The Slurm appliance requires that the Manila shares already exist on the system. Follow the instructions below to do this.
+
+If this is the first time Manila is being used on the system, a CephFS share type will need to be created. You will need admin credentials to do this.
+
+  ```bash
+  openstack share type create cephfs-type false --extra-specs storage_protocol=CEPHFS vendor_name=Ceph
+  ```
+
+Once this exists, create a share using credentials for the Slurm project. An access rule also needs to be created, where the `access_to` argument (`openstack share access create <share> <access_type> <access_to>`) is a user that will be created in Ceph. This needs to be globally unique in Ceph, so needs to be different for each OpenStack project. Ideally, this share should include your environment name. In this example, the name is "production".
+
+  ```bash
+  openstack share create CephFS 300 --description 'Scratch dir for Slurm prod' --name slurm-production-scratch --share-type cephfs-type --wait
+  openstack share access create slurm-production-scratch cephx slurm-production
+  ```
+
+## Configuring the Slurm Appliance for Manila
+
+To mount shares onto hosts in a group, add them to the `manila` group.
+
+  ```ini
+  # environments/site/inventory/groups:
+  [manila:children]:
+  login
+  compute
+  ```
+
+If you are running a different version of Ceph from the defaults in the [os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount/blob/master/defaults/main.yml), you will need to update the package version by setting:
+
+  ```yaml
+  # environments/site/inventory/group_vars/manila.yml:
+  os_manila_mount_ceph_version: "18.2.4"
+  ```
+
+A [site-specific image](image-build.md) should be built which includes this package; add ``manila`` to the Packer ``inventory_groups`` variable.
+
+Define the list of shares to be mounted, and the paths to mount them to. The example below parameterises the share name using the environment name. See the [stackhpc.os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount) for further configuration options.
+
+  ```yaml
+  # environments/site/inventory/group_vars/manila.yml:
+  os_manila_mount_shares:
+    - share_name: "slurm-{{ appliances_environment_name }}-scratch"
+      mount_path: /scratch
+  ```
+
+### Shared home directory
+
+By default, the Slurm appliance configures the control node as an NFS server and exports a directory which is mounted on the other cluster nodes as `/home`. When using Manila + CephFS for the home directory instead, this will need to be disabled. To do this, set the tf var `home_volume_provisioning` to `None`. 
+
+Some `basic_users_homedir_*` parameters need overriding as the provided defaults are only satisfactory for the default root-squashed NFS share:
+
+  ```yaml
+  # environments/site/inventory/group_vars/all/basic_users.yml:
+  basic_users_homedir_server: "{{ groups['login'] | first }}" # if not mounting /home on control node
+  basic_users_homedir_server_path: /home
+  ```
+
+Finally, add the home directory to the list of shares (the share should be already created in OpenStack).
+
+  ```yaml
+  # environments/site/inventory/group_vars/all/manila.yml:
+  os_manila_mount_shares:
+    - share_name: "slurm-{{ appliances_environment_name }}-scratch"
+      mount_path: /scratch
+    - share_name: "slurm-{{ appliances_environment_name }}-home"
+      mount_path: /home
+  ```
diff --git a/environments/common/inventory/group_vars/all/manila.yml b/environments/common/inventory/group_vars/all/manila.yml
index baccd44..cb015f9 100644
--- a/environments/common/inventory/group_vars/all/manila.yml
+++ b/environments/common/inventory/group_vars/all/manila.yml
@@ -10,4 +10,8 @@ os_manila_mount_shares: []
   #   mount_group:
   #   mount_mode:
 
-# os_manila_mount_ceph_version: nautilus # role default for RockyLinux 8
+# os_manila_mount_ceph_version:
+
+# Empty repo lists from stackhpc.ansible-role-os-manila-mount role defaults, as these repofiles are
+# now generated by dnf_repos to allow injecting Ark creds:
+os_manila_mount_ceph_rpm_repos: []
diff --git a/environments/common/inventory/group_vars/all/os-manila-mount.yml b/environments/common/inventory/group_vars/all/os-manila-mount.yml
deleted file mode 100644
index 6b25d62..0000000
--- a/environments/common/inventory/group_vars/all/os-manila-mount.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-# Empty repo lists from stackhpc.ansible-role-os-manila-mount role defaults, as these repofiles are
-# now generated by dnf_repos to allow injecting Ark creds:
-os_manila_mount_ceph_rpm_repos: []

From cb4ca3c69d6627115ede3b0c9ae52c2a9eb3c00d Mon Sep 17 00:00:00 2001
From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com>
Date: Fri, 29 Aug 2025 15:28:07 +0100
Subject: [PATCH 03/50] CaaS pre-hook fix for galaxy requirements validation
 (#767)

* pre-hook to copy requirements.yml.last

* remove mention of CI in comments
---
 environments/.caas/hooks/pre.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/environments/.caas/hooks/pre.yml b/environments/.caas/hooks/pre.yml
index 8c99e59..8924dca 100644
--- a/environments/.caas/hooks/pre.yml
+++ b/environments/.caas/hooks/pre.yml
@@ -63,3 +63,14 @@
           - dnf_repos
       loop: "{{ groups['cluster'] }}"
       when: dnf_repos_enabled | default(false) | bool
+
+# Workaround for setup-env.sh not running in CaaS environment, so:
+# https://github.com/stackhpc/ansible-slurm-appliance/blob/ba9699267449fba58cd9c04c451759a914fd7144/ansible/validate.yml#L16
+# doesn't break CaaS platforms
+- hosts: localhost
+  gather_facts: no
+  tasks:
+    - name: Prepare requirements.yml.last for galaxy validation
+      copy:
+        src: "{{ appliances_repository_root }}/requirements.yml"
+        dest: "{{ appliances_repository_root }}/requirements.yml.last"

From 21ef880bf9116fd88754cc7a5e380a4dcc41b2d4 Mon Sep 17 00:00:00 2001
From: Matt Crees <mattc@stackhpc.com>
Date: Fri, 29 Aug 2025 17:03:03 +0100
Subject: [PATCH 04/50] Production end to end deployment docs (#678)

* First draft of production end-to-end docs

* Ubuntu Jammy is also supported

* Add TODOs

* Accomplish TODOs

* Mention networks docs

* NFS

* Clarify image

* Formatting changes

* Apply suggestions from code review

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>

* Suggestions from code review

* Update docs/production.md

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>

* Add git remote instructions

* Update cookiecutter info

* Link filesystems docs

* Move tofu into define and deploy infra section

* Reorganise configuration

* Move tofu note

---------

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>
---
 docs/production.md | 499 +++++++++++++++++++++++++++++++++------------
 1 file changed, 370 insertions(+), 129 deletions(-)

diff --git a/docs/production.md b/docs/production.md
index bcf4925..8808a56 100644
--- a/docs/production.md
+++ b/docs/production.md
@@ -1,158 +1,399 @@
 # Production Deployments
 
-This page contains some brief notes about differences between the default/demo
-configuration (as described in the main [README.md](../README.md)) and
-production-ready deployments.
-
-- Get it agreed up front what the cluster names will be. Changing this later
-  requires instance deletion/recreation.
-
-- At least two environments should be created using cookiecutter, which will derive from the `site` base environment:
-    - `production`: production environment
-    - `staging`: staging environment
-
-  A `dev` environment should also be created if considered required, or this
-  can be left until later.
-
-  In general only the `inventory/groups` file in the `site` environment is needed -
-  it can be modified as required to
-  enable features for all environments at the site.
-
-- To avoid divergence of configuration all possible overrides for group/role
-vars should be placed in `environments/site/inventory/group_vars/all/*.yml`
-unless the value really is environment-specific (e.g. DNS names for
-`openondemand_servername`).
-
-- Where possible hooks should also be placed in `environments/site/hooks/`
-and referenced from the `site` and `production` environments, e.g.:
-
-    ```yaml
-    # environments/production/hooks/pre.yml:
-    - name: Import parent hook
-      import_playbook: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/../site/hooks/pre.yml"
-    ```
-
-- When setting OpenTofu configurations:
-    
-    - Environment-specific variables (`cluster_name`) should be hardcoded
-      as arguments into the cluster module block at `environments/$ENV/tofu/main.tf`.
-    - Environment-independent variables (e.g. maybe `cluster_net` if the
-      same is used for staging and production) should be set as *defaults*
-      in `environments/site/tofu/variables.tf`, and then don't need to
-      be passed in to the module.
+This page will guide you on how to create production-ready deployments. While
+you can start right away with this guide, you may find it useful to try with a
+demo deployment first, as described in the [main README](../README.md).
+
+## Prerequisites
+
+Before starting ensure that:
+
+  - You have root access on the deploy host.
+
+  - You can create instances from the [latest Slurm appliance
+    image](https://github.com/stackhpc/ansible-slurm-appliance/releases),
+    which already contains the required packages. This is built and tested in
+    StackHPC's CI.
+
+  - You have an SSH keypair defined in OpenStack, with the private key
+    available on the deploy host.
+
+  - Created instances have access to internet (note proxies can be setup
+    through the appliance if necessary).
+
+  - Created instances have accurate/synchronised time (for VM instances this is
+    usually provided by the hypervisor; if not or for bare metal instances it
+    may be necessary to configure a time service via the appliance).
+
+  - Three security groups are present: ``default`` allowing intra-cluster
+    communication, ``SSH`` allowing external access via SSH and ``HTTPS``
+    allowing access for Open OnDemand.
+
+  - Usually, you'll want to deploy the Slurm Appliance into its own dedicated
+    project. It's recommended that your OpenStack credentials are defined in a
+    [clouds.yaml](https://docs.openstack.org/python-openstackclient/latest/configuration/index.html#clouds-yaml)
+    file in a default location with the default cloud name of `openstack`.
+
+### Setup deploy host
+
+The following operating systems are supported for the deploy host:
+
+  - Rocky Linux 9
+
+  - Rocky Linux 8
+
+These instructions assume the deployment host is running Rocky Linux 8:
+
+```bash
+sudo yum install -y git python38
+git clone https://github.com/stackhpc/ansible-slurm-appliance
+cd ansible-slurm-appliance
+git checkout ${latest-release-tag}
+./dev/setup-env.sh
+```
+
+You will also need to install
+[OpenTofu](https://opentofu.org/docs/intro/install/rpm/).
+
+## Version control
+
+A production deployment should be set up under version control, so you should
+create a fork of this repo.
+
+First make an empty Git repository using your service of choice (e.g. GitHub or
+GitLab), then execute the following commands to turn the new empty repository
+into a copy of the ansible-slurm-appliance repository.
+
+  ```bash
+  git clone https://github.com/stackhpc/ansible-slurm-appliance.git
+  cd ansible-slurm-appliance
+  ```
+
+Maintain the existing origin remote as upstream, and create a new origin remote
+for the repository location.
+
+  ```bash
+  git remote rename origin upstream
+  git remote add origin git@<repo location>/ansible-slurm-appliance.git
+  ```
+
+You should use the [latest tagged
+release](https://github.com/stackhpc/ansible-slurm-appliance/releases). v1.161
+has been used as an example here, make sure to change this. Do not use the
+default main branch, as this may have features that are still works in
+progress.
+
+  ```bash
+  git checkout v1.161
+  git checkout -b site/main
+  git push -u origin site/main
+  ```
+
+## Environment setup
+
+Get it agreed up front what the cluster names will be. Changing this later
+requires instance deletion/recreation.
+
+### Environments structure
+
+At least two environments should be created using cookiecutter, which will
+derive from the `site` base environment:
+  - `production`: production environment
+  - `staging`: staging environment
+
+A `dev` environment should also be created if considered required, or this can
+be left until later.
+
+In general only the `inventory/groups` file in the `site` environment is
+needed; it can be modified as required to enable features for all environments
+at the site.
+
+To ensure the `staging` environment provides a good test of the `production`
+environment, wherever possible group/role vars should be placed in
+`environments/site/inventory/group_vars/all/*.yml` unless the value really is
+environment-specific (e.g. DNS names for `openondemand_servername`).
+
+Where possible hooks should also be placed in `environments/site/hooks/`
+and referenced from the `production` and `staging` environments, e.g.:
+
+  ```yaml
+  # environments/production/hooks/pre.yml:
+  - name: Import parent hook
+    import_playbook: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/../site/hooks/pre.yml"
+  ```
+
+OpenTofu configurations are defined in the `site` environment and referenced
+as a module by the site-specific cookie-cutter generated configurations. This
+will have been generated for you already under
+``environments/$ENV/tofu/main.tf``.
+
+### Cookiecutter instructions
+
+- Run the following from the repository root to activate the venv:
+
+  ```bash
+  . venv/bin/activate
+  ```
+
+- Use the `cookiecutter` template to create a new environment to hold your
+  configuration:
+
+  ```bash
+  cd environments
+  cookiecutter ../cookiecutter
+  ```
+
+  and follow the prompts to complete the environment name and description.
+
+  **NB:** In subsequent sections this new environment is referred to as `$ENV`.
+
+- Go back to the root folder and activate the new environment:
+
+  ```bash
+  cd ..
+  . environments/$ENV/activate
+  ```
+
+  And generate secrets for it:
+
+  ```bash
+  ansible-playbook ansible/adhoc/generate-passwords.yml
+  ```
+
+## Define and deploy infrastructure
+
+Create an OpenTofu variables file to define the required infrastructure, e.g.:
+
+  ```
+  # environments/$ENV/tofu/terraform.tfvars
+  cluster_name = "mycluster"
+  cluster_networks = [
+    {
+      network = "some_network" # *
+      subnet = "some_subnet" # *
+    }
+  ]
+  key_pair = "my_key" # *
+  control_node_flavor = "some_flavor_name"
+  login = {
+      # Arbitrary group name for these login nodes
+      interactive = {
+          nodes: ["login-0"]
+          flavor: "login_flavor_name" # *
+      }
+  }
+  cluster_image_id = "rocky_linux_9_image_uuid"
+  compute = {
+      # Group name used for compute node partition definition
+      general = {
+          nodes: ["compute-0", "compute-1"]
+          flavor: "compute_flavor_name" # *
+      }
+  }
+  ```
+
+Variables marked `*` refer to OpenStack resources which must already exist.
+
+The above is a minimal configuration - for all variables and descriptions see
+`environments/site/tofu/variables.tf`.
+
+Note that:
+
+  - Environment-specific variables (`cluster_name`) should be hardcoded into
+    the cluster module block.
+
+  - Environment-independent variables (e.g. maybe `cluster_net` if the same
+    is used for staging and production) should be set as *defaults* in
+    `environments/site/tofu/variables.tf`, and then don't need to be passed
+    in to the module.
+
+The cluster image used should match the release which you are deploying with.
+Published images are described in the release notes
+[here](https://github.com/stackhpc/ansible-slurm-appliance/releases). 
+
+By default, the site OpenTofu configuration provisions two volumes and attaches
+them to the control node:
+  - "$cluster_name-home" for NFS-shared home directories
+  - "$cluster_name-state" for monitoring and Slurm data
+The volumes mean this data is persisted when the control node is rebuilt.
+However if the cluster is destroyed with `tofu destroy` then the volumes will
+also be deleted. This is undesirable for production environments and usually
+also for staging environments. Therefore the volumes should be manually
+created, e.g. via the CLI:
+
+  ```
+  openstack volume create --size 200 mycluster-home # size in GB
+  openstack volume create --size 100 mycluster-state
+  ```
+
+and OpenTofu configured to use those volumes instead of managing them itself by
+setting:
+
+  ```
+  home_volume_provisioning = "attach"
+  state_volume_provisioning = "attach"
+  ```
+
+either for a specific environment within the cluster module block in
+`environments/$ENV/tofu/main.tf`, or as the site default by changing the
+default in `environments/site/tofu/variables.tf`.
+
+For a development environment allowing OpenTofu to manage the volumes using the
+default value of `"manage"` for those varibles is usually appropriate, as it
+allows for multiple clusters to be created with this environment.
+
+If no home volume at all is required because the home directories are provided
+by a parallel filesystem (e.g. Manila) set
+
+  ```
+  home_volume_provisioning = "none"
+  ```
+
+In this case the NFS share for home directories is automatically disabled.
+
+**NB:** To apply "attach" options to existing clusters, first remove the
+volume(s) from the tofu state, e.g.:
+
+  ```
+  tofu state list # find the volume(s)
+  tofu state rm 'module.cluster.openstack_blockstorage_volume_v3.state[0]'
+  ```
+
+This leaves the volume itself intact, but means OpenTofu "forgets" it. Then set
+the "attach" options and run `tofu apply` again - this should show there are no
+changes planned.
+
+A production deployment may have a more complex networking requirements than
+just a simple network. See the [networks docs](networks.md) for details.
+
+If floating IPs are required for login nodes, create these in OpenStack and add
+the IPs into the OpenTofu `login` definition.
+
+Consider enabling topology aware scheduling. This is currently only supported
+if your cluster does not include any baremetal nodes. This can be enabled by:
+  1. Creating Availability Zones in your OpenStack project for each physical
+     rack
+  2. Setting the `availability_zone` fields of compute groups in your OpenTofu
+     configuration
+  3. Adding the `compute` group as a child of `topology` in
+     `environments/$ENV/inventory/groups`
+  4. (Optional) If you are aware of the physical topology of switches above the
+     rack-level, override `topology_above_rack_topology` in your groups vars
+     (see [topology docs](../ansible/roles/topology/README.md) for more detail)
+
+Consider whether mapping of baremetal nodes to ironic nodes is required. See
+[PR 485](https://github.com/stackhpc/ansible-slurm-appliance/pull/485).
+
+To deploy this infrastructure, ensure the venv and the environment are
+[activated](#cookiecutter-instructions) and run:
+
+  ```bash
+  export OS_CLOUD=openstack
+  cd environments/$ENV/tofu/
+  tofu init
+  tofu apply
+  ```
+
+and follow the prompts. Note the OS_CLOUD environment variable assumes that
+OpenStack credentials are defined using a
+[clouds.yaml](https://docs.openstack.org/python-openstackclient/latest/configuration/index.html#clouds-yaml)
+file in a default location with the default cloud name of `openstack`.
+
+By default, OpenTofu (and Terraform)
+[limits](https://opentofu.org/docs/cli/commands/apply/#apply-options) the
+number of concurrent operations to 10. This means that for example only 10
+ports or 10 instances can be deployed at once. This should be raised by
+modifying `environments/$ENV/activate` to add a line like:
+
+  ```bash
+  export TF_CLI_ARGS_apply="-parallelism=25"
+  ```
+
+The value chosen should be the highest value demonstrated during testing. Note
+that any time spent blocked due to this parallelism limit does not count
+against the (un-overridable) internal OpenTofu timeout of 30 minutes
+
+## Configure appliance
+
+### Production configuration to consider
 
 - Vault-encrypt secrets. Running the `generate-passwords.yml` playbook creates
   a secrets file at `environments/$ENV/inventory/group_vars/all/secrets.yml`.
-  To ensure staging environments are a good model for production this should
-  generally be moved into the `site` environment. It should be encrypted
-  using [Ansible vault](https://docs.ansible.com/ansible/latest/user_guide/vault.html)
-  and then committed to the repository.
+  These should be created for each environment, and then be encrypted using
+  [Ansible vault](https://docs.ansible.com/ansible/latest/user_guide/vault.html)
+  and committed to the repository.
 
 - Ensure created instances have accurate/synchronised time. For VM instances
   this is usually provided by the hypervisor, but if not (or for bare metal
-  instances) it may be necessary to configure or proxy `chronyd` via an
-  environment hook.
-
-- By default, the site OpenTofu configuration provisions two
-  volumes and attaches them to the control node:
-    - "$cluster_name-home" for NFS-shared home directories
-    - "$cluster_name-state" for monitoring and Slurm data
-  The volumes mean this data is persisted when the control node is rebuilt.
-  However if the cluster is destroyed with `tofu destroy` then the volumes will
-  also be deleted. This is undesirable for production environments and usually
-  also for staging environments. Therefore the volumes should be manually
-  created, e.g. via the CLI:
-
-      openstack volume create --size 200 mycluster-home # size in GB
-      openstack volume create --size 100 mycluster-state
-
-  and OpenTofu configured to use those volumes instead of managing them itself
-  by setting:
-
-      home_volume_provisioning = "attach"
-      state_volume_provisioning = "attach"
-
-  either for a specific environment within the cluster module block in
-  `environments/$ENV/tofu/main.tf`, or as the site default by changing the
-  default in `environments/site/tofu/variables.tf`.
-  
-  For a development environment allowing OpenTofu to manage the volumes using
-  the default value of `"manage"` for those varibles is usually appropriate, as
-  it allows for multiple clusters to be created with this environment.
-  
-  If no home volume at all is required because the home directories are provided
-  by a parallel filesystem (e.g. manila) set
-
-      home_volume_provisioning = "none"
-
-  In this case the NFS share for home directories is automatically disabled.
-
-  **NB:** To apply "attach" options to existing clusters, first remove the
-    volume(s) from the tofu state, e.g.:
-
-      tofu state list # find the volume(s)
-      tofu state rm 'module.cluster.openstack_blockstorage_volume_v3.state[0]'
-  
-  This leaves the volume itself intact, but means OpenTofu "forgets" it. Then
-  set the "attach" options and run `tofu apply` again - this should show there
-  are no changes planned.
+  instances) it may be necessary to [configure chrony](./chrony.md).
 
 - Consider whether Prometheus storage configuration is required. By default:
   - A 200GB state volume is provisioned (but see above)
-  - The common environment [sets](../environments/common/inventory/group_vars/all/prometheus.yml)
-    a maximum retention of 100 GB and 31 days
+  - The common environment
+    [sets](../environments/common/inventory/group_vars/all/prometheus.yml) a
+    maximum retention of 100 GB and 31 days.
   These may or may not be appropriate depending on the number of nodes, the
   scrape interval, and other uses of the state volume (primarily the `slurmctld`
-  state and the `slurmdbd` database). See [docs/monitoring-and-logging](./monitoring-and-logging.md)
-  for more options.
+  state and the `slurmdbd` database). See
+  [docs/monitoring-and-logging](./monitoring-and-logging.md) for more options.
 
 - Configure Open OnDemand - see [specific documentation](openondemand.md) which
   notes specific variables required.
 
-- Remove the `demo_user` user from `environments/$ENV/inventory/group_vars/all/basic_users.yml`.
-  Replace the `hpctests_user` in `environments/$ENV/inventory/group_vars/all/hpctests.yml` with
-  an appropriately configured user.
+- Remove the `demo_user` user from
+  `environments/$ENV/inventory/group_vars/all/basic_users.yml`. Replace the
+  `hpctests_user` in `environments/$ENV/inventory/group_vars/all/hpctests.yml`
+  with an appropriately configured user.
 
-- Consider whether having (read-only) access to Grafana without login is OK. If not, remove `grafana_auth_anonymous` in `environments/$ENV/inventory/group_vars/all/grafana.yml`
+- Consider whether having (read-only) access to Grafana without login is OK. If
+  not, remove `grafana_auth_anonymous` in
+  `environments/$ENV/inventory/group_vars/all/grafana.yml`
 
-- If floating IPs are required for login nodes, create these in OpenStack and add the IPs into
-  the OpenTofu `login` definition.
+- See the [hpctests docs](../ansible/roles/hpctests/README.md) for advice on
+  raising `hpctests_hpl_mem_frac` during tests.
 
-- Consider enabling topology aware scheduling. This is currently only supported if your cluster does not include any baremetal nodes. This can be enabled by:
-    1. Creating Availability Zones in your OpenStack project for each physical rack
-    2. Setting the `availability_zone` fields of compute groups in your OpenTofu configuration
-    3. Adding the `compute` group as a child of `topology` in `environments/$ENV/inventory/groups`
-    4. (Optional) If you are aware of the physical topology of switches above the rack-level, override `topology_above_rack_topology` in your groups vars
-       (see [topology docs](../ansible/roles/topology/README.md) for more detail)
+- By default, OpenStack Nova
+  [limits](https://docs.openstack.org/nova/latest/configuration/config.html#DEFAULT.max_concurrent_builds)
+  the number of concurrent instance builds to 10. This is per Nova controller,
+  so 10x virtual machines per hypervisor. For baremetal nodes it is 10 per
+  cloud if the OpenStack version is earlier than Caracel, else this limit can
+  be raised using
+  [shards](https://specs.openstack.org/openstack/nova-specs/specs/2024.1/implemented/ironic-shards.html).
+  In general it should be possible to raise this value to 50-100 if the cloud
+  is properly tuned, again, demonstrated through testing.
 
-- Consider whether mapping of baremetal nodes to ironic nodes is required. See
-  [PR 485](https://github.com/stackhpc/ansible-slurm-appliance/pull/485).
+- Enable alertmanager if Slack is available - see
+  [docs/alerting.md](./alerting.md).
 
-- Note [PR 473](https://github.com/stackhpc/ansible-slurm-appliance/pull/473)
-  may help identify any site-specific configuration. 
+- Enable node health checks - see
+  [ansible/roles/nhc/README.md](../ansible/roles/nhc/README.md).
 
-- See the [hpctests docs](../ansible/roles/hpctests/README.md) for advice on
-  raising `hpctests_hpl_mem_frac` during tests.
+- By default, the appliance uses a built-in NFS share backed by an OpenStack
+  volume for the cluster home directories. You may find that you want to change
+  this. The following alternatives are supported:
 
-- By default, OpenTofu (and Terraform) [limits](https://opentofu.org/docs/cli/commands/apply/#apply-options)
-  the number of concurrent operations to 10. This means that for example only
-  10 ports or 10 instances can be deployed at once. This should be raised by
-  modifying `environments/$ENV/activate` to add a line like:
+  - [CephFS via OpenStack Manila](./filesystems.md)
+  - [Lustre](../roles/lustre/README.md)
 
-      export TF_CLI_ARGS_apply="-parallelism=25"
+- For some features, such as installing [DOCA-OFED](../roles/doca/README.md) or
+  [CUDA](../roles/cuda/README.md), you will need to build a custom image. It is
+  recommended that you build this on top of the latest existing openhpc image.
+  See the [image-build docs](image-build.md) for details.
 
-  The value chosen should be the highest value demonstrated during testing.
-  Note that any time spent blocked due to this parallelism limit does not count
-  against the (un-overridable) internal OpenTofu timeout of 30 minutes
+### Applying configuration
 
-- By default, OpenStack Nova also [limits](https://docs.openstack.org/nova/latest/configuration/config.html#DEFAULT.max_concurrent_builds)
-  the number of concurrent instance builds to 10. This is per Nova controller,
-  so 10x virtual machines per hypervisor. For baremetal nodes it is 10 per cloud
-  if the OpenStack version is earlier than Caracel, else this limit can be
-  raised using [shards](https://specs.openstack.org/openstack/nova-specs/specs/2024.1/implemented/ironic-shards.html).
-  In general it should be possible to raise this value to 50-100 if the cloud
-  is properly tuned, again, demonstrated through testing.
+To configure the appliance, ensure the venv and the environment are
+[activated](#create-a-new-environment) and run:
+
+  ```bash
+  ansible-playbook ansible/site.yml
+  ```
+
+Once it completes you can log in to the cluster using:
 
-- Enable alertmanager if Slack is available - see [docs/alerting.md](./alerting.md).
+  ```bash
+  ./dev/ansible-ssh login
+  ```
 
-- Enable node health checks - see [ansible/roles/nhc/README.md](../ansible/roles/nhc/README.md).
+For further information, including additional configuration guides and
+operations instructions, see the [docs](README.md) directory.

From cbf990a8118c882af9429b3edfa290387ac45d28 Mon Sep 17 00:00:00 2001
From: Matt Crees <mattc@stackhpc.com>
Date: Thu, 4 Sep 2025 10:07:42 +0100
Subject: [PATCH 05/50] Fix inventory parsing of cookiecutter env (#768)

Without any top-level inventory file, Ansible will fail with:

```
ERROR! Completely failed to parse inventory source /home/ubuntu/ansible-slurm-appliance/environments/$ENV/inventory
```
---
 cookiecutter/{{cookiecutter.environment}}/inventory/hosts | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 cookiecutter/{{cookiecutter.environment}}/inventory/hosts

diff --git a/cookiecutter/{{cookiecutter.environment}}/inventory/hosts b/cookiecutter/{{cookiecutter.environment}}/inventory/hosts
new file mode 100644
index 0000000..e69de29

From 275da838fa3727cfd745d733573d04527df70198 Mon Sep 17 00:00:00 2001
From: wtripp180901 <78219569+wtripp180901@users.noreply.github.com>
Date: Thu, 4 Sep 2025 13:47:32 +0100
Subject: [PATCH 06/50] Refactor Pulp repo definitions and add more Pulp
 documentation (#760)

* WIP: refactor repos definitions

* add more repos and cope with CRB/PowerTools oddness

* add epel

* use pulp_server as a group

* add epel default

* wip: get pulp sync working

* fixed sync

* autodetect latest in adhoc script, refactored timestamps to allow gated ohpc repos, fixed pulp site

* fixed distributions + ohpc repos

* updated timestamps script + bumped rocky 9 timestamps

* removed pulp_repo_name fields

* updated docs, added gpg checks, simplified filters

* Added pulp systemd file + removed unused vars

* added READMEs + updated variable names

* disabled gpg checks for dnf_repos

* typo

* fixed disable repos task

* bump images

* remove dnf_repos extra index/key and make epel/openhpc special-cases simpler

* clarify pulp distro selection

* fixup sync vars

* fixup grafana vars

* revert latest timestamp changes for extra key level

* review suggestions

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>

* updated README

* docs tweaks

* regularised group names

* updated operations guide for functionality requiring additional installs

* review changes from docs

Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>

* renamed timestamps.yml to dnf_repos_timestamps.yml

---------

Co-authored-by: Steve Brasier <steveb@stackhpc.com>
Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>
---
 ansible/adhoc/deploy-pulp.yml                 |  17 +--
 ansible/adhoc/sync-pulp.yml                   |   4 +-
 ansible/ci/update_timestamps.yml              |   6 +-
 ansible/fatimage.yml                          |   2 +-
 ansible/filter_plugins/utils.py               |   6 -
 ansible/library/latest_timestamps.py          |   5 +-
 ansible/roles/dnf_repos/README.md             |  40 ++++++
 ansible/roles/dnf_repos/defaults/main.yml     |  52 +-------
 .../roles/dnf_repos/tasks/disable_repos.yml   |  25 ++--
 ansible/roles/dnf_repos/tasks/set_repos.yml   |  43 +++++--
 ansible/roles/pulp_site/README.md             |  36 ++++++
 ansible/roles/pulp_site/defaults/main.yml     |  47 +++----
 ansible/roles/pulp_site/files/pulp.service    |  12 ++
 .../filter_plugins/pulp-list-filters.py       |  68 +++++++---
 ansible/roles/pulp_site/tasks/install.yml     |  22 +++-
 ansible/roles/pulp_site/tasks/sync.yml        |  22 ++--
 ansible/roles/pulp_site/templates/cli.toml.j2 |   1 -
 docs/environments.md                          |   5 +-
 docs/experimental/pulp.md                     |  42 ++++++-
 docs/operations.md                            |  20 ++-
 .../tofu/cluster_image.auto.tfvars.json       |   4 +-
 .../common/files/grafana/grafana.repo.j2      |   2 +-
 .../group_vars/all/dnf_repo_timestamps.yml    | 116 ++++++++++++++++++
 .../inventory/group_vars/all/dnf_repos.yml    |   8 ++
 .../inventory/group_vars/all/timestamps.yml   |  88 -------------
 environments/common/inventory/groups          |  11 +-
 environments/site/inventory/groups            |   9 ++
 27 files changed, 445 insertions(+), 268 deletions(-)
 create mode 100644 ansible/roles/dnf_repos/README.md
 create mode 100644 ansible/roles/pulp_site/README.md
 create mode 100644 ansible/roles/pulp_site/files/pulp.service
 create mode 100644 environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
 create mode 100644 environments/common/inventory/group_vars/all/dnf_repos.yml
 delete mode 100644 environments/common/inventory/group_vars/all/timestamps.yml

diff --git a/ansible/adhoc/deploy-pulp.yml b/ansible/adhoc/deploy-pulp.yml
index 2858d03..f7bafc3 100644
--- a/ansible/adhoc/deploy-pulp.yml
+++ b/ansible/adhoc/deploy-pulp.yml
@@ -1,15 +1,6 @@
-# Usage: ansible-playbook ansible/adhoc/deploy-pulp.yml -e "pulp_server=<pulp server hostname>"
-
-- name: Add temporary pulp server host
-  hosts: localhost
-  tasks:
-  - ansible.builtin.add_host:
-      name: "{{ pulp_server }}"
-      group: "_pulp_host"
-
-- name: Install pulp on server and add to config
+- name: Install pulp on server
   become: yes
-  hosts: _pulp_host
+  hosts: pulp_server
   tasks:
   - name: Install pulp
     ansible.builtin.include_role:
@@ -22,5 +13,5 @@
     debug:
       msg: | 
         Server configured, override 'appliances_pulp_url' with
-          appliances_pulp_url: "http://{{ pulp_server }}:{{ pulp_site_port }}"
-        in your environments
+          appliances_pulp_url: "http://{{ hostvars[groups['pulp_server'] | first].ansible_host }}:{{ pulp_site_port }}"
+        (or the correct IP if multi-homed) in your environments
diff --git a/ansible/adhoc/sync-pulp.yml b/ansible/adhoc/sync-pulp.yml
index b2cd9a8..373f3ab 100644
--- a/ansible/adhoc/sync-pulp.yml
+++ b/ansible/adhoc/sync-pulp.yml
@@ -6,5 +6,5 @@
       vars:
         pulp_site_target_arch: "x86_64"
         pulp_site_target_distribution: "rocky"
-        pulp_site_target_distribution_version: "9.5"
-        pulp_site_target_distribution_version_major: "9"
+        # default distribution to *latest* specified for baseos repo:
+        pulp_site_target_distribution_version: "{{ dnf_repos_repos['baseos'].keys() | map('float') | sort | last }}"
diff --git a/ansible/ci/update_timestamps.yml b/ansible/ci/update_timestamps.yml
index e9a455a..8db4757 100644
--- a/ansible/ci/update_timestamps.yml
+++ b/ansible/ci/update_timestamps.yml
@@ -2,15 +2,15 @@
   tasks:
     - name: Get latest timestamps from sources
       latest_timestamps:
-        repos_dict: "{{ appliances_pulp_repos }}"
+        repos_dict: "{{ dnf_repos_default }}"
         content_url: "https://ark.stackhpc.com/pulp/content"
       register: _result
 
     - name: Overwrite repo timestamps with latest
       ansible.builtin.copy:
-        dest: "{{ appliances_repository_root }}/environments/common/inventory/group_vars/all/timestamps.yml"
+        dest: "{{ appliances_repository_root }}/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml"
         content: "{{ repo_template | to_nice_yaml(indent=2) }}"
         backup: true
       vars:
         repo_template:
-          appliances_pulp_repos: "{{ _result.timestamps }}"
+          dnf_repos_default: "{{ _result.timestamps }}"
diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml
index 839c8dc..46a99bc 100644
--- a/ansible/fatimage.yml
+++ b/ansible/fatimage.yml
@@ -18,7 +18,7 @@
   when: hook_path | exists
 
 - name: Sync pulp repos with upstream
-  hosts: pulp
+  hosts: pulp_site
   tasks:
   - ansible.builtin.include_role:
       name: pulp_site
diff --git a/ansible/filter_plugins/utils.py b/ansible/filter_plugins/utils.py
index b5b92ed..42b7107 100644
--- a/ansible/filter_plugins/utils.py
+++ b/ansible/filter_plugins/utils.py
@@ -61,11 +61,6 @@ def to_ood_regex(items):
     r = ['(%s)' % v for v in r]
     return '|'.join(r)
 
-def appliances_repo_to_subpath(repo_entry):
-    """ Take an element from appliances_pulp_repos and convert it to a pulp path. This assumes that the remote and local pulp structures are the same
-    """
-    return repo_entry['path'] + '/' + repo_entry['timestamp']
-
 class FilterModule(object):
     ''' Ansible core jinja2 filters '''
 
@@ -81,5 +76,4 @@ def filters(self):
             'exists': exists,
             'warn': self.warn,
             'to_ood_regex': to_ood_regex,
-            'appliances_repo_to_subpath': appliances_repo_to_subpath
         }
diff --git a/ansible/library/latest_timestamps.py b/ansible/library/latest_timestamps.py
index 6407ef0..0de3883 100644
--- a/ansible/library/latest_timestamps.py
+++ b/ansible/library/latest_timestamps.py
@@ -56,13 +56,12 @@ def run_module():
         for version in timestamps[repo]:
 
             html_txt = requests.get(
-                    url= module.params['content_url'] + '/' + timestamps[repo][version]['path']
+                    url= module.params['content_url'] + '/' + timestamps[repo][version]['pulp_path']
                 ).text
             timestamp_link_list = BeautifulSoup(html_txt,features="html.parser").body.find('pre').find_all() # getting raw list of timestamps from html
             timestamp_link_list = map(lambda x: x.string,timestamp_link_list) # stripping xml tags
             latest_timestamp = list(timestamp_link_list)[-1][:-1] # last timestamp in list with trailing / removed
-            timestamps[repo][version]['timestamp'] = latest_timestamp
-            
+            timestamps[repo][version]['pulp_timestamp'] = latest_timestamp
     result['timestamps'] = dict(sorted(timestamps.items()))
 
     module.exit_json(**result)
diff --git a/ansible/roles/dnf_repos/README.md b/ansible/roles/dnf_repos/README.md
new file mode 100644
index 0000000..ff22c79
--- /dev/null
+++ b/ansible/roles/dnf_repos/README.md
@@ -0,0 +1,40 @@
+dnf_repos
+=========
+
+Modifies repo definitions for repofiles in `/etc/yum.repos.d` to point to snapshots in StackHPC's Ark Pulp server or mirrors of them
+on a local Pulp server.
+
+Requirements
+------------
+
+Requires Ark credentials if using StackHPC's upstream Ark server.
+
+Role Variables
+--------------
+
+Variables in this role are also required by `pulp_site` so set in 
+`environments/common/inventory/groups_vars/all/dnf_repos.yml`. See that file for detailed default values.
+
+- `dnf_repos_repos`: Dict of dicts containing information to construct URLs for Ark snapshots from the target Pulp server for each Rocky version. For example:
+    ```
+    dnf_repos_repos:
+        appstream:                          # ansible.builtin.yum_repository:name
+            '8.10':                           # ansible_distribution_version or ansible_distribution_major_version
+                repo_file: Rocky-AppStream      # yum_repository: file
+                # repo_name:                    # optional, override yum_repository:name
+                pulp_path: rocky/8.10/AppStream/x86_64/os # The subpath of the the upstream Ark server's content endpoint URL for the repo's snapshots, see https://ark.stackhpc.com/pulp/content/
+                pulp_timestamp: 20250614T013846
+                # pulp_content_url:             # optional, dnf_repos_pulp_content_url
+            '9.6':
+                ...
+    ```
+- `dnf_repos_default`: Appliance default repos to use Ark snapshots for. Following same format as `dnf_repos_repos`.
+  See for appliance default repo list `environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml`.
+- `dnf_repos_extra`: Additional repos to use Ark snapshots for. Follows same format as
+  `dnf_repos_repos`. Defaults to `{}`
+- `dnf_repos_pulp_content_url`: Optional str. Content URL of Pulp server to use Ark snapshots from. 
+  Defaults to `{{ appliances_pulp_url }}/pulp/content`
+- `dnf_repos_username`: Optional str. Username for Ark. Should be set if using upstream StackHPC Ark
+  Pulp server, but omitted if using local Pulp server (see `ansible/roles/pulp_site`)
+- `dnf_repos_password`: Optional str. Password for Ark. Should be set if using upstream StackHPC Ark
+  Pulp server, but omitted if using local Pulp server (see `ansible/roles/pulp_site`)
diff --git a/ansible/roles/dnf_repos/defaults/main.yml b/ansible/roles/dnf_repos/defaults/main.yml
index 9302eff..fe3c44e 100644
--- a/ansible/roles/dnf_repos/defaults/main.yml
+++ b/ansible/roles/dnf_repos/defaults/main.yml
@@ -1,54 +1,4 @@
+dnf_repos_repos: {} # see environments/common/inventory/group_vars/all/{dnf_repos,timestamps}.yml
 dnf_repos_pulp_content_url: "{{ appliances_pulp_url }}/pulp/content"
 dnf_repos_username: "{{ omit }}"
 dnf_repos_password: "{{ omit }}"
-
-dnf_repos_filenames:
-  '8':
-    baseos: 'Rocky-BaseOS'
-    appstream: 'Rocky-AppStream'
-    crb: 'Rocky-PowerTools'
-    extras: 'Rocky-Extras'
-    grafana: 'grafana'
-  '9':
-    baseos: 'rocky'
-    appstream: 'rocky'
-    crb: 'rocky'
-    extras: 'rocky-extras'
-    grafana: 'grafana'
-
-dnf_repos_version_filenames: "{{ dnf_repos_filenames[ansible_distribution_major_version] }}"
-
-# epel installed separately
-dnf_repos_default_repolist:
-- file: "{{ dnf_repos_version_filenames.baseos }}"
-  name: baseos
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.baseos[ansible_distribution_version] | appliances_repo_to_subpath }}"
-- file: "{{ dnf_repos_version_filenames.appstream }}"
-  name: appstream
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.appstream[ansible_distribution_version] | appliances_repo_to_subpath }}"
-- file: "{{ dnf_repos_version_filenames.crb }}"
-  name: "{{ 'powertools' if ansible_distribution_major_version == '8' else 'crb' }}"
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.crb[ansible_distribution_version] | appliances_repo_to_subpath }}"
-- file: "{{ dnf_repos_version_filenames.extras }}"
-  name: extras
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.extras[ansible_distribution_version] | appliances_repo_to_subpath }}"
-- file: ceph
-  name: Ceph
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.ceph[ansible_distribution_major_version] | appliances_repo_to_subpath }}"
-- file: "{{ dnf_repos_version_filenames.grafana }}"
-  name: grafana
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.grafana[ansible_distribution_major_version] | appliances_repo_to_subpath }}"
-
-dnf_repos_openhpc_repolist:
-- name: OpenHPC
-  file: OpenHPC
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.openhpc_base[ansible_distribution_major_version] | appliances_repo_to_subpath }}"
-- name: OpenHPC-updates
-  file: OpenHPC
-  base_url: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.openhpc_updates[ansible_distribution_major_version] | appliances_repo_to_subpath }}"
-
-dnf_repos_extra_repolist: []
-dnf_repos_repolist: "{{ dnf_repos_default_repolist + (dnf_repos_openhpc_repolist if (openhpc_install_type | default('ohpc')) == 'ohpc' else []) + dnf_repos_extra_repolist }}"
-
-dnf_repos_epel_baseurl: "{{ dnf_repos_pulp_content_url }}/{{ appliances_pulp_repos.epel[ansible_distribution_major_version] | appliances_repo_to_subpath }}"
-dnf_repos_epel_description: "epel"
diff --git a/ansible/roles/dnf_repos/tasks/disable_repos.yml b/ansible/roles/dnf_repos/tasks/disable_repos.yml
index 9f8abe6..4db073b 100644
--- a/ansible/roles/dnf_repos/tasks/disable_repos.yml
+++ b/ansible/roles/dnf_repos/tasks/disable_repos.yml
@@ -1,21 +1,20 @@
 ---
 - name: Remove password and disable Pulp repos
   ansible.builtin.yum_repository:
-    file: "{{ item.file }}"
-    name: "{{ item.name }}"
-    baseurl: "{{ item.base_url }}"
-    description: "{{ item.name }}"
+    file: "{{ repo_values.repo_file }}"
+    name: "{{ repo_name }}"
+    baseurl: "{{ repo_content_url }}/{{ repo_values.pulp_path }}/{{ repo_values.pulp_timestamp }}"
+    description: "{{ repo_name }}"
     enabled: false
-  loop: "{{ dnf_repos_repolist }}"
-
-- name: Remove password and disable EPEL repo
-  ansible.builtin.yum_repository:
-    name: epel
-    file: epel
-    description: "{{ dnf_repos_epel_description }}"
-    baseurl: "{{ dnf_repos_epel_baseurl }}"
     gpgcheck: false
-    enabled: false
+  loop: "{{ dnf_repos_repos | dict2items }}"
+  loop_control:
+    label: "{{ repo_name }}[{{ repo_os }}]: {{ repo_values }}"
+  vars:
+    repo_os: "{{ ansible_distribution_version if ansible_distribution_version in item.value else ansible_distribution_major_version }}"
+    repo_values: "{{ item.value[repo_os] }}"
+    repo_name: "{{ repo_values.repo_name | default(item.key) }}"
+    repo_content_url: "{{ repo_values.pulp_content_url | default(dnf_repos_pulp_content_url) }}"
 
 - name: Get all repo files
   ansible.builtin.find:
diff --git a/ansible/roles/dnf_repos/tasks/set_repos.yml b/ansible/roles/dnf_repos/tasks/set_repos.yml
index c9fcb0c..2db4de9 100644
--- a/ansible/roles/dnf_repos/tasks/set_repos.yml
+++ b/ansible/roles/dnf_repos/tasks/set_repos.yml
@@ -1,27 +1,44 @@
 ---
 
-- name: Replace system repos with Pulp repos
+- name: Replace non-epel repos with Pulp repos
   ansible.builtin.yum_repository:
-    file: "{{ item.file }}"
-    name: "{{ item.name }}"
-    baseurl: "{{ item.base_url }}"
-    description: "{{ item.name }}"
+    file: "{{ repo_values.repo_file }}"
+    name: "{{ repo_name }}"
+    baseurl: "{{ repo_content_url }}/{{ repo_values.pulp_path }}/{{ repo_values.pulp_timestamp }}"
+    description: "{{ repo_name }}"
     username: "{{ dnf_repos_username }}"
     password: "{{ dnf_repos_password }}"
     gpgcheck: false
-  loop: "{{ dnf_repos_repolist }}"
+  loop: "{{ dnf_repos_repos | dict2items }}"
+  loop_control:
+    label: "{{ repo_name }}[{{ repo_os }}]: {{ repo_values }}"
+  when: repo_name != 'epel'
+  vars:
+    repo_os: "{{ ansible_distribution_version if ansible_distribution_version in item.value else ansible_distribution_major_version }}"
+    repo_values: "{{ item.value[repo_os] }}"
+    repo_name: "{{ repo_values.repo_name | default(item.key) }}"
+    repo_content_url: "{{ repo_values.pulp_content_url | default(dnf_repos_pulp_content_url) }}"
 
 - name: Install epel-release
-  # done so that roles installing epel via epel-release don't over-write our changes to the epel repo
+  # So roles installing epel via epel-release don't overwrite changes to the epel repo below
   ansible.builtin.dnf:
     name: epel-release
 
-- name: Use Pulp EPEL repo
+- name: Replace epel repo with Pulp repo
   ansible.builtin.yum_repository:
-    name: epel
-    file: epel
-    description: "{{ dnf_repos_epel_description }}"
-    gpgcheck: false
-    baseurl: "{{ dnf_repos_epel_baseurl }}"
+    file: "{{ repo_values.repo_file }}"
+    name: "{{ repo_name }}"
+    baseurl: "{{ repo_content_url }}/{{ repo_values.pulp_path }}/{{ repo_values.pulp_timestamp }}"
+    description: "{{ repo_name }}"
     username: "{{ dnf_repos_username }}"
     password: "{{ dnf_repos_password }}"
+    gpgcheck: false
+  loop: "{{ dnf_repos_repos | dict2items }}"
+  loop_control:
+    label: "{{ repo_name }}[{{ repo_os }}]: {{ repo_values }}"
+  when: repo_name == 'epel'
+  vars:
+    repo_os: "{{ ansible_distribution_version if ansible_distribution_version in item.value else ansible_distribution_major_version }}"
+    repo_values: "{{ item.value[repo_os] }}"
+    repo_name: "{{ repo_values.repo_name | default(item.key) }}"
+    repo_content_url: "{{ repo_values.pulp_content_url | default(dnf_repos_pulp_content_url) }}"
diff --git a/ansible/roles/pulp_site/README.md b/ansible/roles/pulp_site/README.md
new file mode 100644
index 0000000..3af801c
--- /dev/null
+++ b/ansible/roles/pulp_site/README.md
@@ -0,0 +1,36 @@
+pulp_site
+=========
+
+Contains playbooks to deploy a Pulp server and sync its content with repo snapshots in
+StackHPC's Ark Pulp server
+
+Requirements
+------------
+
+Requires Ark credentials. The VM you are deploying Pulp on must allow ingress on `pulp_site_port`
+and not be externally accessible (as the Pulp server's content is unauthenticated). Rocky Linux 9 has been
+tested as the target VM for deploying Pulp.
+
+Role Variables
+--------------
+
+- `pulp_site_url`: Required str. The base url from which Pulp content will be hosted. Defaults to `{{ appliances_pulp_url }}`. 
+                 Value to set for ``appliances_pulp_url` will be generated and output by the deploy.yml playbook.
+- `pulp_site_port`: Optional str. Port to serve Pulp server on. Defaults to `8080`.
+- `pulp_site_username`: Optional str. Admin username for the Pulp server. Defaults to `admin`.
+- `pulp_site_password`: Required str. Admin password for the Pulp server. Defaults to `{{ vault_pulp_admin_password }}`.
+- `pulp_site_upstream_username`: Required str. Username for accessing content from the upstream Ark Pulp server.
+- `pulp_site_upstream_password`: Required str. Password for upstream Ark Pulp server.
+- `pulp_site_upstream_content_url`: Optional str. Content URL of upstream Ark Pulp. Defaults to `https://ark.stackhpc.com/pulp/content`.
+- `pulp_site_install_dir`: Optional str. Directory on Pulp host to install config and persistent state to be mounted into Pulp container. Defaults to `/home/rocky/pulp`.
+- `pulp_site_target_facts`: Optional str. The `ansible_facts` of a host which will be pulling from your Pulp server, allowing the role to auto-discover the necessary repos to pull.
+                          defaults to `{{ hostvars[groups['pulp'][0]]['ansible_facts'] }}`.
+- `pulp_site_target_distribution_version`: Optional str. The Rocky Linux minor release to sync repos from Ark for. Defaults to `{{ pulp_site_target_facts['distribution_version'] }}`.
+- `pulp_site_rpm_repo_defaults`: Optional dict. Contains key value pairs for fields which are common to all repo definition in `pulp_site_rpm_repos`. Includes values for `remote_username`,
+                               `remote_password` and `policy` by default.
+- `pulp_site_rpm_repos`: Optional list of dicts. List of repo definitions in format required by the `stackhpc.pulp.pulp_repository`. Defaults to modified versions of repos defined in
+                       `dnf_repos_all`.
+- `pulp_site_rpm_publications`: Optional list of dicts. List of repo definitions in format required by the `stackhpc.pulp.pulp_publication`. Defaults to list of publications for repos defined in
+                              `dnf_repos_all`.
+- `pulp_site_rpm_distributions`: Optional list of dicts. List of repo definitions in format required by the `stackhpc.pulp.pulp_distribution`. Defaults to list of distributions for repos defined in
+                              `dnf_repos_all`.
diff --git a/ansible/roles/pulp_site/defaults/main.yml b/ansible/roles/pulp_site/defaults/main.yml
index d30d1bd..3d2bce7 100644
--- a/ansible/roles/pulp_site/defaults/main.yml
+++ b/ansible/roles/pulp_site/defaults/main.yml
@@ -2,43 +2,30 @@ pulp_site_url: "{{ appliances_pulp_url }}"
 pulp_site_port: 8080
 pulp_site_username: admin # shouldn't be changed
 pulp_site_password: "{{ vault_pulp_admin_password }}"
+# See environments/common/inventory/groups_vars/all/pulp.yml
+# pulp_site_upstream_username:
+# pulp_site_upstream_password:
 pulp_site_upstream_content_url: https://ark.stackhpc.com/pulp/content
-pulp_site_default_upstream_suffix: "{{ pulp_site_target_arch }}/os"
-pulp_site_validate_certs: false
 pulp_site_install_dir: '/home/rocky/pulp'
-pulp_site_selinux_suffix: "{{ ':Z' if ansible_selinux.status == 'enabled' else '' }}"
+_pulp_site_selinux_suffix: "{{ ':Z' if ansible_selinux.status == 'enabled' else '' }}"
 pulp_site_target_facts: "{{ hostvars[groups['pulp'][0]]['ansible_facts'] }}"
-pulp_site_target_distribution_version: "{{ pulp_site_target_facts['distribution_version'] }}"
-pulp_site_target_distribution_version_major: "{{ pulp_site_target_facts['distribution_major_version'] }}"
-
-pulp_site_rpm_info:
-- name: "baseos-{{ pulp_site_target_distribution_version }}-{{ appliances_pulp_repos.baseos[pulp_site_target_distribution_version].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.baseos[pulp_site_target_distribution_version] | appliances_repo_to_subpath }}"
-- name: "appstream-{{ pulp_site_target_distribution_version }}-{{ appliances_pulp_repos.appstream[pulp_site_target_distribution_version].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.appstream[pulp_site_target_distribution_version] | appliances_repo_to_subpath }}"
-- name: "crb-{{ pulp_site_target_distribution_version }}-{{ appliances_pulp_repos.crb[pulp_site_target_distribution_version].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.crb[pulp_site_target_distribution_version] | appliances_repo_to_subpath }}"
-- name: "extras-{{ pulp_site_target_distribution_version }}-{{ appliances_pulp_repos.extras[pulp_site_target_distribution_version].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.extras[pulp_site_target_distribution_version] | appliances_repo_to_subpath }}"
-- name: "epel-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.epel[pulp_site_target_distribution_version_major].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.epel[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}"
-- name: "ohpc-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.openhpc_base[pulp_site_target_distribution_version_major].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.openhpc_base[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}"
-- name: "ohpc-updates-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.openhpc_updates[pulp_site_target_distribution_version_major].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.openhpc_updates[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}"
-- name: "ceph-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.ceph[pulp_site_target_distribution_version_major].timestamp }}"
-  subpath: "{{ appliances_pulp_repos.ceph[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}"
-- name: "grafana-{{ pulp_site_target_distribution_version_major }}-{{ appliances_pulp_repos.grafana.timestamp[pulp_site_target_distribution_version_major].timestamp }}
-  subpath: "{{ appliances_pulp_repos.grafana[pulp_site_target_distribution_version_major] | appliances_repo_to_subpath }}"
+pulp_site_target_distribution_version: "{{ pulp_site_target_facts['distribution_version'] }}" # TODO: how to set automatically?
 
 pulp_site_rpm_repo_defaults:
   remote_username: "{{ pulp_site_upstream_username }}"
   remote_password: "{{ pulp_site_upstream_password }}"
   policy: on_demand
-  state: present
 
-_pulp_site_rpm_info_all: "{{ pulp_site_rpm_info | map('combine', pulp_site_rpm_repo_defaults) }}"
+_pulp_site_rpm_info: |
+  {{
+    dnf_repos_repos |
+    select_repos(pulp_site_target_distribution_version)
+  }}
+pulp_site_rpm_repos: |
+  {{
+    _pulp_site_rpm_info |
+    to_rpm_repos(pulp_site_upstream_content_url, pulp_site_rpm_repo_defaults)
+  }}
 
-pulp_site_rpm_repos: "{{ _pulp_site_rpm_info_all | to_rpm_repos(pulp_site_upstream_content_url) }}"
-pulp_site_rpm_publications: "{{ _pulp_site_rpm_info_all | to_rpm_pubs }}"
-pulp_site_rpm_distributions: "{{ _pulp_site_rpm_info_all | to_rpm_distros }}"
+pulp_site_rpm_publications: "{{ _pulp_site_rpm_info | to_rpm_pubs }}"
+pulp_site_rpm_distributions: "{{ _pulp_site_rpm_info | to_rpm_distros }}"
diff --git a/ansible/roles/pulp_site/files/pulp.service b/ansible/roles/pulp_site/files/pulp.service
new file mode 100644
index 0000000..464961d
--- /dev/null
+++ b/ansible/roles/pulp_site/files/pulp.service
@@ -0,0 +1,12 @@
+# Adapted from https://grimoire.carcano.ch/blog/installing-pulp3-as-a-container/
+[Unit]
+Description=Pulp
+Wants=syslog.service
+
+[Service]
+Restart=always
+ExecStart=/usr/bin/podman start -a pulp
+ExecStop=/usr/bin/podman stop -t 15 pulp
+
+[Install]
+WantedBy=multi-user.target
diff --git a/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py b/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py
index 50e9126..41e995c 100644
--- a/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py
+++ b/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py
@@ -3,29 +3,61 @@ def filters(self):
         return {
             'to_rpm_repos': self.to_rpm_repos,
             'to_rpm_pubs': self.to_rpm_pubs,
-            'to_rpm_distros': self.to_rpm_distros
+            'to_rpm_distros': self.to_rpm_distros,
+            'select_repos': self.select_repos,
         }
-
-    def to_rpm_repos(self, list, pulp_url):
-        repo_list = map(lambda x: {
-            'name': x['name'],
-            'url': pulp_url+'/'+x['subpath'],
-            'remote_username': x['remote_username'],
-            'remote_password': x['remote_password'],
-            'policy': x['policy'],
-            'state': x['state'] }, list)
-        return repo_list
     
+    def select_repos(self, dnf_repos, target_distro_ver):
+        """ Filter dnf_repos to only those for a relevant distribution version (M.m or M). Returns a list of dicts.
+            Also adds pulp_repo_name field to give the repository a unique name in Pulp to be referenced by subsequent
+            filters
+        """
+    
+        target_distro_ver_major = target_distro_ver.split('.')[0]
+
+        rpm_repos = []
+        for repokey in dnf_repos:
+            # select either the matching major.minor or major version:
+            if target_distro_ver in dnf_repos[repokey]:
+                selected_ver = target_distro_ver
+            elif target_distro_ver_major in dnf_repos[repokey]:
+                selected_ver = target_distro_ver_major
+            else:
+                raise ValueError(f'No key matching {target_distro_ver_major} or {target_distro_ver} found in f{repokey}')
+            repo_data = dnf_repos[repokey][selected_ver]
+            repo_data['pulp_repo_name'] = f"{repokey}-{selected_ver}-{dnf_repos[repokey][selected_ver]['pulp_timestamp']}"
+            rpm_repos.append(repo_data)
+        return rpm_repos
+
+    def to_rpm_repos(self, rpm_info, content_url, repo_defaults):
+        """ Filter repo object list given by select_repos into dict required by the pulp_repository_rpm_repos variable
+            from stackhpc.pulp.pulp_repository role
+        """
+        rpm_repos = []
+        for repo_data in rpm_info:
+            rpm_data = repo_defaults.copy() # NB: this changes behaviour vs before, so now defaults can correctly be overriden
+            rpm_data['name'] = repo_data['pulp_repo_name']
+            rpm_data['url'] = '/'.join([content_url, repo_data['pulp_path'], repo_data['pulp_timestamp']])
+            rpm_data['state'] = 'present'
+            rpm_repos.append(rpm_data)
+        return rpm_repos
+
     def to_rpm_pubs(self, list):
+        """ Filter repo object list given by select_repos into dict required by the pulp_publication_rpm variable
+            from stackhpc.pulp.pulp_publication role
+        """
         pub_list = map(lambda x: {
-            'repository': x['name'],
-            'state': x['state'] }, list)
+            'repository': x['pulp_repo_name'],
+            'state': 'present' }, list)
         return pub_list
     
     def to_rpm_distros(self, list):
+        """ Filter repo object list given by select_repos into dict required by the pulp_distirubtion_rpm variable
+            from stackhpc.pulp.pulp_distribution role
+        """
         distro_list = map(lambda x: {
-            'name': x['name'],
-            'repository': x['name'],
-            'base_path': x['subpath'],
-            'state': x['state'] }, list)
-        return distro_list
\ No newline at end of file
+            'name': x['pulp_repo_name'],
+            'repository': x['pulp_repo_name'],
+            'base_path': '/'.join([x['pulp_path'],x['pulp_timestamp']]),
+            'state': 'present' }, list)
+        return distro_list
diff --git a/ansible/roles/pulp_site/tasks/install.yml b/ansible/roles/pulp_site/tasks/install.yml
index 39b4fcd..75b0f66 100644
--- a/ansible/roles/pulp_site/tasks/install.yml
+++ b/ansible/roles/pulp_site/tasks/install.yml
@@ -26,13 +26,27 @@
     publish:
       - "{{ pulp_site_port }}:80"
     volume:
-    - "{{ pulp_site_install_dir }}/settings:/etc/pulp{{ pulp_site_selinux_suffix }}"
-    - "{{ pulp_site_install_dir }}/pulp_storage:/var/lib/pulp{{ pulp_site_selinux_suffix }}"
-    - "{{ pulp_site_install_dir }}/pgsql:/var/lib/pgsql{{ pulp_site_selinux_suffix }}"
-    - "{{ pulp_site_install_dir }}/containers:/var/lib/containers{{ pulp_site_selinux_suffix }}"
+    - "{{ pulp_site_install_dir }}/settings:/etc/pulp{{ _pulp_site_selinux_suffix }}"
+    - "{{ pulp_site_install_dir }}/pulp_storage:/var/lib/pulp{{ _pulp_site_selinux_suffix }}"
+    - "{{ pulp_site_install_dir }}/pgsql:/var/lib/pgsql{{ _pulp_site_selinux_suffix }}"
+    - "{{ pulp_site_install_dir }}/containers:/var/lib/containers{{ _pulp_site_selinux_suffix }}"
     device: /dev/fuse
     image: docker.io/pulp/pulp:3.68.1
+    state: present
 
+- name: Create systemd file
+  copy:
+    src: pulp.service
+    dest: /etc/systemd/system/pulp.service
+  register: _pulp_service
+  
+- name: Start Pulp service
+  systemd:
+    name: pulp
+    state: "{{ 'started' if _pulp_service.changed else 'restarted' }}"
+    daemon_reload: "{{ _pulp_service.changed }}"
+    enabled: true
+  
 - name: Reset admin password once container has initialised
   no_log: true
   ansible.builtin.shell:
diff --git a/ansible/roles/pulp_site/tasks/sync.yml b/ansible/roles/pulp_site/tasks/sync.yml
index 5ef2bc5..9a2a932 100644
--- a/ansible/roles/pulp_site/tasks/sync.yml
+++ b/ansible/roles/pulp_site/tasks/sync.yml
@@ -3,17 +3,7 @@
 - ansible.builtin.assert:
     that: pulp_site_upstream_password != ''
     quiet: true
-    fail_msg: "Upstream password not set. Either set env var ARK_PASSWORD or override pulp_site_upstream_password."
-
-- name: Wait for Pulp server
-  pulp.squeezer.status:
-    pulp_url: "{{ pulp_site_url }}"
-    username: "{{ pulp_site_username }}"
-    password: "{{ pulp_site_password }}"
-  register: _pulp_status
-  until: _pulp_status.failed == false
-  retries: 30
-  delay: 20
+    fail_msg: "Upstream password not set. Ensure `pulp_site_upstream_username` and `pulp_site_upstream_password` are overriden to your Ark credentials."
 
 - name: Ensure Pulp CLI config directory exists
   ansible.builtin.file:
@@ -27,6 +17,16 @@
     dest: ~/.config/pulp/cli.toml
     mode: '0644'
 
+- name: Wait for Pulp server
+  pulp.squeezer.status:
+    pulp_url: "{{ pulp_site_url }}"
+    username: "{{ pulp_site_username }}"
+    password: "{{ pulp_site_password }}"
+  register: _pulp_status
+  until: _pulp_status.failed == false
+  retries: 30
+  delay: 20
+
 - block:
   - name: Ensure squeezer cache exists
     ansible.builtin.file:
diff --git a/ansible/roles/pulp_site/templates/cli.toml.j2 b/ansible/roles/pulp_site/templates/cli.toml.j2
index 0686790..c67dcf3 100644
--- a/ansible/roles/pulp_site/templates/cli.toml.j2
+++ b/ansible/roles/pulp_site/templates/cli.toml.j2
@@ -4,7 +4,6 @@ username = "{{ pulp_site_username }}"
 password = "{{ pulp_site_password }}"
 api_root = "/pulp/"
 domain = "default"
-headers = []
 cert = ""
 key = ""
 verify_ssl = true
diff --git a/docs/environments.md b/docs/environments.md
index d1c4923..183b775 100644
--- a/docs/environments.md
+++ b/docs/environments.md
@@ -14,7 +14,10 @@ All environments load the inventory from the `common` environment first, with th
 
 The ansible inventory for the environment is in `environments/<environment>/inventory/`. It should generally contain:
 - A `hosts` file. This defines the hosts in the appliance. Generally it should be templated out by the deployment automation so it is also a convenient place to define variables which depend on the deployed hosts such as connection variables, IP addresses, ssh proxy arguments etc.
-- A `groups` file defining ansible groups, which essentially controls which features of the appliance are enabled and where they are deployed. This repository generally follows a convention where functionality is defined using ansible roles applied to a group of the same name, e.g. `openhpc` or `grafana`. The meaning and use of each group is described in comments in `environments/common/inventory/groups`. As the groups defined there for the common environment are empty, functionality is disabled by default and must be enabled in a specific environment's `groups` file. Two template examples are provided in `environments/commmon/layouts/` demonstrating a minimal appliance with only the Slurm cluster itself, and an appliance with all functionality.
+- A `groups` file defining ansible groups, which essentially controls which features of the appliance are enabled and where they are deployed. This repository generally follows a convention where functionality is defined using ansible roles applied to a group
+of the same name, e.g. `openhpc` or `grafana`. The meaning and use of each group is described in comments in `environments/common/inventory/groups`. As the groups defined there for the common environment are empty, functionality is disabled by default and must be
+enabled in a specific environment's `groups` file. The `site` environment contains an ini file at `environments/site/inventory/groups` which enables groups for default appliance functionality across all environments. Additional groups should generally also be
+enabled in this file to avoid divergence between staging and production environments. Note that enabling some groups may require a site-specific image build and Ark credentials (see [operations guide](operations.md)).
 - Optionally, group variable files in `group_vars/<group_name>/overrides.yml`, where the group names match the functional groups described above. These can be used to override the default configuration for each functionality, which are defined in `environments/common/inventory/group_vars/all/<group_name>.yml` (the use of `all` here is due to ansible's precedence rules).
 
 Although most of the inventory uses the group convention described above there are a few special cases:
diff --git a/docs/experimental/pulp.md b/docs/experimental/pulp.md
index c6b437d..582eec9 100644
--- a/docs/experimental/pulp.md
+++ b/docs/experimental/pulp.md
@@ -5,13 +5,47 @@ In order to ensure reproducible builds, the appliance can build images using rep
 ## Deploying/configuring Pulp Server
 
 ### Deploying a Pulp server
-A playbook is provided to install and configure a Pulp server on a given host. Admin credentials for this server are automatically generated through the `ansible/adhoc/generate-passwords.yml` playbook. This can be run with
-`ansible-playbook ansible/adhoc/deploy-pulp.yml -e "pulp_server=<target_host>"`
-where `target_host` is any resolvable host. This will print a Pulp URL which can be copied to your environments as appropriate. Ensure that the server is accessible on the specified port. Note access to this server's content isn't authenticated so assumes the server is deployed behind a secure network.
+A playbook is provided to install and configure a Pulp server on a given host. Admin credentials for this server are automatically generated through the `ansible/adhoc/generate-passwords.yml` playbook. To use this, create an inventory file
+defining a group `pulp_server` containing a single host, which requires at least 2 vCPUs and 4GB RAM. The group should be defined in your `site` environment's inventory so that a single Pulp server is shared between all environments and 
+the same snapshots are tested in staging and production.
+Deploying and syncing Pulp has been tested on an RL9 host. The hostvar `ansible_host` should be defined, giving the IP address Ansible should use for ssh. For example, you can create an ini file at `environments/site/inventory/pulp` with the contents:
+
+```
+[pulp_server]
+pulp_host ansible_host=<VM-ip-address>
+```
+
+> [!WARNING] 
+> The inventory hostname cannot conflict with group names i.e can't be called `pulp_site` or `pulp_server`.
+
+Once complete, it will print a message giving a value to set for `appliances_pulp_url` (see example config below), assuming the `ansible_host` address is also the address the cluster
+should use to reach the Pulp server.
+
+Note access to this server's content isn't authenticated so this assumes the `pulp_server` host is not externally reachable.
 
 ### Using an existing Pulp server
 An existing Pulp server can be used to host Ark repos by overriding `pulp_site_password` and `appliances_pulp_url` in the target environment. Note that this assumes the same configuration as the appliance deployed Pulp i.e no content authentication.
 
 ## Syncing Pulp content with Ark
 
-If the `pulp` group is added to the Packer build groups, the local Pulp server will be synced with Ark on build. You must authenticate with Ark by overriding `pulp_site_upstream_username` and `pulp_site_upstream_password` with your vault encrypted Ark dev credentials. `dnf_repos_username` and `dnf_repos_password` must remain unset to access content from the local Pulp. Content can also be synced by running `ansible/adhoc/sync-pulp.yml`. By default this syncs repositories for Rocky 9.5 with x86_64 architecture, but can be overridden by setting extra variables for `pulp_site_target_arch`, `pulp_site_target_distribution`, `pulp_site_target_distribution_version` and `pulp_site_target_distribution_version_major`.
+If the `pulp_site` group is added to the Packer build groups, the local Pulp server will be synced with Ark on build. You must authenticate with Ark by overriding `pulp_site_upstream_username` and `pulp_site_upstream_password` with your vault encrypted Ark dev credentials. `dnf_repos_username` and `dnf_repos_password` must remain unset to access content from the local Pulp.
+
+Content can also be synced by running `ansible/adhoc/sync-pulp.yml`. By default this syncs repositories for the latest version of Rocky supported by the appliance but this can be overridden by setting extra variables for `pulp_site_target_arch`, `pulp_site_target_distribution` and `pulp_site_target_distribution_version`.
+
+## Example config in site variables
+
+```
+# environments/site/inventory/group_vars/all/pulp_site.yml:
+appliances_pulp_url: "http://<pulp-host-ip>:8080"
+pulp_site_upstream_username: <Ark-username>
+pulp_site_upstream_password: <Ark-password>
+```
+
+## Installing packages from Pulp at runtime
+By default, system repos are overwritten to point at Pulp repos during [image builds,](../image-build.md) so using a site Pulp server will require a new fatimage. If you instead wish to install packages at runtime,
+you will need to add all host groups on which you will be installing packages to the `dnf_repos` group in `environments/site/inventory/groups` e.g:
+
+```
+[dnf_repos:children]
+cluster
+```
diff --git a/docs/operations.md b/docs/operations.md
index 4f7bc5c..4c5c640 100644
--- a/docs/operations.md
+++ b/docs/operations.md
@@ -9,7 +9,7 @@ All subsequent sections assume that:
 - Appropriate OpenStack credentials are available.
 - Any non-appliance controlled infrastructure is available (e.g. networks, volumes, etc.).
 - `$ENV` is your current, activated environment, as defined by e.g. `environments/production/`.
-- `$SITE_ENV` is the base site-specific environment, as defined by e.g. `environments/mysite/`.
+- `$SITE_ENV` is the base site-specific environment, as defined by `environments/site/`.
 - A string `some/path/to/file.yml:myvar` defines a path relative to the repository root and an Ansible variable in that file.
 - Configuration is generally common to all environments at a site, i.e. is made in `environments/$SITE_ENV` not `environments/$ENV`.
 
@@ -62,6 +62,24 @@ This is a usually a two-step process:
 
 Deploying the additional nodes and applying these changes requires rerunning both OpenTofu and the Ansible site.yml playbook - follow [Deploying a Cluster](#Deploying-a-Cluster).
 
+# Enabling additional functionality
+Roles in the appliance which are disabled by default can be enabled by adding the appropriate groups as children of the role's corresponding group in `environments/site/inventory/groups`. For example,
+to install a Squid proxy on nodes in the login group, you would modify the `squid` group definition in `environments/site/inventory/groups` to:
+
+```
+[squid:children]
+# Hosts to run squid proxy
+login
+```
+
+Note that many non-default roles include package installations from repositories which the appliance overwrites to point at snapshotted mirrors on a Pulp server (by default StackHPC's Ark server), which are
+disabled during runtime to prevent Ark credentials from being leaked. To enable this functionality, you must therefore either:
+
+- Create a site-specific fatimage (see [image build docs](image-build.md)) with the appropriate group added to the `inventory_groups` Packer variables.
+- If you instead wish roles to perform their installations during runtime, deploy a site Pulp server and sync it with with mirrors of the snapshots from the upstream Ark server (see [Pulp docs](experimental/pulp.md)).
+
+In both cases, Ark credentials will be required.
+
 # Adding Additional Packages
 By default, the following utility packages are installed during the StackHPC image build:
 - htop
diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index af14839..88cdb42 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
     "cluster_image": {
-        "RL8": "openhpc-RL8-250808-1727-faa44755",
-        "RL9": "openhpc-RL9-250808-1727-faa44755"
+        "RL8": "openhpc-RL8-250820-0800-767addd8",
+        "RL9": "openhpc-RL9-250820-0800-767addd8"
     }
 }
diff --git a/environments/common/files/grafana/grafana.repo.j2 b/environments/common/files/grafana/grafana.repo.j2
index 8f1aef5..6ce2581 100644
--- a/environments/common/files/grafana/grafana.repo.j2
+++ b/environments/common/files/grafana/grafana.repo.j2
@@ -1,6 +1,6 @@
 {{ ansible_managed | comment }}
 [grafana]
-baseurl = {{ appliances_pulp_url }}/pulp/content/{{ appliances_pulp_repos.grafana[ansible_distribution_major_version] | appliances_repo_to_subpath }}
+baseurl = {{ appliances_pulp_url }}/pulp/content/{{ dnf_repos_repos['grafana'][ansible_distribution_major_version]['pulp_path'] }}/{{ dnf_repos_repos['grafana'][ansible_distribution_major_version]['pulp_timestamp'] }}
 enabled = 0
 name = grafana
 async = 1
diff --git a/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml b/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
new file mode 100644
index 0000000..d2df041
--- /dev/null
+++ b/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
@@ -0,0 +1,116 @@
+dnf_repos_default:
+  Ceph:
+    '8':
+      pulp_path: centos/8-stream/storage/x86_64/ceph-quincy
+      pulp_timestamp: 20231104T015751
+      repo_file: ceph
+    '9':
+      pulp_path: centos/9-stream/storage/x86_64/ceph-reef
+      pulp_timestamp: 20250617T023108
+      repo_file: ceph
+  appstream:
+    '8.10':
+      pulp_path: rocky/8.10/AppStream/x86_64/os
+      pulp_timestamp: 20250614T013846
+      repo_file: Rocky-AppStream
+    '9.4':
+      pulp_path: rocky/9.4/AppStream/x86_64/os
+      pulp_timestamp: 20241112T003151
+      repo_file: rocky
+    '9.5':
+      pulp_path: rocky/9.5/AppStream/x86_64/os
+      pulp_timestamp: 20250514T014704
+      repo_file: rocky
+    '9.6':
+      pulp_path: rocky/9.6/AppStream/x86_64/os
+      pulp_timestamp: 20250816T020215
+      repo_file: rocky
+  baseos:
+    '8.10':
+      pulp_path: rocky/8.10/BaseOS/x86_64/os
+      pulp_timestamp: 20250614T013846
+      repo_file: Rocky-BaseOS
+    '9.4':
+      pulp_path: rocky/9.4/BaseOS/x86_64/os
+      pulp_timestamp: 20241115T011711
+      repo_file: rocky
+    '9.5':
+      pulp_path: rocky/9.5/BaseOS/x86_64/os
+      pulp_timestamp: 20250513T031844
+      repo_file: rocky
+    '9.6':
+      pulp_path: rocky/9.6/BaseOS/x86_64/os
+      pulp_timestamp: 20250815T050653
+      repo_file: rocky
+  crb:
+    '8.10':
+      pulp_path: rocky/8.10/PowerTools/x86_64/os
+      pulp_timestamp: 20250614T013846
+      repo_file: Rocky-PowerTools
+      repo_name: powertools
+    '9.4':
+      pulp_path: rocky/9.4/CRB/x86_64/os
+      pulp_timestamp: 20241115T003133
+      repo_file: rocky
+    '9.5':
+      pulp_path: rocky/9.5/CRB/x86_64/os
+      pulp_timestamp: 20250514T014704
+      repo_file: rocky
+    '9.6':
+      pulp_path: rocky/9.6/CRB/x86_64/os
+      pulp_timestamp: 20250815T034418
+      repo_file: rocky
+  extras:
+    '8.10':
+      pulp_path: rocky/8.10/extras/x86_64/os
+      pulp_timestamp: 20250510T032327
+      repo_file: Rocky-Extras
+    '9.4':
+      pulp_path: rocky/9.4/extras/x86_64/os
+      pulp_timestamp: 20241118T002802
+      repo_file: rocky-extras
+    '9.5':
+      pulp_path: rocky/9.5/extras/x86_64/os
+      pulp_timestamp: 20250506T032818
+      repo_file: rocky-extras
+    '9.6':
+      pulp_path: rocky/9.6/extras/x86_64/os
+      pulp_timestamp: 20250726T040613
+      repo_file: rocky-extras
+  grafana:
+    '8':
+      pulp_path: grafana/oss/rpm
+      pulp_timestamp: 20250730T011314
+      repo_file: grafana
+      timestamp: 20250615T005738
+    '9':
+      pulp_path: grafana/oss/rpm
+      pulp_timestamp: 20250730T011314
+      repo_file: grafana
+  epel:
+    '8':
+      pulp_path: epel/8/Everything/x86_64
+      pulp_timestamp: 20250615T234151
+      repo_file: epel
+    '9':
+      pulp_path: epel/9/Everything/x86_64
+      pulp_timestamp: 20250817T000753
+      repo_file: epel
+  OpenHPC:
+    '8':
+      pulp_path: OpenHPC/2/EL_8
+      pulp_timestamp: 20241218T154614
+      repo_file: OpenHPC
+    '9':
+      pulp_path: OpenHPC/3/EL_9
+      pulp_timestamp: 20241218T154614
+      repo_file: OpenHPC
+  OpenHPC-updates:
+    '8':
+      pulp_path: OpenHPC/2/updates/EL_8
+      pulp_timestamp: 20250512T003315
+      repo_file: OpenHPC
+    '9':
+      pulp_path: OpenHPC/3/updates/EL_9
+      pulp_timestamp: 20250510T003301
+      repo_file: OpenHPC
diff --git a/environments/common/inventory/group_vars/all/dnf_repos.yml b/environments/common/inventory/group_vars/all/dnf_repos.yml
new file mode 100644
index 0000000..e7a8ace
--- /dev/null
+++ b/environments/common/inventory/group_vars/all/dnf_repos.yml
@@ -0,0 +1,8 @@
+# dnf_repos_default: see role ansible/roles/dnf_repos/README.md for format and dnf_repo_timestamps.yml for default definition
+
+# override this in environments/site/inventory/group_vars/dnf_repos.yml to add repos:
+dnf_repos_extra: {}
+
+# indirection to skip openhpc repos if using alternative slurm:
+dnf_repos_skip: "{{ [] if ((openhpc_install_type | default('ohpc') == 'ohpc')) else ['OpenHPC', 'OpenHPC-updates'] }}"
+dnf_repos_repos: "{{ dnf_repos_default | combine(dnf_repos_extra) | dict2items | rejectattr('key', 'in', dnf_repos_skip) | items2dict }}"
diff --git a/environments/common/inventory/group_vars/all/timestamps.yml b/environments/common/inventory/group_vars/all/timestamps.yml
deleted file mode 100644
index 455c260..0000000
--- a/environments/common/inventory/group_vars/all/timestamps.yml
+++ /dev/null
@@ -1,88 +0,0 @@
-appliances_pulp_repos:
-  appstream:
-    '8.10':
-      path: rocky/8.10/AppStream/x86_64/os
-      timestamp: 20250614T013846
-    '9.4':
-      path: rocky/9.4/AppStream/x86_64/os
-      timestamp: 20241112T003151
-    '9.5':
-      path: rocky/9.5/AppStream/x86_64/os
-      timestamp: 20250514T014704
-    '9.6':
-      path: rocky/9.6/AppStream/x86_64/os
-      timestamp: 20250726T040613
-  baseos:
-    '8.10':
-      path: rocky/8.10/BaseOS/x86_64/os
-      timestamp: 20250614T013846
-    '9.4':
-      path: rocky/9.4/BaseOS/x86_64/os
-      timestamp: 20241115T011711
-    '9.5':
-      path: rocky/9.5/BaseOS/x86_64/os
-      timestamp: 20250513T031844
-    '9.6':
-      path: rocky/9.6/BaseOS/x86_64/os
-      timestamp: 20250726T052250
-  ceph:
-    '8':
-      path: centos/8-stream/storage/x86_64/ceph-quincy
-      timestamp: 20231104T015751
-    '9':
-      path: centos/9-stream/storage/x86_64/ceph-reef
-      timestamp: 20250617T023108
-  crb:
-    '8.10':
-      path: rocky/8.10/PowerTools/x86_64/os
-      timestamp: 20250614T013846
-    '9.4':
-      path: rocky/9.4/CRB/x86_64/os
-      timestamp: 20241115T003133
-    '9.5':
-      path: rocky/9.5/CRB/x86_64/os
-      timestamp: 20250514T014704
-    '9.6':
-      path: rocky/9.6/CRB/x86_64/os
-      timestamp: 20250726T040613
-  epel:
-    '8':
-      path: epel/8/Everything/x86_64
-      timestamp: 20250615T234151
-    '9':
-      path: epel/9/Everything/x86_64
-      timestamp: 20250729T235750
-  extras:
-    '8.10':
-      path: rocky/8.10/extras/x86_64/os
-      timestamp: 20250510T032327
-    '9.4':
-      path: rocky/9.4/extras/x86_64/os
-      timestamp: 20241118T002802
-    '9.5':
-      path: rocky/9.5/extras/x86_64/os
-      timestamp: 20250506T032818
-    '9.6':
-      path: rocky/9.6/extras/x86_64/os
-      timestamp: 20250726T040613
-  grafana:
-    '8':
-      path: grafana/oss/rpm
-      timestamp: 20250615T005738
-    '9':
-      path: grafana/oss/rpm
-      timestamp: 20250730T011314
-  openhpc_base:
-    '8':
-      path: OpenHPC/2/EL_8
-      timestamp: 20241218T154614
-    '9':
-      path: OpenHPC/3/EL_9
-      timestamp: 20241218T154614
-  openhpc_updates:
-    '8':
-      path: OpenHPC/2/updates/EL_8
-      timestamp: 20250512T003315
-    '9':
-      path: OpenHPC/3/updates/EL_9
-      timestamp: 20250510T003301
diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups
index 57b6441..6926355 100644
--- a/environments/common/inventory/groups
+++ b/environments/common/inventory/groups
@@ -197,8 +197,8 @@ k3s_agent
 builder
 extra_packages
 
-[pulp]
-# Add builder to this group to enable automatically syncing of pulp during image build
+[pulp_site]
+# Add builder to this group to automatically sync pulp during image build
 
 [cacerts]
 # Hosts to configure CA certificates and trusts on
@@ -211,3 +211,10 @@ extra_packages
 
 [nhc]
 # Hosts to configure for node health checks - either entire 'compute' group or empty
+
+[pulp_server]
+# Host to deploy a Pulp server on and sync with mirrors of upstream Ark repositories. Should be a group containing a single VM provisioned
+# separately from the appliance. e.g
+# pulp_host ansible_host=<VM-ip-address>
+# Note the host name can't conflict with group names i.e can't be called `pulp` or `pulp_server`
+
diff --git a/environments/site/inventory/groups b/environments/site/inventory/groups
index 9df61dc..b78197d 100644
--- a/environments/site/inventory/groups
+++ b/environments/site/inventory/groups
@@ -157,3 +157,12 @@ compute
 # Should be set to `compute` if enabled
 # Note that this feature currently assumes all compute nodes are VMs, enabling
 # when the cluster contains baremetal compute nodes may lead to unexpected scheduling behaviour
+
+[pulp_site]
+# Add builder to this group to automatically sync pulp during image build
+
+[pulp_server]
+# Host to deploy a Pulp server on and sync with mirrors of upstream Ark repositories. Should be a group containing a single VM provisioned
+# separately from the appliance. e.g
+# pulp_host ansible_host=<VM-ip-address>
+# Note inventory host name cannot conflict with group names i.e can't be called `pulp` or `pulp_server`.

From 2984292dca33d4fa2b76af289c41cc7254e17fe9 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Fri, 5 Sep 2025 09:18:26 +0000
Subject: [PATCH 07/50] temp fix: add alertmanager passwd to
 persist_openhpc_secrets template

---
 .../roles/persist_openhpc_secrets/templates/openhpc_secrets.fact | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact b/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
index 9d6de37..ca1742c 100644
--- a/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
+++ b/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
@@ -6,4 +6,5 @@
   "vault_mysql_root_password": "{{ lookup('password', '/dev/null') }}",
   "vault_mysql_slurm_password": "{{ lookup('password', '/dev/null') }}",
   "vault_openhpc_mungekey": "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') | regex_replace('\s+', '') }}"
+  "vault_alertmanager_admin_password": "{{ lookup('password', '/dev/null') }}"
 }

From a3be3c9474977f6f818d5edcac15725e9c385fb3 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Fri, 5 Sep 2025 12:10:13 +0000
Subject: [PATCH 08/50] missing ','

---
 .../persist_openhpc_secrets/templates/openhpc_secrets.fact      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact b/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
index ca1742c..e049951 100644
--- a/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
+++ b/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
@@ -5,6 +5,6 @@
   "vault_elasticsearch_kibana_password": "{{ lookup('password', '/dev/null') }}",
   "vault_mysql_root_password": "{{ lookup('password', '/dev/null') }}",
   "vault_mysql_slurm_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_openhpc_mungekey": "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') | regex_replace('\s+', '') }}"
+  "vault_openhpc_mungekey": "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') | regex_replace('\s+', '') }}",
   "vault_alertmanager_admin_password": "{{ lookup('password', '/dev/null') }}"
 }

From 32e983803598386609fa2cb7f286663d2ae311dc Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Fri, 5 Sep 2025 12:52:14 +0000
Subject: [PATCH 09/50] alertmanager admin passwd group_var

---
 environments/.caas/inventory/group_vars/all/cluster.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environments/.caas/inventory/group_vars/all/cluster.yml b/environments/.caas/inventory/group_vars/all/cluster.yml
index b06314c..14633c8 100644
--- a/environments/.caas/inventory/group_vars/all/cluster.yml
+++ b/environments/.caas/inventory/group_vars/all/cluster.yml
@@ -11,6 +11,7 @@ vault_elasticsearch_kibana_password: "{{ hostvars[groups['control'][0]].ansible_
 vault_mysql_root_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_mysql_root_password }}"
 vault_mysql_slurm_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_mysql_slurm_password }}"
 vault_openhpc_mungekey: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_openhpc_mungekey }}"
+vault_alertmanager_admin_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_alertmanager_admin_password }}"
 
 # Override this to cope with the case where the podman group just doesn't exist
 appliances_local_users_podman_enable: "{{ groups.get('podman', []) | length > 0 }}"

From 6e05021c310cbabc3f082b0661dca4c77b6b85de Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Fri, 5 Sep 2025 15:49:35 +0100
Subject: [PATCH 10/50] fix incorrect use of partition in nodegroup variable
 definitions (#771)

---
 environments/site/tofu/node_group/variables.tf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/environments/site/tofu/node_group/variables.tf b/environments/site/tofu/node_group/variables.tf
index 35c1b6b..4ef3407 100644
--- a/environments/site/tofu/node_group/variables.tf
+++ b/environments/site/tofu/node_group/variables.tf
@@ -1,11 +1,11 @@
 variable "nodes" {
     type = list(string)
-    description = "list of node names for partition"
+    description = "List of node names for node group"
 }
 
 variable "flavor" {
     type = string
-    description = "Name of flavor for partition"
+    description = "Name of flavor for node group"
 }
 
 variable "cluster_name" {
@@ -24,7 +24,7 @@ variable "key_pair" {
 
 variable "image_id" {
     type = string
-    description = "ID of image for the partition"
+    description = "ID of image for the node group"
 }
 
 variable "environment_root" {

From 109f58497141adc031608fe68930699cc6c24c98 Mon Sep 17 00:00:00 2001
From: bertiethorpe <bertie443@gmail.com>
Date: Mon, 8 Sep 2025 13:47:48 +0000
Subject: [PATCH 11/50] make caas persist secrets idempotent

---
 .../roles/persist_openhpc_secrets/tasks/main.yml | 10 +++++++---
 .../templates/openhpc_secrets.fact               | 16 ++++++++--------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/ansible/roles/persist_openhpc_secrets/tasks/main.yml b/ansible/roles/persist_openhpc_secrets/tasks/main.yml
index 6ae9bcd..e0f5865 100644
--- a/ansible/roles/persist_openhpc_secrets/tasks/main.yml
+++ b/ansible/roles/persist_openhpc_secrets/tasks/main.yml
@@ -14,14 +14,18 @@
   loop:
     - "{{ appliances_state_dir }}/ansible.facts.d"
     - "/etc/ansible/facts.d"
-    
+
+- name: Load existing OpenHPC secrets if present
+  ansible.builtin.setup:
+    filter: ansible_local
+  when: openhpc_secrets_stat.stat.exists
+
 - name: Write OpenHPC secrets
   template:
     src: openhpc_secrets.fact
     dest: "{{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
     owner: root
     mode: 0600
-  when: "not openhpc_secrets_stat.stat.exists"
 
 - name: Symlink persistent facts to facts_path
   file:
@@ -30,6 +34,6 @@
     dest: /etc/ansible/facts.d/openhpc_secrets.fact
     owner: root
     
-- name: Read facts
+- name: Refresh facts to pick up any new secrets
   ansible.builtin.setup:
     filter: ansible_local
diff --git a/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact b/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
index e049951..5c6c5e6 100644
--- a/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
+++ b/ansible/roles/persist_openhpc_secrets/templates/openhpc_secrets.fact
@@ -1,10 +1,10 @@
 {
-  "vault_azimuth_user_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_grafana_admin_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_elasticsearch_admin_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_elasticsearch_kibana_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_mysql_root_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_mysql_slurm_password": "{{ lookup('password', '/dev/null') }}",
-  "vault_openhpc_mungekey": "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') | regex_replace('\s+', '') }}",
-  "vault_alertmanager_admin_password": "{{ lookup('password', '/dev/null') }}"
+  "vault_azimuth_user_password": "{{ ansible_local.openhpc_secrets.vault_azimuth_user_password | default(lookup('password', '/dev/null')) }}",
+  "vault_grafana_admin_password": "{{ ansible_local.openhpc_secrets.vault_grafana_admin_password | default(lookup('password', '/dev/null')) }}",
+  "vault_elasticsearch_admin_password": "{{ ansible_local.openhpc_secrets.vault_elasticsearch_admin_password | default(lookup('password', '/dev/null')) }}",
+  "vault_elasticsearch_kibana_password": "{{ ansible_local.openhpc_secrets.vault_elasticsearch_kibana_password | default(lookup('password', '/dev/null')) }}",
+  "vault_mysql_root_password": "{{ ansible_local.openhpc_secrets.vault_mysql_root_password | default(lookup('password', '/dev/null')) }}",
+  "vault_mysql_slurm_password": "{{ ansible_local.openhpc_secrets.vault_mysql_slurm_password | default(lookup('password', '/dev/null')) }}",
+  "vault_openhpc_mungekey": "{{ ansible_local.openhpc_secrets.vault_openhpc_mungekey | default(lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') | regex_replace('\\s+', '')) }}",
+  "vault_alertmanager_admin_password": "{{ ansible_local.openhpc_secrets.vault_alertmanager_admin_password | default(lookup('password', '/dev/null')) }}"
 }

From 60d531d15ba5fd3bd79450ce33edad7f6ebb3c23 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Tue, 9 Sep 2025 10:38:50 +0200
Subject: [PATCH 12/50] Bump Pulp snapshots for RL 9.6 (#772)

* Reorder repositories alphabetically

* Bump Pulp snapshots for RL 9.6

* Bump CI image (RL9 only)
---
 .../tofu/cluster_image.auto.tfvars.json       |  2 +-
 .../group_vars/all/dnf_repo_timestamps.yml    | 62 +++++++++----------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index 88cdb42..6b294d1 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
     "cluster_image": {
         "RL8": "openhpc-RL8-250820-0800-767addd8",
-        "RL9": "openhpc-RL9-250820-0800-767addd8"
+        "RL9": "openhpc-RL9-250908-2047-d90ebd0e"
     }
 }
diff --git a/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml b/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
index d2df041..c80a85a 100644
--- a/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
+++ b/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
@@ -8,6 +8,24 @@ dnf_repos_default:
       pulp_path: centos/9-stream/storage/x86_64/ceph-reef
       pulp_timestamp: 20250617T023108
       repo_file: ceph
+  OpenHPC:
+    '8':
+      pulp_path: OpenHPC/2/EL_8
+      pulp_timestamp: 20241218T154614
+      repo_file: OpenHPC
+    '9':
+      pulp_path: OpenHPC/3/EL_9
+      pulp_timestamp: 20241218T154614
+      repo_file: OpenHPC
+  OpenHPC-updates:
+    '8':
+      pulp_path: OpenHPC/2/updates/EL_8
+      pulp_timestamp: 20250512T003315
+      repo_file: OpenHPC
+    '9':
+      pulp_path: OpenHPC/3/updates/EL_9
+      pulp_timestamp: 20250510T003301
+      repo_file: OpenHPC
   appstream:
     '8.10':
       pulp_path: rocky/8.10/AppStream/x86_64/os
@@ -23,7 +41,7 @@ dnf_repos_default:
       repo_file: rocky
     '9.6':
       pulp_path: rocky/9.6/AppStream/x86_64/os
-      pulp_timestamp: 20250816T020215
+      pulp_timestamp: 20250902T060015
       repo_file: rocky
   baseos:
     '8.10':
@@ -40,7 +58,7 @@ dnf_repos_default:
       repo_file: rocky
     '9.6':
       pulp_path: rocky/9.6/BaseOS/x86_64/os
-      pulp_timestamp: 20250815T050653
+      pulp_timestamp: 20250902T094855
       repo_file: rocky
   crb:
     '8.10':
@@ -58,8 +76,17 @@ dnf_repos_default:
       repo_file: rocky
     '9.6':
       pulp_path: rocky/9.6/CRB/x86_64/os
-      pulp_timestamp: 20250815T034418
+      pulp_timestamp: 20250902T060015
       repo_file: rocky
+  epel:
+    '8':
+      pulp_path: epel/8/Everything/x86_64
+      pulp_timestamp: 20250615T234151
+      repo_file: epel
+    '9':
+      pulp_path: epel/9/Everything/x86_64
+      pulp_timestamp: 20250908T001730
+      repo_file: epel
   extras:
     '8.10':
       pulp_path: rocky/8.10/extras/x86_64/os
@@ -85,32 +112,5 @@ dnf_repos_default:
       timestamp: 20250615T005738
     '9':
       pulp_path: grafana/oss/rpm
-      pulp_timestamp: 20250730T011314
+      pulp_timestamp: 20250906T025340
       repo_file: grafana
-  epel:
-    '8':
-      pulp_path: epel/8/Everything/x86_64
-      pulp_timestamp: 20250615T234151
-      repo_file: epel
-    '9':
-      pulp_path: epel/9/Everything/x86_64
-      pulp_timestamp: 20250817T000753
-      repo_file: epel
-  OpenHPC:
-    '8':
-      pulp_path: OpenHPC/2/EL_8
-      pulp_timestamp: 20241218T154614
-      repo_file: OpenHPC
-    '9':
-      pulp_path: OpenHPC/3/EL_9
-      pulp_timestamp: 20241218T154614
-      repo_file: OpenHPC
-  OpenHPC-updates:
-    '8':
-      pulp_path: OpenHPC/2/updates/EL_8
-      pulp_timestamp: 20250512T003315
-      repo_file: OpenHPC
-    '9':
-      pulp_path: OpenHPC/3/updates/EL_9
-      pulp_timestamp: 20250510T003301
-      repo_file: OpenHPC

From c94d134930a25f5c21467ff61e904caadaae7c6d Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Tue, 9 Sep 2025 11:34:23 +0100
Subject: [PATCH 13/50] add support for setting server group (#773)

---
 environments/site/tofu/additional.tf           |  4 +++-
 environments/site/tofu/compute.tf              |  4 +++-
 environments/site/tofu/control.tf              |  7 +++++++
 environments/site/tofu/login.tf                |  4 +++-
 environments/site/tofu/node_group/nodes.tf     | 14 ++++++++++++++
 environments/site/tofu/node_group/variables.tf |  5 +++++
 environments/site/tofu/variables.tf            |  8 ++++++++
 7 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/environments/site/tofu/additional.tf b/environments/site/tofu/additional.tf
index 863e160..872f957 100644
--- a/environments/site/tofu/additional.tf
+++ b/environments/site/tofu/additional.tf
@@ -35,6 +35,7 @@ module "additional" {
   security_group_ids = lookup(each.value, "security_group_ids", [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id])
   additional_cloud_config = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
   additional_cloud_config_vars = lookup(each.value, "additional_cloud_config_vars", var.additional_cloud_config_vars)
+  server_group_id = lookup(each.value, "server_group_id", null)
 
   # can't be set for additional nodes
   compute_init_enable = []
@@ -68,6 +69,7 @@ module "additional" {
     "nodename_template",
     "security_group_ids",
     "additional_cloud_config",
-    "additional_cloud_config_vars"
+    "additional_cloud_config_vars",
+    "server_group_id"
   ]
 }
diff --git a/environments/site/tofu/compute.tf b/environments/site/tofu/compute.tf
index 9187f66..35d62c6 100644
--- a/environments/site/tofu/compute.tf
+++ b/environments/site/tofu/compute.tf
@@ -34,6 +34,7 @@ module "compute" {
   match_ironic_node = lookup(each.value, "match_ironic_node", null)
   availability_zone = lookup(each.value, "availability_zone", null)
   ip_addresses = lookup(each.value, "ip_addresses", null)
+  server_group_id = lookup(each.value, "server_group_id", null)
 
   # computed
   # not using openstack_compute_instance_v2.control.access_ip_v4 to avoid
@@ -63,7 +64,8 @@ module "compute" {
     "gateway_ip",
     "nodename_template",
     "additional_cloud_config",
-    "additional_cloud_config_vars"
+    "additional_cloud_config_vars",
+    "server_group_id"
   ]
   
 }
diff --git a/environments/site/tofu/control.tf b/environments/site/tofu/control.tf
index 722e89d..19a41ae 100644
--- a/environments/site/tofu/control.tf
+++ b/environments/site/tofu/control.tf
@@ -72,6 +72,13 @@ resource "openstack_compute_instance_v2" "control" {
     }
   }
 
+  dynamic "scheduler_hints" {
+    for_each = var.control_server_group_id != null ? [true] : []
+    content {
+      group = var.control_server_group_id
+    }
+  }
+
   metadata = {
     environment_root = var.environment_root
     access_ip = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
diff --git a/environments/site/tofu/login.tf b/environments/site/tofu/login.tf
index 7a5b3f8..5ecc033 100644
--- a/environments/site/tofu/login.tf
+++ b/environments/site/tofu/login.tf
@@ -34,6 +34,7 @@ module "login" {
   match_ironic_node = lookup(each.value, "match_ironic_node", null)
   availability_zone = lookup(each.value, "availability_zone", null)
   ip_addresses = lookup(each.value, "ip_addresses", null)
+  server_group_id = lookup(each.value, "server_group_id", null)
 
   # can't be set for login
   compute_init_enable = []
@@ -68,7 +69,8 @@ module "login" {
     "nodename_template",
     "additional_cloud_config",
     "additional_cloud_config_vars",
-    "security_group_ids"
+    "security_group_ids",
+    "server_group_id"
   ]
   
 }
diff --git a/environments/site/tofu/node_group/nodes.tf b/environments/site/tofu/node_group/nodes.tf
index 7c3fe21..45cd449 100644
--- a/environments/site/tofu/node_group/nodes.tf
+++ b/environments/site/tofu/node_group/nodes.tf
@@ -103,6 +103,13 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" {
     }
   }
 
+  dynamic "scheduler_hints" {
+    for_each = var.server_group_id != null ? [true] : []
+    content {
+      group = var.server_group_id
+    }
+  }
+
   metadata = merge(
     {
         environment_root = var.environment_root
@@ -164,6 +171,13 @@ resource "openstack_compute_instance_v2" "compute" {
     }
   }
 
+  dynamic "scheduler_hints" {
+    for_each = var.server_group_id != null ? [true] : []
+    content {
+      group = var.server_group_id
+    }
+  }
+
   metadata = merge(
     {
         environment_root = var.environment_root
diff --git a/environments/site/tofu/node_group/variables.tf b/environments/site/tofu/node_group/variables.tf
index 4ef3407..0a129ab 100644
--- a/environments/site/tofu/node_group/variables.tf
+++ b/environments/site/tofu/node_group/variables.tf
@@ -208,3 +208,8 @@ variable "additional_cloud_config_vars" {
     default = {}
     nullable = false
 }
+
+variable "server_group_id" {
+    type = string
+    default = null
+}
diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index f0451b3..af0b112 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -84,6 +84,7 @@ variable "login" {
                                if match_ironic_node is true, defered to OpenStack otherwise
             gateway_ip: Address to add default route via
             nodename_template: Overrides variable cluster_nodename_template
+            server_group_id: String ID of server group to use for scheduler hint
     EOF
 
     type = any
@@ -129,6 +130,7 @@ variable "compute" {
                                if match_ironic_node is true, defered to OpenStack otherwise
             gateway_ip: Address to add default route via
             nodename_template: Overrides variable cluster_nodename_template
+            server_group_id: String ID of server group to use for scheduler hint
 
         Nodes are added to the following inventory groups:
         - $group_name
@@ -340,3 +342,9 @@ variable "additional_cloud_config_vars" {
     type = map(any)
     default = {}
 }
+
+variable "control_server_group_id" {
+    description = "ID of server group to use for control node scheduler hint"
+    type = string
+    default = null
+}

From 82897d4f55a18f978efdd257e5c7d1aec6ab0d49 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Tue, 9 Sep 2025 13:30:48 +0200
Subject: [PATCH 14/50] Bump CUDA to 13.0.1 and NVIDIA driver to 580.82.07

---
 ansible/roles/cuda/defaults/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml
index 0f5ad9a..e4e785b 100644
--- a/ansible/roles/cuda/defaults/main.yml
+++ b/ansible/roles/cuda/defaults/main.yml
@@ -1,7 +1,7 @@
 cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}/cuda-rhel{{ ansible_distribution_major_version }}.repo"
 cuda_nvidia_driver_stream: '580-open'
-cuda_nvidia_driver_pkg: "nvidia-open-3:580.65.06-1.el{{ ansible_distribution_major_version }}"
-cuda_package_version: '13.0.0-1'
+cuda_nvidia_driver_pkg: "nvidia-open-3:580.82.07-1.el{{ ansible_distribution_major_version }}"
+cuda_package_version: '13.0.1-1'
 cuda_version_short: "{{ (cuda_package_version | split('.'))[0:2] | join('.') }}" # major.minor
 cuda_packages:
   - "cuda-toolkit-{{ cuda_package_version }}"

From b42c2f8fd7ee2d5d794bdb43d54b2b97502877d0 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Thu, 11 Sep 2025 11:29:19 +0100
Subject: [PATCH 15/50] Add validation for tofu-templated vars (#775)

* add validation for tofu-templated vars

* update error message iaw review
---
 ansible/validate.yml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/ansible/validate.yml b/ansible/validate.yml
index 3341275..034f469 100644
--- a/ansible/validate.yml
+++ b/ansible/validate.yml
@@ -43,6 +43,37 @@
         # below produced by dev/setup-env.sh - gives empty list if file is missing:
         _requirements_installed: "{{ ((lookup('file', _requirements_path + '.last', errors='ignore') or '{}') | from_yaml ).values() | flatten }}"
 
+- name: Validate OpenTofu templated inventory is appropriate
+  # This "documents" the assumptions that Ansible makes about the
+  # OpenTofu-provided inventory
+  hosts: localhost
+  gather_facts: false
+  tags:
+    - validate
+    - opentofu
+  tasks:
+    - name: Check templated groups
+      assert:
+        that:
+          - item in groups
+          - groups[item] | length > 0
+        fail_msg: >
+          Expected inventory group '{{ item }}' is missing or empty:
+          - Check OpenTofu inventory template is up to date
+          - Check OpenTofu configuration defines 'login' and 'compute' variables properly
+      loop:
+        - control
+        - compute
+        - login
+    - name: Check templated 'all' vars
+      assert:
+        that:
+          - openhpc_cluster_name is defined
+          - cluster_domain_suffix is defined
+          - cluster_home_volume is defined
+          - cluster_compute_groups is defined
+        fail_msg: "One or more expected variables are missing: is OpenTofu inventory template up to date?"
+
 - name: Ensure control node is in inventory
   hosts: all
   gather_facts: false

From 919a7e2c6bc29e6d31885a20050e0d0e268281c0 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Thu, 11 Sep 2025 17:30:08 +0100
Subject: [PATCH 16/50] Fix error message for state volume provisioning (#780)

---
 environments/site/tofu/variables.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index af0b112..3402c3a 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -203,7 +203,7 @@ variable "state_volume_provisioning" {
     validation {
       condition = contains(["manage", "attach"], var.state_volume_provisioning)
       error_message = <<-EOT
-        home_volume_provisioning must be "manage" or "attach"
+        state_volume_provisioning must be "manage" or "attach"
     EOT
     }
 }

From c12ec99bcc1335ae5ecf31a5914fd4dbd21b167f Mon Sep 17 00:00:00 2001
From: Max Norton <maxn@stackhpc.com>
Date: Thu, 18 Sep 2025 12:16:06 +0100
Subject: [PATCH 17/50] Enable linting (#732)

* Add Github Actions for running code linters

* Fix linting issues.

The super-linter.env currently has the following additions that are to be addressed in the future:
VALIDATE_GITHUB_ACTIONS=false
VALIDATE_SHELL_SHFMT=false
VALIDATE_YAML=false

Most of the linting for the above has been addressed with just a single issue remaining that blocks the linter from being enabled.

* Update GH workflow so linting always runs befor any other jobs

* Update GH workflow so linting always runs befor any other jobs

* Fix linting issues on the merge of origin/main

* Fix linting issues on the merge of origin/main

* Use the head ref for workflow concurrency

* Output the path filter result of the workflow

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Tweak github action used to detect changed paths on push/pull request

* Troubleshooting: ansible.builtin.user

* Troubleshooting: debugging temporarily added

* Shift pylint invalid-name linting behond python bang line

* Temporarily disable the ansible galaxy requirements validation

* Reverting changes made to ansible.builtin.user and ansible.builtin.group where the name parameter was added.
Reverting to ansible.builtin.group: <args>

becasue args aren't an expected label:

groupadd: '{'name': 'grafana', 'gid': 979}' is not a valid group name

* Arguments are dicts not labels

* Preserve file permissions on .ssh directory contents

* Wherever we use become_user set become: true, keeps the linter happy and maintains functionality

* Fix linting on merge of origin/main

* Fix linting on merge of origin/main

* Update cluster image - using fatimage built from ci/linting branch

* Add comments to workflow files detailing the CI workflow and enable these workflows

* Fix workflow execution:
 1. change trivvy to trivy
 2. extra, stackhpc, and trivyscan workflows should trigger on workflow_call and workflow_dispatch

* Fix linting issues from merge of origin/main

* Exclude 'ansible/roles/compute_init/files/compute-init.yml' from ansible lint.
The parser can't load the 'tasks/tuned.yml' ansible so fails with:

load-failure[filenotfounderror]: [Errno 2] No such file or directory: 'ansible-slurm-appliance/tasks/main.yml'
tasks/main.yml:1

This failure can't be skipped beause it's the output of the parser that's fed to the linter where such exceptions are made.

* Temporarily disable Rocky 8 to speed up testing and reduce CI resources
Temporarily disable ansible-lint:

Run ansible/ansible-lint@v25.4.0
Run if [[ -n "" ]]; then
Run action_ref="${GH_ACTION_REF_INPUT:-${GITHUB_ACTION_REF:-main}}"
Using ansible-lint ref: main
Run reqs_file=$(git rev-parse --show-toplevel)/.git/ansible-lint-requirements.txt
--2025-09-09 14:51:58--  https://raw.githubusercontent.com/ansible/ansible-lint/main/.config/requirements-lock.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-09-09 14:51:58 ERROR 404: Not Found.

* Fix some bad ansible-lint line-length markup

* Fix ansible-lint markup for line-length

* Bump CI image - FOR RL9 ONLY TO CONSERVE CI RESOURCES

* Revert ansible.builtin.command to ansible.builtin.shell due to missed comment "need login shell for module command" and mask ansible-lint error

* Disable extra-build.yml workflow which has previously passed so we can focus on the stackhpc.yml workflow

* Disable concurrency to see if this is killing stackhpc.yml

* Remove concurrency from extr.yml, stackhpc.yml, and trivyscan.yml as they're all being triggered from main.yml which has its own concurrency check - the trivscan concurrency was also killing stackhpc

* Enable ansible-lint

* Enable triggering of all workflows from the main CI workflow

* Bump CI image - FOR RL9 ONLY TO CONSERVE CI RESOURCES

* Fix bad ansible-lint markup affecting the bang line

* Reduce workflow CI resources whilst fixing test deploy and reimage workflow

* Bump CI image - FOR RL9 ONLY TO CONSERVE CI RESOURCES

* Enable Rocky Linux 8 - disabled to speed up testing

* Enable all CI workflows

* Bump CI image - FOR RL9 ONLY TO CONSERVE CI RESOURCES

* Remove empty line between ansible "when" and "block" added by ansible-lint --fix, it's not required by the linter.

* Enable check for ansible galaxy requirements

* Revert the ansible collections path to ansible/collections so we don't inadvertently break any existing checkouts.
Direct ansible-lint to use .ansible/collections so downloads are excluded from linting by our .ansible-lint.yml

* Bump CI image
---
 .ansible-lint.yml                             |  24 +
 .checkov.yaml                                 |   4 +
 .editorconfig                                 |   8 +
 .github/bin/create-merge-branch.sh            |  10 +-
 .github/bin/get-s3-image.sh                   |  14 +-
 .github/linters/.checkov.yaml                 |   1 +
 .github/linters/.python-lint                  |   1 +
 .github/linters/.shellcheckrc                 |   1 +
 .github/linters/.yamllint.yml                 |   1 +
 .github/linters/actionlint.yml                |   1 +
 .github/workflows/extra.yml                   |  47 +-
 .github/workflows/fatimage.yml                |  19 +-
 .github/workflows/lint.yml                    |  49 ++
 .github/workflows/main.yml                    | 149 +++++
 .github/workflows/nightly-cleanup.yml         |  16 +-
 .github/workflows/nightlybuild.yml            |  21 +-
 .github/workflows/release-image.yml           |   9 +-
 .github/workflows/s3-image-sync.yml           |  18 +-
 .github/workflows/stackhpc.yml                |  70 +--
 .github/workflows/trivyscan.yml               |  29 +-
 .github/workflows/upgrade-check.yml.sample    |   7 +
 .../workflows/upload-release-image.yml.sample |   6 +
 .gitignore                                    |   1 +
 .python-lint                                  |   6 +
 .shellcheckrc                                 |   7 +
 .yamllint.yml                                 |  24 +
 README.md                                     | 130 +++--
 actionlint.yml                                |   1 +
 ansible/adhoc/backup-keytabs.yml              |   7 +-
 ansible/adhoc/cudatests.yml                   |   7 +-
 ansible/adhoc/deploy-pulp.yml                 |  26 +-
 ansible/adhoc/generate-passwords.yml          |   5 +-
 ansible/adhoc/hpctests.yml                    |   3 +-
 ansible/adhoc/rebuild-via-slurm.yml           |   5 +-
 ansible/adhoc/rebuild.yml                     |  17 +-
 ansible/adhoc/restart-slurm.yml               |  19 +-
 ansible/adhoc/sync-pulp.yml                   |   1 +
 ansible/adhoc/update-packages.yml             |  10 +-
 ansible/bootstrap.yml                         | 210 +++----
 ansible/ci/check_eessi.yml                    |  22 +-
 ansible/ci/check_grafana.yml                  |  10 +-
 ansible/ci/check_sacct_hpctests.yml           |   5 +-
 ansible/ci/check_slurm.yml                    |   5 +-
 ansible/ci/delete_images.yml                  |  14 +-
 ansible/ci/get_image_ids.yml                  |   5 +-
 .../ci/library/grafana_elasticsearch_query.py |  97 ++--
 ansible/ci/output_vars.yml                    |   5 +-
 ansible/ci/retrieve_inventory.yml             |  13 +-
 ansible/ci/update_timestamps.yml              |   3 +-
 ansible/cleanup.yml                           |  36 +-
 ansible/extras.yml                            |  51 +-
 ansible/fatimage.yml                          | 152 ++---
 ansible/filesystems.yml                       |  11 +-
 ansible/filter_plugins/utils.py               |  77 +--
 ansible/final.yml                             |  12 +-
 ansible/iam.yml                               |  27 +-
 ansible/library/latest_timestamps.py          |  87 +--
 ansible/library/user_namespace_facts.py       |  68 ++-
 ansible/monitoring.yml                        |  30 +-
 ansible/noop.yml                              |   1 -
 ansible/portal.yml                            |  35 +-
 ansible/roles/alertmanager/README.md          |  37 +-
 ansible/roles/alertmanager/defaults/main.yml  |  18 +-
 ansible/roles/alertmanager/handlers/main.yml  |   3 +-
 .../roles/alertmanager/tasks/configure.yml    |   8 +-
 ansible/roles/alertmanager/tasks/install.yml  |   5 +-
 ansible/roles/basic_users/README.md           |  65 ++-
 ansible/roles/basic_users/defaults/main.yml   |   5 +-
 .../basic_users/filter_plugins/filter_keys.py |  29 +-
 .../library/terminate_user_sessions.py        |  68 ++-
 ansible/roles/basic_users/tasks/main.yml      |  25 +-
 ansible/roles/block_devices/README.md         |  26 +-
 ansible/roles/block_devices/defaults/main.yml |   6 +-
 .../block_devices/library/block_devices.py    |  31 +-
 ansible/roles/block_devices/tasks/main.yml    |  27 +-
 ansible/roles/cacerts/defaults/main.yml       |   3 +-
 ansible/roles/cacerts/tasks/configure.yml     |   9 +-
 ansible/roles/cacerts/tasks/export.yml        |   5 +-
 ansible/roles/cacerts/tasks/main.yml          |   3 +-
 ansible/roles/cluster_infra/defaults/main.yml |   1 +
 ansible/roles/cluster_infra/tasks/main.yml    |  35 +-
 ansible/roles/compute_init/README.md          | 230 ++++----
 .../roles/compute_init/files/compute-init.yml | 115 ++--
 ansible/roles/compute_init/tasks/export.yml   |  31 +-
 ansible/roles/compute_init/tasks/install.yml  |  25 +-
 ansible/roles/cuda/defaults/main.yml          |   2 +
 ansible/roles/cuda/tasks/facts.yml            |   2 +-
 ansible/roles/cuda/tasks/install.yml          |  19 +-
 ansible/roles/cuda/tasks/runtime.yml          |   3 +-
 ansible/roles/cuda/tasks/samples.yml          |  13 +-
 ansible/roles/dnf_repos/README.md             |  40 +-
 ansible/roles/dnf_repos/defaults/main.yml     |   1 +
 .../roles/dnf_repos/tasks/disable_repos.yml   |   2 +-
 ansible/roles/doca/defaults/main.yml          |   4 +-
 .../roles/doca/tasks/install-kernel-devel.yml |  13 +-
 ansible/roles/doca/tasks/install.yml          |  18 +-
 ansible/roles/doca/tasks/main.yml             |   3 +-
 ansible/roles/eessi/README.md                 |  19 +-
 ansible/roles/eessi/defaults/main.yaml        |   1 -
 ansible/roles/eessi/tasks/configure.yml       |   5 +-
 ansible/roles/eessi/tasks/install.yml         |  19 +-
 ansible/roles/eessi/tasks/main.yml            |   4 +-
 ansible/roles/etc_hosts/README.md             |   5 +-
 ansible/roles/etc_hosts/defaults/main.yml     |   3 +-
 ansible/roles/etc_hosts/tasks/main.yml        |   7 +-
 ansible/roles/fail2ban/README.md              |  22 +-
 ansible/roles/fail2ban/handlers/main.yml      |   3 +-
 ansible/roles/fail2ban/meta/main.yml          |  10 +-
 ansible/roles/fail2ban/tasks/configure.yml    |   9 +-
 ansible/roles/fail2ban/tasks/install.yml      |   4 +-
 ansible/roles/fail2ban/tasks/main.yml         |   4 +-
 ansible/roles/filebeat/defaults/main.yml      |   2 +-
 ansible/roles/filebeat/handlers/main.yml      |   7 +-
 ansible/roles/filebeat/tasks/install.yml      |  10 +-
 ansible/roles/filebeat/tasks/main.yml         |   5 +-
 ansible/roles/filebeat/tasks/runtime.yml      |  24 +-
 ansible/roles/filebeat/tasks/validate.yml     |   4 +-
 ansible/roles/firewalld/README.md             |  42 +-
 ansible/roles/firewalld/defaults/main.yml     |   3 +-
 ansible/roles/firewalld/handlers/main.yml     |   2 +-
 ansible/roles/firewalld/meta/main.yml         |  11 +-
 ansible/roles/firewalld/tasks/install.yml     |   3 +-
 ansible/roles/firewalld/tasks/main.yml        |   4 +-
 ansible/roles/firewalld/tasks/runtime.yml     |   6 +-
 ansible/roles/freeipa/README.md               |  33 +-
 ansible/roles/freeipa/defaults/main.yml       |   9 +-
 ansible/roles/freeipa/tasks/addhost.yml       |   7 +-
 .../roles/freeipa/tasks/backup-keytabs.yml    |   6 +-
 .../roles/freeipa/tasks/client-install.yml    |   4 +-
 ansible/roles/freeipa/tasks/enrol.yml         |  19 +-
 ansible/roles/freeipa/tasks/server.yml        |  40 +-
 ansible/roles/freeipa/tasks/users.yml         |  10 +-
 ansible/roles/freeipa/tasks/validate.yml      |  17 +-
 ansible/roles/gateway/README.md               |   2 +
 ansible/roles/gateway/files/gateway-init.yml  |  29 +-
 ansible/roles/gateway/tasks/main.yml          |   5 +-
 .../files/openhpc-slurm.json                  |   2 +-
 .../roles/grafana-dashboards/tasks/main.yml   |  25 +-
 ansible/roles/hpctests/README.md              |  46 +-
 ansible/roles/hpctests/defaults/main.yml      |  22 +-
 .../roles/hpctests/files/.clang-format-ignore |   1 +
 ansible/roles/hpctests/files/CPPLINT.cfg      |   1 +
 .../roles/hpctests/files/plot_imb_pingpong.py | 111 ++--
 ansible/roles/hpctests/library/hpl_pq.py      |  40 +-
 .../roles/hpctests/library/plot_nxnlatbw.py   | 185 +++++--
 .../hpctests/library/read_imb_pingpong.py     |  44 +-
 .../roles/hpctests/library/slurm_node_info.py |  47 +-
 ansible/roles/hpctests/meta/main.yml          |   4 +-
 ansible/roles/hpctests/tasks/build-hpl.yml    |  48 +-
 ansible/roles/hpctests/tasks/hpl-solo.yml     |  60 +-
 ansible/roles/hpctests/tasks/main.yml         |  25 +-
 ansible/roles/hpctests/tasks/pingmatrix.yml   |  44 +-
 ansible/roles/hpctests/tasks/pingpong.yml     |  37 +-
 ansible/roles/hpctests/tasks/setup.yml        |  18 +-
 ansible/roles/hpctests/tasks/source-hpl.yml   |   5 +-
 .../roles/hpctests/templates/hpl-build.sh.j2  |   0
 .../roles/hpctests/templates/hpl-solo.sh.j2   |   0
 .../roles/hpctests/templates/pingmatrix.sh.j2 |   0
 .../roles/hpctests/templates/pingpong.sh.j2   |   0
 ansible/roles/k3s/README.md                   |  10 +-
 ansible/roles/k3s/defaults/main.yml           |   3 +-
 ansible/roles/k3s/tasks/agent-runtime.yml     |  11 +-
 ansible/roles/k3s/tasks/install.yml           |  93 ++--
 ansible/roles/k3s/tasks/server-runtime.yml    |  22 +-
 .../k3s/templates/k3s-agent.service.env.j2    |   6 +-
 .../roles/k3s/templates/k3s.service.env.j2    |   2 +-
 ansible/roles/k9s/tasks/main.yml              |  19 +-
 ansible/roles/lustre/README.md                |  15 +-
 ansible/roles/lustre/defaults/main.yml        |   5 +-
 ansible/roles/lustre/tasks/configure.yml      |  12 +-
 ansible/roles/lustre/tasks/install.yml        |  18 +-
 ansible/roles/lustre/tasks/validate.yml       |   9 +-
 ansible/roles/mysql/README.md                 |  28 +-
 ansible/roles/mysql/defaults/main.yml         |   5 +-
 ansible/roles/mysql/tasks/configure.yml       |  39 +-
 ansible/roles/mysql/tasks/install.yml         |   9 +-
 ansible/roles/mysql/tasks/main.yml            |   5 +-
 ansible/roles/nhc/README.md                   |  12 +-
 ansible/roles/nhc/tasks/export.yml            |   1 +
 ansible/roles/nhc/tasks/main.yml              |   2 +-
 ansible/roles/ofed/README.md                  |   7 +-
 ansible/roles/ofed/defaults/main.yml          |   4 +-
 ansible/roles/ofed/tasks/install.yml          |  36 +-
 ansible/roles/ofed/tasks/main.yml             |   3 +-
 ansible/roles/openondemand/README.md          |  42 +-
 ansible/roles/openondemand/defaults/main.yml  |  34 +-
 .../files/missing_home_directory.html         |  99 ++--
 .../openondemand/tasks/codeserver_compute.yml |   9 +-
 .../openondemand/tasks/config_changes.yml     |   3 +-
 ansible/roles/openondemand/tasks/exporter.yml |   5 +-
 .../openondemand/tasks/jupyter_compute.yml    |  14 +-
 ansible/roles/openondemand/tasks/main.yml     |  51 +-
 ansible/roles/openondemand/tasks/pam_auth.yml |  14 +-
 .../openondemand/tasks/rstudio_compute.yml    |   7 +-
 ansible/roles/openondemand/tasks/validate.yml |   3 +-
 .../roles/openondemand/tasks/vnc_compute.yml  |  30 +-
 ansible/roles/opensearch/defaults/main.yml    |   4 +-
 ansible/roles/opensearch/handlers/main.yml    |   3 +-
 .../roles/opensearch/tasks/archive_data.yml   |   4 +-
 ansible/roles/opensearch/tasks/certs.yml      |   3 +-
 ansible/roles/opensearch/tasks/install.yml    |  15 +-
 .../opensearch/tasks/migrate-opendistro.yml   |   3 +-
 ansible/roles/opensearch/tasks/runtime.yml    |  44 +-
 ansible/roles/passwords/defaults/main.yml     |   3 +
 ansible/roles/passwords/tasks/main.yml        |   4 +-
 ansible/roles/passwords/tasks/validate.yml    |   3 +-
 .../roles/persist_hostkeys/defaults/main.yml  |   1 +
 ansible/roles/persist_hostkeys/tasks/main.yml |  66 +--
 .../persist_openhpc_secrets/tasks/main.yml    |  14 +-
 ansible/roles/podman/defaults/main.yml        |   1 +
 ansible/roles/podman/tasks/configure.yml      |  17 +-
 ansible/roles/podman/tasks/install.yml        |   4 +-
 ansible/roles/podman/tasks/main.yml           |   4 +-
 ansible/roles/proxy/defaults/main.yml         |   1 +
 ansible/roles/proxy/tasks/main.yml            |  23 +-
 ansible/roles/pulp_site/README.md             |  33 +-
 .../filter_plugins/pulp-list-filters.py       |  85 +--
 ansible/roles/pulp_site/tasks/install.yml     |  36 +-
 ansible/roles/pulp_site/tasks/sync.yml        |  41 +-
 ansible/roles/rebuild/README.md               |  11 +-
 ansible/roles/rebuild/defaults/main.yml       |   6 +-
 ansible/roles/rebuild/tasks/configure.yml     |   4 +-
 ansible/roles/rebuild/tasks/install.yml       |   2 +-
 ansible/roles/rebuild/tasks/main.yml          |   4 +-
 ansible/roles/rebuild/tasks/rebuild.yml       |   6 +-
 .../roles/rebuild/tasks/rebuild_partition.yml |   7 +-
 ansible/roles/resolv_conf/README.md           |   2 +
 ansible/roles/resolv_conf/defaults/main.yml   |   1 +
 ansible/roles/resolv_conf/tasks/main.yml      |   3 +-
 ansible/roles/slurm_exporter/README.md        |  41 +-
 .../roles/slurm_exporter/defaults/main.yml    |   4 +-
 .../roles/slurm_exporter/handlers/main.yml    |   2 +-
 .../roles/slurm_exporter/tasks/configure.yml  |   2 +-
 .../roles/slurm_exporter/tasks/install.yml    |  12 +-
 ansible/roles/slurm_exporter/tasks/main.yml   |   4 +-
 ansible/roles/slurm_recompile/README.md       |  16 +-
 ansible/roles/slurm_recompile/tasks/main.yml  |  16 +-
 ansible/roles/slurm_stats/README.md           |  24 +-
 ansible/roles/slurm_stats/tasks/configure.yml |   8 +-
 ansible/roles/slurm_stats/tasks/install.yml   |   2 +-
 ansible/roles/slurm_stats/tasks/main.yml      |   4 +-
 ansible/roles/slurm_tools/README.md           |   8 +-
 ansible/roles/slurm_tools/tasks/main.yml      |  32 +-
 ansible/roles/squid/README.md                 |   2 +-
 ansible/roles/squid/defaults/main.yml         |   5 +-
 ansible/roles/squid/handlers/main.yml         |   3 +-
 ansible/roles/squid/tasks/configure.yml       |  10 +-
 ansible/roles/squid/tasks/install.yml         |   3 +-
 ansible/roles/squid/tasks/main.yml            |   5 +-
 ansible/roles/sshd/defaults/main.yml          |   1 +
 ansible/roles/sshd/handlers/main.yml          |   3 +-
 ansible/roles/sshd/tasks/configure.yml        |  11 +-
 ansible/roles/sshd/tasks/export.yml           |   3 +-
 ansible/roles/sshd/tasks/main.yml             |   3 +-
 ansible/roles/sssd/README.md                  |   1 -
 ansible/roles/sssd/defaults/main.yml          |   1 +
 ansible/roles/sssd/handlers/main.yml          |   3 +-
 ansible/roles/sssd/tasks/configure.yml        |  20 +-
 ansible/roles/sssd/tasks/export.yml           |   5 +-
 ansible/roles/sssd/tasks/install.yml          |   7 +-
 ansible/roles/sssd/tasks/main.yml             |   5 +-
 ansible/roles/systemd/README.md               |  19 +-
 ansible/roles/systemd/defaults/main.yml       |   3 +-
 ansible/roles/systemd/tasks/main.yml          |  13 +-
 ansible/roles/topology/README.md              |  34 +-
 ansible/roles/topology/defaults/main.yml      |   1 -
 ansible/roles/topology/library/map_hosts.py   |  35 +-
 ansible/roles/topology/tasks/main.yml         |   2 +-
 ansible/roles/tuned/README.md                 |   7 +-
 ansible/roles/tuned/defaults/main.yml         |   2 +-
 ansible/roles/tuned/tasks/configure.yml       |   2 +-
 ansible/roles/tuned/tasks/install.yml         |   3 +-
 ansible/roles/tuned/tasks/main.yml            |   4 +-
 ansible/roles/zenith_proxy/defaults/main.yml  |   8 +-
 .../files/podman-pod-infra-attach.sh          |   2 +-
 ansible/roles/zenith_proxy/tasks/main.yml     |  42 +-
 ansible/site.yml                              |  27 +-
 ansible/slurm.yml                             |  37 +-
 ansible/validate.yml                          |  40 +-
 cookiecutter/cookiecutter.json                |   4 +-
 .../{{cookiecutter.environment}}/README.md    |   2 +-
 .../inventory/group_vars/all/basic_users.yml  |   1 +
 .../inventory/group_vars/all/hpctests.yml     |   1 +
 .../{{cookiecutter.environment}}/tofu/main.tf |  32 +-
 dev/ansible-ssh                               |  24 +-
 dev/delete-cluster.py                         |  47 +-
 dev/extract_logs.py                           |  74 ++-
 dev/image-share.sh                            |  12 +-
 dev/output_manifest.py                        |  25 +-
 dev/setup-env.sh                              |  42 +-
 docs/README.md                                |   8 +-
 docs/adding-functionality.md                  |   3 +-
 docs/alerting.md                              |  73 +--
 docs/chrony.md                                |   3 +-
 docs/ci.md                                    |   3 +-
 docs/environments.md                          |  15 +-
 docs/experimental/compute-init.md             |  12 +-
 docs/experimental/isolated-clusters.md        | 144 ++---
 docs/experimental/pulp.md                     |  15 +-
 docs/experimental/slurm-controlled-rebuild.md | 272 ++++-----
 docs/filesystems.md                           |  84 +--
 docs/image-build.md                           |  76 +--
 docs/k3s.README.md                            |   8 +-
 docs/mig.md                                   |  55 +-
 docs/monitoring-and-logging.md                |  67 ++-
 docs/networks.md                              |   9 +-
 docs/openondemand.md                          |  32 +-
 docs/operations.md                            | 122 +++--
 docs/persistent-state.md                      |   2 +
 docs/production.md                            | 269 ++++-----
 docs/sequence.md                              |  11 +-
 docs/site/README.md                           |   3 +-
 docs/upgrades.md                              |  97 ++--
 environments/.caas/README.md                  |  11 +-
 environments/.caas/hooks/post.yml             |  16 +-
 environments/.caas/hooks/pre.yml              |  38 +-
 .../inventory/group_vars/all/basic_users.yml  |   1 +
 .../inventory/group_vars/all/cluster.yml      |   1 +
 .../inventory/group_vars/all/grafana.yml      |   1 +
 .../inventory/group_vars/all/hpctests.yml     |   3 +-
 .../.caas/inventory/group_vars/all/manila.yml |   5 +-
 .../.caas/inventory/group_vars/all/nfs.yml    |   5 +-
 .../inventory/group_vars/all/openhpc.yml      |   1 +
 .../inventory/group_vars/all/openondemand.yml |   1 -
 .../.caas/inventory/group_vars/all/zenith.yml |   1 +
 .../.caas/inventory/group_vars/openstack.yml  |   1 +
 .../ui-meta/slurm-infra-fast-volume-type.yml  |  13 +-
 .../.caas/ui-meta/slurm-infra-manila-home.yml |  12 +-
 environments/.caas/ui-meta/slurm-infra.yml    |  12 +-
 .../.stackhpc/hooks/post-bootstrap.yml        |   8 +-
 environments/.stackhpc/hooks/pre.yml          |   9 +-
 .../inventory/group_vars/all/basic_users.yml  |   3 +
 .../inventory/group_vars/all/bastion.yml      |   1 +
 .../inventory/group_vars/all/freeipa.yml      |   1 +
 .../inventory/group_vars/all/hpctests.yml     |   1 +
 .../inventory/group_vars/all/manila.yml       |   1 +
 .../inventory/group_vars/all/openhpc.yml      |   1 +
 .../inventory/group_vars/all/openondemand.yml |   7 +-
 .../inventory/group_vars/all/podman.yml       |   1 +
 .../inventory/group_vars/all/tuned.yml        |   1 +
 .../inventory/group_vars/builder.yml          |   3 +-
 .../tofu/cluster_image.auto.tfvars.json       |   8 +-
 environments/.stackhpc/tofu/main.tf           | 103 ++--
 environments/README.md                        |  14 +-
 .../common/files/filebeat/filebeat.yml        |   1 +
 .../inventory/group_vars/all/alertmanager.yml |   4 +-
 .../inventory/group_vars/all/ansible_init.yml |   1 +
 .../inventory/group_vars/all/basic_users.yml  |   3 +-
 .../inventory/group_vars/all/defaults.yml     | 107 ++--
 .../inventory/group_vars/all/filebeat.yml     |   2 +-
 .../inventory/group_vars/all/firewalld.yml    |   5 +-
 .../group_vars/all/freeipa_server.yml         |   1 +
 .../inventory/group_vars/all/grafana.yml      |  12 +-
 .../common/inventory/group_vars/all/k3s.yml   |   1 +
 .../inventory/group_vars/all/manila.yml       |   1 +
 .../common/inventory/group_vars/all/mysql.yml |   2 +-
 .../common/inventory/group_vars/all/nfs.yml   |  12 +-
 .../inventory/group_vars/all/openhpc.yml      |  13 +-
 .../inventory/group_vars/all/openondemand.yml |  48 +-
 .../inventory/group_vars/all/podman.yml       |   1 +
 .../inventory/group_vars/all/prometheus.yml   |  67 +--
 .../common/inventory/group_vars/all/proxy.yml |   1 +
 .../common/inventory/group_vars/all/pulp.yml  |   1 +
 .../group_vars/all/slurm_exporter.yml         |   5 +-
 .../common/inventory/group_vars/all/squid.yml |   1 +
 .../common/inventory/group_vars/all/sshd.yaml |   1 +
 .../inventory/group_vars/all/systemd.yml      |   1 +
 .../inventory/group_vars/all/update.yml       |   7 +-
 .../site/inventory/group_vars/all/grafana.yml |   3 +-
 .../group_vars/all/vault_alertmanager.yml     |   2 +-
 environments/site/tofu/additional.tf          |  46 +-
 environments/site/tofu/baremetal-node-list.py |  34 +-
 environments/site/tofu/compute.tf             |  50 +-
 environments/site/tofu/control.tf             |  56 +-
 environments/site/tofu/data.tf                |   3 +-
 environments/site/tofu/inventory.tf           |  27 +-
 environments/site/tofu/login.tf               |  50 +-
 environments/site/tofu/main.tf                |   2 +-
 environments/site/tofu/network.tf             |   8 +-
 environments/site/tofu/node_group/main.tf     |   2 +-
 environments/site/tofu/node_group/network.tf  |   4 +-
 environments/site/tofu/node_group/nodes.tf    | 116 ++--
 .../site/tofu/node_group/variables.tf         | 193 +++----
 .../site/tofu/read-inventory-secrets.py       |  49 +-
 environments/site/tofu/variables.tf           | 517 +++++++++---------
 environments/site/tofu/volumes.tf             |  60 +-
 packer/openhpc_extravars.yml                  |   3 +-
 requirements.yml                              |   1 -
 super-linter.env                              |  27 +
 389 files changed, 5025 insertions(+), 4013 deletions(-)
 create mode 100644 .ansible-lint.yml
 create mode 100644 .checkov.yaml
 create mode 100644 .editorconfig
 mode change 100644 => 100755 .github/bin/create-merge-branch.sh
 mode change 100644 => 100755 .github/bin/get-s3-image.sh
 create mode 120000 .github/linters/.checkov.yaml
 create mode 120000 .github/linters/.python-lint
 create mode 120000 .github/linters/.shellcheckrc
 create mode 120000 .github/linters/.yamllint.yml
 create mode 120000 .github/linters/actionlint.yml
 create mode 100644 .github/workflows/lint.yml
 create mode 100644 .github/workflows/main.yml
 create mode 100644 .python-lint
 create mode 100644 .shellcheckrc
 create mode 100644 .yamllint.yml
 create mode 100644 actionlint.yml
 create mode 100644 ansible/roles/hpctests/files/.clang-format-ignore
 create mode 100644 ansible/roles/hpctests/files/CPPLINT.cfg
 mode change 100644 => 100755 ansible/roles/hpctests/templates/hpl-build.sh.j2
 mode change 100644 => 100755 ansible/roles/hpctests/templates/hpl-solo.sh.j2
 mode change 100644 => 100755 ansible/roles/hpctests/templates/pingmatrix.sh.j2
 mode change 100644 => 100755 ansible/roles/hpctests/templates/pingpong.sh.j2
 mode change 100644 => 100755 ansible/roles/zenith_proxy/files/podman-pod-infra-attach.sh
 create mode 100644 super-linter.env

diff --git a/.ansible-lint.yml b/.ansible-lint.yml
new file mode 100644
index 0000000..97d3b68
--- /dev/null
+++ b/.ansible-lint.yml
@@ -0,0 +1,24 @@
+---
+skip_list:
+  - role-name
+  # Unresolved issues with parsing jinja in multiline strings
+  # https://github.com/ansible/ansible-lint/issues/3935
+  - jinja[spacing]
+  - galaxy[no-changelog]
+  - meta-runtime[unsupported-version]
+
+warn_list:
+  - name[missing]
+  - name[play]
+  - var-naming
+
+exclude_paths:
+  - actionlint.yml
+  - .ansible/
+  - .github/
+  # Rule 'syntax-check' is unskippable, you cannot use it in 'skip_list' or 'warn_list'.
+  # It breaks the parser which takes place before the linter, the only option is to exclude the file.
+  - ansible/roles/filebeat/tasks/runtime.yml
+  - environments/common/files/filebeat/filebeat.yml
+  # Rule 'load-failure[filenotfounderror]' is also unskippable
+  - ansible/roles/compute_init/files/compute-init.yml
diff --git a/.checkov.yaml b/.checkov.yaml
new file mode 100644
index 0000000..ef0fb8b
--- /dev/null
+++ b/.checkov.yaml
@@ -0,0 +1,4 @@
+---
+skip-check:
+  # Requires all blocks to have rescue: - not considered appropriate
+  - CKV2_ANSIBLE_3
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..ab1e657
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,8 @@
+# The is primarily used to alter the behaviour of linters executed by super-linter.
+# See https://editorconfig.org/
+
+# shfmt will default to indenting shell scripts with tabs,
+# define the indent as 2 spaces
+[{.github/bin,dev}/*.sh]
+indent_style = space
+indent_size = 2
diff --git a/.github/bin/create-merge-branch.sh b/.github/bin/create-merge-branch.sh
old mode 100644
new mode 100755
index d76fe45..af1684d
--- a/.github/bin/create-merge-branch.sh
+++ b/.github/bin/create-merge-branch.sh
@@ -44,7 +44,7 @@ if git show-branch "remotes/origin/$BRANCH_NAME" >/dev/null 2>&1; then
 fi
 
 echo "[INFO] Merging release tag - $RELEASE_TAG"
-git merge --strategy recursive -X theirs --no-commit $RELEASE_TAG
+git merge --strategy recursive -X theirs --no-commit "$RELEASE_TAG"
 
 # Check if the merge resulted in any changes being staged
 if [ -n "$(git status --short)" ]; then
@@ -54,7 +54,7 @@ if [ -n "$(git status --short)" ]; then
   # NOTE(scott): The GitHub create-pull-request action does
   # the commiting for us, so we only need to make branches
   # and commits if running outside of GitHub actions.
-  if [ ! $GITHUB_ACTIONS ]; then
+  if [ ! "$GITHUB_ACTIONS" ]; then
     echo "[INFO] Checking out temporary branch '$BRANCH_NAME'..."
     git checkout -b "$BRANCH_NAME"
 
@@ -74,8 +74,8 @@ if [ -n "$(git status --short)" ]; then
 
   # Write a file containing the branch name and tag
   # for automatic PR or MR creation that follows
-  echo "BRANCH_NAME=\"$BRANCH_NAME\"" > .mergeenv
-  echo "RELEASE_TAG=\"$RELEASE_TAG\"" >> .mergeenv
+  echo "BRANCH_NAME=\"$BRANCH_NAME\"" >.mergeenv
+  echo "RELEASE_TAG=\"$RELEASE_TAG\"" >>.mergeenv
 else
   echo "[INFO] Merge resulted in no changes"
-fi
\ No newline at end of file
+fi
diff --git a/.github/bin/get-s3-image.sh b/.github/bin/get-s3-image.sh
old mode 100644
new mode 100755
index 98b9131..dc0c816
--- a/.github/bin/get-s3-image.sh
+++ b/.github/bin/get-s3-image.sh
@@ -13,14 +13,14 @@ echo "Checking if image $image_name exists in OpenStack"
 image_exists=$(openstack image list --name "$image_name" -f value -c Name)
 
 if [ -n "$image_exists" ]; then
-    echo "Image $image_name already exists in OpenStack."
+  echo "Image $image_name already exists in OpenStack."
 else
-    echo "Image $image_name not found in OpenStack. Getting it from S3."
+  echo "Image $image_name not found in OpenStack. Getting it from S3."
 
-    wget https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/$bucket_name/$image_name --progress=dot:giga
+  wget "https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/$bucket_name/$image_name" --progress=dot:giga
 
-    echo "Uploading image $image_name to OpenStack..."
-    openstack image create --file $image_name --disk-format qcow2 $image_name --progress
+  echo "Uploading image $image_name to OpenStack..."
+  openstack image create --file "$image_name" --disk-format qcow2 "$image_name" --progress
 
-    echo "Image $image_name has been uploaded to OpenStack."
-fi
\ No newline at end of file
+  echo "Image $image_name has been uploaded to OpenStack."
+fi
diff --git a/.github/linters/.checkov.yaml b/.github/linters/.checkov.yaml
new file mode 120000
index 0000000..2cc8ad8
--- /dev/null
+++ b/.github/linters/.checkov.yaml
@@ -0,0 +1 @@
+../../.checkov.yaml
\ No newline at end of file
diff --git a/.github/linters/.python-lint b/.github/linters/.python-lint
new file mode 120000
index 0000000..d0b7471
--- /dev/null
+++ b/.github/linters/.python-lint
@@ -0,0 +1 @@
+../../.python-lint
\ No newline at end of file
diff --git a/.github/linters/.shellcheckrc b/.github/linters/.shellcheckrc
new file mode 120000
index 0000000..3f34501
--- /dev/null
+++ b/.github/linters/.shellcheckrc
@@ -0,0 +1 @@
+../../.shellcheckrc
\ No newline at end of file
diff --git a/.github/linters/.yamllint.yml b/.github/linters/.yamllint.yml
new file mode 120000
index 0000000..54a3654
--- /dev/null
+++ b/.github/linters/.yamllint.yml
@@ -0,0 +1 @@
+../../.yamllint.yml
\ No newline at end of file
diff --git a/.github/linters/actionlint.yml b/.github/linters/actionlint.yml
new file mode 120000
index 0000000..766b4e9
--- /dev/null
+++ b/.github/linters/actionlint.yml
@@ -0,0 +1 @@
+../../actionlint.yml
\ No newline at end of file
diff --git a/.github/workflows/extra.yml b/.github/workflows/extra.yml
index f18e380..1941064 100644
--- a/.github/workflows/extra.yml
+++ b/.github/workflows/extra.yml
@@ -1,30 +1,23 @@
+---
+
+# Test building extra images on OpenStack.
+# This workflow can run standalone or as part of the main CI workflow.
+# See the workflow file 'main.yml' for how this is CI triggered.
+
 name: Test extra build
 on:
+  workflow_call:
   workflow_dispatch:
-  push:
-    branches:
-      - main
-    paths:
-      - 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
-      - 'ansible/roles/doca/**'
-      - 'ansible/roles/cuda/**'
-      - 'ansible/roles/slurm_recompile/**' # runs on cuda group
-      - 'ansible/roles/lustre/**'
-      - '.github/workflows/extra.yml'
-  pull_request:
-    paths:
-      - 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
-      - 'ansible/roles/doca/**'
-      - 'ansible/roles/cuda/**'
-      - 'ansible/roles/lustre/**'
-      - '.github/workflows/extra.yml'
+
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
 
 jobs:
   doca:
     name: extra-build
-    concurrency:
-      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
-      cancel-in-progress: true
     runs-on: ubuntu-22.04
     strategy:
       fail-fast: false # allow other matrix jobs to continue even if one fails
@@ -46,7 +39,7 @@ jobs:
       PACKER_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Load current fat images into GITHUB_ENV
         # see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#example-of-a-multiline-string
@@ -60,7 +53,7 @@ jobs:
       - name: Record settings
         run: |
           echo CI_CLOUD: ${{ env.CI_CLOUD }}
-          echo FAT_IMAGES: ${FAT_IMAGES}
+          echo "FAT_IMAGES: ${FAT_IMAGES}"
 
       - name: Setup ssh
         run: |
@@ -99,7 +92,7 @@ jobs:
 
           PACKER_LOG=1 packer build \
           -on-error=${{ vars.PACKER_ON_ERROR }} \
-          -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
+          -var-file="$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl" \
           -var "source_image_name=${{ fromJSON(env.FAT_IMAGES)['cluster_image'][matrix.build.source_image_name_key] }}" \
           -var "image_name=${{ matrix.build.image_name }}" \
           -var "inventory_groups=${{ matrix.build.inventory_groups }}" \
@@ -111,14 +104,14 @@ jobs:
         run: |
           . venv/bin/activate
           IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json)
-          while ! openstack image show -f value -c name $IMAGE_ID; do
+          while ! openstack image show -f value -c name "$IMAGE_ID"; do
             sleep 5
           done
-          IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
+          IMAGE_NAME=$(openstack image show -f value -c name "$IMAGE_ID")
           echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
           echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
-          echo $IMAGE_ID > image-id.txt
-          echo $IMAGE_NAME > image-name.txt
+          echo "$IMAGE_ID" > image-id.txt
+          echo "$IMAGE_NAME" > image-name.txt
 
       - name: Make image usable for further builds
         run: |
diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml
index 51ea29a..407bd44 100644
--- a/.github/workflows/fatimage.yml
+++ b/.github/workflows/fatimage.yml
@@ -1,6 +1,7 @@
 name: Build fat image
 on:
   workflow_dispatch:
+      # checkov:skip=CKV_GHA_7: "The build output cannot be affected by user parameters other than the build entry point and the top-level source location. GitHub Actions workflow_dispatch inputs MUST be empty. "
       inputs:
         ci_cloud:
           description: 'Select the CI_CLOUD'
@@ -16,6 +17,12 @@ on:
           required: true
           default: true
 
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   openstack:
     name: openstack-imagebuild
@@ -42,7 +49,7 @@ jobs:
       PACKER_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Record settings for CI cloud
         run: |
@@ -85,7 +92,7 @@ jobs:
 
           PACKER_LOG=1 packer build \
           -on-error=${{ github.event.inputs.cleanup_on_failure && 'cleanup' || 'abort' }} \
-          -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
+          -var-file="$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl" \
           -var "source_image_name=${{ matrix.build.source_image_name }}" \
           -var "image_name=${{ matrix.build.image_name }}" \
           -var "inventory_groups=${{ matrix.build.inventory_groups }}" \
@@ -96,14 +103,14 @@ jobs:
         run: |
           . venv/bin/activate
           IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json)
-          while ! openstack image show -f value -c name $IMAGE_ID; do
+          while ! openstack image show -f value -c name "$IMAGE_ID"; do
             sleep 5
           done
-          IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
+          IMAGE_NAME=$(openstack image show -f value -c name "$IMAGE_ID")
           echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
           echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
-          echo $IMAGE_ID > image-id.txt
-          echo $IMAGE_NAME > image-name.txt
+          echo "$IMAGE_ID" > image-id.txt
+          echo "$IMAGE_NAME" > image-name.txt
 
       - name: Make image usable for further builds
         run: |
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000..d824577
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,49 @@
+---
+name: Lint
+
+on:  # yamllint disable-line rule:truthy
+  workflow_call:
+
+permissions:
+  contents: read
+  packages: read
+  # To report GitHub Actions status checks
+  statuses: write
+
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: read
+      # To report GitHub Actions status checks
+      statuses: write
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # super-linter needs the full git history to get the
+          # list of files that changed across commits
+          fetch-depth: 0
+          submodules: true
+
+      - name: Run ansible-lint
+        uses: ansible/ansible-lint@v25.4.0
+        env:
+          ANSIBLE_COLLECTIONS_PATH: .ansible/collections
+
+      - name: Load super-linter configuration
+        # Use grep inverse matching to exclude eventual comments in the .env file
+        # because the GitHub Actions command to set environment variables doesn't
+        # support comments.
+        # yamllint disable-line rule:line-length
+        # Ref: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable
+        run: grep -v '^#' super-linter.env >> "$GITHUB_ENV"
+        if: always()
+
+      - name: Run super-linter
+        uses: super-linter/super-linter@v7.3.0
+        if: always()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..5e2ccc7
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,149 @@
+---
+
+# This file governs the main CI workflow.
+# It's the only workflow triggered on push and pull requests,
+# it manages the CI workflow as follows:
+# 1. Lint the code aborting the workflow if there are linting errors.
+# 2. Determine which files have changed and set job outputs accordingly.
+# 3. Conditionally run the other workflows based on the changed files:
+#    * stackhpc.yml
+#    * extra.yml
+#    * trivyscan.yml
+
+name: Test on push and pull request
+
+permissions:
+  actions: write
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+  id-token: write
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+concurrency:
+    group: ${{ github.workflow }}-${{ github.head_ref }}
+    cancel-in-progress: true
+
+jobs:
+  lint:
+    name: Lint
+    uses: ./.github/workflows/lint.yml
+
+  files_changed:
+    name: Determine files changed
+    needs: lint
+    runs-on: ubuntu-latest
+    # Map a step output to a job output, this allows other jobs to be gated on the filter results
+    outputs:
+        # The 'stackhpc' output will be 'true' if either of the two stackhpc filters below matched
+        stackhpc: ${{ toJson(fromJson(steps.filter_on_every.outputs.stackhpc) || fromJson(steps.filter_on_some.outputs.stackhpc)) }}
+        extra_on_push: ${{ steps.filter_on_some.outputs.extra_on_push }}
+        extra_on_pull_request: ${{ steps.filter_on_some.outputs.extra_on_pull_request }}
+        trivyscan: ${{ steps.filter_on_some.outputs.trivyscan }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+        # NOTE: We're detecting the changed files within a job so that we can gate execution of other jobs.
+        #       We use dorny/paths-filter which doesn't work like the conventional 'paths' and 'paths_exclude',
+        #       we can't do the following: 
+        # paths:
+        #   - '**'
+        #   - '!dev/**'
+        #   - 'dev/setup-env.sh'
+        #
+        #      Which would include all files whilst removing all "dev/" files except "dev/setup-env.sh".
+        #      We have to use two filters:
+        #        * first filter includes all changed files and removes "dev/" files
+        #        * second filter explicitly adds 'dev/setup-env.sh'
+        #      We use the logical OR of the filters outputs to gate jobs.
+
+      - name: Paths matching on every filter rule
+        # For safety use the commit of dorny/paths-filter@v3
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36
+        id: filter_on_every
+        with:
+          # Filter changed files, 'every' means the file is matched only if it matches all filter rules.
+          # NOTE: currently seeing: Warning: Unexpected input(s) 'predicate-quantifier', valid inputs are..
+          #       this can be ignored, filtering works as expected.
+          predicate-quantifier: 'every'
+          list-files: 'json'
+          filters: |
+            stackhpc:
+              - '**'
+              - '!dev/**'
+              - '!**/*.md'
+              - '!.gitignore'
+              - '!.github/workflows/**'
+
+      - name: Paths matching on any filter rule
+        # For safety use the commit of dorny/paths-filter@v3
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36
+        id: filter_on_some
+        with:
+          # Filter changed files, 'some' means the file is matched if any one of the filter rules match.
+          # NOTE: currently seeing: Warning: Unexpected input(s) 'predicate-quantifier', valid inputs are..
+          #       this can be ignored, filtering works as expected.
+          predicate-quantifier: 'some'
+          list-files: 'json'
+          filters: |
+            stackhpc:
+              - 'dev/setup-env.sh'
+              - '.github/workflows/stackhpc.yml'
+            extra_on_push:
+              - 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
+              - 'ansible/roles/doca/**'
+              - 'ansible/roles/cuda/**'
+              - 'ansible/roles/slurm_recompile/**' # runs on cuda group
+              - 'ansible/roles/lustre/**'
+              - '.github/workflows/extra.yml'
+            extra_on_pull_request:
+              - 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
+              - 'ansible/roles/doca/**'
+              - 'ansible/roles/cuda/**'
+              - 'ansible/roles/lustre/**'
+              - '.github/workflows/extra.yml'
+            trivyscan:
+              - 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
+
+      - name: Paths matched output
+        # NOTE: This is a debug step, it shows what files were matched by the filters.
+        #       It's useful because dorny/paths-filter doesn't work like the conventional 'paths' and 'paths_exclude'
+        run: >
+          echo '{ "stackhpc_every_files": ${{ steps.filter_on_every.outputs.stackhpc_files }} }' | jq -r '.';
+          echo '{ "stackhpc_some_files": ${{ steps.filter_on_some.outputs.stackhpc_files }} }' | jq -r '.';
+          echo '{ "extra_on_push_files": ${{ steps.filter_on_some.outputs.extra_on_push_files }} }' | jq -r '.';
+          echo '{ "extra_on_pull_request_files": ${{ steps.filter_on_some.outputs.extra_on_pull_request_files }} }' | jq -r '.';
+          echo '{ "trivyscan_files": ${{ steps.filter_on_some.outputs.trivyscan_files }} }' | jq -r '.'
+
+  stackhpc:
+    name: Test deployment and reimage on OpenStack
+    needs: files_changed
+    if: |
+      needs.files_changed.outputs.stackhpc == 'true'
+    uses: ./.github/workflows/stackhpc.yml
+    secrets: inherit
+
+  extra:
+    name: Test extra build
+    needs: files_changed
+    if: |
+      github.event_name != 'pull_request' && needs.files_changed.outputs.extra_on_push == 'true' ||
+      github.event_name == 'pull_request' && needs.files_changed.outputs.extra_on_pull_request == 'true'
+    uses: ./.github/workflows/extra.yml
+    secrets: inherit
+
+  trivyscan:
+    name: Trivy scan image for vulnerabilities
+    needs: files_changed
+    if: |
+      github.event_name == 'pull_request' &&
+      needs.files_changed.outputs.trivyscan == 'true'
+    uses: ./.github/workflows/trivyscan.yml
+    secrets: inherit
diff --git a/.github/workflows/nightly-cleanup.yml b/.github/workflows/nightly-cleanup.yml
index 897d357..5bec96d 100644
--- a/.github/workflows/nightly-cleanup.yml
+++ b/.github/workflows/nightly-cleanup.yml
@@ -4,6 +4,12 @@ on:
   schedule:
     - cron: '0 21 * * *'  # Run at 9PM - image sync runs at midnight
 
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   ci_cleanup:
     name: ci-cleanup
@@ -20,7 +26,7 @@ jobs:
       OS_CLOUD: openstack
       CI_CLOUD: ${{ matrix.cloud }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Record which cloud CI is running on
         run: |
@@ -31,7 +37,7 @@ jobs:
           python3 -m venv venv
           . venv/bin/activate
           pip install -U pip
-          pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
+          pip install "$(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)"
         shell: bash
 
       - name: Write clouds.yaml
@@ -52,7 +58,7 @@ jobs:
             # Flatten multiline value so can be passed as env var
             CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//')
             echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED"
-            echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> $GITHUB_ENV
+            echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> "$GITHUB_ENV"
           fi
         shell: bash
       
@@ -69,7 +75,7 @@ jobs:
             echo "Processing cluster: $cluster_prefix"
 
             # Get all servers with the matching name for control node
-            CONTROL_SERVERS=$(openstack server list --name ${cluster_prefix}-control --format json)
+            CONTROL_SERVERS=$(openstack server list --name "${cluster_prefix}-control" --format json)
 
             # Get unique server names to avoid duplicate cleanup
             UNIQUE_NAMES=$(echo "$CONTROL_SERVERS" | jq -r '.[].Name' | sort | uniq)
@@ -86,7 +92,7 @@ jobs:
               fi
 
               echo "Deleting cluster $cluster_prefix (server $server)..."
-              ./dev/delete-cluster.py $cluster_prefix --force
+              ./dev/delete-cluster.py "$cluster_prefix" --force
             done
           done
         shell: bash
diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml
index ea4b242..21e9d64 100644
--- a/.github/workflows/nightlybuild.yml
+++ b/.github/workflows/nightlybuild.yml
@@ -1,6 +1,7 @@
 name: Build nightly image
 on:
   workflow_dispatch:
+      # checkov:skip=CKV_GHA_7: "The build output cannot be affected by user parameters other than the build entry point and the top-level source location. GitHub Actions workflow_dispatch inputs MUST be empty. "
       inputs:
         ci_cloud:
           description: 'Select the CI_CLOUD'
@@ -13,6 +14,12 @@ on:
   # schedule:
   #   - cron: '0 0 * * *'  # Run at midnight on default branch
 
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   openstack:
     name: openstack-imagebuild
@@ -39,7 +46,7 @@ jobs:
       PACKER_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Record settings for CI cloud
         run: |
@@ -81,8 +88,8 @@ jobs:
           packer init .
 
           PACKER_LOG=1 packer build \
-          -on-error=${{ vars.PACKER_ON_ERROR }} \
-          -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
+          -on-error="${{ vars.PACKER_ON_ERROR }}" \
+          -var-file="$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl" \
           -var "source_image_name=${{ matrix.build.source_image_name }}" \
           -var "image_name=${{ matrix.build.image_name }}" \
           -var "image_name_version=" \
@@ -94,10 +101,10 @@ jobs:
         run: |
           . venv/bin/activate
           IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json)
-          while ! openstack image show -f value -c name $IMAGE_ID; do
+          while ! openstack image show -f value -c name "$IMAGE_ID"; do
             sleep 5
           done
-          IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
+          IMAGE_NAME=$(openstack image show -f value -c name "$IMAGE_ID")
           echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
           echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
 
@@ -142,7 +149,7 @@ jobs:
       SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }}
       TARGET_CLOUD: ${{ matrix.target_cloud }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Record settings for CI cloud
         run: |
@@ -154,7 +161,7 @@ jobs:
           python3 -m venv venv
           . venv/bin/activate
           pip install -U pip
-          pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
+          pip install "$(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)"
 
       - name: Write clouds.yaml
         run: |
diff --git a/.github/workflows/release-image.yml b/.github/workflows/release-image.yml
index 8fcddf5..1ee545c 100644
--- a/.github/workflows/release-image.yml
+++ b/.github/workflows/release-image.yml
@@ -6,6 +6,13 @@ on:
       - published # should work for both pre-releases and releases
 env:
   IMAGE_PATH: environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   ci-image-release:
     name: ci-image-release
@@ -18,7 +25,7 @@ jobs:
           - RL8
           - RL9
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Write s3cmd configuration
         run: echo "${{ secrets.LEAFCLOUD_S3_CFG }}" > ~/.s3cfg
diff --git a/.github/workflows/s3-image-sync.yml b/.github/workflows/s3-image-sync.yml
index 990125f..43adf50 100644
--- a/.github/workflows/s3-image-sync.yml
+++ b/.github/workflows/s3-image-sync.yml
@@ -10,6 +10,12 @@ env:
   S3_BUCKET: openhpc-images-prerelease
   IMAGE_PATH: environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
 
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   s3_cleanup:
     runs-on: ubuntu-22.04
@@ -17,7 +23,7 @@ jobs:
     strategy:
       fail-fast: false
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Write s3cmd configuration
         run: |
@@ -50,7 +56,7 @@ jobs:
     outputs:
       ci_cloud: ${{ steps.ci.outputs.CI_CLOUD }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Record which cloud CI is running on
         id: ci
@@ -62,7 +68,7 @@ jobs:
           python3 -m venv venv
           . venv/bin/activate
           pip install -U pip
-          pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
+          pip install "$(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)"
         shell: bash
 
       - name: Write clouds.yaml
@@ -138,7 +144,7 @@ jobs:
       OS_CLOUD: openstack
       CI_CLOUD: ${{ matrix.cloud }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Record which cloud CI is running on
         run: |
@@ -149,7 +155,7 @@ jobs:
           python3 -m venv venv
           . venv/bin/activate
           pip install -U pip
-          pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
+          pip install "$(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)"
         shell: bash
 
       - name: Write clouds.yaml
@@ -175,7 +181,7 @@ jobs:
           image_hanging=$(openstack image list --name ${{ env.TARGET_IMAGE }} -f value -c ID -c Status | grep -v ' active$' | awk '{print $1}')
           if [ -n "$image_hanging" ]; then
             echo "Cleaning up OpenStack image with ID: $image_hanging"
-            openstack image delete $image_hanging
+            openstack image delete "$image_hanging"
           else
             echo "No image ID found, skipping cleanup."
           fi
diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml
index da4933b..cb4e865 100644
--- a/.github/workflows/stackhpc.yml
+++ b/.github/workflows/stackhpc.yml
@@ -1,33 +1,23 @@
+---
+
+# Test deployment and reimage on OpenStack.
+# This workflow can run standalone or as part of the main CI workflow.
+# See the workflow file 'main.yml' for how this is CI triggered.
 
 name: Test deployment and reimage on OpenStack
 on:
+  workflow_call:
   workflow_dispatch:
-  push:
-    branches:
-      - main
-    paths:
-      - '**'
-      - '!dev/**'
-      - 'dev/setup-env.sh'
-      - '!**.md'
-      - '!.gitignore'
-      - '!.github/workflows/'
-      - '.github/workflows/stackhpc'
-  pull_request:
-    paths:
-      - '**'
-      - '!dev/**'
-      - 'dev/setup-env.sh'
-      - '!**.md'
-      - '!.gitignore'
-      - '!.github/workflows/'
-      - '.github/workflows/stackhpc'
+
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   openstack:
     name: openstack-ci
-    concurrency:
-      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }} # to branch/PR + OS
-      cancel-in-progress: true
     runs-on: ubuntu-22.04
     strategy:
       fail-fast: false # allow other matrix jobs to continue even if one fails
@@ -46,7 +36,7 @@ jobs:
 
       - name: Find the latest release
         run: |
-          echo LATEST_RELEASE_TAG=$(curl -s https://api.github.com/repos/stackhpc/ansible-slurm-appliance/releases/latest | jq -r .tag_name) >> "$GITHUB_ENV"
+          echo "LATEST_RELEASE_TAG=$(curl -s https://api.github.com/repos/stackhpc/ansible-slurm-appliance/releases/latest | jq -r .tag_name)" >> "$GITHUB_ENV"
       
       - name: Checkout latest release
         uses: actions/checkout@v4
@@ -59,19 +49,19 @@ jobs:
         run: |
           # Iterate over the labels
           labels=$(echo '${{ toJSON(github.event.pull_request.labels) }}' | jq -r '.[].name')
-          echo $labels
+          echo "$labels"
           for label in $labels; do
              if [[ $label == CI_CLOUD=* ]]; then
               # Extract the value after 'CI_CLOUD='
               CI_CLOUD_OVERRIDE=${label#CI_CLOUD=}
-              echo "CI_CLOUD=${CI_CLOUD_OVERRIDE}" >> $GITHUB_ENV
+              echo "CI_CLOUD=${CI_CLOUD_OVERRIDE}" >> "$GITHUB_ENV"
             fi
           done
 
       - name: Record debug info
         run: |
-          echo LATEST_RELEASE_TAG: $LATEST_RELEASE_TAG
-          echo CI_CLOUD: $CI_CLOUD
+          echo "LATEST_RELEASE_TAG: $LATEST_RELEASE_TAG"
+          echo "CI_CLOUD: $CI_CLOUD"
 
       - name: Setup ssh
         run: |
@@ -107,7 +97,7 @@ jobs:
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
-          echo vault_demo_user_password: "$DEMO_USER_PASSWORD" > $APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/all/test_user.yml
+          echo "vault_demo_user_password: $DEMO_USER_PASSWORD" > "$APPLIANCES_ENVIRONMENT_ROOT/inventory/group_vars/all/test_user.yml"
         env:
           DEMO_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
 
@@ -116,14 +106,14 @@ jobs:
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
-          cd $STACKHPC_TF_DIR
+          cd "$STACKHPC_TF_DIR"
           tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
 
       - name: Delete infrastructure if provisioning failed
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
-          cd $STACKHPC_TF_DIR
+          cd "$STACKHPC_TF_DIR"
           tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
         if: failure() && steps.provision_servers.outcome == 'failure'
 
@@ -159,7 +149,7 @@ jobs:
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
-          cd $STACKHPC_TF_DIR
+          cd "$STACKHPC_TF_DIR"
           tofu init
           tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
           
@@ -205,14 +195,14 @@ jobs:
           # load ansible variables into shell:
           ansible-playbook ansible/ci/output_vars.yml \
             -e output_vars_hosts=openondemand \
-            -e output_vars_path=$APPLIANCES_ENVIRONMENT_ROOT/vars.txt \
+            -e output_vars_path="$APPLIANCES_ENVIRONMENT_ROOT/vars.txt" \
             -e output_vars_items=bastion_ip,bastion_user,openondemand_servername
-          source $APPLIANCES_ENVIRONMENT_ROOT/vars.txt
+          source "$APPLIANCES_ENVIRONMENT_ROOT/vars.txt"
 
           # setup ssh proxying:
           sudo apt-get --yes install proxychains
           echo proxychains installed
-          ssh -v -fN -D 9050 ${bastion_user}@${bastion_ip}
+          ssh -v -fN -D 9050 "${bastion_user}@${bastion_ip}"
           echo port 9050 forwarded
 
           # check OOD server returns 200:
@@ -222,9 +212,9 @@ jobs:
             --server-response \
             --no-check-certificate \
             --http-user=demo_user \
-            --http-password=${DEMO_USER_PASSWORD} https://${openondemand_servername} \
+            --http-password="${DEMO_USER_PASSWORD}" "https://${openondemand_servername}" \
             2>&1)
-          (echo $statuscode | grep "200 OK") || (echo $statuscode  && exit 1)
+          (echo "$statuscode" | grep "200 OK") || (echo "$statuscode"  && exit 1)
         env:
           DEMO_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
 
@@ -234,14 +224,14 @@ jobs:
           . environments/.stackhpc/activate  
           if [ -n "$SNAPSHOT" ]
           then
-              echo Deleting $SNAPSHOT
-              openstack volume snapshot delete $SNAPSHOT
+              echo "Deleting $SNAPSHOT"
+              openstack volume snapshot delete "$SNAPSHOT"
           fi
 
       - name: Delete infrastructure
         run: |
           . venv/bin/activate
           . environments/.stackhpc/activate
-          cd $STACKHPC_TF_DIR
+          cd "$STACKHPC_TF_DIR"
           tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars" || echo "tofu failed in $STACKHPC_TF_DIR"
         if: ${{ success() || cancelled() }}
diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml
index fe049e6..8cfc8e4 100644
--- a/.github/workflows/trivyscan.yml
+++ b/.github/workflows/trivyscan.yml
@@ -1,17 +1,22 @@
+---
+
+# Scan the built image for vulnerabilities using Trivy.
+# This workflow can run standalone or as part of the main CI workflow.
+# See the workflow file 'main.yml' for how this is CI triggered.
+
 name: Trivy scan image for vulnerabilities
 on:
+  workflow_call:
   workflow_dispatch:
-  pull_request:
-    branches:
-      - main
-    paths:
-      - 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
+
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
 
 jobs:
   scan:
-    concurrency:
-      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build }} # to branch/PR + build
-      cancel-in-progress: true
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -23,19 +28,19 @@ jobs:
       CI_CLOUD: ${{ vars.CI_CLOUD }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
       - name: Override CI_CLOUD if PR label is present
         if: ${{ github.event_name == 'pull_request' }}
         run: |
           # Iterate over the labels
           labels=$(echo '${{ toJSON(github.event.pull_request.labels) }}' | jq -r '.[].name')
-          echo $labels
+          echo "$labels"
           for label in $labels; do
              if [[ $label == CI_CLOUD=* ]]; then
               # Extract the value after 'CI_CLOUD='
               CI_CLOUD_OVERRIDE=${label#CI_CLOUD=}
-              echo "CI_CLOUD=${CI_CLOUD_OVERRIDE}" >> $GITHUB_ENV
+              echo "CI_CLOUD=${CI_CLOUD_OVERRIDE}" >> "$GITHUB_ENV"
             fi
           done
 
@@ -60,7 +65,7 @@ jobs:
           python3 -m venv venv
           . venv/bin/activate
           pip install -U pip
-          pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
+          pip install "$(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)"
         shell: bash
 
       - name: Write clouds.yaml
diff --git a/.github/workflows/upgrade-check.yml.sample b/.github/workflows/upgrade-check.yml.sample
index 39efcd8..eabe973 100644
--- a/.github/workflows/upgrade-check.yml.sample
+++ b/.github/workflows/upgrade-check.yml.sample
@@ -28,6 +28,13 @@ on:
   schedule:
     - cron: "0 9 * * *"
   workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   check_for_update:
     runs-on: ubuntu-22.04
diff --git a/.github/workflows/upload-release-image.yml.sample b/.github/workflows/upload-release-image.yml.sample
index d1f9305..fd7635a 100644
--- a/.github/workflows/upload-release-image.yml.sample
+++ b/.github/workflows/upload-release-image.yml.sample
@@ -29,6 +29,12 @@ on:
           - openhpc-images
           # - openhpc-images-prerelease
 
+permissions:
+  contents: read
+  packages: write
+  # To report GitHub Actions status checks
+  statuses: write
+
 jobs:
   image_upload:
     runs-on: ubuntu-22.04
diff --git a/.gitignore b/.gitignore
index d5b752d..6dfeb97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ venv
 packer/openhpc2
 .vscode
 requirements.yml.last
+.ansible
diff --git a/.python-lint b/.python-lint
new file mode 100644
index 0000000..7fe8d51
--- /dev/null
+++ b/.python-lint
@@ -0,0 +1,6 @@
+[MESSAGES CONTROL]
+
+# There seems to be an issue with the check
+# https://github.com/pylint-dev/pylint/issues/214
+disable=
+    duplicate-code,
diff --git a/.shellcheckrc b/.shellcheckrc
new file mode 100644
index 0000000..454b8ef
--- /dev/null
+++ b/.shellcheckrc
@@ -0,0 +1,7 @@
+# Configuration file for shellcheck
+# https://github.com/koalaman/shellcheck/blob/master/shellcheck.1.md#rc-files
+
+# Unable to exclude *.sh.j2 files and the ansible parentheses upset shellcheck a lot.
+# Lines can be address individually with # shellcheck disable=SCxxxx but this gets quite prolific.
+# Disabling globally as we have more sh.j2 files than .sh
+disable=SC1009,SC1054,SC1064,SC1065,SC1072,SC1073,SC1083
diff --git a/.yamllint.yml b/.yamllint.yml
new file mode 100644
index 0000000..3220260
--- /dev/null
+++ b/.yamllint.yml
@@ -0,0 +1,24 @@
+---
+extends: default
+
+rules:
+  brackets:
+    forbid: non-empty
+  comments:
+    # https://github.com/prettier/prettier/issues/6780
+    min-spaces-from-content: 1
+  # https://github.com/adrienverge/yamllint/issues/384
+  comments-indentation: false
+  document-start: disable
+  # 160 chars was the default used by old E204 rule, but
+  # you can easily change it or disable in your .yamllint file.
+  line-length:
+    max: 160
+  # We are adding an extra space inside braces as that's how prettier does it
+  # and we are trying not to fight other linters.
+  braces:
+    min-spaces-inside: 0 # yamllint defaults to 0
+    max-spaces-inside: 1 # yamllint defaults to 0
+  octal-values:
+    forbid-implicit-octal: true # yamllint defaults to false
+    forbid-explicit-octal: true # yamllint defaults to false
diff --git a/README.md b/README.md
index f8503a4..8acd424 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,9 @@
-[![Test deployment and image build on OpenStack](https://github.com/stackhpc/ansible-slurm-appliance/actions/workflows/stackhpc.yml/badge.svg)](https://github.com/stackhpc/ansible-slurm-appliance/actions/workflows/stackhpc.yml)
-
 # StackHPC Slurm Appliance
 
+[![Test deployment and image build on OpenStack](https://github.com/stackhpc/ansible-slurm-appliance/actions/workflows/stackhpc.yml/badge.svg)](https://github.com/stackhpc/ansible-slurm-appliance/actions/workflows/stackhpc.yml)
+
 This repository contains playbooks and configuration to define a Slurm-based HPC environment. This includes:
+
 - [Rocky Linux](https://rockylinux.org/)-based hosts.
 - [OpenTofu](https://opentofu.org/) configurations to define the cluster's infrastructure-as-code.
 - Packages for Slurm and MPI software stacks from [OpenHPC](https://openhpc.community/).
@@ -22,18 +23,20 @@ While it is tested on OpenStack it should work on any cloud with appropriate Ope
 ## Demonstration Deployment
 
 The default configuration in this repository may be used to create a cluster to explore use of the appliance. It provides:
+
 - Persistent state backed by an OpenStack volume.
 - NFS-based shared file system backed by another OpenStack volume.
 
 It requires an OpenStack cloud, and an Ansible "deploy host" with access to that cloud.
 
 Before starting ensure that:
+
 - You have root access on the deploy host.
 - You can create instances from the [latest Slurm appliance image](https://github.com/stackhpc/ansible-slurm-appliance/releases), which already contains the required packages. This is built and tested in StackHPC's CI.
 - You have an SSH keypair defined in OpenStack, with the private key available on the deploy host.
 - Created instances have access to internet (note proxies can be setup through the appliance if necessary).
 - Created instances have accurate/synchronised time (for VM instances this is usually provided by the hypervisor; if not or for bare metal instances it may be necessary to configure a time service via the appliance).
-- Three security groups are present: ``default`` allowing intra-cluster communication, ``SSH`` allowing external access via SSH and ``HTTPS`` allowing access for Open OnDemand.
+- Three security groups are present: `default` allowing intra-cluster communication, `SSH` allowing external access via SSH and `HTTPS` allowing access for Open OnDemand.
 
 ### Setup deploy host
 
@@ -44,11 +47,13 @@ The following operating systems are supported for the deploy host:
 
 These instructions assume the deployment host is running Rocky Linux 8:
 
-    sudo yum install -y git python38
-    git clone https://github.com/stackhpc/ansible-slurm-appliance
-    cd ansible-slurm-appliance
-    git checkout ${latest-release-tag}
-    ./dev/setup-env.sh
+```shell
+sudo yum install -y git python38
+git clone https://github.com/stackhpc/ansible-slurm-appliance
+cd ansible-slurm-appliance
+git checkout ${latest-release-tag}
+./dev/setup-env.sh
+```
 
 You will also need to install [OpenTofu](https://opentofu.org/docs/intro/install/rpm/).
 
@@ -56,12 +61,16 @@ You will also need to install [OpenTofu](https://opentofu.org/docs/intro/install
 
 Run the following from the repository root to activate the venv:
 
-    . venv/bin/activate
+```shell
+. venv/bin/activate
+```
 
 Use the `cookiecutter` template to create a new environment to hold your configuration:
 
-    cd environments
-    cookiecutter ../cookiecutter
+```shell
+cd environments
+cookiecutter ../cookiecutter
+```
 
 and follow the prompts to complete the environment name and description.
 
@@ -69,52 +78,59 @@ and follow the prompts to complete the environment name and description.
 
 Go back to the root folder and activate the new environment:
 
-    cd ..
-    . environments/$ENV/activate
+```shell
+cd ..
+. environments/$ENV/activate
+```
 
 And generate secrets for it:
 
-    ansible-playbook ansible/adhoc/generate-passwords.yml
+```shell
+ansible-playbook ansible/adhoc/generate-passwords.yml
+```
 
 ### Define and deploy infrastructure
 
 Create an OpenTofu variables file to define the required infrastructure, e.g.:
 
-    # environments/$ENV/tofu/terraform.tfvars:
-
-    cluster_name = "mycluster"
-    cluster_networks = [
-      {
-        network = "some_network" # *
-        subnet = "some_subnet" # *
-      }
-    ]
-    key_pair = "my_key" # *
-    control_node_flavor = "some_flavor_name"
-    login = {
-        # Arbitrary group name for these login nodes
-        interactive = {
-            nodes: ["login-0"]
-            flavor: "login_flavor_name" # *
-        }
+```text
+# environments/$ENV/tofu/terraform.tfvars:
+cluster_name = "mycluster"
+cluster_networks = [
+  {
+    network = "some_network" # *
+    subnet = "some_subnet" # *
+  }
+]
+key_pair = "my_key" # *
+control_node_flavor = "some_flavor_name"
+login = {
+    # Arbitrary group name for these login nodes
+    interactive = {
+        nodes: ["login-0"]
+        flavor: "login_flavor_name" # *
     }
-    cluster_image_id = "rocky_linux_9_image_uuid"
-    compute = {
-        # Group name used for compute node partition definition
-        general = {
-            nodes: ["compute-0", "compute-1"]
-            flavor: "compute_flavor_name" # *
-        }
+}
+cluster_image_id = "rocky_linux_9_image_uuid"
+compute = {
+    # Group name used for compute node partition definition
+    general = {
+        nodes: ["compute-0", "compute-1"]
+        flavor: "compute_flavor_name" # *
     }
+}
+```
 
 Variables marked `*` refer to OpenStack resources which must already exist. The above is a minimal configuration - for all variables and descriptions see `environments/$ENV/tofu/variables.tf`.
 
 To deploy this infrastructure, ensure the venv and the environment are [activated](#create-a-new-environment) and run:
 
-    export OS_CLOUD=openstack
-    cd environments/$ENV/tofu/
-    tofu init
-    tofu apply
+```shell
+export OS_CLOUD=openstack
+cd environments/$ENV/tofu/
+tofu init
+tofu apply
+```
 
 and follow the prompts. Note the OS_CLOUD environment variable assumes that OpenStack credentials are defined using a [clouds.yaml](https://docs.openstack.org/python-openstackclient/latest/configuration/index.html#clouds-yaml) file in a default location with the default cloud name of `openstack`.
 
@@ -122,11 +138,15 @@ and follow the prompts. Note the OS_CLOUD environment variable assumes that Open
 
 To configure the appliance, ensure the venv and the environment are [activated](#create-a-new-environment) and run:
 
-    ansible-playbook ansible/site.yml
+```shell
+ansible-playbook ansible/site.yml
+```
 
 Once it completes you can log in to the cluster using:
 
-    ssh rocky@$login_ip
+```shell
+ssh rocky@$login_ip
+```
 
 where the IP of the login node is given in `environments/$ENV/inventory/hosts.yml`
 
@@ -134,7 +154,27 @@ where the IP of the login node is given in `environments/$ENV/inventory/hosts.ym
 
 - `environments/`: See [docs/environments.md](docs/environments.md).
 - `ansible/`: Contains the ansible playbooks to configure the infrastructure.
-- `packer/`: Contains automation to use Packer to build machine images for an environment - see the README in this directory for further information.
+- `packer/`: Contains automation to use Packer to build machine images for an environment - see the readme in this directory for further information.
 - `dev/`: Contains development tools.
 
 For further information see the [docs](docs/) directory.
+
+## Developing locally
+
+To run the GitHub Actions linters locally, use:
+
+```shell
+docker run --rm \
+    -e RUN_LOCAL=true \
+    --env-file "super-linter.env" \
+    -v "$(pwd)":/tmp/lint \
+    ghcr.io/super-linter/super-linter:v7.3.0
+```
+
+```shell
+ANSIBLE_COLLECTIONS_PATH=.ansible/collections \
+    ansible-lint -c .ansible-lint.yml
+```
+
+Specifying `ANSIBLE_COLLECTIONS_PATH` ensures `ansible-lint` downloads collections and roles under the `.ansible` directory, separating them from our own roles under the `ansible` directory.
+We exclude these downloaded files from linting by listing `.ansible` under `exclude_paths` in `.ansible-lint.yml`.
diff --git a/actionlint.yml b/actionlint.yml
new file mode 100644
index 0000000..ed97d53
--- /dev/null
+++ b/actionlint.yml
@@ -0,0 +1 @@
+---
diff --git a/ansible/adhoc/backup-keytabs.yml b/ansible/adhoc/backup-keytabs.yml
index 5566e48..a88daf7 100644
--- a/ansible/adhoc/backup-keytabs.yml
+++ b/ansible/adhoc/backup-keytabs.yml
@@ -1,11 +1,12 @@
+---
 # Use ONE of the following tags on this playbook:
 #   - retrieve: copies keytabs out of the state volume to the environment
 #   - deploy: copies keytabs from the environment to the state volume
 
 - hosts: freeipa_client
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: freeipa
         tasks_from: backup-keytabs.yml
diff --git a/ansible/adhoc/cudatests.yml b/ansible/adhoc/cudatests.yml
index 59af856..f571f8a 100644
--- a/ansible/adhoc/cudatests.yml
+++ b/ansible/adhoc/cudatests.yml
@@ -1,8 +1,9 @@
+---
 - hosts: cuda
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tags: cuda_samples
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: cuda
         tasks_from: samples.yml
diff --git a/ansible/adhoc/deploy-pulp.yml b/ansible/adhoc/deploy-pulp.yml
index f7bafc3..11158cb 100644
--- a/ansible/adhoc/deploy-pulp.yml
+++ b/ansible/adhoc/deploy-pulp.yml
@@ -1,17 +1,19 @@
+---
+
 - name: Install pulp on server
-  become: yes
+  become: true
   hosts: pulp_server
   tasks:
-  - name: Install pulp
-    ansible.builtin.include_role:
-      name: pulp_site
-      tasks_from: install.yml
-      public: true
+    - name: Install pulp
+      ansible.builtin.include_role:
+        name: pulp_site
+        tasks_from: install.yml
+        public: true
 
-  - name: Print Pulp endpoint
-    become: no
-    debug:
-      msg: | 
-        Server configured, override 'appliances_pulp_url' with
+    - name: Print Pulp endpoint
+      become: false
+      ansible.builtin.debug:
+        msg: |
+          Server configured, override 'appliances_pulp_url' with
           appliances_pulp_url: "http://{{ hostvars[groups['pulp_server'] | first].ansible_host }}:{{ pulp_site_port }}"
-        (or the correct IP if multi-homed) in your environments
+          (or the correct IP if multi-homed) in your environments
diff --git a/ansible/adhoc/generate-passwords.yml b/ansible/adhoc/generate-passwords.yml
index 89c08f0..f9354f2 100644
--- a/ansible/adhoc/generate-passwords.yml
+++ b/ansible/adhoc/generate-passwords.yml
@@ -1,9 +1,8 @@
 ---
-
 - name: Generate passwords.yml
   hosts: localhost
   gather_facts: false
   tasks:
     - name: Include password generation role
-      include_role:
-        name: passwords
\ No newline at end of file
+      ansible.builtin.include_role:
+        name: passwords
diff --git a/ansible/adhoc/hpctests.yml b/ansible/adhoc/hpctests.yml
index 6e733d3..5747e7c 100644
--- a/ansible/adhoc/hpctests.yml
+++ b/ansible/adhoc/hpctests.yml
@@ -3,10 +3,9 @@
 # Relies on installed packages in appliance defaults - see openhpc variables.
 
 ---
-
 - hosts: hpctests[0] # TODO: might want to make which node is used selectable?
   become: false
   gather_facts: false
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: hpctests
diff --git a/ansible/adhoc/rebuild-via-slurm.yml b/ansible/adhoc/rebuild-via-slurm.yml
index 4f7b5a5..33cbe5c 100644
--- a/ansible/adhoc/rebuild-via-slurm.yml
+++ b/ansible/adhoc/rebuild-via-slurm.yml
@@ -1,3 +1,4 @@
+---
 # Rebuild compute nodes via slurm.
 # Nodes will be rebuilt if `image_id` in inventory is different to the
 # currently-provisioned image. Otherwise they are rebooted.
@@ -9,9 +10,9 @@
 
 - hosts: login
   run_once: true
-  gather_facts: no
+  gather_facts: false
   tasks:
     - name: Run slurm-controlled rebuild
-      import_role:
+      ansible.builtin.import_role:
         name: rebuild
         tasks_from: rebuild.yml
diff --git a/ansible/adhoc/rebuild.yml b/ansible/adhoc/rebuild.yml
index 9e7a3a7..b6033e4 100644
--- a/ansible/adhoc/rebuild.yml
+++ b/ansible/adhoc/rebuild.yml
@@ -1,21 +1,24 @@
+---
 # Rebuild hosts with a specified image from OpenStack.
-# 
+#
 # Use ansible's -v output to see output.
 # Use --limit to control which hosts to rebuild (either specific hosts or the <cluster_name>_<partition_name> groups defining partitions).
 # Optionally, supply `-e rebuild_image=<image_name_or_id>` to define a specific image, otherwise the current image is reused.
 #
-# NOTE: If a hostvar `instance_id` is defined this is used to select hosts. Otherwise the hostname is used and this must be unique, which may not be the case e.g. if using identically-named staging and production hosts.
+# NOTE: If a hostvar `instance_id` is defined this is used to select hosts.
+#       Otherwise the hostname is used and this must be unique, which may not be the case e.g. if using identically-named staging and production hosts.
 #
 # Example:
 #   ansible-playbook -v --limit ohpc_compute ansible/adhoc/rebuild.yml -e rebuild_image=openhpc_v2.3
 
 - hosts: cluster
-  become: no
-  gather_facts: no
+  become: false
+  gather_facts: false
   tasks:
-    - command: "openstack server rebuild {{ instance_id | default(inventory_hostname) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}"
+    # yamllint disable-line rule:line-length
+    - ansible.builtin.command: "openstack server rebuild {{ instance_id | default(inventory_hostname) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}"
       delegate_to: localhost
-    - wait_for_connection:
+      changed_when: false
+    - ansible.builtin.wait_for_connection:
         delay: 60
         timeout: 600
-
diff --git a/ansible/adhoc/restart-slurm.yml b/ansible/adhoc/restart-slurm.yml
index 41b9dcb..de837f5 100644
--- a/ansible/adhoc/restart-slurm.yml
+++ b/ansible/adhoc/restart-slurm.yml
@@ -1,3 +1,4 @@
+---
 # Restart all slurm daemons e.g. after changing configuration. Note that:
 # - `scontrol reconfigure` will handle most reconfiguration - see https://slurm.schedmd.com/scontrol.html#OPT_reconfigure
 #   for which options need a restart
@@ -5,25 +6,25 @@
 #   restart daemons as required.
 
 - hosts: compute,login
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
-    - service:
+    - ansible.builtin.service:
         name: slurmd
         state: stopped
 
 - hosts: control
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
-    - service:
+    - ansible.builtin.service:
         name: slurmctld
         state: restarted
 
 - hosts: compute,login
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
-    - service:
+    - ansible.builtin.service:
         name: slurmd
         state: started
diff --git a/ansible/adhoc/sync-pulp.yml b/ansible/adhoc/sync-pulp.yml
index 373f3ab..a3b07ae 100644
--- a/ansible/adhoc/sync-pulp.yml
+++ b/ansible/adhoc/sync-pulp.yml
@@ -1,3 +1,4 @@
+---
 - hosts: localhost
   tasks:
     - ansible.builtin.include_role:
diff --git a/ansible/adhoc/update-packages.yml b/ansible/adhoc/update-packages.yml
index ae970ba..929b0da 100644
--- a/ansible/adhoc/update-packages.yml
+++ b/ansible/adhoc/update-packages.yml
@@ -1,18 +1,20 @@
+---
 - hosts: update
-  become: yes
+  become: true
   gather_facts: false
   tasks:
     - name: Update selected packages
-      yum:
+      ansible.builtin.dnf:
         name: "{{ update_name }}"
         state: "{{ update_state }}"
         exclude: "{{ update_exclude }}"
         disablerepo: "{{ update_disablerepo }}"
       register: updates
     - name: Log updated packages
-      copy:
+      ansible.builtin.copy:
         content: "{{ updates.results | join('\n') }}"
         dest: "{{ update_log_path }}"
+        mode: "0644"
       delegate_to: localhost
-    - debug:
+    - ansible.builtin.debug:
         msg: "{{ updates.results | length }} changes to packages - see {{ update_log_path }} for details"
diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml
index 50d0246..21f9303 100644
--- a/ansible/bootstrap.yml
+++ b/ansible/bootstrap.yml
@@ -1,16 +1,15 @@
 ---
-
 - hosts: cluster
   gather_facts: false
-  become: yes
+  become: true
   tasks:
     - name: Check if ansible-init is installed
-      stat:
+      ansible.builtin.stat:
         path: /etc/systemd/system/ansible-init.service
       register: _stat_ansible_init_unitfile
-    
+
     - name: Wait for ansible-init to finish
-      wait_for:
+      ansible.builtin.wait_for:
         path: /var/lib/ansible-init.done
         timeout: "{{ ansible_init_wait }}" # seconds
       when: _stat_ansible_init_unitfile.stat.exists
@@ -21,7 +20,7 @@
   tags:
     - deprecated
   tasks:
-    - fail:
+    - ansible.builtin.fail:
         msg: |
           Variables prefixed secrets_openhpc_* are deprecated - run:
               $ ansible-playbook ansible/adhoc/generate-passwords.yml
@@ -29,34 +28,34 @@
       when: "'secrets_openhpc_' in (hostvars[inventory_hostname] | join)"
 
 - hosts: resolv_conf
-  become: yes
+  become: true
   gather_facts: false
   tags: resolv_conf
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: resolv_conf
 
 - hosts: etc_hosts
   gather_facts: false
   tags: etc_hosts
-  become: yes
+  become: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: etc_hosts
 
 - hosts: proxy
   gather_facts: false
   tags: proxy
-  become: yes
+  become: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: proxy
 
 - hosts: chrony
   tags: chrony
-  become: yes
+  become: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: mrlesmithjr.chrony
         # skip install tasks as might not have network yet
         tasks_from: config_chrony.yml
@@ -67,53 +66,53 @@
 
 - hosts: cluster
   gather_facts: false
-  become: yes
+  become: true
   tasks:
     - name: Fix incorrect permissions on /etc in Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2
       # breaks munge
-      file:
+      ansible.builtin.file:
         path: /etc
         state: directory
         owner: root
         group: root
         mode: u=rwx,go=rx # has g=rwx
     - name: Prevent ssh hanging if shared home is unavailable
-      lineinfile:
+      ansible.builtin.lineinfile:
         path: /etc/profile
         search_string: HOSTNAME=$(/usr/bin/hostnamectl --transient 2>/dev/null) || \
         state: absent
     - name: Add system user groups
-      ansible.builtin.group: "{{ item.group }}"
+      ansible.builtin.group: "{{ item.group }}" # noqa: args[module]
       loop: "{{ appliances_local_users }}"
       when:
         - item.enable | default(true) | bool
         - "'group' in item"
-      become_method: "sudo"
+      become_method: ansible.builtin.sudo
       # Need to change working directory otherwise we try to switch back to non-existent directory.
-      become_flags: '-i'
+      become_flags: "-i"
     - name: Add system users
-      ansible.builtin.user: "{{ item.user }}"
+      ansible.builtin.user: "{{ item.user }}" # noqa: args[module]
       loop: "{{ appliances_local_users }}"
       when: item.enable | default(true) | bool
-      become_method: "sudo"
+      become_method: ansible.builtin.sudo
       # Need to change working directory otherwise we try to switch back to non-existent directory.
-      become_flags: '-i'
+      become_flags: "-i"
     - name: Reset ssh connection to allow user changes to affect ansible_user
-      meta: reset_connection
-      become: no
+      ansible.builtin.meta: reset_connection
+      become: false
 
 - hosts: systemd
-  become: yes
+  become: true
   gather_facts: false
   tags: systemd
   tasks:
     - name: Make systemd unit modifications
-      import_role:
+      ansible.builtin.import_role:
         name: systemd
 
 - hosts: selinux
   gather_facts: false
-  become: yes
+  become: true
   tags:
     - selinux
   tasks:
@@ -125,37 +124,37 @@
 
 - hosts: sshd
   tags: sshd
-  gather_facts: no
-  become: yes
+  gather_facts: false
+  become: true
   tasks:
     - name: Configure sshd
-      import_role:
+      ansible.builtin.import_role:
         name: sshd
 
 - hosts: dnf_repos
-  become: yes
+  become: true
   tags: dnf_repos
   tasks:
-  - name: Check that creds won't be leaked to users
-    ansible.builtin.assert:
-      that: dnf_repos_password is undefined
-      fail_msg: Passwords should not be templated into repofiles during configure, unset 'dnf_repos_password'
-    when:
-      - appliances_mode == 'configure'
-      - not (dnf_repos_allow_insecure_creds | default(false)) # useful for development
+    - name: Check that creds won't be leaked to users
+      ansible.builtin.assert:
+        that: dnf_repos_password is undefined
+        fail_msg: Passwords should not be templated into repofiles during configure, unset 'dnf_repos_password'
+      when:
+        - appliances_mode == 'configure'
+        - not (dnf_repos_allow_insecure_creds | default(false)) # useful for development
 
 - hosts: cacerts
   tags: cacerts
   gather_facts: false
   tasks:
     - name: Install custom cacerts
-      import_role:
+      ansible.builtin.import_role:
         name: cacerts
 
 - hosts: squid
   tags: squid
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tasks:
     # - Installing squid requires working dnf repos
     # - Configuring dnf_repos itself requires working dnf repos to install epel
@@ -166,27 +165,27 @@
         tasks_from: set_repos.yml
       when: "'dnf_repos' in group_names"
     - name: Configure squid proxy
-      import_role:
+      ansible.builtin.import_role:
         name: squid
 
 - hosts: dnf_repos
   tags: dnf_repos
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tasks:
-  - name: Replace system repos with pulp repos 
-    ansible.builtin.include_role:
-      name: dnf_repos
-      tasks_from: set_repos.yml
+    - name: Replace system repos with pulp repos
+      ansible.builtin.include_role:
+        name: dnf_repos
+        tasks_from: set_repos.yml
 
 # --- tasks after here require general access to package repos ---
 - hosts: tuned
   tags: tuned
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tasks:
     - name: Install and configure tuneD
-      include_role:
+      ansible.builtin.include_role:
         name: tuned
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
@@ -195,39 +194,39 @@
   tags:
     - freeipa
     - freeipa_server
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tasks:
     - name: Install FreeIPA server
-      import_role:
+      ansible.builtin.import_role:
         name: freeipa
         tasks_from: server.yml
 
 - hosts: cluster
   gather_facts: false
-  become: yes
+  become: true
   tags: cockpit
   tasks:
-    - name: Remove RHEL cockpit
-      command: dnf -y remove cockpit-ws # N.B. using ansible dnf module is very slow
+    - name: Remove RHEL cockpit # noqa: no-changed-when
+      ansible.builtin.command: dnf -y remove cockpit-ws
       register: dnf_remove_output
-      ignore_errors: true  # Avoid failing if a lock or other error happens
+      ignore_errors: true # Avoid failing if a lock or other error happens
 
 - hosts: firewalld
   gather_facts: false
-  become: yes
+  become: true
   tags: firewalld
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: firewalld
         tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
 - hosts: fail2ban
   gather_facts: false
-  become: yes
+  become: true
   tags: fail2ban
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: fail2ban
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
@@ -236,91 +235,92 @@
   hosts: podman
   tags: podman
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: podman
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
 - hosts: update
   gather_facts: false
-  become: yes
+  become: true
   tags:
     - update
   tasks:
-    - block:
-      - name: Update selected packages
-        yum:
-          name: "{{ update_name }}"
-          state: "{{ update_state }}"
-          exclude: "{{ update_exclude }}"
-          disablerepo: "{{ update_disablerepo }}"
-        async: "{{ 30 * 60 }}" # wait for up to 30 minutes
-        poll: 15 # check every 15 seconds
-        register: updates
-      - name: Ensure update log directory on localhost exists
-        file:
-          path: "{{ update_log_path | dirname }}"
-          state: directory
-        become: false
-        delegate_to: localhost
-        run_once: true
-      - name: Log updated packages
-        copy:
-          content: "{{ updates.results | join('\n') }}"
-          dest: "{{ update_log_path }}"
-        delegate_to: localhost
-        become: no
-      - debug:
-          msg: "{{ updates.results | length }} changes to packages - see {{ update_log_path }} for details"
-      when: "update_enable | default('false') | bool"
-
+    - when: "update_enable | default('false') | bool"
+      block:
+        - name: Update selected packages
+          ansible.builtin.dnf:
+            name: "{{ update_name }}"
+            state: "{{ update_state }}"
+            exclude: "{{ update_exclude }}"
+            disablerepo: "{{ update_disablerepo }}"
+          async: "{{ 30 * 60 }}" # wait for up to 30 minutes
+          poll: 15 # check every 15 seconds
+          register: updates
+        - name: Ensure update log directory on localhost exists
+          ansible.builtin.file:
+            path: "{{ update_log_path | dirname }}"
+            state: directory
+            mode: "0755"
+          become: false
+          delegate_to: localhost
+          run_once: true # noqa: run-once[task]
+        - name: Log updated packages
+          ansible.builtin.copy:
+            content: "{{ updates.results | join('\n') }}"
+            dest: "{{ update_log_path }}"
+            mode: "0644"
+          delegate_to: localhost
+          become: false
+        - ansible.builtin.debug:
+            msg: "{{ updates.results | length }} changes to packages - see {{ update_log_path }} for details"
 - hosts:
     - selinux
     - update
   gather_facts: false
-  become: yes
+  become: true
   tags:
     - reboot
     - selinux
     - update
   tasks:
     - name: Check for pending reboot from package updates
-      command:
+      ansible.builtin.command:
         cmd: dnf needs-restarting -r
       register: update_reboot_required
       failed_when: "update_reboot_required.rc not in [0, 1]"
       changed_when: false
     - name: Reboot to cover SELinux state change or package upgrades
-      reboot:
+      ansible.builtin.reboot:
         post_reboot_delay: 30
       when: (sestatus['reboot_required'] | default(false)) or (update_reboot_required.rc == 1)
     - name: Wait for hosts to be reachable
-      wait_for_connection:
+      ansible.builtin.wait_for_connection:
         sleep: 15
     - name: Clear facts
-      meta: clear_facts
+      ansible.builtin.meta: clear_facts
     - name: Update facts
-      setup:
+      ansible.builtin.setup:
 
 - hosts: ofed
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tags: ofed
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: ofed
 
 - hosts: ansible_init
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tags: linux_ansible_init
   tasks:
     - name: Install ansible-init
-      include_role:
+      ansible.builtin.include_role:
         name: azimuth_cloud.image_utils.linux_ansible_init
       when: "appliances_mode == 'build'"
 
 - hosts: k3s:&builder
-  become: yes
+  become: true
   tags: k3s
   tasks:
     - name: Install k3s
diff --git a/ansible/ci/check_eessi.yml b/ansible/ci/check_eessi.yml
index 280f865..a72bd91 100644
--- a/ansible/ci/check_eessi.yml
+++ b/ansible/ci/check_eessi.yml
@@ -5,20 +5,21 @@
     eessi_test_rootdir: /home/eessi_test
   tasks:
     - name: Create test root directory
-      file:
+      ansible.builtin.file:
         path: "{{ eessi_test_rootdir }}"
         state: directory
         owner: "{{ ansible_user }}"
         group: "{{ ansible_user }}"
+        mode: "0755"
       become: true
-      
-    - name: Clone eessi-demo repo
+
+    - name: Clone eessi-demo repo  # noqa: latest[git]
       ansible.builtin.git:
         repo: "https://github.com/eessi/eessi-demo.git"
         dest: "{{ eessi_test_rootdir }}/eessi-demo"
 
     - name: Create batch script
-      copy:
+      ansible.builtin.copy:
         dest: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow/tensorflow.sh"
         content: |
           #!/usr/bin/env bash
@@ -26,25 +27,26 @@
           #SBATCH --error=%x.out
           source /cvmfs/pilot.eessi-hpc.org/latest/init/bash
           srun ./run.sh
+        mode: "0644"
 
-    - name: Run test job
-      ansible.builtin.shell:
+    - name: Run test job # noqa: no-changed-when
+      ansible.builtin.command:
         cmd: sbatch --wait tensorflow.sh
         chdir: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow"
       register: job_output
 
     - name: Retrieve job output
-      slurp:
+      ansible.builtin.slurp:
         src: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow/tensorflow.sh.out"
       register: _tensorflow_out
       no_log: true # as its base64 encoded so useless
 
     - name: Show job output
-      debug:
+      ansible.builtin.debug:
         msg: "{{ _tensorflow_out.content | b64decode }}"
 
     - name: Fail if job output contains error
-      fail:
+      ansible.builtin.fail:
         # Note: Job prints live progress bar to terminal, so use regex filter to remove this from stdout
-        msg: "Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b', '') }}"        
+        msg: "Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b', '') }}"
       when: '"Epoch 5/5" not in _tensorflow_out.content | b64decode'
diff --git a/ansible/ci/check_grafana.yml b/ansible/ci/check_grafana.yml
index 36fb78b..0764b65 100644
--- a/ansible/ci/check_grafana.yml
+++ b/ansible/ci/check_grafana.yml
@@ -1,15 +1,16 @@
+---
 # Checks Slurm jobs from hpctests are shown in Grafana.
 # Can't actually check the dashboard programatically so this queries the datasource used by the dashboard instead.
 
 - hosts: control # so proxying etc is irrelevant
-  gather_facts: no
-  become: no
+  gather_facts: false
+  become: false
   tasks:
     - name: Wait for slurm-stats file to exist (run by cron)
       ansible.builtin.wait_for:
         path: /var/log/slurm-stats/finished_jobs.json
         timeout: 315 # slurm stats cron job runs every 5 mins
-      
+
     - name: Query grafana for expected hpctests jobs
       grafana_elasticsearch_query:
         grafana_url: http://{{ grafana_api_address }}:{{ grafana_port }}
@@ -23,4 +24,5 @@
       delay: 5
       vars:
         _found_jobs: "{{ _slurm_stats_jobs.docs | map(attribute='JobName', default='(json error in slurmstats data)') }}"
-        _expected_jobs: ['pingpong.sh']
+        _expected_jobs:
+          - "pingpong.sh"
diff --git a/ansible/ci/check_sacct_hpctests.yml b/ansible/ci/check_sacct_hpctests.yml
index 1ebbf21..3628609 100644
--- a/ansible/ci/check_sacct_hpctests.yml
+++ b/ansible/ci/check_sacct_hpctests.yml
@@ -1,3 +1,4 @@
+---
 - hosts: control
   gather_facts: false
   become: true
@@ -7,13 +8,13 @@
       1,pingpong.sh,COMPLETED
   tasks:
     - name: Get info for ended jobs
-      shell:
+      ansible.builtin.command:
         cmd: sacct --format=jobid,jobname,state --allocations --parsable2 --delimiter=, --starttime=now-1days --endtime=now
         # by default start/end time is midnight/now which is not robust
       changed_when: false
       register: sacct
     - name: Check info for ended jobs
-      assert:
+      ansible.builtin.assert:
         that: sacct_stdout_expected in sacct.stdout
         fail_msg: |
           Expected:
diff --git a/ansible/ci/check_slurm.yml b/ansible/ci/check_slurm.yml
index ff527da..45cda6c 100644
--- a/ansible/ci/check_slurm.yml
+++ b/ansible/ci/check_slurm.yml
@@ -1,9 +1,10 @@
+---
 - hosts: login:!builder # won't have a slurm control daemon when in build
-  become: no
+  become: false
   gather_facts: false
   tasks:
     - name: Run sinfo
-      shell: 'sinfo --noheader --format="%N %P %a %l %D %t" | sort' # using --format ensures we control whitespace: Partition,partition_state,max_jobtime,num_nodes,node_state,node_name
+      ansible.builtin.shell: 'sinfo --noheader --format="%N %P %a %l %D %t" | sort' # noqa: risky-shell-pipe
       register: sinfo
       changed_when: false
       until: sinfo.stdout_lines == expected_sinfo
diff --git a/ansible/ci/delete_images.yml b/ansible/ci/delete_images.yml
index 78b5742..992fb8e 100644
--- a/ansible/ci/delete_images.yml
+++ b/ansible/ci/delete_images.yml
@@ -1,12 +1,12 @@
+---
 - hosts: login:!builder
-  become: no
-  gather_facts: no
+  become: false
+  gather_facts: false
   tasks:
-    - import_tasks: get_image_ids.yml
-    
-    - name: Delete images
-      shell:
+    - ansible.builtin.import_tasks: get_image_ids.yml
+    - name: Delete images # noqa: no-changed-when
+      ansible.builtin.shell:
         cmd: |
           openstack image delete {{ item.artifact_id }}
       delegate_to: localhost
-      loop: "{{ manifest['builds'] }}"
+      loop: "{{ manifest['builds'] }}" # noqa: no-changed-when
diff --git a/ansible/ci/get_image_ids.yml b/ansible/ci/get_image_ids.yml
index 4a53b15..ede3a72 100644
--- a/ansible/ci/get_image_ids.yml
+++ b/ansible/ci/get_image_ids.yml
@@ -1,12 +1,13 @@
+---
 - name: Read packer build manifest
-  set_fact:
+  ansible.builtin.set_fact:
     manifest: "{{ lookup('file', manifest_path) | from_json }}"
   vars:
     manifest_path: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}/packer/packer-manifest.json"
   delegate_to: localhost
 
 - name: Get latest image builds
-  set_fact:
+  ansible.builtin.set_fact:
     login_build: "{{ manifest['builds'] | selectattr('custom_data', 'eq', {'source': 'login'}) | last }}"
     compute_build: "{{ manifest['builds'] | selectattr('custom_data', 'eq', {'source': 'compute'}) | last }}"
     control_build: "{{ manifest['builds'] | selectattr('custom_data', 'eq', {'source': 'control'}) | last }}"
diff --git a/ansible/ci/library/grafana_elasticsearch_query.py b/ansible/ci/library/grafana_elasticsearch_query.py
index 3809565..7a1d603 100644
--- a/ansible/ci/library/grafana_elasticsearch_query.py
+++ b/ansible/ci/library/grafana_elasticsearch_query.py
@@ -1,10 +1,17 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 
 # Copyright: (c) 2022 Steve Brasier steve@stackhpc.com
-from __future__ import (absolute_import, division, print_function)
-__metaclass__ = type
+from __future__ import absolute_import, division, print_function
 
-DOCUMENTATION = r'''
+import json
+
+import requests  # pylint: disable=import-error
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
+
+__metaclass__ = type  # pylint: disable=invalid-name
+
+DOCUMENTATION = r"""
 ---
 module: grafana_elasticsearch_query
 
@@ -16,9 +23,9 @@
 
 author:
     - Steve Brasier
-'''
+"""
 
-EXAMPLES = r'''
+EXAMPLES = r"""
 - name: Get elasticsearch hits
   grafana_elasticsearch_query:
     grafana_url: http://{{ grafana_api_address }}:{{ grafana_port }}
@@ -26,63 +33,83 @@
     grafana_password: "{{ vault_grafana_admin_password }}"
     datasource: slurmstats
     index_pattern: 'filebeat-*'
-'''
+"""
 
-RETURN = r'''
+RETURN = r"""
 # These are examples of possible return values, and in general should use other names for return values.
 docs:
   description: List of dicts with the original json in each document.
   returned: always
   type: list
-'''
-
-from ansible.module_utils.basic import AnsibleModule
-import requests
-import json
-
-def run_module():
-    module_args = dict(
-        grafana_url=dict(type="str", required=True),
-        grafana_username=dict(type="str", required=True),
-        grafana_password=dict(type="str", required=True),
-        datasource=dict(type="str", required=True),
-        index_pattern=dict(type="str", required=True),
-    )
+"""
+
+
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {
+        "grafana_url": {
+            "type": "str",
+            "required": True,
+        },
+        "grafana_username": {
+            "type": "str",
+            "required": True,
+        },
+        "grafana_password": {
+            "type": "str",
+            "required": True,
+        },
+        "datasource": {
+            "type": "str",
+            "required": True,
+        },
+        "index_pattern": {
+            "type": "str",
+            "required": True,
+        },
+    }
 
-    result = dict(
-        changed=False,
-        jobs=[]
-    )
+    result = {
+        "changed": False,
+        "jobs": [],
+    }
 
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
 
-    auth=(module.params['grafana_username'], module.params['grafana_password'])
-    
+    auth = (module.params["grafana_username"], module.params["grafana_password"])
+
     # list datasources:
-    datasources_api_url = module.params["grafana_url"] + '/api/datasources'
+    datasources_api_url = module.params["grafana_url"] + "/api/datasources"
     r = requests.get(datasources_api_url, auth=auth)
     datasources = json.loads(r.text)
 
     # select required datasource:
-    ds = [s for s in datasources if s['name'] == module.params["datasource"]][0]
+    ds = [s for s in datasources if s["name"] == module.params["datasource"]][0]
 
     # get documents:
-    datasource_proxy_url = module.params["grafana_url"] + '/api/datasources/proxy/' + str(ds['id']) + '/' + module.params['index_pattern'] + '/_search'
+    datasource_proxy_url = (
+        module.params["grafana_url"]
+        + "/api/datasources/proxy/"
+        + str(ds["id"])
+        + "/"
+        + module.params["index_pattern"]
+        + "/_search"
+    )
     r = requests.get(datasource_proxy_url, auth=auth)
     search = json.loads(r.text)
-    # see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-api-response-body:
-    docs = [h['_source']['json'] for h in search['hits']['hits']]    
+    # see
+    # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-api-response-body:
+    docs = [h["_source"]["json"] for h in search["hits"]["hits"]]
 
     result = {
-        'docs': docs,
+        "docs": docs,
     }
 
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/ansible/ci/output_vars.yml b/ansible/ci/output_vars.yml
index 0e2bc4c..2963a58 100644
--- a/ansible/ci/output_vars.yml
+++ b/ansible/ci/output_vars.yml
@@ -1,7 +1,8 @@
+---
 # Output specific hostvars to a file in a form which can be sourced by bash
 # NB: obviously the keys and values for the hostvars need to be suitable bash variables
-- hosts: "{{ output_vars_hosts }}"
-  gather_facts: no
+- hosts: "{{ output_vars_hosts }}" # noqa: syntax-check[specific]
+  gather_facts: false
   tasks:
     - copy:
         dest: "{{ output_vars_path }}"
diff --git a/ansible/ci/retrieve_inventory.yml b/ansible/ci/retrieve_inventory.yml
index d5f61bb..6e395ef 100644
--- a/ansible/ci/retrieve_inventory.yml
+++ b/ansible/ci/retrieve_inventory.yml
@@ -1,27 +1,28 @@
+---
 # Retrieve inventory from a deployed CI arcus environment by reversing arcus/inventory/hooks/pre.yml
 # Usage example:
 #   ansible-playbook ansible/ci/retrieve_inventory.yml -e cluster_prefix=ci4005969475
 #
 - hosts: localhost
-  become: no
-  gather_facts: no
+  become: false
+  gather_facts: false
   vars:
     cluster_prefix: "{{ undef(hint='cluster_prefix must be defined') }}" # e.g. ci4005969475
     ci_vars_file: "{{ appliances_environment_root + '/tofu/' + lookup('env', 'CI_CLOUD') }}.tfvars"
     cluster_network: "{{ lookup('ansible.builtin.ini', 'cluster_net', file=ci_vars_file, type='properties') | trim('\"') }}"
   tasks:
     - name: Get control host IP
-      set_fact:
+      ansible.builtin.set_fact:
         control_ip: "{{ (lookup('pipe', 'openstack server show -f json ' + cluster_prefix + '-control') | from_json)['addresses'][cluster_network][0] }}"
     - name: Add host into in-memory inventory
-      add_host:
+      ansible.builtin.add_host:
         name: cluster_control
         groups: control
         ansible_host: "{{ control_ip }}"
 
 - hosts: control
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
     - ansible.builtin.fetch:
         src: "/etc/ci-config/{{ item | basename }}"
diff --git a/ansible/ci/update_timestamps.yml b/ansible/ci/update_timestamps.yml
index 8db4757..c6eb6f0 100644
--- a/ansible/ci/update_timestamps.yml
+++ b/ansible/ci/update_timestamps.yml
@@ -1,6 +1,7 @@
+---
 - hosts: localhost
   tasks:
-    - name: Get latest timestamps from sources
+    - name: Get latest timestamps from sources # noqa: syntax-check[unknown-module] # ansible/library/latest_timestamps.py
       latest_timestamps:
         repos_dict: "{{ dnf_repos_default }}"
         content_url: "https://ark.stackhpc.com/pulp/content"
diff --git a/ansible/cleanup.yml b/ansible/cleanup.yml
index 3db6eb1..6b495d7 100644
--- a/ansible/cleanup.yml
+++ b/ansible/cleanup.yml
@@ -1,16 +1,17 @@
+---
 # Clean up a Packer build VM
 
-- meta: flush_handlers
+- ansible.builtin.meta: flush_handlers
 
-- name: Remove dnf caches
-  command: dnf clean all
+- name: Remove dnf caches # noqa: no-changed-when
+  ansible.builtin.command: dnf clean all
 
 # If image build happens on a Neutron subnet with property dns_namservers defined, then cloud-init
 # disables NetworkManager's control of /etc/resolv.conf and appends nameservers itself.
 # We don't want network configuration during instance boot to depend on the configuration
 # of the network the builder was on, so we reset these aspects.
 - name: Delete /etc/resolv.conf
-  file:
+  ansible.builtin.file:
     path: /etc/resolv.conf
     state: absent
   when: "'resolv_conf' not in group_names" # if its been overriden, deleting it is the wrong thing to do
@@ -19,25 +20,25 @@
   # NB: This *doesn't* delete the 90-dns-none.conf file created by the resolv_conf role
   # as if nameservers are explicitly being set by that role we don't want to allow NM
   # to override it again.
-  file:
+  ansible.builtin.file:
     path: /etc/NetworkManager/conf.d/99-cloud-init.conf
     state: absent
 
 - name: Get remote environment for ansible_user
-  setup:
+  ansible.builtin.setup:
     gather_subset: env
-  become: no
+  become: false
 
 - name: Delete any injected ssh config for ansible_user
-  file:
+  ansible.builtin.file:
     path: "{{ ansible_env.HOME }}/.ssh/"
     state: absent
 
-- name: Run cloud-init cleanup
-  command: cloud-init clean --logs --seed
+- name: Run cloud-init cleanup # noqa: no-changed-when
+  ansible.builtin.command: cloud-init clean --logs --seed
 
-- name: Cleanup /tmp
-  command : rm -rf /tmp/*
+- name: Cleanup /tmp # noqa: no-changed-when
+  ansible.builtin.command: rm -rf /tmp/*
 
 - name: Delete files triggering vulnerability scans
   ansible.builtin.file:
@@ -54,10 +55,10 @@
     - /etc/ansible-init/playbooks/roles/mrlesmithjr.chrony/requirements.txt
 
 - name: Get package facts
-  package_facts:
+  ansible.builtin.package_facts:
 
 - name: Ensure image summary directory exists
-  file:
+  ansible.builtin.file:
     path: /var/lib/image/
     state: directory
     owner: root
@@ -65,9 +66,10 @@
     mode: u=rwX,go=rX
 
 - name: Write image summary
-  copy:
+  ansible.builtin.copy:
     content: "{{ image_info | to_nice_json }}"
     dest: /var/lib/image/image.json
+    mode: "0644"
   vars:
     image_info:
       branch: "{{ lookup('pipe', 'git rev-parse --abbrev-ref HEAD') }}"
@@ -79,5 +81,5 @@
       cuda: "{{ ansible_facts.packages['cuda-toolkit'].0.version | default('-') }}"
       slurm-ohpc: "{{ ansible_facts.packages['slurm-ohpc'].0.version | default('-') }}"
 
-- name: Show image summary
-  command: cat /var/lib/image/image.json
+- name: Show image summary # noqa: no-changed-when
+  ansible.builtin.command: cat /var/lib/image/image.json
diff --git a/ansible/extras.yml b/ansible/extras.yml
index 08892e4..02b0d40 100644
--- a/ansible/extras.yml
+++ b/ansible/extras.yml
@@ -1,5 +1,6 @@
+---
 - hosts: k3s_server:!builder
-  become: yes
+  become: true
   tags: k3s
   tasks:
     - name: Start k3s server
@@ -10,7 +11,7 @@
 # technically should be part of bootstrap.yml but hangs waiting on failed mounts
 # if runs before filesystems.yml after the control node has been reimaged
 - hosts: k3s_agent:!builder
-  become: yes
+  become: true
   tags: k3s
   tasks:
     - name: Start k3s agents
@@ -19,13 +20,13 @@
         tasks_from: agent-runtime.yml
 
 - hosts: basic_users:!builder
-  become: yes
+  become: true
   tags:
     - basic_users
     - users
-  gather_facts: yes
+  gather_facts: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: basic_users
 
 - name: Setup EESSI
@@ -35,57 +36,57 @@
   gather_facts: false
   tasks:
     - name: Install / configure EESSI
-      include_role:
+      ansible.builtin.include_role:
         name: eessi
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
 - name: Setup CUDA
   hosts: cuda
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tags: cuda
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: cuda
         tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'install.yml' }}"
 
 - name: Setup vGPU
   hosts: vgpu
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tags: vgpu
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: stackhpc.linux.vgpu
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'install.yml' }}"
   handlers:
-    - name: reboot
-      fail:
+    - name: reboot # noqa: name[casing]
+      ansible.builtin.fail:
         msg: Reboot handler for stackhpc.linux.vgpu role fired unexpectedly. This was supposed to be unreachable.
 
 - name: Persist hostkeys across rebuilds
   # Must be after filesystems.yml (for storage)
   # and before portal.yml (where OOD login node hostkeys are scanned)
   hosts: persist_hostkeys:!builder
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: persist_hostkeys
 
 - name: Install k9s
-  become: yes
+  become: true
   hosts: k9s
   tags: k9s
   tasks:
-  - import_role:
-      name: k9s
+    - ansible.builtin.import_role:
+        name: k9s
 
 - hosts: extra_packages
-  become: yes
+  become: true
   tags:
-   - extra_packages
+    - extra_packages
   tasks:
-  - name: Install additional packages
-    dnf:
-      name: "{{ appliances_extra_packages }}"
+    - name: Install additional packages
+      ansible.builtin.dnf:
+        name: "{{ appliances_extra_packages }}"
diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml
index 46a99bc..8e8e58a 100644
--- a/ansible/fatimage.yml
+++ b/ansible/fatimage.yml
@@ -1,13 +1,14 @@
+---
 # Builder version of site.yml just installing binaries
 
 - hosts: builder
-  become: no
-  gather_facts: no
+  become: false
+  gather_facts: false
   tasks:
     - name: Report hostname (= final image name)
-      command: hostname
+      ansible.builtin.command: hostname # noqa: no-changed-when
     - name: Report inventory groups
-      debug:
+      ansible.builtin.debug:
         var: group_names
 
 - name: Run pre.yml hook
@@ -20,21 +21,21 @@
 - name: Sync pulp repos with upstream
   hosts: pulp_site
   tasks:
-  - ansible.builtin.include_role:
-      name: pulp_site
-      tasks_from: sync.yml
-      apply:
-        delegate_to: localhost
-    when: appliances_mode != 'configure'
+    - ansible.builtin.include_role:
+        name: pulp_site
+        tasks_from: sync.yml
+        apply:
+          delegate_to: localhost
+      when: appliances_mode != 'configure'
 
 - import_playbook: bootstrap.yml
 
 - hosts: doca
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
     - name: Install NVIDIA DOCA
-      import_role:
+      ansible.builtin.import_role:
         name: doca
 
 - name: Run post-bootstrap.yml hook
@@ -45,33 +46,33 @@
   when: hook_path | exists
 
 - hosts: builder
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
     # - import_playbook: iam.yml
     - name: Install FreeIPA client
-      import_role:
+      ansible.builtin.import_role:
         name: freeipa
         tasks_from: client-install.yml
       when: "'freeipa_client' in group_names"
     - name: Install sssd
-      import_role:
+      ansible.builtin.import_role:
         name: sssd
         tasks_from: install.yml
       when: "'sssd' in group_names"
 
     # - import_playbook: filesystems.yml:
     - name: Install nfs packages
-      dnf:
+      ansible.builtin.dnf:
         name: nfs-utils
       when: "'nfs' in group_names"
     - name: Install Manila client packages
-      include_role:
+      ansible.builtin.include_role:
         name: stackhpc.os-manila-mount
         tasks_from: install.yml
       when: "'manila' in group_names"
     - name: Install Lustre packages
-      include_role:
+      ansible.builtin.include_role:
         name: lustre
         tasks_from: install.yml
       when: "'lustre' in group_names"
@@ -82,46 +83,46 @@
 - name: Install compute_init playbook
   hosts: compute_init
   tags: compute_init # tagged to allow running on cluster instances for dev
-  become: yes
+  become: true
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: compute_init
         tasks_from: install.yml
 
 - name: Install gateway playbook
   hosts: gateway
   tags: gateway
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: gateway
 
 - hosts: builder
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
     # - import_playbook: slurm.yml:
     - name: Setup DB
-      include_role:
+      ansible.builtin.include_role:
         name: mysql
         tasks_from: install.yml
       when: "'mysql' in group_names"
 
     - name: Install rebuild
-      include_role:
+      ansible.builtin.include_role:
         name: rebuild
         tasks_from: install.yml
 
     - name: Install OpenHPC
-      import_role:
+      ansible.builtin.import_role:
         name: stackhpc.openhpc
         tasks_from: install.yml
       when: "'openhpc' in group_names"
 
     # - import_playbook: portal.yml
     - name: Open Ondemand server (packages)
-      include_role:
+      ansible.builtin.include_role:
         name: osc.ood
         tasks_from: install-package.yml
         vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml"
@@ -129,90 +130,90 @@
     # # FUTURE: install-apps.yml - this is git clones
 
     - name: Open Ondemand server (apps)
-      include_role:
+      ansible.builtin.include_role:
         name: osc.ood
         tasks_from: install-apps.yml
         vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml"
       when: "'openondemand' in group_names"
 
     - name: Open Ondemand remote desktop
-      import_role:
+      ansible.builtin.import_role:
         name: openondemand
         tasks_from: vnc_compute.yml
       when: "'openondemand_desktop' in group_names"
 
     - name: Open Ondemand jupyter node
-      import_role:
+      ansible.builtin.import_role:
         name: openondemand
         tasks_from: jupyter_compute.yml
       when: "'openondemand_jupyter' in group_names"
 
     - name: Install Apache PAM module # Extracted from start of roles/openondemand/tasks/pam_auth.yml to ensure only installed during build
-      yum:
+      ansible.builtin.dnf:
         name: mod_authnz_pam
 
     # - import_playbook: monitoring.yml:
-    - import_role:
+    - ansible.builtin.import_role:
         name: opensearch
         tasks_from: install.yml
       when: "'opensearch' in group_names"
 
-    - import_role:
+    - ansible.builtin.import_role:
         name: slurm_stats
         tasks_from: install.yml
       when: "'slurm_stats' in group_names"
 
-    - import_role:
+    - ansible.builtin.import_role:
         name: filebeat
         tasks_from: install.yml
       when: "'filebeat' in group_names"
 
-    - import_role:
-      # can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start
-      # however starting node exporter is ok
+    - ansible.builtin.import_role:
+        # can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start
+        # however starting node exporter is ok
         name: cloudalchemy.node_exporter
       when: "'node_exporter' in group_names"
 
-    - name: openondemand exporter
-      dnf:
+    - name: Openondemand exporter
+      ansible.builtin.dnf:
         name: ondemand_exporter
       when: "'openondemand' in group_names"
 
-    - name: slurm exporter
-      include_role:
+    - name: Slurm exporter
+      ansible.builtin.include_role:
         name: slurm_exporter
         tasks_from: install.yml
       when: "'slurm_exporter' in group_names"
 
     - name: Install alertmanager
-      include_role:
+      ansible.builtin.include_role:
         name: alertmanager
         tasks_from: install.yml
       when: "'alertmanager' in group_names"
 
     - name: Download HPL source
-      include_role:
+      ansible.builtin.include_role:
         name: hpctests
         tasks_from: source-hpl.yml
 
 - hosts: prometheus
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: cloudalchemy.prometheus
         tasks_from: preflight.yml
 
     # can't run cloudalchemy.prometheus/tasks/install.yml as it triggers a unit start
     # so below is a partial extraction of this:
-    - name: create prometheus system group
-      group:
+    - name: Create prometheus system group
+      ansible.builtin.group:
         name: prometheus
         system: true
         state: present
 
-    - name: create prometheus system user
-      user:
+    - name: Create prometheus system user
+      ansible.builtin.user:
         name: prometheus
         system: true
         shell: "/usr/sbin/nologin"
@@ -220,31 +221,33 @@
         createhome: false
         home: "{{ prometheus_db_dir }}"
 
-    - name: download prometheus binary to local folder
+    - name: Download prometheus binary to local folder
       become: false
-      get_url:
+      ansible.builtin.get_url:
+        # yamllint disable-line rule:line-length
         url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
         dest: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
         checksum: "sha256:{{ __prometheus_checksum }}"
+        mode: "0644"
       register: _download_archive
       until: _download_archive is succeeded
       retries: 5
       delay: 2
 
-    - name: unpack prometheus binaries
+    - name: Unpack prometheus binaries
       become: false
-      unarchive:
-        remote_src: yes
+      ansible.builtin.unarchive:
+        remote_src: true
         src: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
         dest: "/tmp"
         creates: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}/prometheus"
 
-    - name: propagate official prometheus and promtool binaries
-      copy:
-        remote_src: yes
+    - name: Propagate official prometheus and promtool binaries
+      ansible.builtin.copy:
+        remote_src: true
         src: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}/{{ item }}"
         dest: "{{ _prometheus_binary_install_dir }}/{{ item }}"
-        mode: 0755
+        mode: "0755"
         owner: root
         group: root
       with_items:
@@ -252,26 +255,26 @@
         - promtool
 
 - hosts: grafana
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
     - name: Include distribution variables for cloudalchemy.grafana
-      include_vars: "{{ appliances_repository_root }}/ansible/roles/cloudalchemy.grafana/vars/redhat.yml"
-    - import_role:
+      ansible.builtin.include_vars: "{{ appliances_repository_root }}/ansible/roles/cloudalchemy.grafana/vars/redhat.yml"
+    - ansible.builtin.import_role:
         name: cloudalchemy.grafana
         tasks_from: install.yml
-    - import_role:
+    - ansible.builtin.import_role:
         name: cloudalchemy.grafana
         tasks_from: plugins.yml
-    - include_role: # done in same play so it can use handlers from cloudalchemy.grafana
+    - ansible.builtin.include_role: # done in same play so it can use handlers from cloudalchemy.grafana
         name: grafana-dashboards
 
 - name: Add support for NVIDIA GPU auto detection to Slurm
   hosts: slurm_recompile
-  become: yes
+  become: true
   tasks:
     - name: Recompile slurm
-      import_role:
+      ansible.builtin.import_role:
         name: slurm_recompile
       vars:
         slurm_recompile_with_nvml: "{{ groups.cuda | length > 0 }}"
@@ -286,12 +289,11 @@
 - import_playbook: final.yml
 
 - hosts: builder
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tags: finalise
   tasks:
     - name: Cleanup image
-      import_tasks: cleanup.yml
-
+      ansible.builtin.import_tasks: cleanup.yml
     - name: Shutdown Packer VM
       community.general.shutdown:
diff --git a/ansible/filesystems.yml b/ansible/filesystems.yml
index 41a685d..804f7d2 100644
--- a/ansible/filesystems.yml
+++ b/ansible/filesystems.yml
@@ -1,11 +1,10 @@
 ---
-
 - name: Setup block devices
   hosts: block_devices
-  become: yes
+  become: true
   tags: block_devices
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: block_devices
 
 - name: Setup NFS
@@ -14,7 +13,7 @@
   tags:
     - nfs
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: stackhpc.nfs
 
 - name: Setup Manila share mounts
@@ -22,7 +21,7 @@
   become: true
   tags: manila
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: stackhpc.os-manila-mount
         tasks_from: "{{ item }}"
       loop: "{{ ['lookup.yml', 'mount.yml'] if appliances_mode == 'configure' else ['main.yml'] }}"
@@ -32,7 +31,7 @@
   become: true
   tags: lustre
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: lustre
         # NB install is ONLY run in builder
         tasks_from: configure.yml
diff --git a/ansible/filter_plugins/utils.py b/ansible/filter_plugins/utils.py
index 42b7107..33ad391 100644
--- a/ansible/filter_plugins/utils.py
+++ b/ansible/filter_plugins/utils.py
@@ -1,79 +1,84 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 
 # Copyright: (c) 2020, StackHPC
 # Apache 2 License
 
-from ansible.errors import AnsibleError, AnsibleFilterError
-from ansible.utils.display import Display
-from collections import defaultdict
-import jinja2
-from ansible.module_utils.six import string_types
 import os.path
 import re
+from collections import defaultdict
+
+from ansible.utils.display import Display  # pylint: disable=import-error
+
 
 def prometheus_node_exporter_targets(hosts, hostvars, env_key, group):
-    """ Return a mapping in cloudalchemy.nodeexporter prometheus_targets
-        format.
+    """Return a mapping in cloudalchemy.nodeexporter prometheus_targets
+    format.
 
-        hosts: list of inventory_hostnames
-        hostvars: Ansible hostvars variable
-        env_key: key to lookup in each host's hostvars to add as label 'env' (default: 'ungrouped')
-        group: string to add as label 'group'
+    hosts: list of inventory_hostnames
+    hostvars: Ansible hostvars variable
+    env_key: key to lookup in each host's hostvars to add as label 'env' (default: 'ungrouped')
+    group: string to add as label 'group'
     """
     result = []
     per_env = defaultdict(list)
     for host in hosts:
-        host_env = hostvars[host].get(env_key, 'ungrouped')
+        host_env = hostvars[host].get(env_key, "ungrouped")
         per_env[host_env].append(host)
-    for env, hosts in per_env.items():
+    for env, hosts in per_env.items():  # pylint: disable=redefined-argument-from-local
         target = {
             "targets": [f"{target}:9100" for target in hosts],
-            "labels": {
-                'env': env,
-                'group': group
-            }
+            "labels": {"env": env, "group": group},
         }
         result.append(target)
     return result
 
-def readfile(fpath):
+
+def readfile(fpath):  # pylint: disable=missing-function-docstring
     if not os.path.isfile(fpath):
         return ""
-    with open(fpath) as f:
+    with open(fpath) as f:  # pylint: disable=unspecified-encoding
         return f.read()
 
-def exists(fpath):
+
+def exists(fpath):  # pylint: disable=missing-function-docstring
     return os.path.isfile(fpath)
 
+
 def to_ood_regex(items):
-    """ Convert a list of strings possibly containing digits into a regex containing \d+
-    
-        eg {{ [compute-001, compute-002, control] | to_regex }} -> '(compute-\d+)|(control)'
+    """Convert a list of strings possibly containing digits into a regex containing \\d+
+
+    eg {{ [compute-001, compute-002, control] | to_regex }} -> '(compute-\\d+)|(control)'
     """
-    
+
     # NB: for python3.12+ the \d in this function & docstring
-    # need to be raw strings. See https://docs.python.org/3/reference/lexical_analysis.html
+    # need to be raw strings. See
+    # https://docs.python.org/3/reference/lexical_analysis.html
 
     # There's a python bug which means re.sub() can't use '\d' in the replacement so
     # have to do replacement in two stages:
-    r = [re.sub(r"\d+", 'XBACKSLASHX', v) for v in items]
-    r = [v.replace('XBACKSLASHX', '\d+') for v in set(r)]
-    r = ['(%s)' % v for v in r]
-    return '|'.join(r)
+    r = [re.sub(r"\d+", "XBACKSLASHX", v) for v in items]
+    r = [v.replace("XBACKSLASHX", r"\d+") for v in set(r)]
+    r = [f"({v})" for v in r]
+    return "|".join(r)
+
 
+# pylint: disable=useless-object-inheritance
 class FilterModule(object):
-    ''' Ansible core jinja2 filters '''
+    """Ansible core jinja2 filters"""
 
-    def warn(self, message, **kwargs):
+    # pylint: disable=missing-function-docstring
+    def warn(self, message, **kwargs):  # pylint: disable=unused-argument
         Display().warning(message)
         return message
 
+    # pylint: disable=missing-function-docstring
     def filters(self):
         return {
             # jinja2 overrides
-            'readfile': readfile,
-            'prometheus_node_exporter_targets': prometheus_node_exporter_targets,
-            'exists': exists,
-            'warn': self.warn,
-            'to_ood_regex': to_ood_regex,
+            "readfile": readfile,
+            "prometheus_node_exporter_targets": prometheus_node_exporter_targets,
+            "exists": exists,
+            "warn": self.warn,
+            "to_ood_regex": to_ood_regex,
         }
diff --git a/ansible/final.yml b/ansible/final.yml
index 3e715df..d984204 100644
--- a/ansible/final.yml
+++ b/ansible/final.yml
@@ -1,5 +1,5 @@
 - hosts: dnf_repos
-  become: yes
+  become: true
   tags: dnf_repos
   tasks:
     - name: Disable pulp repos
@@ -12,19 +12,19 @@
   hosts: compute_init:!builder
   # NB: done last so other roles can prepare configuration etc
   tags: compute_init
-  become: yes
+  become: true
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: compute_init
         tasks_from: export.yml
 
 - hosts: proxy
   gather_facts: false
   tags: proxy
-  become: yes
+  become: true
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: proxy
       vars:
         proxy_state: absent
-      when: proxy_remove | default(false) | bool == true
+      when: proxy_remove | default(false) | bool
diff --git a/ansible/iam.yml b/ansible/iam.yml
index 8b3bf6b..d570a7a 100644
--- a/ansible/iam.yml
+++ b/ansible/iam.yml
@@ -1,13 +1,14 @@
+---
 - hosts: freeipa_client
   tags:
     - freeipa
     - freeipa_server # as this is only relevant if using freeipa_server
     - freeipa_host
-  gather_facts: no
-  become: yes
+  gather_facts: false
+  become: true
   tasks:
     - name: Ensure FreeIPA client hosts are added to the FreeIPA server
-      import_role:
+      ansible.builtin.import_role:
         name: freeipa
         tasks_from: addhost.yml
       when: groups['freeipa_server'] | length > 0
@@ -16,16 +17,16 @@
   tags:
     - freeipa
     - freeipa_client
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tasks:
     - name: Install FreeIPA client
-      include_role:
+      ansible.builtin.include_role:
         name: freeipa
         tasks_from: client-install.yml
       when: "appliances_mode != 'configure'"
     - name: Enrol FreeIPA client
-      import_role:
+      ansible.builtin.import_role:
         name: freeipa
         tasks_from: enrol.yml
 
@@ -34,19 +35,19 @@
     - freeipa
     - freeipa_server
     - users
-  gather_facts: yes
-  become: yes
+  gather_facts: true
+  become: true
   tasks:
     - name: Add FreeIPA users
-      import_role:
+      ansible.builtin.import_role:
         name: freeipa
         tasks_from: users.yml
 
 - hosts: sssd
-  become: yes
-  gather_facts: no
+  become: true
+  gather_facts: false
   tags: sssd
   tasks:
     - name: Configure sssd
-      import_role:
+      ansible.builtin.import_role:
         name: sssd
diff --git a/ansible/library/latest_timestamps.py b/ansible/library/latest_timestamps.py
index 0de3883..6ac4549 100644
--- a/ansible/library/latest_timestamps.py
+++ b/ansible/library/latest_timestamps.py
@@ -1,25 +1,32 @@
-__metaclass__ = type
+# pylint: disable=missing-module-docstring
+import requests  # pylint: disable=import-error
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
+from bs4 import BeautifulSoup  # pylint: disable=import-error, wrong-import-order
 
-DOCUMENTATION = r'''
+__metaclass__ = type  # pylint: disable=invalid-name
+
+DOCUMENTATION = r"""
 ---
 module: latest_timestamps
 short_description: Gets the latest set of snapshots from Pulp
 version_added: "1.0.0"
-description: Gets the latest set of snapshots from given source URLs and returns dictionary to replace 'appliances_repo_timestamps' with
+description: >
+    Gets the latest set of snapshots from given source URLs
+    and returns dictionary to replace 'appliances_repo_timestamps' with
 author:
     - William Tripp
     - Steve Brasier
-'''
+"""
 
-EXAMPLES = r'''
+EXAMPLES = r"""
 - name: Get latest timestamps
   latest_timestamps:
     repos_dict: "{{ appliances_repo_timestamp_sources }}"
     content_url: "https://ark.stackhpc.com/pulp/content"
   register: result
-'''
+"""
 
-RETURN = r'''
+RETURN = r"""
 latest_dict:
     description: Dictionary with updated timestamps
     type: dict
@@ -28,48 +35,58 @@
     description: List of repos that have updated timestamps
     type: str[]
     returned: always
-'''
+"""
 
-from ansible.module_utils.basic import AnsibleModule
-import requests
-from bs4 import BeautifulSoup
 
-def run_module():
-    module_args = dict(
-        repos_dict=dict(type='dict', required=True),
-        content_url=dict(type='str', required=True)
-    )
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {
+        "repos_dict": {
+            "type": "dict",
+            "required": True,
+        },
+        "content_url": {
+            "type": "str",
+            "required": True,
+        },
+    }
 
-    result = dict(
-        changed=False,
-        original_message='',
-        message=''
-    )
+    result = {
+        "changed": False,
+        "original_message": "",
+        "message": "",
+    }
 
-    module = AnsibleModule(
-        argument_spec=module_args,
-        supports_check_mode=True
-    )
+    module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
 
-    timestamps = dict(module.params['repos_dict'])
+    timestamps = dict(module.params["repos_dict"])
     for repo in timestamps:
         for version in timestamps[repo]:
 
             html_txt = requests.get(
-                    url= module.params['content_url'] + '/' + timestamps[repo][version]['pulp_path']
-                ).text
-            timestamp_link_list = BeautifulSoup(html_txt,features="html.parser").body.find('pre').find_all() # getting raw list of timestamps from html
-            timestamp_link_list = map(lambda x: x.string,timestamp_link_list) # stripping xml tags
-            latest_timestamp = list(timestamp_link_list)[-1][:-1] # last timestamp in list with trailing / removed
-            timestamps[repo][version]['pulp_timestamp'] = latest_timestamp
-    result['timestamps'] = dict(sorted(timestamps.items()))
+                url=module.params["content_url"]
+                + "/"
+                + timestamps[repo][version]["pulp_path"]
+            ).text
+            timestamp_link_list = (
+                BeautifulSoup(html_txt, features="html.parser")
+                .body.find("pre")
+                .find_all()
+            )  # getting raw list of timestamps from html
+            timestamp_link_list = map(
+                lambda x: x.string, timestamp_link_list
+            )  # stripping xml tags
+            latest_timestamp = list(timestamp_link_list)[-1][
+                :-1
+            ]  # last timestamp in list with trailing / removed
+            timestamps[repo][version]["pulp_timestamp"] = latest_timestamp
+    result["timestamps"] = dict(sorted(timestamps.items()))
 
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/ansible/library/user_namespace_facts.py b/ansible/library/user_namespace_facts.py
index 022f63f..a68834b 100644
--- a/ansible/library/user_namespace_facts.py
+++ b/ansible/library/user_namespace_facts.py
@@ -1,11 +1,19 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 
 # Copyright: (c) 2020, Will Szumski <will@stackhpc.com>
-# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
-from __future__ import (absolute_import, division, print_function)
-__metaclass__ = type
+# GNU General Public License v3.0+ (see COPYING or
+# https://www.gnu.org/licenses/gpl-3.0.txt)
+from __future__ import absolute_import, division, print_function
 
-DOCUMENTATION = r'''
+import csv
+import os
+
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
+
+__metaclass__ = type  # pylint: disable=invalid-name
+
+DOCUMENTATION = r"""
 ---
 module: user_namepace_facts
 
@@ -17,14 +25,14 @@
 
 author:
     - Will Szumski (@jovial)
-'''
+"""
 
-EXAMPLES = r'''
+EXAMPLES = r"""
 - name: Return ansible_facts
   user_namepace_facts:
-'''
+"""
 
-RETURN = r'''
+RETURN = r"""
 # These are examples of possible return values, and in general should use other names for return values.
 ansible_facts:
   description: Facts to add to ansible_facts.
@@ -41,20 +49,17 @@
       type: str
       returned: always, empty dict if /etc/subgid doesn't exist
       sample: { "foo": {"size": 123, "start": 100000 }}
-'''
+"""
 
-from ansible.module_utils.basic import AnsibleModule
-import csv
-import os
 
-def parse(path):
+def parse(path):  # pylint: disable=missing-function-docstring
     result = {}
 
     if not os.path.exists(path):
         return result
 
-    with open(path) as f:
-        reader = csv.reader(f, delimiter=':')
+    with open(path) as f:  # pylint: disable=unspecified-encoding
+        reader = csv.reader(f, delimiter=":")
         for row in reader:
             user = row[0]
             entry = {
@@ -65,50 +70,43 @@ def parse(path):
 
     return result
 
-def run_module():
+
+def run_module():  # pylint: disable=missing-function-docstring
     # define available arguments/parameters a user can pass to the module
-    module_args = dict()
+    module_args = {}
 
     # seed the result dict in the object
     # we primarily care about changed and state
     # changed is if this module effectively modified the target
     # state will include any data that you want your module to pass back
     # for consumption, for example, in a subsequent task
-    result = dict(
-        changed=False,
-        ansible_facts=dict(),
-    )
+    result = {
+        "changed": False,
+        "ansible_facts": {},
+    }
 
     # the AnsibleModule object will be our abstraction working with Ansible
     # this includes instantiation, a couple of common attr would be the
     # args/params passed to the execution, as well as if the module
     # supports check mode
-    module = AnsibleModule(
-        argument_spec=module_args,
-        supports_check_mode=True
-    )
+    module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
 
     # manipulate or modify the state as needed (this is going to be the
     # part where your module will do what it needs to do)
 
-    result = {
-        'ansible_facts': {
-            'subuid': {},
-            'subgid': {}
-        }
-    }
+    result = {"ansible_facts": {"subuid": {}, "subgid": {}}}
 
-    result['ansible_facts']['subuid'] = parse('/etc/subuid')
-    result['ansible_facts']['subgid'] = parse('/etc/subgid')
+    result["ansible_facts"]["subuid"] = parse("/etc/subuid")
+    result["ansible_facts"]["subgid"] = parse("/etc/subgid")
 
     # in the event of a successful module execution, you will want to
     # simple AnsibleModule.exit_json(), passing the key/value results
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/ansible/monitoring.yml b/ansible/monitoring.yml
index d34a65f..c8225a0 100644
--- a/ansible/monitoring.yml
+++ b/ansible/monitoring.yml
@@ -5,11 +5,11 @@
   hosts: opensearch
   tags: opensearch
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: opensearch
         tasks_from: install.yml
       become: true
-    - import_role:
+    - ansible.builtin.import_role:
         name: opensearch
         tasks_from: runtime.yml
       become: true
@@ -18,7 +18,7 @@
   hosts: slurm_stats
   tags: slurm_stats
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: slurm_stats
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
@@ -26,7 +26,7 @@
   hosts: filebeat
   tags: filebeat
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: filebeat
         tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
@@ -34,7 +34,7 @@
   hosts: node_exporter
   tags: node_exporter
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: cloudalchemy.node_exporter
 
 - name: Deploy OpenOndemand exporter
@@ -44,7 +44,7 @@
     - openondemand
     - openondemand_server
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: exporter.yml
 
@@ -53,7 +53,7 @@
   become: true
   tags: slurm_exporter
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: slurm_exporter
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
@@ -62,7 +62,7 @@
   tags: prometheus
   tasks:
     - name: Check for existing prometheus binaries
-      stat:
+      ansible.builtin.stat:
         path: /usr/local/bin/{{ item }}
       register: prometheus_binaries
       loop:
@@ -70,10 +70,10 @@
         - promtool
     - name: Skip prometheus install if prometheus binaries exist and prometheus_version not defined
       # i.e. if prometheus_version isn't defined we don't care, so use what's already there
-      set_fact:
-        prometheus_skip_install: "{{ false if prometheus_version is defined else true }}" 
+      ansible.builtin.set_fact:
+        prometheus_skip_install: "{{ false if prometheus_version is defined else true }}"
       when: "(prometheus_binaries.results | map(attribute='stat') | map(attribute='exists')) + [prometheus_skip_install is not defined]"
-    - import_role:
+    - ansible.builtin.import_role:
         name: cloudalchemy.prometheus
 
 - name: Deploy grafana
@@ -94,22 +94,22 @@
         group: root
         mode: '0755'
       become: true
-    - include_role:
+    - ansible.builtin.include_role:
         name: cloudalchemy.grafana
       vars:
         # Internal role used to install dashboards as cloudalchemy role does not support all required options:
         grafana_dashboards: []
-    - include_role: # done in same play so it can use handlers from cloudalchemy.grafana
+    - ansible.builtin.include_role: # done in same play so it can use handlers from cloudalchemy.grafana
         name: grafana-dashboards
       when: "appliances_mode != 'configure'"
 
 - name: Deploy alertmanager
   hosts: alertmanager
   tags: alertmanager
-  become: yes
+  become: true
   gather_facts: false
   tasks:
     - name: Configure alertmanager
-      include_role:
+      ansible.builtin.include_role:
         name: alertmanager
         tasks_from: configure.yml
diff --git a/ansible/noop.yml b/ansible/noop.yml
index adad248..4c1c5ea 100644
--- a/ansible/noop.yml
+++ b/ansible/noop.yml
@@ -1,5 +1,4 @@
 ---
-
 # This file exists so that we can conditionally import a playbook. The path
 # must exist, but we can use a when conditional so that it is not actually
 # run
diff --git a/ansible/portal.yml b/ansible/portal.yml
index 58ca69f..361a603 100644
--- a/ansible/portal.yml
+++ b/ansible/portal.yml
@@ -1,15 +1,16 @@
+---
 - hosts: openondemand
   tags:
     - openondemand
     - openondemand_server
-  become: yes
-  gather_facts: yes # TODO
+  become: true
+  gather_facts: true # TODO
   tasks:
     - name: Skip openondemand apps installation in configure mode
-      set_fact:
+      ansible.builtin.set_fact:
         ood_install_apps: {}
       when: appliances_mode == 'configure'
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: main.yml
 
@@ -18,10 +19,10 @@
     - openondemand
     - openondemand_desktop
     - openondemand_matlab
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: vnc_compute.yml
       when: appliances_mode != 'configure' # is run during build
@@ -30,10 +31,10 @@
   tags:
     - openondemand
     - openondemand_jupyter
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: jupyter_compute.yml
       when: appliances_mode != 'configure' # is run during build
@@ -42,10 +43,10 @@
   tags:
     - openondemand
     - openondemand_rstudio
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: rstudio_compute.yml
       when: appliances_mode != 'configure' # is run during build
@@ -54,10 +55,10 @@
   tags:
     - openondemand
     - openondemand_codeserver
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: codeserver_compute.yml
-      when: appliances_mode != 'configure' # is run during build
\ No newline at end of file
+      when: appliances_mode != 'configure' # is run during build
diff --git a/ansible/roles/alertmanager/README.md b/ansible/roles/alertmanager/README.md
index f5bc23b..900e0e1 100644
--- a/ansible/roles/alertmanager/README.md
+++ b/ansible/roles/alertmanager/README.md
@@ -5,6 +5,7 @@ to route Prometheus alerts to a receiver. Currently Slack is the only supported
 receiver.
 
 Note that:
+
 - HA configuration is not supported
 - Alertmanager state is not preserved when the node it runs on (by default,
   control node) is reimaged, so any alerts silenced via the GUI will reoccur.
@@ -14,6 +15,7 @@ Alertmanager is enabled by default on the `control` node in the
 `site` environment's `inventory/groups` file.
 
 In general usage may only require:
+
 - Enabling the Slack integration (see section below).
 - Possibly setting `alertmanager_web_external_url`.
 
@@ -25,6 +27,7 @@ All variables are optional. See [defaults/main.yml](defaults/main.yml) for
 all default values.
 
 General variables:
+
 - `alertmanager_version`: String, version (no leading 'v')
 - `alertmanager_download_checksum`: String, checksum for relevant version from
   [prometheus.io download page](https://prometheus.io/download/), in format
@@ -43,14 +46,14 @@ The following variables are equivalent to similarly-named arguments to the
 `alertmanager` binary. See `man alertmanager` for more info:
 
 - `alertmanager_config_file`: String, path the main alertmanager config file
-  will be written to. Parent directory will be created if necessary. 
+  will be written to. Parent directory will be created if necessary.
 - `alertmanager_web_config_file`: String, path alertmanager web config file
-  will be written to. Parent directory will be created if necessary. 
+  will be written to. Parent directory will be created if necessary.
 - `alertmanager_storage_path`: String, base path for data storage.
 - `alertmanager_web_listen_addresses`: List of strings, defining addresses to listeen on.
 - `alertmanager_web_external_url`: String, the URL under which Alertmanager is
-   externally reachable - defaults to host IP address and `alertmanager_port`.
-   See man page for more details if proxying alertmanager.
+  externally reachable - defaults to host IP address and `alertmanager_port`.
+  See man page for more details if proxying alertmanager.
 - `alertmanager_data_retention`: String, how long to keep data for
 - `alertmanager_data_maintenance_interval`: String, interval between garbage
   collection and snapshotting to disk of the silences and the notification logs.
@@ -59,6 +62,7 @@ The following variables are equivalent to similarly-named arguments to the
 - `alertmanager_default_receivers`:
 
 The following variables are templated into the alertmanager [main configuration](https://prometheus.io/docs/alerting/latest/configuration/):
+
 - `alertmanager_config_template`: String, path to configuration template. The default
   is to template in `alertmanager_config_default` and `alertmanager_config_extra`.
 - `alertmanager_config_default`: Mapping with default configuration for the
@@ -70,24 +74,27 @@ The following variables are templated into the alertmanager [main configuration]
 - `alertmanager_extra_receivers`: A list of additional [receiver](https://prometheus.io/docs/alerting/),
   mappings to add, by default empty.
 - `alertmanager_slack_receiver`: Mapping defining the [Slack receiver](https://prometheus.io/docs/alerting/latest/configuration/#slack_config). Note the default configuration for this is in
-`environments/common/inventory/group_vars/all/alertmanager.yml`.
+  `environments/common/inventory/group_vars/all/alertmanager.yml`.
 - `alertmanager_slack_receiver_name`: String, name for the above Slack reciever.
 - `alertmanager_slack_receiver_send_resolved`: Bool, whether to send resolved alerts via the above Slack reciever.
-- `alertmanager_null_receiver`:  Mapping defining a `null` [receiver](https://prometheus.io/docs/alerting/latest/configuration/#receiver) so a receiver is always defined.
+- `alertmanager_null_receiver`: Mapping defining a `null` [receiver](https://prometheus.io/docs/alerting/latest/configuration/#receiver) so a receiver is always defined.
 - `alertmanager_config_extra`: Mapping with additional configuration. Keys in
   this become top-level keys in the configuration. E.g this might be:
-    ```yaml
-    alertmanager_config_extra:
-    global:
-        smtp_from: smtp.example.org:587
-    time_intervals:
-    - name: monday-to-friday
-        time_intervals:
-        - weekdays: ['monday:friday']
-    ```
+
+  ```yaml
+  alertmanager_config_extra:
+  global:
+      smtp_from: smtp.example.org:587
+  time_intervals:
+  - name: monday-to-friday
+      time_intervals:
+      - weekdays: ['monday:friday']
+  ```
+
   Note that `route` and `receivers` keys should not be added here.
 
 The following variables are templated into the alertmanager [web configuration](https://prometheus.io/docs/alerting/latest/https/):
+
 - `alertmanager_web_config_default`: Mapping with default configuration for
   `basic_auth_users` providing the default web user.
 - `alertmanager_alertmanager_web_config_extra`: Mapping with additional web
diff --git a/ansible/roles/alertmanager/defaults/main.yml b/ansible/roles/alertmanager/defaults/main.yml
index b303017..4b90994 100644
--- a/ansible/roles/alertmanager/defaults/main.yml
+++ b/ansible/roles/alertmanager/defaults/main.yml
@@ -1,5 +1,6 @@
-alertmanager_version: '0.28.1'
-alertmanager_download_checksum: 'sha256:5ac7ab5e4b8ee5ce4d8fb0988f9cb275efcc3f181b4b408179fafee121693311'
+---
+alertmanager_version: "0.28.1"
+alertmanager_download_checksum: "sha256:5ac7ab5e4b8ee5ce4d8fb0988f9cb275efcc3f181b4b408179fafee121693311"
 alertmanager_download_dest: /tmp/alertmanager.tar.gz
 alertmanager_binary_dir: /usr/local/bin
 alertmanager_started: true
@@ -11,13 +12,13 @@ alertmanager_config_file: /etc/alertmanager/alertmanager.yml
 alertmanager_web_config_file: /etc/alertmanager/alertmanager-web.yml
 alertmanager_storage_path: /var/lib/alertmanager
 
-alertmanager_port: '9093'
+alertmanager_port: "9093"
 alertmanager_web_listen_addresses:
   - ":{{ alertmanager_port }}"
-alertmanager_web_external_url: '' # defined in environments/common/inventory/group_vars/all/alertmanager.yml for visibility
+alertmanager_web_external_url: "" # defined in environments/common/inventory/group_vars/all/alertmanager.yml for visibility
 
-alertmanager_data_retention: '120h'
-alertmanager_data_maintenance_interval: '15m'
+alertmanager_data_retention: "120h"
+alertmanager_data_maintenance_interval: "15m"
 alertmanager_config_flags: {} # other command-line parameters as shown by `man alertmanager`
 alertmanager_config_template: alertmanager.yml.j2
 alertmanager_web_config_template: alertmanager-web.yml.j2
@@ -35,7 +36,7 @@ alertmanager_alertmanager_web_config_extra: {} # top-level only
 #   app_creds:
 
 alertmanager_null_receiver:
-  name: 'null'
+  name: "null"
 alertmanager_slack_receiver: {} # defined in environments/common/inventory/group_vars/all/alertmanager.yml as it needs prometheus_address
 alertmanager_extra_receivers: []
 alertmanager_default_receivers: "{{ [alertmanager_null_receiver] + ([alertmanager_slack_receiver] if alertmanager_slack_integration is defined else []) }}"
@@ -43,7 +44,8 @@ alertmanager_receivers: "{{ alertmanager_default_receivers  + alertmanager_extra
 
 alertmanager_config_default:
   route:
-    group_by: ['...']
+    group_by:
+      - "..."
     receiver: "{{ alertmanager_slack_receiver_name if alertmanager_slack_integration is defined else 'null' }}"
   receivers: "{{ alertmanager_receivers }}"
 
diff --git a/ansible/roles/alertmanager/handlers/main.yml b/ansible/roles/alertmanager/handlers/main.yml
index ee87e1e..6e427a6 100644
--- a/ansible/roles/alertmanager/handlers/main.yml
+++ b/ansible/roles/alertmanager/handlers/main.yml
@@ -1,5 +1,6 @@
+---
 - name: Restart alertmanager
-  systemd:
+  ansible.builtin.systemd:
     name: alertmanager
     state: restarted
     daemon_reload: "{{ _alertmanager_service.changed | default(false) }}"
diff --git a/ansible/roles/alertmanager/tasks/configure.yml b/ansible/roles/alertmanager/tasks/configure.yml
index a43ec20..15f252f 100644
--- a/ansible/roles/alertmanager/tasks/configure.yml
+++ b/ansible/roles/alertmanager/tasks/configure.yml
@@ -1,3 +1,4 @@
+---
 - name: Create alertmanager directories
   ansible.builtin.file:
     path: "{{ item }}"
@@ -11,7 +12,7 @@
     - "{{ alertmanager_storage_path }}"
 
 - name: Create alertmanager service file with immutable options
-  template:
+  ansible.builtin.template:
     src: alertmanager.service.j2
     dest: /usr/lib/systemd/system/alertmanager.service
     owner: root
@@ -38,10 +39,9 @@
     mode: u=rw,go=
   notify: Restart alertmanager
 
-- meta: flush_handlers
-
+- ansible.builtin.meta: flush_handlers
 - name: Ensure alertmanager service state
-  systemd:
+  ansible.builtin.systemd:
     name: alertmanager
     state: "{{ 'started' if alertmanager_started | bool else 'stopped' }}"
     enabled: "{{ alertmanager_enabled | bool }}"
diff --git a/ansible/roles/alertmanager/tasks/install.yml b/ansible/roles/alertmanager/tasks/install.yml
index 0f655da..f1cb9cd 100644
--- a/ansible/roles/alertmanager/tasks/install.yml
+++ b/ansible/roles/alertmanager/tasks/install.yml
@@ -1,3 +1,4 @@
+---
 - name: Create alertmanager system user
   ansible.builtin.user:
     name: "{{ alertmanager_system_user }}"
@@ -22,4 +23,6 @@
     group: root
     mode: u=rwx,go=rx
     remote_src: true
-    extra_opts: ['--strip-components=1', '--show-stored-names']
+    extra_opts:
+      - "--strip-components=1"
+      - "--show-stored-names"
diff --git a/ansible/roles/basic_users/README.md b/ansible/roles/basic_users/README.md
index 70ab154..23bea4c 100644
--- a/ansible/roles/basic_users/README.md
+++ b/ansible/roles/basic_users/README.md
@@ -1,9 +1,8 @@
-
-basic_users
-===========
+# basic_users
 
 Setup users on cluster nodes using `/etc/passwd` and manipulating `$HOME`, i.e.
 without requiring LDAP etc. Features:
+
 - UID/GID is consistent across cluster (and explicitly defined).
 - SSH key generated and propagated to all nodes to allow login between cluster
   nodes.
@@ -12,59 +11,56 @@ without requiring LDAP etc. Features:
 - When deleting users, systemd user sessions are terminated first.
 
 > [!IMPORTANT] The defaults for this role assumes that `$HOME` for users
-managed by this role (e.g. not `rocky` and other system users) is on a shared
-filesystem. The export of this shared filesystem may be root squashed if its
-server is in the `basic_user` group - see configuration examples below.
+> managed by this role (e.g. not `rocky` and other system users) is on a shared
+> filesystem. The export of this shared filesystem may be root squashed if its
+> server is in the `basic_user` group - see configuration examples below.
 
-Role Variables
---------------
+## Role Variables
 
 - `basic_users_homedir_server`: Optional inventory hostname in the `basic_users`
   group defining the host to use to create home directories. If the home
-  directory export is root squashed, this host *must* be the home directory
+  directory export is root squashed, this host _must_ be the home directory
   server. Default is the `control` node which is appropriate for the default
   appliance configuration. Not relevant if `create_home` is false for all users.
 - `basic_users_homedir_server_path`: Optional path prefix for home directories on
-   the `basic_users_homedir_server`, i.e. on the "server side". Default is
-   `/exports/home` which is appropriate for the default appliance configuration.
+  the `basic_users_homedir_server`, i.e. on the "server-side". Default is
+  `/exports/home` which is appropriate for the default appliance configuration.
 - `basic_users_homedir_client`: Optional inventory hostname in the `basic_users`
-  group defining the host to use to create ssh keys etc in home directories.
+  group defining the host to use to create SSH keys etc in home directories.
   This should be a host mounting the home directories. Default is the first
   node in the `login` group which is appropriate for the default appliance
   configuration.
 - `basic_users_users`: Optional, default empty list. A list of mappings defining
-   information for each user. In general, mapping keys/values are passed through
-   as parameters to [ansible.builtin.user](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/user_module.html)
-   and default values are as given there, with the following differences:
+  information for each user. In general, mapping keys/values are passed through
+  as parameters to [ansible.builtin.user](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/user_module.html)
+  and default values are as given there, with the following differences:
   - `generate_ssh_key`: Default is `true`, and the generated key is added to
-     the user's authorized keys.
-  - `ssh_key_comment`: Default is user name.
-  - `home`: Set automatically based on the user name and
+    the user's authorized keys.
+  - `ssh_key_comment`: Default is username.
+  - `home`: Set automatically based on the username and
     `basic_users_homedir_server_path`. Can be overriden for users with
-     non-standard home directory paths.
+    non-standard home directory paths.
   - `uid`: Should be set, so that the UID/GID is consistent across the cluster
     (which Slurm requires).
-  - `shell`: If *not* set will be `/sbin/nologin` on the `control` node to
-     prevent users logging in to this node, and the default shell on other
-     nodes. Explicitly setting this defines the shell for all nodes and if the
-     shared home directories are mounted on the control node will allow the
-     user to log in to the control node.
+  - `shell`: If _not_ set will be `/sbin/nologin` on the `control` node to
+    prevent users logging in to this node, and the default shell on other
+    nodes. Explicitly setting this defines the shell for all nodes and if the
+    shared home directories are mounted on the control node will allow the
+    user to log in to the control node.
   - `public_key`: Optional, define a key to log into the cluster with.
   - `sudo`: Optional, a (possibly multiline) string defining sudo rules for the
-     user.
+    user.
   - `ssh_key_type` defaults to `ed25519` instead of the `ansible.builtin.user`
-     default of `rsa`.
+    default of `rsa`.
   - Any other keys may present for other purposes (i.e. not used by this role).
 - `basic_users_groups`: Optional, default empty list. A list of mappings defining information for each group. Mapping keys/values are passed through as parameters to [ansible.builtin.group](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/group_module.html) and default values are as given there.
 - `basic_users_override_sssd`: Optional bool, default false. Whether to disable `sssd` when ensuring users/groups exist with this role. Permits creating local users/groups even if they clash with users provided via sssd (e.g. from LDAP). Ignored if host is not in group `sssd` as well. Note with this option active `sssd` will be stopped and restarted each time this role is run.
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Configurations
-----------------------
+## Example Configurations
 
 With default appliance NFS configuration, create user `alice` with access
 to all nodes except the control node, and delete user `bob`:
@@ -83,9 +79,10 @@ basic_users_users:
 ```
 
 Using an external share which:
-  - does not root squash (so this role can create directories on it)
-  - is mounted to all nodes including the control node (so this role can set
-    authorized keys there)
+
+- does not root squash (so this role can create directories on it)
+- is mounted to all nodes including the control node (so this role can set
+  authorized keys there)
 
 Create user `Carol`:
 
@@ -99,7 +96,7 @@ basic_users_user:
     public_key: ssh-ed25519 ...
 ```
 
-Using an external share which *does* root squash, so home directories cannot be
+Using an external share which _does_ root squash, so home directories cannot be
 created by this role and must already exist, create user `Dan`:
 
 ```yaml
diff --git a/ansible/roles/basic_users/defaults/main.yml b/ansible/roles/basic_users/defaults/main.yml
index 7b24ef7..8b4b66b 100644
--- a/ansible/roles/basic_users/defaults/main.yml
+++ b/ansible/roles/basic_users/defaults/main.yml
@@ -1,9 +1,10 @@
+---
 basic_users_homedir_server: "{{ groups['control'] | first }}" # no way, generally, to find the nfs_server
 basic_users_homedir_server_path: /exports/home
-basic_users_homedir_client: "{{ groups['login'] | first }}"  
+basic_users_homedir_client: "{{ groups['login'] | first }}"
 basic_users_userdefaults:
   state: present # need this here so don't have to add default() everywhere
-  generate_ssh_key:  true
+  generate_ssh_key: true
   ssh_key_comment: "{{ item.name }}"
   ssh_key_type: ed25519
   shell: "{{'/sbin/nologin' if 'control' in group_names else omit }}"
diff --git a/ansible/roles/basic_users/filter_plugins/filter_keys.py b/ansible/roles/basic_users/filter_plugins/filter_keys.py
index 119a430..12aa079 100644
--- a/ansible/roles/basic_users/filter_plugins/filter_keys.py
+++ b/ansible/roles/basic_users/filter_plugins/filter_keys.py
@@ -1,22 +1,27 @@
-""" Filter a dict to remove specified keys """
+"""Filter a dict to remove specified keys"""
 
 import copy
 
-USER_MODULE_PARAMS = ('append authorization comment create_home createhome expires force generate_ssh_key group '
-                      'groups hidden home local login_class move_home name user non_unique password password_expire_min '
-                      'password_expire_max password_lock profile remove role seuser shell skeleton ssh_key_bits '
-                      'ssh_key_comment ssh_key_file ssh_key_passphrase ssh_key_type state system uid update_password').split()
+USER_MODULE_PARAMS = (
+    "append authorization comment create_home createhome expires force generate_ssh_key group "
+    "groups hidden home local login_class move_home name user non_unique password "
+    "password_expire_min password_expire_max password_lock profile remove role seuser shell "
+    "skeleton ssh_key_bits ssh_key_comment ssh_key_file ssh_key_passphrase ssh_key_type state "
+    "system uid update_password"
+).split()
 
-class FilterModule(object):
 
-    def filters(self):
-        return {
-            'filter_user_params': self.filter_user_params
-        }
+class FilterModule(
+    object
+):  # pylint: disable=missing-class-docstring, useless-object-inheritance
+
+    def filters(self):  # pylint: disable=missing-function-docstring
+        return {"filter_user_params": self.filter_user_params}
 
     def filter_user_params(self, d):
-        ''' Return a copy of dict `d` containing only keys which are parameters for the user module'''
-        
+        # pylint: disable-next=line-too-long
+        """Return a copy of dict `d` containing only keys which are parameters for the user module"""
+
         user_dict = copy.deepcopy(d)
         remove_keys = set(user_dict).difference(USER_MODULE_PARAMS)
         for key in remove_keys:
diff --git a/ansible/roles/basic_users/library/terminate_user_sessions.py b/ansible/roles/basic_users/library/terminate_user_sessions.py
index 711b373..542c338 100644
--- a/ansible/roles/basic_users/library/terminate_user_sessions.py
+++ b/ansible/roles/basic_users/library/terminate_user_sessions.py
@@ -1,11 +1,15 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 
 # Copyright: (c) 2021, Steve Brasier <steveb@stackhpc.com>
 # Apache V2 licence
-from __future__ import (absolute_import, division, print_function)
-__metaclass__ = type
+from __future__ import absolute_import, division, print_function
 
-DOCUMENTATION = r'''
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
+
+__metaclass__ = type  # pylint: disable=invalid-name
+
+DOCUMENTATION = r"""
 ---
 module: terminate_user_sessions
 
@@ -22,54 +26,60 @@
         description: Name of user
         required: true
         type: str
-    
+
 author:
     - Steve Brasier (stackhpc.com)
-'''
+"""
 
-EXAMPLES = r'''
+EXAMPLES = r"""
 - terminate_user_sessions:
     name: fred
-'''
-
-RETURN = r'''
-'''
+"""
 
-from ansible.module_utils.basic import AnsibleModule
+RETURN = r"""
+"""
 
 
-def run_module():
-    # define available arguments/parameters a user can pass to the module
-    module_args = dict(
-        user=dict(type='str', required=True),
-    )
+def run_module():  # pylint: disable=missing-function-docstring
+    # define available arguments/parameters a user can pass to the module]
+    module_args = {
+        "user": {
+            "type": "str",
+            "required": True,
+        }
+    }
 
-    result = dict(changed=False)
+    result = {
+        "changed": False,
+    }
 
-    module = AnsibleModule(
-        argument_spec=module_args,
-        supports_check_mode=True
-    )
+    module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
 
     if module.check_mode:
         module.exit_json(**result)
 
-    _, sessions_stdout, _ = module.run_command("loginctl --no-legend list-sessions", check_rc=True)
+    _, sessions_stdout, _ = module.run_command(
+        "loginctl --no-legend list-sessions", check_rc=True
+    )
     for line in sessions_stdout.splitlines():
         session_info = line.split()
         user = session_info[1]
         session_id = session_info[0]
-        if user == module.params['user']:
-            _, sessions_stdout, _ = module.run_command("loginctl terminate-session %s" % session_id, check_rc=True)
-            result['changed'] = True
-        
+        if user == module.params["user"]:
+            _, sessions_stdout, _ = module.run_command(
+                # pylint: disable-next=consider-using-f-string
+                "loginctl terminate-session %s" % session_id,
+                check_rc=True,
+            )
+            result["changed"] = True
+
     # successful module exit:
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
 
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+if __name__ == "__main__":
+    main()
diff --git a/ansible/roles/basic_users/tasks/main.yml b/ansible/roles/basic_users/tasks/main.yml
index 6abba9c..cd01430 100644
--- a/ansible/roles/basic_users/tasks/main.yml
+++ b/ansible/roles/basic_users/tasks/main.yml
@@ -9,7 +9,7 @@
     - "item.state | default('present') == 'absent'"
 
 - name: Stop sssd if required
-  systemd:
+  ansible.builtin.systemd:
     name: sssd
     state: stopped
   register: _stop_sssd
@@ -18,11 +18,11 @@
     - basic_users_override_sssd | bool
 
 - name: Create groups
-  ansible.builtin.group: "{{ item }}"
-  loop:  "{{ basic_users_groups }}"
+  ansible.builtin.group: "{{ item }}" # noqa: args[module]
+  loop: "{{ basic_users_groups }}"
 
 - name: Create users
-  user: "{{ basic_users_userdefaults | combine(item) | filter_user_params() | combine(_disable_homedir) }}"
+  ansible.builtin.user: "{{ basic_users_userdefaults | combine(item) | filter_user_params() | combine(_disable_homedir) }}" # noqa: args[module]
   loop: "{{ basic_users_users }}"
   loop_control:
     label: "{{ item.name }}"
@@ -32,10 +32,11 @@
       generate_ssh_key: false
 
 - name: Write sudo rules
-  blockinfile:
+  ansible.builtin.blockinfile:
     path: /etc/sudoers.d/80-{{ item.name }}-user
     block: "{{ item.sudo }}"
     create: true
+    mode: "0440"
   loop: "{{ basic_users_users }}"
   loop_control:
     label: "{{ item.name }}"
@@ -44,10 +45,10 @@
     - "'sudo' in item"
 
 - name: Restart sssd if required
-  systemd:
+  ansible.builtin.systemd:
     name: sssd
     state: started
-  when: _stop_sssd is changed
+  when: _stop_sssd is changed # noqa: no-handler
 
 # This task runs only on the home directory server so it can handle
 # root-squashed exports
@@ -56,7 +57,7 @@
   ansible.builtin.copy:
     remote_src: true
     src: "{{ item.skeleton | default('/etc/skel/') }}"
-    dest: "{{ item.home | default( basic_users_homedir_server_path + '/' + item.name ) }}"
+    dest: "{{ item.home | default(basic_users_homedir_server_path + '/' + item.name) }}"
     owner: "{{ item.name }}"
     group: "{{ item.name }}"
     mode: u=rwX,go=
@@ -72,12 +73,13 @@
 # paths are easily constructed, becoming each user so that root-squash
 # doesn't matter
 - name: Create ~/.ssh directories
-  file:
+  ansible.builtin.file:
     state: directory
     path: ~/.ssh/
     owner: "{{ item.name }}"
     group: "{{ item.name }}"
     mode: u=rwX,go=
+  become: true
   become_user: "{{ item.name }}"
   loop: "{{ basic_users_users }}"
   loop_control:
@@ -89,11 +91,12 @@
 
 - name: Generate cluster ssh key
   community.crypto.openssh_keypair:
-    path: "{{ item.ssh_key_file | default('~/.ssh/id_' + _ssh_key_type )}}" # NB: ssh_key_file is from ansible.builtin.user
+    path: "{{ item.ssh_key_file | default('~/.ssh/id_' + _ssh_key_type) }}" # NB: ssh_key_file is from ansible.builtin.user
     type: "{{ _ssh_key_type }}"
     comment: "{{ item.ssh_key_comment | default(item.name) }}"
   vars:
     _ssh_key_type: "{{ item.ssh_key_type | default('ed25519') }}"
+  become: true
   become_user: "{{ item.name }}"
   loop: "{{ basic_users_users }}"
   loop_control:
@@ -111,6 +114,7 @@
     manage_dir: false
     key: "{{ item.public_key }}"
     path: ~/.ssh/authorized_keys
+  become: true
   become_user: "{{ item.item.name }}"
   loop: "{{ _cluster_ssh_keypair.results }}"
   loop_control:
@@ -128,6 +132,7 @@
     manage_dir: false
     key: "{{ item.public_key }}"
     path: ~/.ssh/authorized_keys
+  become: true
   become_user: "{{ item.name }}"
   loop: "{{ basic_users_users }}"
   loop_control:
diff --git a/ansible/roles/block_devices/README.md b/ansible/roles/block_devices/README.md
index 0d326d3..ac642ce 100644
--- a/ansible/roles/block_devices/README.md
+++ b/ansible/roles/block_devices/README.md
@@ -1,9 +1,9 @@
-block_devices
-=============
+# block_devices
 
 Manage filesystems on block devices (such as OpenStack volumes), including creating partitions, creating filesystems and mounting filesystems.
 
 This is a convenience wrapper around the ansible modules:
+
 - community.general.parted
 - community.general.filesystem
 - ansible.buildin.file
@@ -15,13 +15,11 @@ To avoid issues with device names changing after e.g. reboots, devices are ident
 
 [^1]: See `environments/common/inventory/group_vars/builder/defaults.yml`
 
-Requirements
-------------
+## Requirements
 
 N/A.
 
-Role Variables
---------------
+## Role Variables
 
 - `block_devices_partition_state`: Optional. Partition state, 'present' or 'absent' (as for parted) or 'skip'. Defaults to 'present'.
 - `block_devices_serial`: Required. Serial number of block device. For an OpenStack volume this is the volume ID.
@@ -36,20 +34,18 @@ Role Variables
 
 Multiple NFS client/server configurations may be provided by defining `block_devices_configurations`. This should be a list of mappings with keys/values are as per the variables above without the `block_devices_` prefix. Omitted keys/values are filled from the corresponding variable.
 
-Dependencies
-------------
+## Dependencies
 
 See top of page.
 
-Example Playbook
-----------------
+## Example Playbook
 
 ```yaml
 - hosts: servers
   become: true
   tasks:
-  - include_role:
-    name: block_devices
+    - include_role:
+      name: block_devices
 ```
 
 The example variables below create an `ext4` partition on `/dev/sdb1` and mount it as `/mnt/files` with the default owner/group:
@@ -71,12 +67,10 @@ block_devices_configurations:
     path: /mnt/files
 ```
 
-License
--------
+## License
 
 Apache V2
 
-Author Information
-------------------
+## Author Information
 
 stackhpc.com
diff --git a/ansible/roles/block_devices/defaults/main.yml b/ansible/roles/block_devices/defaults/main.yml
index 0f997bf..1a9da7b 100644
--- a/ansible/roles/block_devices/defaults/main.yml
+++ b/ansible/roles/block_devices/defaults/main.yml
@@ -1,9 +1,11 @@
-block_devices_configurations: [{}]
+---
+block_devices_configurations:
+  - {}
 block_devices_partition_state: present # 'present', 'absent' (as for parted) or 'skip'
 block_devices_device: # Path to block device, e.g. '/dev/sda'. See community.general.parted:device and community.general.filesystem:dev
 block_devices_number: # Partition number, e.g 1 for /dev/sda1. See community.general.parted:number
 block_devices_fstype: # Filesystem type, e.g. e.g. 'ext4'. See community.general.filesystem:fstype
-block_devices_resizefs: no # Grow filesystem into block device space (yes or no). See community.general.filesystem:resizefs
+block_devices_resizefs: false # Grow filesystem into block device space (yes or no). See community.general.filesystem:resizefs
 block_devices_filesystem_state: present # 'present', 'absent' (as for community.general.filesystem:state) or 'skip'
 block_devices_path: # Path to mount point, e.g. '/mnt/files'
 block_devices_mount_state: mounted # Mount state, see ansible.posix.mount:state
diff --git a/ansible/roles/block_devices/library/block_devices.py b/ansible/roles/block_devices/library/block_devices.py
index ac34f2b..4a598fc 100644
--- a/ansible/roles/block_devices/library/block_devices.py
+++ b/ansible/roles/block_devices/library/block_devices.py
@@ -1,9 +1,14 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 
 # Copyright: (c) 2021, StackHPC
 # Apache 2 License
 
-DOCUMENTATION = r'''
+import json
+
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
+
+DOCUMENTATION = r"""
 ---
 module: block_devices
 
@@ -13,32 +18,30 @@
 
 author:
     - Steve Brasier (@sjpb)
-'''
+"""
 
-RETURN = r'''
+RETURN = r"""
 devices:
     description: dict with device serial numbers as keys and full paths (e.g. /dev/sdb) as values
     type: dict
     return: always
-'''
+"""
 
-import json
 
-from ansible.module_utils.basic import AnsibleModule
-
-def run_module():
-    module_args = dict()
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {}
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
     result = {"changed": False}
     _, stdout, _ = module.run_command("lsblk --paths --json -O", check_rc=True)
-    
-    device_info = json.loads(stdout)['blockdevices']
-    result['devices'] = dict((item['serial'], item['name']) for item in device_info)
+
+    device_info = json.loads(stdout)["blockdevices"]
+    result["devices"] = dict((item["serial"], item["name"]) for item in device_info)
     module.exit_json(**result)
 
-def main():
+
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/ansible/roles/block_devices/tasks/main.yml b/ansible/roles/block_devices/tasks/main.yml
index efaec3c..4ce7925 100644
--- a/ansible/roles/block_devices/tasks/main.yml
+++ b/ansible/roles/block_devices/tasks/main.yml
@@ -1,5 +1,6 @@
+---
 - name: Warn role is deprecated
-  debug:
+  ansible.builtin.debug:
     msg: "{{ 'Role block_devices is deprecated, see ansible/roles/block_devices/README.md' | warn }}"
   when: block_devices_configurations | length > 0
 
@@ -7,18 +8,18 @@
   block_devices:
   register: _block_devices
 
-- name:  Create partitions
-  parted:
+- name: Create partitions
+  community.general.parted:
     device: "{{ _device }}"
     number: "{{ item.get('number', block_devices_number) }}"
     state: "{{ item.get('partition_state', block_devices_partition_state) }}"
   when: "item.get('partition_state', block_devices_partition_state) != 'skip'"
   loop: "{{ block_devices_configurations }}"
   vars:
-    _device: "{{ _block_devices.devices[ item.get('serial', block_devices_serial) ] }}"
+    _device: "{{ _block_devices.devices[item.get('serial', block_devices_serial)] }}"
 
 - name: Create filesystems
-  filesystem:
+  community.general.filesystem:
     fstype: "{{ item.get('fstype', block_devices_fstype) }}"
     dev: "{{ _device }}{{ item.get('number', block_devices_number) }}"
     resizefs: "{{ item.get('resizefs', block_devices_resizefs) }}"
@@ -26,26 +27,27 @@
   when: "item.get('filesystem_state', block_devices_filesystem_state) != 'skip'"
   loop: "{{ block_devices_configurations }}"
   vars:
-    _device: "{{ _block_devices.devices[ item.get('serial', block_devices_serial) ] }}"
+    _device: "{{ _block_devices.devices[item.get('serial', block_devices_serial)] }}"
 
 - name: Get filesystem UUIDs
-  command:
+  ansible.builtin.command:
     cmd: "lsblk {{ _device }}{{ item.get('number', block_devices_number) }} --noheadings --output UUID"
   loop: "{{ block_devices_configurations }}"
   vars:
-    _device: "{{ _block_devices.devices[ item.get('serial', block_devices_serial) ] }}"
+    _device: "{{ _block_devices.devices[item.get('serial', block_devices_serial)] }}"
   register: block_devices_uuids
   changed_when: false
-  check_mode: no
+  check_mode: false
 
 - name: Ensure mount point exists
-  file:
+  ansible.builtin.file:
     path: "{{ item.get('path', block_devices_path) }}"
     state: directory
+    mode: "0755"
   loop: "{{ block_devices_configurations }}"
 
 - name: Mount filesystems by UUID
-  mount:
+  ansible.posix.mount:
     path: "{{ item.get('path', block_devices_path) }}"
     src: "UUID={{ _uuid }}"
     fstype: "{{ item.get('fstype', block_devices_fstype) }}"
@@ -57,10 +59,11 @@
     index_var: block_devices_idx
 
 - name: Set owner/group for mounted directory
-  file:
+  ansible.builtin.file:
     path: "{{ item.get('path', block_devices_path) }}"
     state: directory
     owner: "{{ item.get('owner', block_devices_owner) | default(omit) }}"
     group: "{{ item.get('group', block_devices_group) | default(omit) }}"
+    mode: "0755"
   when: "item.get('owner', block_devices_owner) or item.get('group', block_devices_group)"
   loop: "{{ block_devices_configurations }}"
diff --git a/ansible/roles/cacerts/defaults/main.yml b/ansible/roles/cacerts/defaults/main.yml
index c1f940f..d53992a 100644
--- a/ansible/roles/cacerts/defaults/main.yml
+++ b/ansible/roles/cacerts/defaults/main.yml
@@ -1,3 +1,4 @@
-#cacerts_dest_dir: /etc/pki/ca-trust/source/anchors/
+---
+# cacerts_dest_dir: /etc/pki/ca-trust/source/anchors/
 cacerts_cert_dir: "{{ appliances_environment_root }}/cacerts"
 cacerts_update: true
diff --git a/ansible/roles/cacerts/tasks/configure.yml b/ansible/roles/cacerts/tasks/configure.yml
index 5001f44..a23f275 100644
--- a/ansible/roles/cacerts/tasks/configure.yml
+++ b/ansible/roles/cacerts/tasks/configure.yml
@@ -1,16 +1,15 @@
 ---
-
 - name: Copy all certificates
-  copy:
+  ansible.builtin.copy:
     src: "{{ item }}"
     dest: /etc/pki/ca-trust/source/anchors/
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
   with_fileglob:
     - "{{ cacerts_cert_dir }}/*"
   become: true
 
-- name: Update trust store
-  command: update-ca-trust extract
+- name: Update trust store # noqa: no-changed-when
+  ansible.builtin.command: update-ca-trust extract
   become: true
diff --git a/ansible/roles/cacerts/tasks/export.yml b/ansible/roles/cacerts/tasks/export.yml
index c9c6471..8e036a1 100644
--- a/ansible/roles/cacerts/tasks/export.yml
+++ b/ansible/roles/cacerts/tasks/export.yml
@@ -1,10 +1,11 @@
+---
 - name: Copy cacerts from deploy host to /exports/cluster/cacerts/
-  copy:
+  ansible.builtin.copy:
     src: "{{ item }}"
     dest: /exports/cluster/cacerts/
     owner: slurm
     group: root
-    mode: 0644
+    mode: "0644"
   with_fileglob:
     - "{{ cacerts_cert_dir }}/*"
   delegate_to: "{{ groups['control'] | first }}"
diff --git a/ansible/roles/cacerts/tasks/main.yml b/ansible/roles/cacerts/tasks/main.yml
index 84f4934..ec83d2b 100644
--- a/ansible/roles/cacerts/tasks/main.yml
+++ b/ansible/roles/cacerts/tasks/main.yml
@@ -1 +1,2 @@
-- import_tasks: configure.yml
+---
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/cluster_infra/defaults/main.yml b/ansible/roles/cluster_infra/defaults/main.yml
index f2f9637..3b1f6c7 100644
--- a/ansible/roles/cluster_infra/defaults/main.yml
+++ b/ansible/roles/cluster_infra/defaults/main.yml
@@ -1,2 +1,3 @@
+---
 ansible_init_collections: []
 ansible_init_playbooks: []
diff --git a/ansible/roles/cluster_infra/tasks/main.yml b/ansible/roles/cluster_infra/tasks/main.yml
index f62c257..91c2ab3 100644
--- a/ansible/roles/cluster_infra/tasks/main.yml
+++ b/ansible/roles/cluster_infra/tasks/main.yml
@@ -1,4 +1,5 @@
-- debug:
+---
+- ansible.builtin.debug:
     msg: |
       terraform_backend_type: {{ terraform_backend_type }}
       terraform_state: {{ terraform_state }}
@@ -8,55 +9,57 @@
 # if we we have cluster_floating_ip, otherwise assume that we're
 # assigning the FIP in Terraform and that it will be available in
 # outputs.cluster_gateway_ip.
-- block:
+- when:
+    - cluster_floating_ip is defined
+    - cluster_floating_ip
+  block:
     - name: Look up floating IP
       azimuth_cloud.terraform.os_floating_ip_info:
-         floating_ip: "{{ cluster_floating_ip }}"
+        floating_ip: "{{ cluster_floating_ip }}"
       register: cluster_floating_ip_info
 
     - name: Set floating IP address fact
-      set_fact:
+      ansible.builtin.set_fact:
         cluster_floating_ip_address: "{{ cluster_floating_ip_info.floating_ip.floating_ip_address }}"
-  when:
-    - cluster_floating_ip is defined
-    - cluster_floating_ip
-
 - name: Install Terraform binary
-  include_role:
+  ansible.builtin.include_role:
     name: azimuth_cloud.terraform.install
 
 - name: Make Terraform project directory
-  file:
+  ansible.builtin.file:
     path: "{{ terraform_project_path }}"
     state: directory
+    mode: "0755"
 
 - name: Write backend configuration
-  copy:
+  ansible.builtin.copy:
     content: |
       terraform {
         backend "{{ terraform_backend_type }}" { }
       }
     dest: "{{ terraform_project_path }}/backend.tf"
+    mode: "0644"
 
 - name: Template Terraform files into project directory
-  template:
+  ansible.builtin.template:
     src: >-
-      {{ 
+      {{
         "{}{}.j2".format(
           (
-             cluster_terraform_template_dir ~ "/" 
-             if cluster_terraform_template_dir is defined 
+             cluster_terraform_template_dir ~ "/"
+             if cluster_terraform_template_dir is defined
              else ""
           ),
           item
         )
       }}
     dest: "{{ terraform_project_path }}/{{ item }}"
+    mode: "0644"
   loop:
     - outputs.tf
     - providers.tf
     - resources.tf
 
 - name: Provision infrastructure
-  include_role:
+  ansible.builtin.include_role:
     name: azimuth_cloud.terraform.infra
diff --git a/ansible/roles/compute_init/README.md b/ansible/roles/compute_init/README.md
index 7a95d2b..cc8b2de 100644
--- a/ansible/roles/compute_init/README.md
+++ b/ansible/roles/compute_init/README.md
@@ -8,6 +8,7 @@ Allow compute nodes to rejoin the cluster after a reboot without running the
 > required configuration may change with further development.
 
 To enable this:
+
 1. Add the `compute` group (or a subset) into the `compute_init` group.
 2. Build an image which includes the `compute_init` group. This is the case
    for StackHPC-built release images.
@@ -35,67 +36,67 @@ property described above. If a role is marked as requiring a custom image then
 it also requires an image build with the role name added to the
 [Packer inventory_groups variable](../../../docs/image-build.md).
 
-| Playbook                 | Role (or functionality) | Support                         | Custom image reqd.? |
-| -------------------------|-------------------------|---------------------------------|---------------------|
-| hooks/pre.yml            | ?                       | None at present                 | n/a                 |
-| validate.yml             | n/a                     | Not relevant during boot        | n/a                 |
-| bootstrap.yml            | (wait for ansible-init) | Not relevant during boot        | n/a                 |
-| bootstrap.yml            | resolv_conf             | Fully supported                 | No                  |
-| bootstrap.yml            | etc_hosts               | Fully supported                 | No                  |
-| bootstrap.yml            | chrony                  | Fully supported                 | No                  |
-| bootstrap.yml            | proxy                   | None at present                 | No                  |
-| bootstrap.yml            | (/etc permissions)      | None required - use image build | No                  |
-| bootstrap.yml            | (ssh /home fix)         | None required - use image build | No                  |
-| bootstrap.yml            | (system users)          | None required - use image build | No                  |
-| bootstrap.yml            | systemd                 | None required - use image build | No                  |
-| bootstrap.yml            | selinux                 | None required - use image build | Maybe [1]           |
-| bootstrap.yml            | sshd                    | Fully supported                 | No                  |
-| bootstrap.yml            | dnf_repos               | None at present [2]             | -                   |
-| bootstrap.yml            | cacerts                 | Supported [3]                   | -                   |
-| bootstrap.yml            | squid                   | Not relevant for compute nodes  | n/a                 |
-| bootstrap.yml            | tuned                   | Fully supported                 | No                  |         
-| bootstrap.yml            | freeipa_server          | Not relevant for compute nodes  | n/a                 |
-| bootstrap.yml            | cockpit                 | None required - use image build | No                  |
-| bootstrap.yml            | firewalld               | Not relevant for compute nodes  | n/a                 |
-| bootstrap.yml            | fail2ban                | Not relevant for compute nodes  | n/a                 |
-| bootstrap.yml            | podman                  | Not relevant for compute nodes  | n/a                 |
-| bootstrap.yml            | update                  | Not relevant during boot        | n/a                 |
-| bootstrap.yml            | reboot                  | Not relevant for compute nodes  | n/a                 |
-| bootstrap.yml            | ofed                    | Not relevant during boot        | Yes                 |
-| bootstrap.yml            | ansible_init (install)  | Not relevant during boot        | n/a                 |
-| bootstrap.yml            | k3s (install)           | Not relevant during boot        | n/a                 |
-| hooks/post-bootstrap.yml | ?                       | None at present                 | n/a                 |
-| iam.yml                  | freeipa_client          | None at present [4]             | Yes                 |
-| iam.yml                  | freeipa_server          | Not relevant for compute nodes  | n/a                 |
-| iam.yml                  | sssd                    | Fully supported                 | No                  |
-| filesystems.yml          | block_devices           | None required - role deprecated | n/a                 |
-| filesystems.yml          | nfs                     | All client functionality        | No                  |
-| filesystems.yml          | manila                  | All functionality               | No [5]              |
-| filesystems.yml          | lustre                  | All functionality               | Yes                 |
-| extras.yml               | basic_users             | All functionality [6]           | No                  |
-| extras.yml               | eessi                   | All functionality [7]           | No                  |
-| extras.yml               | cuda                    | None required - use image build | Yes [8]             |
-| extras.yml               | vgpu                    | All functionality               | Yes                 |
-| extras.yml               | persist_hostkeys        | Not relevant for compute nodes  | n/a                 |
-| extras.yml               | compute_init (export)   | Not relevant for compute nodes  | n/a                 |
-| extras.yml               | k9s (install)           | Not relevant during boot        | n/a                 |
-| extras.yml               | extra_packages          | None at present [9]             | -                   |
-| slurm.yml                | mysql                   | Not relevant for compute nodes  | n/a                 |
-| slurm.yml                | rebuild                 | Not relevant for compute nodes  | n/a                 |
-| slurm.yml                | openhpc [10]            | All slurmd functionality        | No                  |
-| slurm.yml                | (set memory limits)     | Fully supported                 | No                  |
-| slurm.yml                | (block ssh)             | Fully supported                 | No                  |
-| slurm.yml                | nhc                     | Fully supported                 | No                  |
-| portal.yml               | (openondemand server)   | Not relevant for compute nodes  | n/a                 |
-| portal.yml               | (openondemand vnc desktop)  | None required - use image build | No              |
-| portal.yml               | (openondemand jupyter server) | None required - use image build | No            |
-| monitoring.yml           | node_exporter           | None required - use image build | No                  |
-| monitoring.yml           | (other  monitoring)     | Not relevant for compute nodes  | -                   |
-| disable-repos.yml        | dnf_repos               | None at present [2]             | -                   |
-| hooks/post.yml           | ?                       | None at present                 | -                   |
-
+| Playbook                 | Role (or functionality)       | Support                         | Custom image reqd.? |
+| ------------------------ | ----------------------------- | ------------------------------- | ------------------- |
+| hooks/pre.yml            | ?                             | None at present                 | n/a                 |
+| validate.yml             | n/a                           | Not relevant during boot        | n/a                 |
+| bootstrap.yml            | (wait for ansible-init)       | Not relevant during boot        | n/a                 |
+| bootstrap.yml            | resolv_conf                   | Fully supported                 | No                  |
+| bootstrap.yml            | etc_hosts                     | Fully supported                 | No                  |
+| bootstrap.yml            | chrony                        | Fully supported                 | No                  |
+| bootstrap.yml            | proxy                         | None at present                 | No                  |
+| bootstrap.yml            | (/etc permissions)            | None required - use image build | No                  |
+| bootstrap.yml            | (SSH /home fix)               | None required - use image build | No                  |
+| bootstrap.yml            | (system users)                | None required - use image build | No                  |
+| bootstrap.yml            | systemd                       | None required - use image build | No                  |
+| bootstrap.yml            | selinux                       | None required - use image build | Maybe [1]           |
+| bootstrap.yml            | sshd                          | Fully supported                 | No                  |
+| bootstrap.yml            | dnf_repos                     | None at present [2]             | -                   |
+| bootstrap.yml            | cacerts                       | Supported [3]                   | -                   |
+| bootstrap.yml            | squid                         | Not relevant for compute nodes  | n/a                 |
+| bootstrap.yml            | tuned                         | Fully supported                 | No                  |
+| bootstrap.yml            | freeipa_server                | Not relevant for compute nodes  | n/a                 |
+| bootstrap.yml            | cockpit                       | None required - use image build | No                  |
+| bootstrap.yml            | firewalld                     | Not relevant for compute nodes  | n/a                 |
+| bootstrap.yml            | fail2ban                      | Not relevant for compute nodes  | n/a                 |
+| bootstrap.yml            | podman                        | Not relevant for compute nodes  | n/a                 |
+| bootstrap.yml            | update                        | Not relevant during boot        | n/a                 |
+| bootstrap.yml            | reboot                        | Not relevant for compute nodes  | n/a                 |
+| bootstrap.yml            | ofed                          | Not relevant during boot        | Yes                 |
+| bootstrap.yml            | ansible_init (install)        | Not relevant during boot        | n/a                 |
+| bootstrap.yml            | k3s (install)                 | Not relevant during boot        | n/a                 |
+| hooks/post-bootstrap.yml | ?                             | None at present                 | n/a                 |
+| iam.yml                  | freeipa_client                | None at present [4]             | Yes                 |
+| iam.yml                  | freeipa_server                | Not relevant for compute nodes  | n/a                 |
+| iam.yml                  | sssd                          | Fully supported                 | No                  |
+| filesystems.yml          | block_devices                 | None required - role deprecated | n/a                 |
+| filesystems.yml          | nfs                           | All client functionality        | No                  |
+| filesystems.yml          | manila                        | All functionality               | No [5]              |
+| filesystems.yml          | lustre                        | All functionality               | Yes                 |
+| extras.yml               | basic_users                   | All functionality [6]           | No                  |
+| extras.yml               | eessi                         | All functionality [7]           | No                  |
+| extras.yml               | cuda                          | None required - use image build | Yes [8]             |
+| extras.yml               | vgpu                          | All functionality               | Yes                 |
+| extras.yml               | persist_hostkeys              | Not relevant for compute nodes  | n/a                 |
+| extras.yml               | compute_init (export)         | Not relevant for compute nodes  | n/a                 |
+| extras.yml               | k9s (install)                 | Not relevant during boot        | n/a                 |
+| extras.yml               | extra_packages                | None at present [9]             | -                   |
+| slurm.yml                | MySQL                         | Not relevant for compute nodes  | n/a                 |
+| slurm.yml                | rebuild                       | Not relevant for compute nodes  | n/a                 |
+| slurm.yml                | openhpc [10]                  | All slurmd functionality        | No                  |
+| slurm.yml                | (set memory limits)           | Fully supported                 | No                  |
+| slurm.yml                | (block SSH)                   | Fully supported                 | No                  |
+| slurm.yml                | nhc                           | Fully supported                 | No                  |
+| portal.yml               | (openondemand server)         | Not relevant for compute nodes  | n/a                 |
+| portal.yml               | (openondemand vnc desktop)    | None required - use image build | No                  |
+| portal.yml               | (openondemand jupyter server) | None required - use image build | No                  |
+| monitoring.yml           | node_exporter                 | None required - use image build | No                  |
+| monitoring.yml           | (other monitoring)            | Not relevant for compute nodes  | -                   |
+| disable-repos.yml        | dnf_repos                     | None at present [2]             | -                   |
+| hooks/post.yml           | ?                             | None at present                 | -                   |
 
 Notes:
+
 1. `selinux` is set to disabled in StackHPC images.
 2. Requirement for this functionality is TBD.
 3. `cacerts_cert_dir` must be the same on all nodes.
@@ -105,32 +106,32 @@ Notes:
 6. Assumes home directory already exists on shared storage.
 7. Assumes `cvmfs_config` is the same on control node and all compute nodes.
 8. If `cuda` role was run during build, the nvidia-persistenced is enabled
-  and will start during boot.
+   and will start during boot.
 9. Would require `dnf_repos`.
 10. `openhpc` does not need to be added to `compute_init_enable`, this is
     automatically enabled by adding `compute`.
 
 ## Approach
+
 This works as follows:
+
 1. During image build, an ansible-init playbook and supporting files
-(e.g. templates, filters, etc) are installed.
+   (e.g. templates, filters, etc) are installed.
 2. Cluster instances are created as usual; the above compute-init playbook does
-not run.
+   not run.
 3. The `site.yml` playbook is run as usual to configure all the instances into
-a cluster. In addition, with `compute-init` enabled, a `/exports/cluster` NFS
-share is created on the control node containing:
-    - an /etc/hosts file for the cluster
-    - Hostvars for each compute node
+   a cluster. In addition, with `compute-init` enabled, a `/exports/cluster` NFS
+   share is created on the control node containing: - an /etc/hosts file for the cluster - Hostvars for each compute node
 4. On reboot of a compute node, ansible-init runs the compute-init playbook
-which:
-    a. Checks whether the `enable_compute` metadata flag is set, and exits if
-       not.
-    b. Tries to mount the above `/exports/cluster` NFS share from the control
-       node, and exits if it cannot.
-    c. Configures itself using the exported hostvars, depending on the
-       `enable_*` flags set in metadata.
-    d. Issues an `scontrol` command to resume the node (because Slurm will
-       consider it as "unexpectedly rebooted").
+   which:
+   a. Checks whether the `enable_compute` metadata flag is set, and exits if
+   not.
+   b. Tries to mount the above `/exports/cluster` NFS share from the control
+   node, and exits if it cannot.
+   c. Configures itself using the exported hostvars, depending on the
+   `enable_*` flags set in metadata.
+   d. Issues an `scontrol` command to resume the node (because Slurm will
+   consider it as "unexpectedly rebooted").
 
 The check in 4b. above is what prevents the compute-init script from trying
 to configure the node before the services on the control node are available
@@ -147,35 +148,43 @@ a new image:
 
 2. Reimage the compute nodes:
 
-        ansible-playbook --limit compute ansible/adhoc/rebuild.yml
+```shell
+ansible-playbook --limit compute ansible/adhoc/rebuild.yml
+```
 
 3. Add metadata to a compute node e.g. via Horizon to turn on compute-init
    playbook functionality.
 
 4. Stop ansible-init from running
 
-        ansible all -ba "systemctl stop ansible-init"
+```shell
+ansible all -ba "systemctl stop ansible-init"
+```
 
 5. Fake an image build to deploy the compute-init playbook:
 
-        ansible-playbook ansible/fatimage.yml --tags compute_init
+```shell
+ansible-playbook ansible/fatimage.yml --tags compute_init
+```
 
-    NB: This will also re-export the compute hostvars, as the nodes are not
-    in the builder group, which conveniently means any changes made to that
-    play also get picked up.
+NB: This will also reexport the compute hostvars, as the nodes are not
+in the builder group, which conveniently means any changes made to that
+play also get picked up.
 
 6. Fake a reimage of compute to run ansible-init and the updated compute-init playbook:
 
-        ansible all -ba "rm -f /var/lib/ansible-init.done && systemctl restart ansible-init"
+```shell
+ansible all -ba "rm -f /var/lib/ansible-init.done && systemctl restart ansible-init"
+```
 
-    Use `systemctl status ansible-init` to view stdout/stderr from Ansible.
+Use `systemctl status ansible-init` to view stdout/stderr from Ansible.
 
 Steps 4/5/6 can be repeated with changes to the compute script. If required,
 reimage the compute node(s) first as in step 2 and/or add additional metadata
 as in step 3.
 
-
 ## Design notes
+
 - Duplicating code in roles into the `compute-init` script is unfortunate, but
   does allow developing this functionality without wider changes to the
   appliance.
@@ -188,7 +197,6 @@ as in step 3.
   1. Control node copies files resulting from role into cluster exports,
      compute-init copies to local disk. Only works if files are not host-specific
      Examples: etc_hosts, eessi config?
-  
   2. Re-implement the role. Works if the role vars are not too complicated,
      (else they all need to be duplicated in compute-init). Could also only
      support certain subsets of role functionality or variables
@@ -197,29 +205,29 @@ as in step 3.
 - Some variables are defined using hostvars from other nodes, which aren't
   available v the current approach:
 
-    ```
-    [root@rl9-compute-0 rocky]# grep hostvars /mnt/cluster/hostvars/rl9-compute-0/hostvars.yml
-        "grafana_address": "{{ hostvars[groups['grafana'].0].api_address }}",
-        "grafana_api_address": "{{ hostvars[groups['grafana'].0].internal_address }}",
-        "mysql_host": "{{ hostvars[groups['mysql'] | first].api_address }}",
-        "nfs_server_default": "{{ hostvars[groups['control'] | first ].internal_address }}",
-        "openhpc_slurm_control_host": "{{ hostvars[groups['control'].0].api_address }}",
-        "openondemand_address": "{{ hostvars[groups['openondemand'].0].api_address if groups['openondemand'] | count > 0 else '' }}",
-        "openondemand_node_proxy_directives": "{{ _opeonondemand_unset_auth if (openondemand_auth == 'basic_pam' and 'openondemand_host_regex' and groups['grafana'] | length > 0 and hostvars[ groups['grafana']  | first]._grafana_auth_is_anonymous) else '' }}",
-        "openondemand_servername": "{{ hostvars[ groups['openondemand'] | first].ansible_host }}",
-        "prometheus_address": "{{ hostvars[groups['prometheus'].0].api_address }}",
-            "{{ hostvars[groups['freeipa_server'].0].ansible_host }}"
-    ```
-
-    More generally, there is nothing to stop any group var depending on a
-    "{{ hostvars[] }}" interpolation ...
-
-    Only `nfs_server_default` and `openhpc_slurm_control_host` are of concern
-    for compute nodes - both of these indirect via `api_address` to
-    `inventory_hostname`. This has been worked around by replacing this with
-    "{{ groups['control'] | first }}" which does result in the control node
-    inventory hostname when templating.
-
-    Note that although `groups` is defined in the templated hostvars, when
-    the hostvars are loaded using `include_vars:` is is ignored as it is a
-    "magic variable" determined by ansible itself and cannot be set.
+  ```text
+  [root@rl9-compute-0 rocky]# grep hostvars /mnt/cluster/hostvars/rl9-compute-0/hostvars.yml
+      "grafana_address": "{{ hostvars[groups['grafana'].0].api_address }}",
+      "grafana_api_address": "{{ hostvars[groups['grafana'].0].internal_address }}",
+      "mysql_host": "{{ hostvars[groups['mysql'] | first].api_address }}",
+      "nfs_server_default": "{{ hostvars[groups['control'] | first ].internal_address }}",
+      "openhpc_slurm_control_host": "{{ hostvars[groups['control'].0].api_address }}",
+      "openondemand_address": "{{ hostvars[groups['openondemand'].0].api_address if groups['openondemand'] | count > 0 else '' }}",
+      "openondemand_node_proxy_directives": "{{ _opeonondemand_unset_auth if (openondemand_auth == 'basic_pam' and 'openondemand_host_regex' and groups['grafana'] | length > 0 and hostvars[ groups['grafana']  | first]._grafana_auth_is_anonymous) else '' }}",
+      "openondemand_servername": "{{ hostvars[ groups['openondemand'] | first].ansible_host }}",
+      "prometheus_address": "{{ hostvars[groups['prometheus'].0].api_address }}",
+          "{{ hostvars[groups['freeipa_server'].0].ansible_host }}"
+  ```
+
+  More generally, there is nothing to stop any group var depending on a
+  "{{ hostvars[] }}" interpolation ...
+
+  Only `nfs_server_default` and `openhpc_slurm_control_host` are of concern
+  for compute nodes - both of these indirect via `api_address` to
+  `inventory_hostname`. This has been worked around by replacing this with
+  "{{ groups['control'] | first }}" which does result in the control node
+  inventory hostname when templating.
+
+  Note that although `groups` is defined in the templated hostvars, when
+  the hostvars are loaded using `include_vars:` is is ignored as it is a
+  "magic variable" determined by ansible itself and cannot be set.
diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml
index 397da01..0ff647a 100644
--- a/ansible/roles/compute_init/files/compute-init.yml
+++ b/ansible/roles/compute_init/files/compute-init.yml
@@ -1,8 +1,7 @@
 ---
-
 - name: Compute node initialisation
   hosts: localhost
-  become: yes
+  become: true
   vars:
     os_metadata: "{{ lookup('url', 'http://169.254.169.254/openstack/latest/meta_data.json') | from_json }}"
     server_node_ip: "{{ os_metadata.meta.control_address }}"
@@ -12,7 +11,7 @@
     enable_cacerts: "{{ os_metadata.meta.cacerts | default(false) | bool }}"
     enable_sssd: "{{ os_metadata.meta.sssd | default(false) | bool }}"
     enable_sshd: "{{ os_metadata.meta.sshd | default(false) | bool }}"
-    enable_tuned:  "{{ os_metadata.meta.tuned | default(false) | bool }}"
+    enable_tuned: "{{ os_metadata.meta.tuned | default(false) | bool }}"
     enable_nfs: "{{ os_metadata.meta.nfs | default(false) | bool }}"
     enable_manila: "{{ os_metadata.meta.manila | default(false) | bool }}"
     enable_lustre: "{{ os_metadata.meta.lustre | default(false) | bool }}"
@@ -24,7 +23,6 @@
 
     # TODO: "= role defaults" - could be moved to a vars_file: on play with similar precedence effects
     resolv_conf_nameservers: []
-
     tuned_profile_baremetal: hpc-compute
     tuned_profile_vm: virtual-guest
     tuned_profile: "{{ tuned_profile_baremetal if ansible_virtualization_role != 'guest' else tuned_profile_vm }}"
@@ -47,17 +45,16 @@
       - nosuid
 
   tasks:
-    - block:
+    - when: not enable_compute
+      block:
         - name: Report skipping initialization if not compute node
           # meta: end_play produces no output
-          debug:
+          ansible.builtin.debug:
             msg: "Skipping compute initialization: Metadata enable_compute is not true"
-        
-        - meta: end_play
-      when: not enable_compute
 
+        - ansible.builtin.meta: end_play
     - name: Ensure the mount directory exists
-      file:
+      ansible.builtin.file:
         path: /mnt/cluster
         state: directory
         owner: slurm
@@ -76,46 +73,46 @@
       # exits from playbook if this failed below, allowing ansible-init to
       # finish, which allows site.yml to continue on initial deploy
 
-    - block:
+    - when: _mount_mnt_cluster.failed
+      block:
         - name: Report skipping initialization if cannot mount nfs
           # meta: end_play produces no output
-          debug:
+          ansible.builtin.debug:
             msg: "Skipping compute initialization: Failed to mount /exports/cluster from control node {{ server_node_ip }}"
-        
-        - meta: end_play
-      when: _mount_mnt_cluster.failed
 
+        - ansible.builtin.meta: end_play
     - name: Check if hostvars exist
+      become: true
       become_user: slurm
-      stat:
+      ansible.builtin.stat:
         path: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml"
       register: hostvars_stat
 
-    - block:
+    - when: not hostvars_stat.stat.exists
+      block:
         - name: Report skipping initialization if host vars does not exist
           # meta: end_play produces no output
-          debug:
+          ansible.builtin.debug:
             msg: "Skipping compute initialization: hostvars does not exist"
 
-        - meta: end_play
-      when: not hostvars_stat.stat.exists
-
+        - ansible.builtin.meta: end_play
     - name: Sync /mnt/cluster to /var/tmp
+      become: true
       become_user: slurm
-      synchronize:
+      ansible.posix.synchronize:
         src: "/mnt/cluster/"
         dest: "/var/tmp/cluster/"
-        archive: yes
-        recursive: yes
+        archive: true
+        recursive: true
 
     - name: Unmount /mnt/cluster after sync
-      mount:
+      ansible.posix.mount:
         path: /mnt/cluster
         state: unmounted
 
     - name: Load hostvars
       # this is higher priority than vars block = normal ansible's hostvars
-      include_vars:
+      ansible.builtin.include_vars:
         file: "/var/tmp/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml"
 
     - name: Run chrony role
@@ -129,6 +126,7 @@
       when: enable_chrony
 
     - name: Configure resolve.conf
+      when: enable_resolv_conf
       block:
         - name: Set nameservers in /etc/resolv.conf
           ansible.builtin.template:
@@ -151,16 +149,14 @@
           ansible.builtin.systemd:
             name: NetworkManager
             state: reloaded
-          when: _copy_nm_config.changed | default(false)
-      when: enable_resolv_conf
-
+          when: _copy_nm_config.changed | default(false) # noqa: no-handler
     - name: Copy cluster /etc/hosts
-      copy:
+      ansible.builtin.copy:
         src: /var/tmp/cluster/hosts
         dest: /etc/hosts
         owner: root
         group: root
-        mode: 0644
+        mode: "0644"
       when: enable_etc_hosts
 
     - name: Configure cacerts
@@ -178,7 +174,7 @@
       when: enable_sshd
 
     - name: Configure tuned
-      include_tasks: tasks/tuned.yml
+      ansible.builtin.include_tasks: tasks/tuned.yml
       when: enable_tuned
 
     - name: Configure sssd
@@ -200,12 +196,15 @@
       loop: "{{ nfs_configurations }}"
 
     - name: Manila mounts
+      when:
+        - enable_manila
+        - os_manila_mount_shares | length > 0
       block:
         - name: Read manila share info from nfs file
-          include_vars:
+          ansible.builtin.include_vars:
             file: /var/tmp/cluster/manila_share_info.yml
           no_log: true # contains secrets
-        
+
         - name: Ensure Ceph configuration directory exists
           ansible.builtin.file:
             path: "{{ os_manila_mount_ceph_conf_path }}"
@@ -267,10 +266,6 @@
           loop_control:
             label: "{{ item.share_name }}"
           when: item.mount_state | default(os_manila_mount_state) in ['mounted' or 'ephemeral']
-      when:
-        - enable_manila
-        - os_manila_mount_shares | length > 0
-
     - name: Configure lustre
       ansible.builtin.include_role:
         name: lustre
@@ -278,27 +273,29 @@
       when: enable_lustre
 
     - name: Basic users
-      ansible.builtin.include_role: 
+      ansible.builtin.include_role:
         name: basic_users
       when: enable_basic_users
 
     - name: EESSI
+      when: enable_eessi
+      # NB: don't need conditional block on enable_compute as have already exited
+      # if not the case
       block:
         - name: Copy cvmfs config
-          copy:
+          ansible.builtin.copy:
             src: /var/tmp/cluster/cvmfs/default.local
             dest: /etc/cvmfs/default.local
             owner: root
             group: root
-            mode: 0644
+            mode: "0644"
 
-        - name: Ensure CVMFS config is setup
-          command:
+        - name: Ensure CVMFS config is setup # noqa: no-changed-when
+          ansible.builtin.command:
             cmd: "cvmfs_config setup"
-      when: enable_eessi
 
     - name: Configure VGPUs
-      include_role:
+      ansible.builtin.include_role:
         name: stackhpc.linux.vgpu
         tasks_from: 'configure.yml'
       when: enable_vgpu
@@ -306,54 +303,54 @@
     # NB: don't need conditional block on enable_compute as have already exited
     # if not the case
     - name: Write Munge key
-      copy:
+      ansible.builtin.copy:
         # NB: openhpc_munge_key is *binary* and may not survive json encoding
         # so do same as environments/common/inventory/group_vars/all/openhpc.yml
         content: "{{ vault_openhpc_mungekey | b64decode }}"
         dest: "/etc/munge/munge.key"
         owner: munge
         group: munge
-        mode: 0400
+        mode: "0400"
 
     - name: Set slurmctld location for configless operation
-      lineinfile:
+      ansible.builtin.lineinfile:
         path: /etc/sysconfig/slurmd
         line: "SLURMD_OPTIONS='--conf-server {{ openhpc_slurm_control_host_address | default(openhpc_slurm_control_host) }}'"
         regexp: "^SLURMD_OPTIONS="
-        create: yes
+        create: true
         owner: root
         group: root
-        mode: 0644
+        mode: "0644"
 
     - name: Ensure Munge service state
-      service:
+      ansible.builtin.service:
         name: munge
         enabled: true
         state: started
 
     - name: Set locked memory limits on user-facing nodes
-      lineinfile:
+      ansible.builtin.lineinfile:
         path: /etc/security/limits.conf
-        regexp: '\* soft memlock unlimited'
+        regexp: "\\* soft memlock unlimited"
         line: "* soft memlock unlimited"
 
     - name: Configure sshd pam module
-      blockinfile:
+      ansible.builtin.blockinfile:
         path: /etc/pam.d/sshd
-        insertafter: 'account\s+required\s+pam_nologin.so'
+        insertafter: "account\\s+required\\s+pam_nologin.so"
         block: |
           account    sufficient   pam_access.so
           account    required     pam_slurm.so
 
     - name: Configure login access control
-      blockinfile:
+      ansible.builtin.blockinfile:
         path: /etc/security/access.conf
         block: |
           +:adm:ALL
           -:ALL:ALL
 
     - name: Ensure slurmd service state
-      service:
+      ansible.builtin.service:
         name: slurmd
         enabled: true
         state: started
@@ -364,9 +361,9 @@
         tasks_from: boot.yml
       when: enable_nhc
 
-    - name: Ensure node is resumed
+    - name: Ensure node is resumed # noqa: no-changed-when
       # TODO: consider if this is always safe for all job states?
-      command: scontrol update state=resume nodename={{ ansible_hostname }}
+      ansible.builtin.command: scontrol update state=resume nodename={{ ansible_hostname }}
       register: _scontrol_update
       failed_when:
         - _scontrol_update.rc > 0
diff --git a/ansible/roles/compute_init/tasks/export.yml b/ansible/roles/compute_init/tasks/export.yml
index f5c594c..5b31bd6 100644
--- a/ansible/roles/compute_init/tasks/export.yml
+++ b/ansible/roles/compute_init/tasks/export.yml
@@ -1,5 +1,6 @@
+---
 - name: Ensure the /exports/cluster directory exists
-  file:
+  ansible.builtin.file:
     path: /exports/cluster
     state: directory
     owner: slurm
@@ -9,7 +10,7 @@
   delegate_to: "{{ groups['control'] | first }}"
 
 - name: Copy /etc/hosts to /exports/cluster
-  copy:
+  ansible.builtin.copy:
     src: /etc/hosts
     dest: /exports/cluster/hosts
     owner: slurm
@@ -20,7 +21,7 @@
   delegate_to: "{{ groups['control'] | first }}"
 
 - name: Create hostvars directory
-  file:
+  ansible.builtin.file:
     path: /exports/cluster/hostvars/{{ inventory_hostname }}/
     state: directory
     owner: slurm
@@ -29,7 +30,7 @@
   delegate_to: "{{ groups['control'] | first }}"
 
 - name: Template out hostvars
-  template:
+  ansible.builtin.template:
     src: hostvars.yml.j2
     dest: /exports/cluster/hostvars/{{ inventory_hostname }}/hostvars.yml
     owner: slurm
@@ -38,7 +39,7 @@
   delegate_to: "{{ groups['control'] | first }}"
 
 - name: Copy manila share info to /exports/cluster
-  copy:
+  ansible.builtin.copy:
     content: "{{ os_manila_mount_share_info_var | to_nice_yaml }}"
     dest: /exports/cluster/manila_share_info.yml
     owner: slurm
@@ -52,22 +53,22 @@
       os_manila_mount_share_info: "{{ os_manila_mount_share_info }}"
 
 - name: Ensure /exports/cluster/cvmfs directory exists
-  file:
+  ansible.builtin.file:
     path: /exports/cluster/cvmfs
     state: directory
     owner: slurm
     group: root
-    mode: 0755
+    mode: "0755"
   run_once: true
   delegate_to: "{{ groups['control'] | first }}"
 
 - name: Copy EESSI CVMFS config to /exports/cluster
-  copy:
+  ansible.builtin.copy:
     src: /etc/cvmfs/default.local
     dest: /exports/cluster/cvmfs/default.local
     owner: slurm
     group: root
-    mode: 0644
+    mode: "0644"
     remote_src: true
   run_once: true
   delegate_to: "{{ groups['control'] | first }}"
@@ -79,7 +80,7 @@
   when: "'cacerts' in group_names"
 
 - name: Create hostconfig directory
-  file:
+  ansible.builtin.file:
     path: "/exports/cluster/hostconfig/{{ inventory_hostname }}/"
     state: directory
     owner: slurm
@@ -87,20 +88,20 @@
     mode: u=rX,g=rwX,o=
   delegate_to: "{{ groups['control'] | first }}"
 
-- name: Template sssd config 
-  import_role:
+- name: Template sssd config
+  ansible.builtin.import_role:
     name: sssd
     tasks_from: export.yml
   when: "'sssd' in group_names"
 
-- name: Template sshd config 
-  import_role:
+- name: Template sshd config
+  ansible.builtin.import_role:
     name: sshd
     tasks_from: export.yml
   when: "'sshd' in group_names"
 
 - name: Export generated NHC config
-  import_role:
+  ansible.builtin.import_role:
     name: nhc
     tasks_from: export.yml
   when: "'nhc' in group_names"
diff --git a/ansible/roles/compute_init/tasks/install.yml b/ansible/roles/compute_init/tasks/install.yml
index 67f339c..f7ee876 100644
--- a/ansible/roles/compute_init/tasks/install.yml
+++ b/ansible/roles/compute_init/tasks/install.yml
@@ -1,12 +1,11 @@
 ---
-
 - name: Ensure directories exist
-  file:
+  ansible.builtin.file:
     path: "/etc/ansible-init/playbooks/{{ item }}"
     state: directory
     owner: root
     group: root
-    mode: 0755
+    mode: "0755"
   loop:
     - templates
     - files
@@ -16,11 +15,15 @@
     - roles
 
 - name: Inject files from roles
-  synchronize:
-    src: '{{ item.src }}'
-    dest: '/etc/ansible-init/playbooks/{{ item.dest }}'
+  ansible.posix.synchronize:
+    src: "{{ item.src }}"
+    dest: "/etc/ansible-init/playbooks/{{ item.dest }}"
     archive: false
-    rsync_opts: ["-p", "--chmod=D770,F644", "--owner=root", "--group=root"]
+    rsync_opts:
+      - "-p"
+      - "--chmod=D770,F644"
+      - "--owner=root"
+      - "--group=root"
     recursive: true
     use_ssh_args: true
   become: true
@@ -53,18 +56,18 @@
       dest: roles/
 
 - name: Add filter_plugins to ansible.cfg
-  lineinfile:
+  ansible.builtin.lineinfile:
     path: /etc/ansible-init/ansible.cfg
     line: "filter_plugins = /etc/ansible-init/filter_plugins"
     state: present
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
 
 - name: Add compute initialisation playbook
-  copy:
+  ansible.builtin.copy:
     src: compute-init.yml
     dest: /etc/ansible-init/playbooks/10-compute-init.yml
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml
index e4e785b..692301d 100644
--- a/ansible/roles/cuda/defaults/main.yml
+++ b/ansible/roles/cuda/defaults/main.yml
@@ -1,3 +1,5 @@
+---
+# yamllint disable-line rule:line-length
 cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}/cuda-rhel{{ ansible_distribution_major_version }}.repo"
 cuda_nvidia_driver_stream: '580-open'
 cuda_nvidia_driver_pkg: "nvidia-open-3:580.82.07-1.el{{ ansible_distribution_major_version }}"
diff --git a/ansible/roles/cuda/tasks/facts.yml b/ansible/roles/cuda/tasks/facts.yml
index 0d60457..787f026 100644
--- a/ansible/roles/cuda/tasks/facts.yml
+++ b/ansible/roles/cuda/tasks/facts.yml
@@ -1,4 +1,4 @@
 ---
 - name: Set cuda_facts_version_short
-  set_fact:
+  ansible.builtin.set_fact:
     cuda_facts_version_short: "{{ cuda_version_short }}"
diff --git a/ansible/roles/cuda/tasks/install.yml b/ansible/roles/cuda/tasks/install.yml
index 39bd20d..91af515 100644
--- a/ansible/roles/cuda/tasks/install.yml
+++ b/ansible/roles/cuda/tasks/install.yml
@@ -1,10 +1,11 @@
-
+---
 # Based on https://docs.nvidia.com/datacenter/tesla/driver-installation-guide/
 
 - name: Install cuda repo
-  get_url:
+  ansible.builtin.get_url:
     dest: "/etc/yum.repos.d/cuda-rhel{{ ansible_distribution_major_version }}.repo"
     url: "{{ cuda_repo_url }}"
+    mode: "0644"
 
 - name: Check if nvidia driver module is enabled
   ansible.builtin.command: dnf module list --enabled nvidia-driver
@@ -24,7 +25,7 @@
   register: _cuda_driver_install
 
 - name: Check kernel has not been modified
-  assert:
+  ansible.builtin.assert:
     that: "'kernel ' not in _cuda_driver_install.stdout | default('')" # space ensures we don't flag e.g. kernel-devel-matched
     fail_msg: "{{ _cuda_driver_install.stdout_lines | default([]) | select('search', 'kernel ') }}"
 
@@ -37,13 +38,13 @@
   register: cuda_package_install
 
 - name: Add cuda binaries to path
-  lineinfile:
+  ansible.builtin.lineinfile:
     path: /etc/profile.d/sh.local
-    line: 'export PATH=$PATH:$(ls -1d /usr/local/cuda-* | sort -V | tail -1)/bin'
+    line: "export PATH=$PATH:$(ls -1d /usr/local/cuda-* | sort -V | tail -1)/bin"
   when: cuda_package_version != 'none'
 
 - name: Enable NVIDIA Persistence Daemon
-  systemd:
+  ansible.builtin.systemd:
     name: nvidia-persistenced
     enabled: true
     state: "{{ cuda_persistenced_state }}"
@@ -51,9 +52,9 @@
 - name: Reboot
   ansible.builtin.reboot:
     post_reboot_delay: 30
-  when: cuda_package_install.changed
+  when: cuda_package_install.changed # noqa: no-handler
 
 - name: Wait for hosts to be reachable
-  wait_for_connection:
+  ansible.builtin.wait_for_connection:
     sleep: 15
-  when: cuda_package_install.changed
+  when: cuda_package_install.changed # noqa: no-handler
diff --git a/ansible/roles/cuda/tasks/runtime.yml b/ansible/roles/cuda/tasks/runtime.yml
index c16a48c..e2dfab3 100644
--- a/ansible/roles/cuda/tasks/runtime.yml
+++ b/ansible/roles/cuda/tasks/runtime.yml
@@ -1,5 +1,6 @@
+---
 - name: Ensure NVIDIA Persistence Daemon state
-  systemd:
+  ansible.builtin.systemd:
     name: nvidia-persistenced
     enabled: true
     state: "{{ cuda_persistenced_state }}"
diff --git a/ansible/roles/cuda/tasks/samples.yml b/ansible/roles/cuda/tasks/samples.yml
index b2bccd7..392a295 100644
--- a/ansible/roles/cuda/tasks/samples.yml
+++ b/ansible/roles/cuda/tasks/samples.yml
@@ -1,13 +1,15 @@
+---
 - name: Ensure cuda_samples_path exists
-  file:
+  ansible.builtin.file:
     state: directory
     path: "{{ cuda_samples_path }}"
     owner: "{{ ansible_user }}"
     group: "{{ ansible_user }}"
+    mode: "0755"
 
 - name: Download CUDA samples release
-  unarchive:
-    remote_src: yes
+  ansible.builtin.unarchive:
+    remote_src: true
     src: "{{ cuda_samples_release_url }}"
     dest: "{{ cuda_samples_path }}"
     owner: "{{ ansible_user }}"
@@ -15,12 +17,13 @@
     creates: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}"
 
 - name: Create CUDA samples build directory
-  file:
+  ansible.builtin.file:
     state: directory
     path: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}/build"
+    mode: "0755"
 
 - name: Build CUDA samples
-  shell:
+  ansible.builtin.shell:
     # We need to source /etc/profile.d/sh.local to add CUDA to the PATH
     cmd: . /etc/profile.d/sh.local && cmake .. && make -j {{ ansible_processor_vcpus }}
     chdir: "{{ cuda_samples_path }}/cuda-samples-{{ cuda_version_short }}/build"
diff --git a/ansible/roles/dnf_repos/README.md b/ansible/roles/dnf_repos/README.md
index ff22c79..a7c6bc2 100644
--- a/ansible/roles/dnf_repos/README.md
+++ b/ansible/roles/dnf_repos/README.md
@@ -1,38 +1,34 @@
-dnf_repos
-=========
+# dnf_repos
 
-Modifies repo definitions for repofiles in `/etc/yum.repos.d` to point to snapshots in StackHPC's Ark Pulp server or mirrors of them
+Modifies repository definitions for repofiles in `/etc/yum.repos.d` to point to snapshots in StackHPC's Ark Pulp server or mirrors of them
 on a local Pulp server.
 
-Requirements
-------------
+## Requirements
 
 Requires Ark credentials if using StackHPC's upstream Ark server.
 
-Role Variables
---------------
+## Role Variables
 
-Variables in this role are also required by `pulp_site` so set in 
+Variables in this role are also required by `pulp_site` so set in
 `environments/common/inventory/groups_vars/all/dnf_repos.yml`. See that file for detailed default values.
 
 - `dnf_repos_repos`: Dict of dicts containing information to construct URLs for Ark snapshots from the target Pulp server for each Rocky version. For example:
-    ```
-    dnf_repos_repos:
-        appstream:                          # ansible.builtin.yum_repository:name
-            '8.10':                           # ansible_distribution_version or ansible_distribution_major_version
-                repo_file: Rocky-AppStream      # yum_repository: file
-                # repo_name:                    # optional, override yum_repository:name
-                pulp_path: rocky/8.10/AppStream/x86_64/os # The subpath of the the upstream Ark server's content endpoint URL for the repo's snapshots, see https://ark.stackhpc.com/pulp/content/
-                pulp_timestamp: 20250614T013846
-                # pulp_content_url:             # optional, dnf_repos_pulp_content_url
-            '9.6':
-                ...
-    ```
+  ```yaml
+  dnf_repos_repos:
+    appstream: # ansible.builtin.yum_repository:name
+      "8.10": # ansible_distribution_version or ansible_distribution_major_version
+        repo_file: Rocky-AppStream # yum_repository: file
+        # repo_name:                    # optional, override yum_repository:name
+        pulp_path: rocky/8.10/AppStream/x86_64/os # The subpath of the the upstream Ark server's content endpoint URL for the repo's snapshots, see https://ark.stackhpc.com/pulp/content/
+        pulp_timestamp: 20250614T013846
+        # pulp_content_url:             # optional, dnf_repos_pulp_content_url
+      "9.6": ...
+  ```
 - `dnf_repos_default`: Appliance default repos to use Ark snapshots for. Following same format as `dnf_repos_repos`.
-  See for appliance default repo list `environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml`.
+  See for appliance default repository list `environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml`.
 - `dnf_repos_extra`: Additional repos to use Ark snapshots for. Follows same format as
   `dnf_repos_repos`. Defaults to `{}`
-- `dnf_repos_pulp_content_url`: Optional str. Content URL of Pulp server to use Ark snapshots from. 
+- `dnf_repos_pulp_content_url`: Optional str. Content URL of Pulp server to use Ark snapshots from.
   Defaults to `{{ appliances_pulp_url }}/pulp/content`
 - `dnf_repos_username`: Optional str. Username for Ark. Should be set if using upstream StackHPC Ark
   Pulp server, but omitted if using local Pulp server (see `ansible/roles/pulp_site`)
diff --git a/ansible/roles/dnf_repos/defaults/main.yml b/ansible/roles/dnf_repos/defaults/main.yml
index fe3c44e..112c5c7 100644
--- a/ansible/roles/dnf_repos/defaults/main.yml
+++ b/ansible/roles/dnf_repos/defaults/main.yml
@@ -1,3 +1,4 @@
+---
 dnf_repos_repos: {} # see environments/common/inventory/group_vars/all/{dnf_repos,timestamps}.yml
 dnf_repos_pulp_content_url: "{{ appliances_pulp_url }}/pulp/content"
 dnf_repos_username: "{{ omit }}"
diff --git a/ansible/roles/dnf_repos/tasks/disable_repos.yml b/ansible/roles/dnf_repos/tasks/disable_repos.yml
index 4db073b..0339f5b 100644
--- a/ansible/roles/dnf_repos/tasks/disable_repos.yml
+++ b/ansible/roles/dnf_repos/tasks/disable_repos.yml
@@ -27,5 +27,5 @@
     path: "{{ item.path }}"
     regexp: '^enabled\ ?=\ ?1'
     replace: 'enabled=0'
-    backup: yes
+    backup: true
   loop: "{{ _dnf_repo_files.files }}"
diff --git a/ansible/roles/doca/defaults/main.yml b/ansible/roles/doca/defaults/main.yml
index 8fb5e92..7f28ef8 100644
--- a/ansible/roles/doca/defaults/main.yml
+++ b/ansible/roles/doca/defaults/main.yml
@@ -1,3 +1,5 @@
-doca_version: '2.9.3' # 2.9 is LTS, last to support ConnectX-4, 3 years for bug fixes and CVE updates
+---
+
+doca_version: "2.9.3" # 2.9 is LTS, last to support ConnectX-4, 3 years for bug fixes and CVE updates
 doca_profile: doca-ofed
 doca_repo_url: "https://linux.mellanox.com/public/repo/doca/{{ doca_version }}/rhel{{ ansible_distribution_version }}/{{ ansible_architecture }}/"
diff --git a/ansible/roles/doca/tasks/install-kernel-devel.yml b/ansible/roles/doca/tasks/install-kernel-devel.yml
index 6a1943a..9968058 100644
--- a/ansible/roles/doca/tasks/install-kernel-devel.yml
+++ b/ansible/roles/doca/tasks/install-kernel-devel.yml
@@ -1,24 +1,27 @@
+---
 - name: Get installed kernels
-  command: dnf list --installed kernel
+  ansible.builtin.command: dnf list --installed kernel
   register: _ofed_dnf_kernels
   changed_when: false
 
 - name: Determine running kernel
-  command: uname -r # e.g. 4.18.0-513.18.1.el8_9.x86_64
+  ansible.builtin.command: uname -r
   register: _ofed_loaded_kernel
   changed_when: false
 
 - name: Check current kernel is newest installed
-  assert:
+  ansible.builtin.assert:
     that: _ofed_kernel_current == _ofed_dnf_kernels_newest
     fail_msg: "Kernel {{ _ofed_loaded_kernel.stdout }} is loaded but newer {{ _ofed_dnf_kernels_newest }} is installed: consider rebooting?"
   vars:
+    # yamllint disable rule:line-length
     _ofed_kernel_current: >-
       {{ _ofed_loaded_kernel.stdout | regex_replace('\.(?:.(?!\.))+$', '') | regex_replace('\.(?:.(?!\.))+$', '') }}
     _ofed_dnf_kernels_newest: >-
       {{ _ofed_dnf_kernels.stdout_lines[1:] | map('split') | map(attribute=1) | map('regex_replace', '\.(?:.(?!\.))+$', '') | community.general.version_sort | last }}
-    # dnf line format e.g. "kernel.x86_64  4.18.0-513.18.1.el8_9   @baseos  "
+    # yamllint enable rule:line-length
+ # dnf line format e.g. "kernel.x86_64  4.18.0-513.18.1.el8_9   @baseos  "
 
 - name: Install matching kernel-devel package
-  dnf:
+  ansible.builtin.dnf:
     name: "kernel-devel-{{ _ofed_loaded_kernel.stdout | trim }}"
diff --git a/ansible/roles/doca/tasks/install.yml b/ansible/roles/doca/tasks/install.yml
index d26fda7..e21218e 100644
--- a/ansible/roles/doca/tasks/install.yml
+++ b/ansible/roles/doca/tasks/install.yml
@@ -1,5 +1,5 @@
-- import_tasks: install-kernel-devel.yml
-
+---
+- ansible.builtin.import_tasks: install-kernel-devel.yml
 - name: Install DOCA repo
   ansible.builtin.yum_repository:
     name: doca
@@ -13,21 +13,21 @@
   ansible.builtin.dnf:
     name: doca-extra
 
-- name: Build DOCA kernel modules
-  ansible.builtin.shell:
+- name: Build DOCA kernel modules # noqa: no-changed-when
+  ansible.builtin.command:
     cmd: /opt/mellanox/doca/tools/doca-kernel-support
   register: _doca_kernel_build
 
-
 - name: Find generated doca-kernel-repo
-  ansible.builtin.shell: 'find /tmp/DOCA.* -name doca-kernel-repo-*'
+  ansible.builtin.shell: "find /tmp/DOCA.* -name doca-kernel-repo-*"
   register: _doca_kernel_repo # e.g. /tmp/DOCA.WVMchs2QWo/doca-kernel-repo-24.10.1.1.4.0-1.kver.5.14.0.427.31.1.el9.4.x86.64.x86_64.rpm
   changed_when: false
 
-- name: Create dnf cache
+- name: Create dnf cache # noqa: no-changed-when
   ansible.builtin.command: dnf makecache
 
 - name: Install DOCA repository package
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
   ansible.builtin.dnf:
     name: "{{ _doca_kernel_repo.stdout }}"
     disable_gpg_check: true
@@ -41,11 +41,11 @@
     state: absent
     path: "{{ (_doca_kernel_repo.stdout | split('/'))[:3] | join('/') }}" # leading / means 1st element of split list is ''
 
-- name: Update initramfs
+- name: Update initramfs # noqa: no-changed-when
   ansible.builtin.command:
     cmd: dracut -f
   register: _doca_dracut
   failed_when: _doca_dracut.stderr != '' # appears rc is always 0
 
-- name: Load the new driver
+- name: Load the new driver # noqa: no-changed-when
   ansible.builtin.command: /etc/init.d/openibd restart
diff --git a/ansible/roles/doca/tasks/main.yml b/ansible/roles/doca/tasks/main.yml
index e7a272f..df97825 100644
--- a/ansible/roles/doca/tasks/main.yml
+++ b/ansible/roles/doca/tasks/main.yml
@@ -1 +1,2 @@
-- include_tasks: install.yml
+---
+- ansible.builtin.include_tasks: install.yml
diff --git a/ansible/roles/eessi/README.md b/ansible/roles/eessi/README.md
index d48e009..df9e835 100644
--- a/ansible/roles/eessi/README.md
+++ b/ansible/roles/eessi/README.md
@@ -1,26 +1,23 @@
-EESSI
-=====
+# EESSI
 
 Configure the EESSI pilot respository for use on given hosts.
 
-Requirements
-------------
+## Requirements
 
 None.
 
-Role Variables
---------------
+## Role Variables
 
 - `cvmfs_quota_limit_mb`: Optional int. Maximum size of local package cache on each node in MB.
-- `cvmfs_config_overrides`: Optional dict. Set of key-value pairs for additional CernVM-FS settings see [official docs](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html) for list of options. Each dict key should correspond to a valid config variable (e.g. `CVMFS_HTTP_PROXY`) and the corresponding dict value will be set as the variable value (e.g. `https://my-proxy.com`). These configuration parameters will be written to the `/etc/cvmfs/default.local` config file on each host in the form `KEY=VALUE`.
+- `cvmfs_config_overrides`: Optional dict. Set of key-value pairs for additional CernVM-FS settings see [official docs](https://cvmfs.readthedocs.io/en/stable/cpt-configure.html) for list of options.
+  Each dict key should correspond to a valid config variable (e.g. `CVMFS_HTTP_PROXY`) and the corresponding dict value will be set as the variable value (e.g. `https://my-proxy.com`).
+  These configuration parameters will be written to the `/etc/cvmfs/default.local` config file on each host in the form `KEY=VALUE`.
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Playbook
-----------------
+## Example Playbook
 
 ```yaml
 - name: Setup EESSI
diff --git a/ansible/roles/eessi/defaults/main.yaml b/ansible/roles/eessi/defaults/main.yaml
index 60e61f1..581c24f 100644
--- a/ansible/roles/eessi/defaults/main.yaml
+++ b/ansible/roles/eessi/defaults/main.yaml
@@ -7,7 +7,6 @@ cvmfs_config_default:
   CVMFS_QUOTA_LIMIT: "{{ cvmfs_quota_limit_mb }}"
 
 cvmfs_config_overrides: {}
-
 cvmfs_config: "{{ cvmfs_config_default | combine(cvmfs_config_overrides) }}"
 
 cvmfs_gpg_checksum: "sha256:4ac81adff957565277cfa6a4a330cdc2ce5a8fdd73b8760d1a5a32bef71c4bd6"
diff --git a/ansible/roles/eessi/tasks/configure.yml b/ansible/roles/eessi/tasks/configure.yml
index b308376..2c765d2 100644
--- a/ansible/roles/eessi/tasks/configure.yml
+++ b/ansible/roles/eessi/tasks/configure.yml
@@ -7,10 +7,11 @@
     option: "{{ item.key }}"
     value: "{{ item.value }}"
     no_extra_spaces: true
+    mode: "0644"
   loop: "{{ cvmfs_config | dict2items }}"
 
 
 # NOTE: Not clear how to make this idempotent
-- name: Ensure CVMFS config is setup
-  command:
+- name: Ensure CVMFS config is setup # noqa: no-changed-when
+  ansible.builtin.command:
     cmd: "cvmfs_config setup"
diff --git a/ansible/roles/eessi/tasks/install.yml b/ansible/roles/eessi/tasks/install.yml
index a4adb0b..50b939c 100644
--- a/ansible/roles/eessi/tasks/install.yml
+++ b/ansible/roles/eessi/tasks/install.yml
@@ -1,34 +1,37 @@
 ---
 
 - name: Download Cern GPG key
+  # checkov:skip=CKV2_ANSIBLE_2: "Ensure that HTTPS url is used with get_url"
   ansible.builtin.get_url:
     url: http://cvmrepo.web.cern.ch/cvmrepo/yum/RPM-GPG-KEY-CernVM
     dest: ./cvmfs-key.gpg
     checksum: "{{ cvmfs_gpg_checksum }}"
+    mode: "0644"
 
-- name: Import downloaded GPG key
-  command: rpm --import cvmfs-key.gpg
-
+- name: Import downloaded GPG key # noqa: no-changed-when
+  ansible.builtin.command: rpm --import cvmfs-key.gpg # noqa: command-instead-of-module
 - name: Add CVMFS repo
-  dnf:
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
+  ansible.builtin.dnf:
     name: https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm
     disable_gpg_check: true
 
 - name: Install CVMFS
-  dnf:
+  ansible.builtin.dnf:
     name: cvmfs
 
 - name: Install EESSI CVMFS config
-  dnf:
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
+  ansible.builtin.dnf:
     name: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi-latest.noarch.rpm
     # NOTE: Can't find any docs on obtaining gpg key - maybe downloading directly from github is ok?
     disable_gpg_check: true
 
 # Alternative version using official repo - still no GPG key :(
 # - name: Add EESSI repo
-#   dnf:
+#   ansible.builtin.dnf:
 #     name: http://repo.eessi-infra.org/eessi/rhel/8/noarch/eessi-release-0-1.noarch.rpm
 
 # - name: Install EESSI CVMFS config
-#   dnf:
+#   ansible.builtin.dnf:
 #     name: cvmfs-config-eessi
diff --git a/ansible/roles/eessi/tasks/main.yml b/ansible/roles/eessi/tasks/main.yml
index 79d326c..e5e0787 100644
--- a/ansible/roles/eessi/tasks/main.yml
+++ b/ansible/roles/eessi/tasks/main.yml
@@ -1,4 +1,4 @@
 ---
 
-- include_tasks: install.yml
-- include_tasks: configure.yml
+- ansible.builtin.include_tasks: install.yml
+- ansible.builtin.include_tasks: configure.yml
diff --git a/ansible/roles/etc_hosts/README.md b/ansible/roles/etc_hosts/README.md
index 0ad9568..8c1c422 100644
--- a/ansible/roles/etc_hosts/README.md
+++ b/ansible/roles/etc_hosts/README.md
@@ -3,11 +3,12 @@
 Hosts in the `etc_hosts` groups have `/etc/hosts` created with entries of the format `IP_address canonical_hostname [alias]`.
 
 By default, an entry is created for each host in this group as follows:
+
 - The value of `ansible_host` is used as the IP_address.
 - If `node_fqdn` is defined then that is used as the canonical hostname and `inventory_hostname` as an alias. Otherwise `inventory_hostname` is used as the canonical hostname.
-This may need overriding for multi-homed hosts or hosts with multiple aliases.
+  This may need overriding for multi-homed hosts or hosts with multiple aliases.
 
-# Variables
+## Variables
 
 - `etc_hosts_template`: Template file to use. Default is the in-role template.
 - `etc_hosts_hostvars`: A list of variable names, used (in the order supplied) to create the entry for each host. Default is described above.
diff --git a/ansible/roles/etc_hosts/defaults/main.yml b/ansible/roles/etc_hosts/defaults/main.yml
index c2ecbca..bf7dbe5 100644
--- a/ansible/roles/etc_hosts/defaults/main.yml
+++ b/ansible/roles/etc_hosts/defaults/main.yml
@@ -1,3 +1,4 @@
+---
 etc_hosts_template: hosts.j2
 etc_hosts_hostvars: "{{ ['ansible_host'] + (['node_fqdn'] if node_fqdn is defined else []) + ['inventory_hostname'] }}"
-etc_hosts_extra_hosts: ''
+etc_hosts_extra_hosts: ""
diff --git a/ansible/roles/etc_hosts/tasks/main.yml b/ansible/roles/etc_hosts/tasks/main.yml
index 6fdabf5..452b58f 100644
--- a/ansible/roles/etc_hosts/tasks/main.yml
+++ b/ansible/roles/etc_hosts/tasks/main.yml
@@ -1,8 +1,9 @@
+---
 - name: Template out /etc/hosts
-  template:
+  ansible.builtin.template:
     src: "{{ etc_hosts_template }}"
     dest: /etc/hosts
     owner: root
     group: root
-    mode: 0644
-  become: yes
+    mode: "0644"
+  become: true
diff --git a/ansible/roles/fail2ban/README.md b/ansible/roles/fail2ban/README.md
index 0e744fd..dec727e 100644
--- a/ansible/roles/fail2ban/README.md
+++ b/ansible/roles/fail2ban/README.md
@@ -1,27 +1,23 @@
-fail2ban
-=========
+# fail2ban
 
 Setup fail2ban to protect SSH on a host.
 
 Note that no email alerts are set up so logs (at `/var/log/fail2ban.log`) will have to be manually reviewed if required.
 
-Requirements
-------------
+## Requirements
 
 - An EL8 system.
 - `firewalld` running.
 
-Role Variables
---------------
+## Role Variables
+
 None.
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Playbook
-----------------
+## Example Playbook
 
 ```yaml
 - hosts: fail2ban
@@ -34,12 +30,10 @@ Example Playbook
         name: fail2ban
 ```
 
-License
--------
+## License
 
 Apache v2
 
-Author Information
-------------------
+## Author Information
 
 stackhpc.com
diff --git a/ansible/roles/fail2ban/handlers/main.yml b/ansible/roles/fail2ban/handlers/main.yml
index d578c29..9db9b01 100644
--- a/ansible/roles/fail2ban/handlers/main.yml
+++ b/ansible/roles/fail2ban/handlers/main.yml
@@ -1,7 +1,6 @@
 ---
-
 - name: Restart fail2ban
-  service:
+  ansible.builtin.service:
     name: fail2ban
     state: restarted
     enabled: true
diff --git a/ansible/roles/fail2ban/meta/main.yml b/ansible/roles/fail2ban/meta/main.yml
index 02d6a2f..1005726 100644
--- a/ansible/roles/fail2ban/meta/main.yml
+++ b/ansible/roles/fail2ban/meta/main.yml
@@ -1,6 +1,8 @@
+---
 galaxy_info:
   author: Steve Brasier
   company: stackhpc
+  description: Setup fail2ban to protect SSH on a host
 
   # If the issue tracker for your role is not on github, uncomment the
   # next line and provide a value
@@ -15,7 +17,7 @@ galaxy_info:
   # - CC-BY-4.0
   license: Apache-2.0
 
-  min_ansible_version: 2.1
+  min_ansible_version: "2.1"
 
   # If this a Container Enabled role, provide the minimum Ansible Container version.
   # min_ansible_container_version:
@@ -27,9 +29,9 @@ galaxy_info:
   # https://galaxy.ansible.com/api/v1/platforms/
   #
   platforms:
-  - name: EL
-    versions:
-    - 8
+    - name: EL
+      versions:
+        - "8"
 
   galaxy_tags: []
     # List tags for your role here, one per line. A tag is a keyword that describes
diff --git a/ansible/roles/fail2ban/tasks/configure.yml b/ansible/roles/fail2ban/tasks/configure.yml
index e4951f7..6bde88a 100644
--- a/ansible/roles/fail2ban/tasks/configure.yml
+++ b/ansible/roles/fail2ban/tasks/configure.yml
@@ -1,15 +1,16 @@
 ---
 - name: Create config
-  template:
+  ansible.builtin.template:
     dest: /etc/fail2ban/jail.local
     src: jail.local.j2
+    mode: "0644"
   notify: Restart fail2ban
 
-- name: flush handlers
-  meta: flush_handlers
+- name: Flush handlers
+  ansible.builtin.meta: flush_handlers
 
 - name: Ensure fail2ban running even if no config change
-  service:
+  ansible.builtin.service:
     name: fail2ban
     state: started
     enabled: true
diff --git a/ansible/roles/fail2ban/tasks/install.yml b/ansible/roles/fail2ban/tasks/install.yml
index 65f3bfe..e745a4f 100644
--- a/ansible/roles/fail2ban/tasks/install.yml
+++ b/ansible/roles/fail2ban/tasks/install.yml
@@ -1,10 +1,10 @@
 ---
 - name: Install EPEL repo
-  package:
+  ansible.builtin.package:
     name: epel-release
 
 - name: Install fail2ban packages
-  package:
+  ansible.builtin.package:
     name:
       - fail2ban-server
       - fail2ban-firewalld
diff --git a/ansible/roles/fail2ban/tasks/main.yml b/ansible/roles/fail2ban/tasks/main.yml
index 410e943..8a0a795 100644
--- a/ansible/roles/fail2ban/tasks/main.yml
+++ b/ansible/roles/fail2ban/tasks/main.yml
@@ -1,4 +1,4 @@
 ---
 
-- import_tasks: install.yml
-- import_tasks: configure.yml
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/filebeat/defaults/main.yml b/ansible/roles/filebeat/defaults/main.yml
index bdd02a2..1701427 100644
--- a/ansible/roles/filebeat/defaults/main.yml
+++ b/ansible/roles/filebeat/defaults/main.yml
@@ -1,6 +1,6 @@
 ---
 
-#filebeat_config_path: undefined # REQUIRED. Path to filebeat.yml configuration file template
+# filebeat_config_path: undefined # REQUIRED. Path to filebeat.yml configuration file template
 filebeat_debug: false
 
 # Note all the below can only be set/changed using the install.yml task file:
diff --git a/ansible/roles/filebeat/handlers/main.yml b/ansible/roles/filebeat/handlers/main.yml
index 77b9363..8fa3862 100644
--- a/ansible/roles/filebeat/handlers/main.yml
+++ b/ansible/roles/filebeat/handlers/main.yml
@@ -1,9 +1,8 @@
 ---
-
 - name: Restart filebeat container
-  systemd:
+  ansible.builtin.systemd:
     name: filebeat.service
     state: restarted
-    enabled: yes
-    daemon_reload: yes
+    enabled: true
+    daemon_reload: true
   become: true
diff --git a/ansible/roles/filebeat/tasks/install.yml b/ansible/roles/filebeat/tasks/install.yml
index 6514e30..74c3b09 100644
--- a/ansible/roles/filebeat/tasks/install.yml
+++ b/ansible/roles/filebeat/tasks/install.yml
@@ -1,8 +1,9 @@
 ---
 - name: Create systemd unit file
-  template:
+  ansible.builtin.template:
     dest: /etc/systemd/system/filebeat.service
     src: filebeat.service.j2
+    mode: "0644"
   become: true
   register: _filebeat_unit
 
@@ -10,9 +11,10 @@
   containers.podman.podman_image:
     name: "docker.elastic.co/beats/filebeat-oss"
     tag: "{{ filebeat_version }}"
+  become: true
   become_user: "{{ filebeat_podman_user }}"
 
-- name: Reload filebeat unit file
-  command: systemctl daemon-reload
-  when: _filebeat_unit.changed
+- name: Reload filebeat unit file # noqa: no-changed-when
+  ansible.builtin.command: systemctl daemon-reload # noqa: command-instead-of-module
+  when: _filebeat_unit.changed # noqa: no-handler
   become: true
diff --git a/ansible/roles/filebeat/tasks/main.yml b/ansible/roles/filebeat/tasks/main.yml
index 849683c..7a1e329 100644
--- a/ansible/roles/filebeat/tasks/main.yml
+++ b/ansible/roles/filebeat/tasks/main.yml
@@ -1,2 +1,3 @@
-- import_tasks: install.yml
-- import_tasks: runtime.yml
+---
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: runtime.yml
diff --git a/ansible/roles/filebeat/tasks/runtime.yml b/ansible/roles/filebeat/tasks/runtime.yml
index 1197450..cc2bd91 100644
--- a/ansible/roles/filebeat/tasks/runtime.yml
+++ b/ansible/roles/filebeat/tasks/runtime.yml
@@ -1,38 +1,36 @@
 ---
-
 - name: Collect usernamespace facts
   user_namespace_facts:
 
 - name: Set facts containing sub-ids
-  set_fact:
+  ansible.builtin.set_fact:
     # filebeat user is 1000
     filebeat_host_user_id: "{{ ansible_facts.subuid[filebeat_podman_user]['start'] + 1000 - 1 }}"
     filebeat_host_group_id: "{{ ansible_facts.subgid[filebeat_podman_user]['start'] + 1000 - 1 }}"
 
 - name: Ensure parent directory exists
-  file:
+  ansible.builtin.file:
     state: directory
     path: "/etc/filebeat"
     owner: "{{ filebeat_host_user_id }}"
     group: "{{ filebeat_host_group_id }}"
-    mode: 0770
+    mode: "0770"
   become: true
 
 - name: Template configuration files
-  template:
-      src: "{{ filebeat_config_path }}"
-      dest: /etc/filebeat/filebeat.yml
-      owner: "{{ filebeat_host_user_id }}"
-      group: "{{ filebeat_host_group_id }}"
-      mode: 0600
+  ansible.builtin.template:
+    src: "{{ filebeat_config_path }}"
+    dest: /etc/filebeat/filebeat.yml
+    owner: "{{ filebeat_host_user_id }}"
+    group: "{{ filebeat_host_group_id }}"
+    mode: "0600"
   notify: Restart filebeat container
   become: true
 
 - name: Flush handlers
-  meta: flush_handlers
-
+  ansible.builtin.meta: flush_handlers
 - name: Ensure filebeat service state
-  systemd:
+  ansible.builtin.systemd:
     name: filebeat.service
     state: started
     enabled: true
diff --git a/ansible/roles/filebeat/tasks/validate.yml b/ansible/roles/filebeat/tasks/validate.yml
index b493620..0787938 100644
--- a/ansible/roles/filebeat/tasks/validate.yml
+++ b/ansible/roles/filebeat/tasks/validate.yml
@@ -1,5 +1,5 @@
 ---
 
 - name: Assert that filebeat_config_path is defined
-  assert:
-    that: filebeat_config_path is defined
\ No newline at end of file
+  ansible.builtin.assert:
+    that: filebeat_config_path is defined
diff --git a/ansible/roles/firewalld/README.md b/ansible/roles/firewalld/README.md
index 2d75b6b..280e828 100644
--- a/ansible/roles/firewalld/README.md
+++ b/ansible/roles/firewalld/README.md
@@ -1,48 +1,44 @@
-Role Name
-=========
+# Role Name
 
 Install and configure the `firewalld` firewall.
 
-Requirements
-------------
+## Requirements
 
 EL8 host
 
-Role Variables
---------------
+## Role Variables
 
 - `firewalld_enabled`: Optional. Whether `firewalld` service is enabled (starts at boot). Default `yes`.
 - `firewalld_state`: Optional. State of `firewalld` service. Default `started`. Other values: `stopped`.
 - `firewalld_configs`: Optional. List of dicts giving parameters for [ansible.posix.firewalld module](https://docs.ansible.com/ansible/latest/collections/ansible/posix/firewalld_module.html). Default is an empty list.
 
 Note that the default configuration for firewalld on Rocky Linux 8.5 is as follows:
+
 ```shell
 #  firewall-offline-cmd --list-all
 public
   target: default
   icmp-block-inversion: no
-  interfaces: 
-  sources: 
+  interfaces:
+  sources:
   services: cockpit dhcpv6-client ssh
-  ports: 
-  protocols: 
+  ports:
+  protocols:
   forward: no
   masquerade: no
-  forward-ports: 
-  source-ports: 
-  icmp-blocks: 
-  rich rules: 
+  forward-ports:
+  source-ports:
+  icmp-blocks:
+  rich rules:
 ```
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Playbook
-----------------
+## Example Playbook
 
-```
+```yaml
 - hosts: firewalld
   gather_facts: false
   become: yes
@@ -52,12 +48,10 @@ Example Playbook
         name: firewalld
 ```
 
-License
--------
+## License
 
 BSD
 
-Author Information
-------------------
+## Author Information
 
-An optional section for the role authors to include contact information, or a website (HTML is not allowed).
+An optional section for the role authors to include contact information, or a site (HTML is not allowed).
diff --git a/ansible/roles/firewalld/defaults/main.yml b/ansible/roles/firewalld/defaults/main.yml
index d2bdac7..2720037 100644
--- a/ansible/roles/firewalld/defaults/main.yml
+++ b/ansible/roles/firewalld/defaults/main.yml
@@ -1,3 +1,4 @@
-firewalld_enabled: yes
+---
+firewalld_enabled: true
 firewalld_state: started
 firewalld_configs: []
diff --git a/ansible/roles/firewalld/handlers/main.yml b/ansible/roles/firewalld/handlers/main.yml
index c7a008a..0e8c3df 100644
--- a/ansible/roles/firewalld/handlers/main.yml
+++ b/ansible/roles/firewalld/handlers/main.yml
@@ -1,6 +1,6 @@
 ---
 - name: Restart filewalld
-  service:
+  ansible.builtin.service:
     name: firewalld
     state: restarted
   when: firewalld_state != 'stopped'
diff --git a/ansible/roles/firewalld/meta/main.yml b/ansible/roles/firewalld/meta/main.yml
index c572acc..7e1dddb 100644
--- a/ansible/roles/firewalld/meta/main.yml
+++ b/ansible/roles/firewalld/meta/main.yml
@@ -1,7 +1,8 @@
+---
 galaxy_info:
-  author: your name
-  description: your role description
-  company: your company (optional)
+  author: StackHPC Ltd
+  description: Install and configure the `firewalld` firewall
+  company: StackHPC Ltd
 
   # If the issue tracker for your role is not on github, uncomment the
   # next line and provide a value
@@ -14,9 +15,9 @@ galaxy_info:
   # - GPL-3.0-only
   # - Apache-2.0
   # - CC-BY-4.0
-  license: license (GPL-2.0-or-later, MIT, etc)
+  license: (GPL-2.0-or-later, MIT, etc)
 
-  min_ansible_version: 2.1
+  min_ansible_version: "2.1"
 
   # If this a Container Enabled role, provide the minimum Ansible Container version.
   # min_ansible_container_version:
diff --git a/ansible/roles/firewalld/tasks/install.yml b/ansible/roles/firewalld/tasks/install.yml
index 1709cfb..c30c064 100644
--- a/ansible/roles/firewalld/tasks/install.yml
+++ b/ansible/roles/firewalld/tasks/install.yml
@@ -1,3 +1,4 @@
+---
 - name: Install firewalld package
-  dnf:
+  ansible.builtin.dnf:
     name: firewalld
diff --git a/ansible/roles/firewalld/tasks/main.yml b/ansible/roles/firewalld/tasks/main.yml
index 98a7aa7..7a1e329 100644
--- a/ansible/roles/firewalld/tasks/main.yml
+++ b/ansible/roles/firewalld/tasks/main.yml
@@ -1,3 +1,3 @@
 ---
-- import_tasks: install.yml
-- import_tasks: runtime.yml
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: runtime.yml
diff --git a/ansible/roles/firewalld/tasks/runtime.yml b/ansible/roles/firewalld/tasks/runtime.yml
index 2c9ab59..03a5356 100644
--- a/ansible/roles/firewalld/tasks/runtime.yml
+++ b/ansible/roles/firewalld/tasks/runtime.yml
@@ -1,10 +1,10 @@
-- name: Apply filewalld configs
+---
+- name: Apply filewalld configs # noqa: args[module]
   ansible.posix.firewalld: "{{ item }}"
   notify: Restart filewalld
   loop: "{{ firewalld_configs }}"
 
-- meta: flush_handlers
-
+- ansible.builtin.meta: flush_handlers
 - name: Ensure filewalld state
   ansible.builtin.systemd:
     name: firewalld
diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md
index 0fd9c36..4bcf2f6 100644
--- a/ansible/roles/freeipa/README.md
+++ b/ansible/roles/freeipa/README.md
@@ -1,15 +1,15 @@
-
 # freeipa
 
 Support FreeIPA in the appliance. In production use it is expected the FreeIPA server(s) will be external to the cluster, implying that hosts and users are managed outside the appliance. However for testing and development the role can also deploy an "in-appliance" FreeIPA server, add hosts to it and manage users in FreeIPA.
 
-# FreeIPA Client
+## FreeIPA Client
+
+### FreeIPA Client Usage
 
-## Usage
 - Add hosts to the `freeipa_client` group and run (at a minimum) the `ansible/iam.yml` playbook.
-- Host names must match the domain name. By default (using the site OpenTofu) hostnames are of the form `nodename.cluster_name.cluster_domain_suffix` where `cluster_name` and `cluster_domain_suffix` are OpenTofu variables.
+- Hostnames must match the domain name. By default (using the site OpenTofu) hostnames are of the form `nodename.cluster_name.cluster_domain_suffix` where `cluster_name` and `cluster_domain_suffix` are OpenTofu variables.
 - Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If DNS servers are not set this is not set from DHCP, then use the `resolv_conf` role to configure this. For example when using the in-appliance FreeIPA development server:
-  
+
   ```ini
   # environments/<env>/groups
   ...
@@ -21,19 +21,20 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s
   ```yaml
   # environments/<env>/inventory/group_vars/all/resolv_conf.yml
   resolv_conf_nameservers:
-  - "{{ hostvars[groups['freeipa_server'] | first].ansible_host }}"
+    - "{{ hostvars[groups['freeipa_server'] | first].ansible_host }}"
   ```
 
-
-- For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrol the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password.
+- For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`).
+  This password should be set as a hostvar `freeipa_host_password`.
+  Initial host enrolment will use this OTP to enrol the host. After this it becomes irrelevant so it does not need to be committed to Git.
+  This approach means the appliance does not require the FreeIPA administrator password.
 - For development use with the in-appliance FreeIPA server, `freeipa_host_password` will be automatically generated in memory.
 - The `control` host must define `appliances_state_dir` (on persistent storage). This is used to back-up keytabs to allow FreeIPA clients to automatically re-enrol after e.g. reimaging. Note that:
   - This is implemented when using the site OpenTofu; on the control node `appliances_state_dir` defaults to `/var/lib/state` which is mounted from a volume.
   - Nodes are not re-enroled by a [Slurm-driven reimage](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md) (as that does not run this role).
   - If both a backed-up keytab and `freeipa_host_password` exist, the former is used.
 
-
-## Role Variables for Clients
+### Role Variables for Clients
 
 - `freeipa_host_password`. Required for initial enrolment only, FreeIPA host password as described above.
 - `freeipa_setup_dns`: Optional, whether to use the FreeIPA server as the client's nameserver. Defaults to `true` when `freeipa_server` contains a host, otherwise `false`.
@@ -41,10 +42,12 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s
 
 See also use of `appliances_state_dir` on the control node as described above.
 
-# FreeIPA Server
+## FreeIPA Server
+
 As noted above this is only intended for development and testing. Note it cannot be run on the `openondemand` node as no other virtual servers must be defined in the Apache configuration.
 
-## Usage
+### FreeIPA Server Usage
+
 - Add a single host to the `freeipa_server` group and run (at a minimum) the `ansible/bootstrap.yml` and `ansible/iam.yml` playbooks.
 - As well as configuring the FreeIPA server, the role will also:
   - Add ansible hosts in the group `freeipa_client` as FreeIPA hosts.
@@ -52,7 +55,7 @@ As noted above this is only intended for development and testing. Note it cannot
 
 The FreeIPA GUI will be available on `https://<freeipa_server_ip>/ipa/ui`.
 
-## Role Variables for Server
+### Role Variables for Server
 
 These role variables are only required when using `freeipa_server`:
 
@@ -60,10 +63,10 @@ These role variables are only required when using `freeipa_server`:
 - `freeipa_domain`: Optional, name of domain. Default is lowercased `freeipa_realm`.
 - `freeipa_ds_password`: Optional, password to be used by the Directory Server for the Directory Manager user (`ipa-server-install --ds-password`). Default is generated in `environments/<environment>/inventory/group_vars/all/secrets.yml`
 - `freeipa_admin_password`: Optional, password for the IPA `admin` user. Default is generated as for `freeipa_ds_password`.
-- `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. Default `false`. 
+- `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. Default `false`.
 - `freeipa_setup_dns`: Optional bool, whether to configure the FreeIPA server as an integrated DNS server and define a zone and records. NB: This also controls whether `freeipa_client` hosts use the `freeipa_server` host for name resolution. Default `true` when `freeipa_server` contains a host.
 - `freeipa_client_ip`: Optional, IP address of FreeIPA client. Default is `ansible_host`.
 - `freeipa_users`: A list of dicts defining users to add, with keys/values as for [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html): Note that:
   - `name`, `givenname` (firstname) and `sn` (surname) are required.
   - `ipa_host`, `ipa_port`, `ipa_prot`, `ipa_user`, `validate_certs` are automatically provided and cannot be overridden.
-  - If `password` is set, the value should *not* be a hash (unlike `ansible.builtin.user` as used by the `basic_users` role), and it must be changed on first login. `krbpasswordexpiration` does not appear to be able to override this.
+  - If `password` is set, the value should _not_ be a hash (unlike `ansible.builtin.user` as used by the `basic_users` role), and it must be changed on first login. `krbpasswordexpiration` does not appear to be able to override this.
diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml
index f3482a4..364c0dc 100644
--- a/ansible/roles/freeipa/defaults/main.yml
+++ b/ansible/roles/freeipa/defaults/main.yml
@@ -1,8 +1,9 @@
-#freeipa_realm:
+---
+# freeipa_realm:
 freeipa_domain: "{{ freeipa_realm | lower }}"
-#freeipa_ds_password:
-#freeipa_admin_password:
-#freeipa_server_ip:
+# freeipa_ds_password:
+# freeipa_admin_password:
+# freeipa_server_ip:
 freeipa_setup_dns: "{{ groups['freeipa_server'] | length > 0 }}"
 freeipa_client_ip: "{{ ansible_host }}" # when run on freeipa_client group!
 # freeipa_host_password:
diff --git a/ansible/roles/freeipa/tasks/addhost.yml b/ansible/roles/freeipa/tasks/addhost.yml
index 8020f80..f01cba0 100644
--- a/ansible/roles/freeipa/tasks/addhost.yml
+++ b/ansible/roles/freeipa/tasks/addhost.yml
@@ -1,3 +1,4 @@
+---
 - name: Get ipa host information
   # This uses DNS to find the ipa server, which works as this is running on the enrolled ipa server
   # It doesn't fail even if the host doesn't exist
@@ -10,7 +11,7 @@
     validate_certs: false
   delegate_to: "{{ groups['freeipa_server'].0 }}"
   register: _ipa_host_check
-  check_mode: yes
+  check_mode: true
   changed_when: false
 
 - name: Add host to IPA
@@ -29,6 +30,6 @@
   register: _ipa_host_add
 
 - name: Set fact for ipa host password
-  set_fact:
+  ansible.builtin.set_fact:
     freeipa_host_password: "{{ _ipa_host_add.host.randompassword }}"
-  when: _ipa_host_add.changed
+  when: _ipa_host_add.changed # noqa: no-handler
diff --git a/ansible/roles/freeipa/tasks/backup-keytabs.yml b/ansible/roles/freeipa/tasks/backup-keytabs.yml
index 7fc77f9..1de3f7f 100644
--- a/ansible/roles/freeipa/tasks/backup-keytabs.yml
+++ b/ansible/roles/freeipa/tasks/backup-keytabs.yml
@@ -1,5 +1,6 @@
+---
 - name: Retrieve keytabs to localhost
-  fetch:
+  ansible.builtin.fetch:
     src: "{{ _freeipa_keytab_backup_path }}"
     dest: "{{ appliances_environment_root }}/keytabs/{{ inventory_hostname }}/"
     flat: true
@@ -7,8 +8,9 @@
   tags: retrieve
 
 - name: Copy keytabs back to control node
-  copy:
+  ansible.builtin.copy:
     src: "{{ appliances_environment_root }}/keytabs/{{ inventory_hostname }}/"
     dest: "{{ _freeipa_keytab_backup_path | dirname }}"
+    mode: "0644"
   delegate_to: "{{ groups['control'].0 }}"
   tags: deploy
diff --git a/ansible/roles/freeipa/tasks/client-install.yml b/ansible/roles/freeipa/tasks/client-install.yml
index a164cd2..82f7901 100644
--- a/ansible/roles/freeipa/tasks/client-install.yml
+++ b/ansible/roles/freeipa/tasks/client-install.yml
@@ -1,4 +1,4 @@
-
+---
 - name: Install FreeIPA client package
-  dnf:
+  ansible.builtin.dnf:
     name: ipa-client
diff --git a/ansible/roles/freeipa/tasks/enrol.yml b/ansible/roles/freeipa/tasks/enrol.yml
index 9848f04..19e0ee2 100644
--- a/ansible/roles/freeipa/tasks/enrol.yml
+++ b/ansible/roles/freeipa/tasks/enrol.yml
@@ -1,14 +1,16 @@
+---
+# yamllint disable-line rule:line-length
 # based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/assembly_installing-an-idm-client_installing-identity-management
 
 - name: Retrieve persisted keytab from previous enrolement
-  slurp:
+  ansible.builtin.slurp:
     src: "{{ _freeipa_keytab_backup_path }}"
   delegate_to: "{{ groups['control'] | first }}"
   register: _slurp_persisted_keytab
   failed_when: false
 
 - name: Write persisted keytab from previous enrolment
-  copy:
+  ansible.builtin.copy:
     content: "{{ _slurp_persisted_keytab.content | b64decode }}"
     dest: /tmp/krb5.keytab
     owner: root
@@ -33,7 +35,7 @@
   #  3. New SSH keys are generated
   #  4. ipaUniqueID is preserved
   # and ALSO that the keytab is changed!
-  command:
+  ansible.builtin.command:
     cmd: >
       ipa-client-install
         --unattended
@@ -52,7 +54,7 @@
 
 - name: Enrol with FreeIPA using random password
   # Note --password is overloaded - it's bulkpassword unless --principal or --force-join is used in which case it's admin password
-  command:
+  ansible.builtin.command:
     cmd: >
       ipa-client-install
         --unattended
@@ -75,19 +77,19 @@
   # This service is installed by nfs-utils, which attempts to start it.
   # It has ConditionPathExists=/etc/krb5.keytab which fails if host is not enroled.
   # This task avoids a reboot.
-  systemd:
+  ansible.builtin.systemd:
     name: rpc-gssd.service
     state: started
     enabled: true
 
 - name: Retrieve current keytab
-  slurp:
+  ansible.builtin.slurp:
     src: /etc/krb5.keytab
   register: _slurp_current_keytab
   failed_when: false
 
 - name: Ensure keytab backup directory exists
-  file:
+  ansible.builtin.file:
     path: "{{ _freeipa_keytab_backup_path | dirname }}"
     state: directory
     owner: root
@@ -96,7 +98,8 @@
   delegate_to: "{{ groups['control'] | first }}"
 
 - name: Persist keytab
-  copy:
+  ansible.builtin.copy:
     content: "{{ _slurp_current_keytab.content | b64decode }}"
     dest: "{{ _freeipa_keytab_backup_path }}"
+    mode: "0644"
   delegate_to: "{{ groups['control'] | first }}"
diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml
index e555ebe..b711998 100644
--- a/ansible/roles/freeipa/tasks/server.yml
+++ b/ansible/roles/freeipa/tasks/server.yml
@@ -1,20 +1,22 @@
+---
+# yamllint disable-line rule:line-length
 # Based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/preparing-the-system-for-ipa-server-installation_installing-identity-management#host-name-and-dns-requirements-for-ipa_preparing-the-system-for-ipa-server-installation
 
 - name: Install freeipa server packages
-  dnf:
-    name: '@idm:DL1/dns'
+  ansible.builtin.dnf:
+    name: "@idm:DL1/dns"
     state: present
 
 - name: Install ipa server
-# TODO: make no-ui-redirect and dns configurable??
-# TODO: set file mask as per docs? Would be hard to cope with failures. Doesn't appear to be necessary actually.
-  command:
+  # TODO: make no-ui-redirect and dns configurable??
+  # TODO: set file mask as per docs? Would be hard to cope with failures. Doesn't appear to be necessary actually.
+  ansible.builtin.command:
     cmd: >
       ipa-server-install
         --realm {{ freeipa_realm | quote }}
         --domain {{ freeipa_domain | lower | quote }}
         --ds-password {{ freeipa_ds_password | quote }}
-        --admin-password {{ freeipa_admin_password | quote }} 
+        --admin-password {{ freeipa_admin_password | quote }}
         --ip-address={{ freeipa_server_ip }}
         {% if freeipa_setup_dns | bool %}--setup-dns{% endif %}
         --auto-reverse
@@ -32,26 +34,26 @@
 
 - name: Disable redirects to hard-coded domain
   # see https://pagure.io/freeipa/issue/7479
-  replace:
+  ansible.builtin.replace:
     path: /etc/httpd/conf.d/ipa-rewrite.conf
-    regexp: '{{ item.regexp }}'
-    replace: '{{ item.replace }}'
+    regexp: "{{ item.regexp }}"
+    replace: "{{ item.replace }}"
   loop:
     # RewriteRule ^/$ https://${FQDN}/ipa/ui [L,NC,R=301] - irrelevant if using --no-ui-redirect
-    - regexp: '^(RewriteRule \^/\$) (https://.*)(/ipa/ui.*)$'
-      replace: '\1 \3'
+    - regexp: "^(RewriteRule \\^/\\$) (https://.*)(/ipa/ui.*)$"
+      replace: "\\1 \\3"
     # RewriteRule ^/ipa/(.*) - occurs twice
-    - regexp: '^(RewriteRule \^\/ipa\/\(.*)$' 
-      replace: '#\1'
-    - regexp: '^(RewriteCond .*)$'
-      replace: '#\1'
+    - regexp: "^(RewriteRule \\^\\/ipa\\/\\(.*)$"
+      replace: "#\\1"
+    - regexp: "^(RewriteCond .*)$"
+      replace: "#\\1"
     # RewriteRule ^/(.*)          https://${FQDN}/$1 [L,R=301]
-    - regexp: '^(RewriteRule \^/\(\.\*\).*)$'
-      replace: '#\1'
+    - regexp: "^(RewriteRule \\^/\\(\\.\\*\\).*)$"
+      replace: "#\\1"
   register: _replace_freeipa_rewrites
 
 - name: Get freeipa server facts
-  setup:
+  ansible.builtin.setup:
 
 - name: Fix HTTP_REFERER
   ansible.builtin.lineinfile:
@@ -60,7 +62,7 @@
   register: _http_referer
 
 - name: Reload apache configuration
-  service:
+  ansible.builtin.service:
     name: httpd
     state: reloaded
   when: _replace_freeipa_rewrites.changed or _http_referer.changed
diff --git a/ansible/roles/freeipa/tasks/users.yml b/ansible/roles/freeipa/tasks/users.yml
index bd1caca..97068fa 100644
--- a/ansible/roles/freeipa/tasks/users.yml
+++ b/ansible/roles/freeipa/tasks/users.yml
@@ -4,12 +4,12 @@
     displayname: "{{ item.displayname | default(omit) }}"
     gidnumber: "{{ item.gidnumber | default(omit) }}"
     givenname: "{{ item.givenname }}"
-    #ipa_host
+    # ipa_host
     ipa_pass: "{{ freeipa_admin_password | quote }}"
-    #ipa_port
-    #ipa_prot
+    # ipa_port
+    # ipa_prot
     ipa_timeout: "{{ item.ipa_timeout | default(omit) }}"
-    #ipa_user
+    # ipa_user
     krbpasswordexpiration: "{{ item.krbpasswordexpiration | default(omit) }}"
     loginshell: "{{ item.loginshell | default(omit) }}"
     mail: "{{ item.mail | default(omit) }}"
@@ -23,5 +23,5 @@
     uidnumber: "{{ item.uidnumber | default(omit) }}"
     update_password: "{{ item.update_password | default(omit) }}"
     userauthtype: "{{ item.userauthtype | default(omit) }}"
-    #validate_certs
+    # validate_certs
   loop: "{{ freeipa_users }}"
diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml
index 238f89e..39faba3 100644
--- a/ansible/roles/freeipa/tasks/validate.yml
+++ b/ansible/roles/freeipa/tasks/validate.yml
@@ -1,12 +1,13 @@
+---
 - name: Get hostname as reported by command
-  command: hostname
+  ansible.builtin.command: hostname
   register: _freeipa_validate_hostname
   changed_when: false
   when: "'freeipa_server' in group_names"
 
 - name: Ensure hostname is fully-qualified
   # see section 2.7 of redhat guide to installing identity management
-  assert:
+  ansible.builtin.assert:
     that: _freeipa_validate_hostname.stdout | split('.') | length >= 3
     fail_msg: "freeipa_server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)"
   when: "'freeipa_server' in group_names"
@@ -14,23 +15,23 @@
 - name: Check for virtual servers in httpd configuration of freeipa_server
   # e.g. fatimage with OOD config; community.general.ipa_host fails with "401 Unauthorized: No session cookie found"
   # https://lists.fedoraproject.org/archives/list/freeipa-users@lists.fedorahosted.org/message/7RH7XDFR35KDPYJ7AQCQI2H2EOWIZCWA/
-  find:
+  ansible.builtin.find:
     path: /etc/httpd/conf.d/
-    contains: '<VirtualHost'
+    contains: "<VirtualHost"
     read_whole_file: false
-    pattern: '*.conf'
+    pattern: "*.conf"
   register: _find_httpd_conf
   when: "'freeipa_server' in group_names"
 
 - name: Assert no other name-based virtual servers on freeipa_server
-  assert:
+  ansible.builtin.assert:
     that: item.path == '/etc/httpd/conf.d/ssl.conf' # this one is OK
     fail_msg: "freeipa_server host must not have other virtual servers defined: see {{ item.path }}"
   loop: "{{ _find_httpd_conf.files }}"
   when: "'freeipa_server' in group_names"
 
 - name: Ensure control node has persistent storage defined
-  assert:
-    that: "{{ 'appliances_state_dir' in hostvars[groups['control'] | first ] }}"
+  ansible.builtin.assert:
+    that: "{{ 'appliances_state_dir' in hostvars[groups['control'] | first] }}"
     fail_msg: "Variable appliances_state_dir must be defined on the control node (pointing to persistent storage) when using the freeipa role."
   run_once: true
diff --git a/ansible/roles/gateway/README.md b/ansible/roles/gateway/README.md
index 3b80641..925e62b 100644
--- a/ansible/roles/gateway/README.md
+++ b/ansible/roles/gateway/README.md
@@ -13,6 +13,7 @@ the instance a single default route via that address. The default route will
 use the interface which has a CIDR including the gateway address.
 
 Note that:
+
 - If the correct default route already exists, no changes are made.
 - If a default route exists on a different interface, that route will be deleted.
 - If a default route exists on the same interface but using a different address,
@@ -26,5 +27,6 @@ See [docs/networks.md](../../../docs/networks.md) for further discussion.
 The image must include both this role and the `linux-ansible-init` role. This
 is the case for StackHPC-built images. For custom images use one of the following
 configurations during Packer build:
+
 - Add `builder` into the `gateway` group in `environments/$ENV/inventory/groups`
 - Add `gateway` to the `inventory_groups` Packer variable
diff --git a/ansible/roles/gateway/files/gateway-init.yml b/ansible/roles/gateway/files/gateway-init.yml
index 72edcb3..2a025cd 100644
--- a/ansible/roles/gateway/files/gateway-init.yml
+++ b/ansible/roles/gateway/files/gateway-init.yml
@@ -1,18 +1,19 @@
+---
 - hosts: localhost
-  #become: true
+  # become: true
   gather_facts: false
   vars:
     os_metadata: "{{ lookup('url', 'http://169.254.169.254/openstack/latest/meta_data.json') | from_json }}"
     gateway_ip: "{{ os_metadata.meta.gateway_ip | default('') }}"
-    access_ip:  "{{ os_metadata.meta.access_ip | default('') }}"
+    access_ip: "{{ os_metadata.meta.access_ip | default('') }}"
   tasks:
     - name: Read nmcli device info
-      command: nmcli --get GENERAL.DEVICE,GENERAL.CONNECTION,IP4.ADDRESS,IP4.GATEWAY device show
+      ansible.builtin.command: nmcli --get GENERAL.DEVICE,GENERAL.CONNECTION,IP4.ADDRESS,IP4.GATEWAY device show
       register: _nmcli_device_raw
       changed_when: false
 
     - name: Set fact for nmcli devices
-      set_fact:
+      ansible.builtin.set_fact:
         # creates a dict with keys as per zip arg below, values might be ''
         nmcli_devices: >-
           {{
@@ -25,24 +26,24 @@
         # batch takes default '' because last devices doesn't have trailing blank line
 
     - name: Examine whether device address contains gateway_ip
-      set_fact:
+      ansible.builtin.set_fact:
         device_is_gateway_device: "{{ nmcli_devices | map(attribute='ip4_address') | map('ansible.utils.network_in_network', gateway_ip) }}"
       # list of bools - false if gateway_ip == ''
 
     - name: Get name of connection containing gateway_ip
       # might be empty string
-      set_fact:
+      ansible.builtin.set_fact:
         gateway_ip_connection: >-
           {{ nmcli_devices | map(attribute='connection') |
-            zip(device_is_gateway_device) | selectattr('1') | 
+            zip(device_is_gateway_device) | selectattr('1') |
             map(attribute=0) | list | first | default ('') }}
 
     - name: Show debug info
-      debug:
+      ansible.builtin.debug:
         msg: "gateway_ip={{ gateway_ip }} access_ip={{ access_ip }} gateway_ip_connection={{ gateway_ip_connection }}"
 
     - name: Error if device has a gateway which is not the desired one
-      assert:
+      ansible.builtin.assert:
         that: item.gateway == gateway_ip
         fail_msg: "Device {{ item | to_nice_json }} has gateway: cannot apply gateway {{ gateway_ip }}"
       when:
@@ -51,8 +52,8 @@
         - item.ip4_gateway != gateway_ip
       loop: "{{ nmcli_devices }}"
 
-    - name: Remove undesired gateways
-      shell: |
+    - name: Remove undesired gateways # noqa: no-changed-when
+      ansible.builtin.shell: |
         nmcli connection modify '{{ item.connection }}' \
           ipv4.never-default yes \
           ipv6.never-default yes
@@ -62,9 +63,9 @@
         - item.ip4_gateway != ''
         - item.connection != gateway_ip_connection
       loop: "{{ nmcli_devices }}"
-    
-    - name: Add desired gateways
-      shell: |
+
+    - name: Add desired gateways # noqa: no-changed-when
+      ansible.builtin.shell: |
         nmcli connection modify '{{ item.connection }}' \
           ipv4.address {{ item.ip4_address }} \
           ipv4.gateway {{ gateway_ip }}
diff --git a/ansible/roles/gateway/tasks/main.yml b/ansible/roles/gateway/tasks/main.yml
index c13ba5c..82b481a 100644
--- a/ansible/roles/gateway/tasks/main.yml
+++ b/ansible/roles/gateway/tasks/main.yml
@@ -1,7 +1,8 @@
+---
 - name: Add gateway playbook
-  copy:
+  ansible.builtin.copy:
     src: gateway-init.yml
     dest: /etc/ansible-init/playbooks/05-gateway-init.yml
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
diff --git a/ansible/roles/grafana-dashboards/files/openhpc-slurm.json b/ansible/roles/grafana-dashboards/files/openhpc-slurm.json
index fb4078c..4cc5a46 100644
--- a/ansible/roles/grafana-dashboards/files/openhpc-slurm.json
+++ b/ansible/roles/grafana-dashboards/files/openhpc-slurm.json
@@ -2072,4 +2072,4 @@
   "title": "OpenHPC Slurm",
   "uid": "openhpc-slurm",
   "version": 2
-}
\ No newline at end of file
+}
diff --git a/ansible/roles/grafana-dashboards/tasks/main.yml b/ansible/roles/grafana-dashboards/tasks/main.yml
index 235088f..2292dac 100644
--- a/ansible/roles/grafana-dashboards/tasks/main.yml
+++ b/ansible/roles/grafana-dashboards/tasks/main.yml
@@ -25,7 +25,7 @@
 - become: false
   block:
     - name: Create local grafana dashboard directory
-      tempfile:
+      ansible.builtin.tempfile:
         state: directory
       register: _tmp_dashboards
       changed_when: false
@@ -52,10 +52,11 @@
       tags:
         - skip_ansible_lint
 
-    - name: copy in-role grafana dashboards
+    - name: Copy in-role grafana dashboards
       ansible.builtin.copy:
         src: "{{ item.dashboard_file }}"
         dest: "{{ _tmp_dashboards.path }}"
+        mode: "0644"
       loop: "{{ grafana_dashboards }}"
       when:
         - grafana_dashboards | length > 0
@@ -109,7 +110,7 @@
 
 - name: Create/Update dashboards file (provisioning)
   become: true
-  copy:
+  ansible.builtin.copy:
     dest: "/etc/grafana/provisioning/dashboards/ansible.yml"
     content: |
       apiVersion: 1
@@ -123,12 +124,12 @@
     backup: false
     owner: root
     group: grafana
-    mode: 0640
+    mode: "0640"
   notify: restart grafana
 
 - name: Register preexisting dashboards
   become: true
-  find:
+  ansible.builtin.find:
     paths: "{{ grafana_data_dir }}/dashboards"
     hidden: true
     patterns:
@@ -137,15 +138,17 @@
 
 - name: Import grafana dashboards
   become: true
-  copy:
-    remote_src: yes
+  ansible.builtin.copy:
+    remote_src: true
     src: "{{ _tmp_dashboards.path }}/" # Note trailing / to only copy contents, not directory itself
     dest: "{{ grafana_data_dir }}/dashboards/"
+    directory_mode: "0755"
+    mode: "0644"
   notify: "provisioned dashboards changed"
 
 - name: Register all installed dashboards
   become: true
-  find:
+  ansible.builtin.find:
     paths: "{{ grafana_data_dir }}/dashboards"
     hidden: true
     patterns:
@@ -153,13 +156,13 @@
   register: _dashboards_post
 
 - name: Get dashboard lists
-  set_fact:
-    _dashboards_pre_list:  "{{ _dashboards_pre  | json_query('files[*].path') | default([]) }}"
+  ansible.builtin.set_fact:
+    _dashboards_pre_list: "{{ _dashboards_pre | json_query('files[*].path') | default([]) }}"
     _dashboards_post_list: "{{ _dashboards_post | json_query('files[*].path') | default([]) }}"
 
 - name: Remove installed dashboards not defined through this role
   become: true
-  file:
+  ansible.builtin.file:
     path: "{{ item }}"
     state: absent
   with_items: "{{ _dashboards_pre_list | difference( _dashboards_post_list ) }}"
diff --git a/ansible/roles/hpctests/README.md b/ansible/roles/hpctests/README.md
index 2cb9b76..ed3d64e 100644
--- a/ansible/roles/hpctests/README.md
+++ b/ansible/roles/hpctests/README.md
@@ -1,53 +1,55 @@
-hpctests
-=========
+# hpctests
 
 An MPI-based test suite for Slurm appliance clusters.
 
-This is intended as a replacement for [this test role](https://github.com/stackhpc/ansible_collection_slurm_openstack_tools/tree/main/roles/test/) but will be safe to run on clusters in production use as it does not use NFS exports for package installs. Instead it assumes the required packages are pre-installed, which is the case by default with this appliance. 
+This is intended as a replacement for [this test role](https://github.com/stackhpc/ansible_collection_slurm_openstack_tools/tree/main/roles/test/) but will be safe to run on clusters in production use as it does not use NFS exports for package installs. Instead it assumes the required packages are pre-installed, which is the case by default with this appliance.
 
 Tests (with corresponding tags) are:
+
 - `pingpong`: Runs Intel MPI Benchmark's IMB-MPI1 pingpong between a pair of (scheduler-selected) nodes. Reports zero-size message latency and maximum bandwidth.
 - `pingmatrix`: Runs a similar pingpong test but between all pairs of nodes. Reports zero-size message latency & maximum bandwidth.
 - `hpl-solo`: Runs the HPL benchmark individually on all nodes. Reports Gflops.
 
 All tests use GCC 9 and OpenMPI 4 with UCX. The HPL-based tests use OpenBLAS.
 
-Requirements
-------------
+## Requirements
 
 - An OpenHPC v2.x cluster.
 - The following OpenHPC packages installed (note this is the default in the appliance, see `environments/common/inventory/group_vars/all/openhpc.yml:openhpc_default_packages`):
   - `ohpc-gnu9-openmpi4-perf-tools`
   - `openblas-gnu9-ohpc`
 
-Role Variables
---------------
+## Role Variables
+
 - `hpctests_user`: Optional. User to run jobs as. Default is `ansible_user`.
 - `hpctests_rootdir`: Optional. Path to root of test directory tree. This must
   be a r/w filesystem shared to all cluster nodes under test. Default is
   `/home/{{ hpctests_user }}/hpctests`. **NB:** Do not use `~` in this path.
 - `hpctests_partition`: Optional. Name of partition to use, otherwise default partition is used.
 - `hpctests_nodes`: Optional. A Slurm node expression, e.g. `'compute-[0-15,19]'` defining the nodes to use. If not set all nodes in the selected partition are used.
-- `hpctests_ucx_net_devices`: Optional. Control which network device/interface to use, e.g. `mlx5_1:0`. The default of `all` (as per UCX) may not be appropriate for multi-rail nodes with different bandwidths on each device. See [here](https://openucx.readthedocs.io/en/master/faq.html#what-is-the-default-behavior-in-a-multi-rail-environment) and [here](https://github.com/openucx/ucx/wiki/UCX-environment-parameters#setting-the-devices-to-use). Alternatively a mapping of partition name (as `hpctests_partition`) to device/interface can be used. For partitions not defined in the mapping the default of `all` is used.
+- `hpctests_ucx_net_devices`: Optional. Control which network device/interface to use, e.g. `mlx5_1:0`.
+  The default of `all` (as per UCX) may not be appropriate for multi-rail nodes with different bandwidths on each device. See [here](https://openucx.readthedocs.io/en/master/faq.html#what-is-the-default-behavior-in-a-multi-rail-environment) and [here](https://github.com/openucx/ucx/wiki/UCX-environment-parameters#setting-the-devices-to-use).
+  Alternatively a mapping of partition name (as `hpctests_partition`) to device/interface can be used. For partitions not defined in the mapping the default of `all` is used.
 - `hpctests_outdir`: Optional. Directory to use for test output on local host. Defaults to `$HOME/hpctests` (for local user).
 - `hpctests_hpl_NB`: Optional, default 192. The HPL block size "NB" - for Intel CPUs see [here](https://software.intel.com/content/www/us/en/develop/documentation/onemkl-linux-developer-guide/top/intel-oneapi-math-kernel-library-benchmarks/intel-distribution-for-linpack-benchmark/configuring-parameters.html).
 - `hpctests_hpl_mem_frac`: Optional, default 0.3. The HPL problem size "N" will
-   be selected to target using this fraction of each node's memory -
-   **CAUTION: see note below**.
+  be selected to target using this fraction of each node's memory -
+  **CAUTION: see note below**.
 - `hpctests_hpl_arch`: Optional, default 'linux64'. Arbitrary architecture name for HPL build. HPL is compiled on the first compute node of those selected (see `hpctests_nodes`), so this can be used to create different builds for different types of compute node.
 
-
 ---
+
 **CAUTION**
 
 > The default of `hpctests_hpl_mem_frac=0.3` will not significantly load nodes.
-Values up to ~0.8 may be appropriate for a stress test but ensure cloud
-operators are aware in case this overloads e.g. power supplies or cooling.
-Values > 0.8 require longer runtimes and increase the risk of out-of-memory
-errors without normally significantly increasing the stress on the node.
----
+> Values up to ~0.8 may be appropriate for a stress test but ensure cloud
+> operators are aware in case this overloads e.g. power supplies or cooling.
+> Values > 0.8 require longer runtimes and increase the risk of out-of-memory
+
+## errors without normally significantly increasing the stress on the node
 
 The following variables should not generally be changed:
+
 - `hpctests_pre_cmd`: Optional. Command(s) to include in sbatch templates before module load commands.
 - `hpctests_pingmatrix_modules`: Optional. List of modules to load for pingmatrix test. Defaults are suitable for OpenHPC 2.x cluster using the required packages.
 - `hpctests_pingpong_modules`: As above but for pingpong test.
@@ -55,13 +57,11 @@ The following variables should not generally be changed:
 - `hpctests_hpl_modules`: As above but for hpl tests.
 - `hpctests_hpl_version`: Version of HPL
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Playbook
-----------------
+## Example Playbook
 
 The role should be run on a login node;
 
@@ -76,12 +76,10 @@ The role should be run on a login node;
         name: hpctests
 ```
 
-License
--------
+## License
 
 Apache v2
 
-Author Information
-------------------
+## Author Information
 
 stackhpc.com
diff --git a/ansible/roles/hpctests/defaults/main.yml b/ansible/roles/hpctests/defaults/main.yml
index e514de5..fa1c3c2 100644
--- a/ansible/roles/hpctests/defaults/main.yml
+++ b/ansible/roles/hpctests/defaults/main.yml
@@ -2,16 +2,24 @@
 hpctests_user: "{{ ansible_user }}"
 hpctests_group: "{{ hpctests_user }}"
 hpctests_rootdir: "/home/{{ hpctests_user }}/hpctests"
-hpctests_pre_cmd: ''
-hpctests_pingmatrix_modules: [gnu12 openmpi4]
-hpctests_pingpong_modules: [gnu12 openmpi4 imb]
-hpctests_pingpong_plot: yes
-hpctests_hpl_modules: [gnu12 openmpi4 openblas]
+hpctests_pre_cmd: ""
+hpctests_pingmatrix_modules:
+  - gnu12
+  - openmpi4
+hpctests_pingpong_modules:
+  - gnu12
+  - openmpi4
+  - imb
+hpctests_pingpong_plot: true
+hpctests_hpl_modules:
+  - gnu12
+  - openmpi4
+  - openblas
 hpctests_outdir: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/hpctests"
 hpctests_ucx_net_devices: all
 hpctests_hpl_version: "2.3"
 hpctests_hpl_NB: 192
 hpctests_hpl_mem_frac: 0.3
 hpctests_hpl_arch: linux64
-#hpctests_nodes:
-#hpctests_partition:
+# hpctests_nodes:
+# hpctests_partition:
diff --git a/ansible/roles/hpctests/files/.clang-format-ignore b/ansible/roles/hpctests/files/.clang-format-ignore
new file mode 100644
index 0000000..72e8ffc
--- /dev/null
+++ b/ansible/roles/hpctests/files/.clang-format-ignore
@@ -0,0 +1 @@
+*
diff --git a/ansible/roles/hpctests/files/CPPLINT.cfg b/ansible/roles/hpctests/files/CPPLINT.cfg
new file mode 100644
index 0000000..88e41cd
--- /dev/null
+++ b/ansible/roles/hpctests/files/CPPLINT.cfg
@@ -0,0 +1 @@
+exclude_files=.*.c
diff --git a/ansible/roles/hpctests/files/plot_imb_pingpong.py b/ansible/roles/hpctests/files/plot_imb_pingpong.py
index dbf6398..eb15c4f 100644
--- a/ansible/roles/hpctests/files/plot_imb_pingpong.py
+++ b/ansible/roles/hpctests/files/plot_imb_pingpong.py
@@ -1,55 +1,76 @@
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-from matplotlib import ticker
-import numpy as np
+# pylint: disable=missing-module-docstring
 import os
 
-def sizeof_fmt(num, suffix='B'):
-    """ TODO: """
+import matplotlib.pyplot as plt  # pylint: disable=import-error
+from matplotlib import ticker  # pylint: disable=import-error
+
+
+def sizeof_fmt(num, suffix="B"):
+    """TODO:"""
     # from https://stackoverflow.com/a/1094933/916373
-    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
+    for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
         if abs(num) < 1024.0:
-            return "%3.1f%s%s" % (num, unit, suffix)
+            # pylint: disable-next=consider-using-f-string
+            return "%3.1f%s%s" % (
+                num,
+                unit,
+                suffix,
+            )
         num /= 1024.0
-    return "%.1f%s%s" % (num, 'Yi', suffix)
+    return "%.1f%s%s" % (num, "Yi", suffix)  # pylint: disable=consider-using-f-string
+
 
 def read_imb_out(path):
-    """ Read stdout from an IMB-MPI1 run.
-        
-        Returns a dict with:
-            key:= int, total number of processes involved
-            value:= pandas dataframe, i.e. one per results table. Columns as per table.
-        
-        If multiple results tables are present it is assumed that they are all the same benchmark,
-        and only differ in the number of processes.
+    """Read stdout from an IMB-MPI1 run.
+
+    Returns a dict with:
+        key:= int, total number of processes involved
+        value:= pandas dataframe, i.e. one per results table. Columns as per table.
+
+    If multiple results tables are present it is assumed that they are all the same benchmark,
+    and only differ in the number of processes.
     """
 
     data = {}
 
-    COLTYPES = { # all benchmark names here should be lowercase
-        'uniband': (int, int, float, int), # #bytes #repetitions Mbytes/sec Msg/sec
-        'biband': (int, int, float, int),
-        'pingpong':(int, int, float, float), # #bytes #repetitions t[usec] Mbytes/sec
-        'alltoall':(int, int, float, float, float) # #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
+    COLTYPES = {  # all benchmark names here should be lowercase # pylint: disable=invalid-name
+        # #bytes #repetitions Mbytes/sec Msg/sec
+        "uniband": (int, int, float, int),
+        "biband": (int, int, float, int),
+        # #bytes #repetitions t[usec] Mbytes/sec
+        "pingpong": (int, int, float, float),
+        "alltoall": (
+            int,
+            int,
+            float,
+            float,
+            float,
+        ),  # #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec]
     }
 
-    with open(path) as f:
+    with open(path) as f:  # pylint: disable=unspecified-encoding
         for line in f:
-            if line.startswith('# Benchmarking '):
+            if line.startswith("# Benchmarking "):
                 benchmark = line.split()[-1].lower()
                 if benchmark not in COLTYPES:
-                    raise ValueError('Do not know how to read %r benchmark in %s' % (benchmark, path))
+                    raise ValueError(
+                        "Do not know how to read %r benchmark in %s"  # pylint: disable=consider-using-f-string
+                        % (benchmark, path)
+                    )
                 converters = COLTYPES[benchmark]
                 line = next(f)
-                if not line.startswith('# #processes = '):
-                    raise ValueError('expected %s, got %s' % (expect, nprocs_line))
-                n_procs = int(line.split('=')[-1].strip())
-                while line.startswith('#'):
-                    line = next(f) # may or may not include line "# .. additional processes waiting in MPI_Barrier", plus other # lines
+                expected = "# #processes = "
+                if not line.startswith(expected):
+                    raise ValueError(f"expected {expected}, got {line}")
+                n_procs = int(line.split("=")[-1].strip())
+                while line.startswith("#"):
+                    # may or may not include line "# .. additional processes
+                    # waiting in MPI_Barrier", plus other # lines
+                    line = next(f)
                 rows = []
                 while True:
                     line = next(f).strip()
-                    if line == '':
+                    if line == "":
                         break
                     rows.append([f(v) for (f, v) in zip(converters, line.split())])
                 # turn data around:
@@ -60,26 +81,30 @@ def read_imb_out(path):
                 data[n_procs] = cols
     return data
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     import sys
+
     d = read_imb_out(sys.argv[1])
     if len(d) > 1:
-        raise ValueError('Found > 1 benchmark in', sys.argv[1])
+        raise ValueError("Found > 1 benchmark in", sys.argv[1])
     outdir = os.path.dirname(sys.argv[1])
     for n, df in d.items():
         fig, ax1 = plt.subplots()
         ax2 = ax1.twinx()
-        ax1.plot(df[0], df[2], label='latency', color='b')
-        ax2.plot(df[0], df[3], label='bandwidth', color='r')
-        ax1.set_xscale('log', base=2)
-        ax1.set_yscale('log', base=10)
-        ax1.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: sizeof_fmt(x)))
+        ax1.plot(df[0], df[2], label="latency", color="b")
+        ax2.plot(df[0], df[3], label="bandwidth", color="r")
+        ax1.set_xscale("log", base=2)
+        ax1.set_yscale("log", base=10)
+        ax1.xaxis.set_major_formatter(
+            ticker.FuncFormatter(lambda x, pos: sizeof_fmt(x))
+        )
         ax1.grid(True, which="both")
-        ax1.set_xlabel('#bytes')
-        ax1.set_ylabel('latency ($\mu$s)', color='b')
-        ax2.set_ylabel('bandwidth (Mbytes/sec)', color='r')
-        fig.legend(loc='upper left')
+        ax1.set_xlabel("#bytes")
+        ax1.set_ylabel("latency ($\\mu$s)", color="b")
+        ax2.set_ylabel("bandwidth (Mbytes/sec)", color="r")
+        fig.legend(loc="upper left")
         plt.tight_layout()
-        figpath = os.path.join(outdir, 'pingpong.png')
+        figpath = os.path.join(outdir, "pingpong.png")
         plt.savefig(figpath)
         print(figpath)
diff --git a/ansible/roles/hpctests/library/hpl_pq.py b/ansible/roles/hpctests/library/hpl_pq.py
index 96eff80..0e017a6 100644
--- a/ansible/roles/hpctests/library/hpl_pq.py
+++ b/ansible/roles/hpctests/library/hpl_pq.py
@@ -1,11 +1,12 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 # -*- coding: utf-8 -*-
 
 # Copyright: (c) 2020, StackHPC
 # Apache 2 License
 
-from ansible.module_utils.basic import AnsibleModule
-import json
+
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
 
 ANSIBLE_METADATA = {
     "metadata_version": "0.1",
@@ -18,8 +19,9 @@
 module: hpl_pq
 short_description: Calculate P and Q values for HPL.
 version_added: "0.0"
-description:
-    - "Takes number of processes and returns a dict with keys 'P' and 'Q' giving appropriate values, i.e. with Q equal or slightly larger than P and P * Q == num_processes."
+description: >
+    Takes number of processes and returns a dict with keys 'P' and 'Q' giving appropriate values,
+    i.e. with Q equal or slightly larger than P and P * Q == num_processes.
 options:
     num_processes:
         description:
@@ -36,33 +38,39 @@
 TODO
 """
 
+
 def factors(n):
-    """ Return a sequence of (a, b) tuples where a < b giving factors of n.
-        
-        Based on https://stackoverflow.com/a/6909532/916373
+    """Return a sequence of (a, b) tuples where a < b giving factors of n.
+
+    Based on https://stackoverflow.com/a/6909532/916373
     """
-    return [(i, n//i)  for i in range(1, int(n**0.5) + 1) if n % i == 0]
+    return [(i, n // i) for i in range(1, int(n**0.5) + 1) if n % i == 0]
 
-def run_module():
-    module_args = dict(
-        num_processes=dict(type="int", required=True),
-    )
+
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {
+        "num_processes": {
+            "type": "int",
+            "required": True,
+        },
+    }
 
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
     result = {"changed": False}
     if module.check_mode:
         module.exit_json(**result)
-    
+
     num_processes = module.params["num_processes"]
     f = factors(num_processes)
-    p, q = f[-1] # nearest to square
+    p, q = f[-1]  # nearest to square
 
-    result['grid'] = {'P':p, 'Q': q}
+    result["grid"] = {"P": p, "Q": q}
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
+
 if __name__ == "__main__":
     main()
diff --git a/ansible/roles/hpctests/library/plot_nxnlatbw.py b/ansible/roles/hpctests/library/plot_nxnlatbw.py
index 0193b69..b1a9810 100644
--- a/ansible/roles/hpctests/library/plot_nxnlatbw.py
+++ b/ansible/roles/hpctests/library/plot_nxnlatbw.py
@@ -1,11 +1,13 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 # -*- coding: utf-8 -*-
 
 # Copyright: (c) 2020, StackHPC
 # Apache 2 License
 
-from ansible.module_utils.basic import AnsibleModule
-import json, os
+import os
+
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
 
 ANSIBLE_METADATA = {
     "metadata_version": "0.1",
@@ -18,8 +20,10 @@
 module: plot_nxnlatbw
 short_description: Read nxnlatbw output, report statistics and tabulate latencies
 version_added: "0.0"
-description:
-    - "Reads output from running the nxnlatbw ping matrix. Return value includes a 'stats' key with min/max latency and bandwidth values. Generates an html table of pairwise latencies, coloured by value."
+description: >
+    Reads output from running the nxnlatbw ping matrix.
+    Return value includes a 'stats' key with min/max latency and bandwidth values.
+    Generates an html table of pairwise latencies, coloured by value.
 options:
     src:
         description:
@@ -32,8 +36,9 @@
         required: true
         type: str
    nodes:
-        description:
-            - Comma-separated list of nodenames to label RANKS with - NB this should be provided in the same order as ranks
+        description: >
+            Comma-separated list of nodenames to label RANKS with -
+            NB this should be provided in the same order as ranks
 requirements:
     - "python >= 3.6"
 author:
@@ -64,119 +69,179 @@
 </html>
 """
 
-def html_rows(rankAs, rankBs, nodes, data):
-    """ Create an HTML-format fragment defining table rows.
 
-        Args:
-            rankAs, rankBs: lists of ranks
-            nodes: list of nodenames in rank order
-            data: dict with keys (rankA, rankB)
+def html_rows(
+    rankAs, rankBs, nodes, data
+):  # pylint: disable=invalid-name # pylint: disable=invalid-name
+    """Create an HTML-format fragment defining table rows.
 
-        Returns a string.
+    Args:
+        rankAs, rankBs: lists of ranks
+        nodes: list of nodenames in rank order
+        data: dict with keys (rankA, rankB)
+
+    Returns a string.
     """
-    
+
     minv = min(data.values())
     maxv = max(data.values())
 
     rows = []
-    for rankA in rankAs: # row
+    for rankA in rankAs:  # row # pylint: disable=invalid-name
         if nodes:
-            outrow = ['<tr><td>%s [%s]</td>' % (nodes[rankA], rankA)]
+            outrow = [
+                # pylint: disable-next=consider-using-f-string
+                "<tr><td>%s [%s]</td>"
+                % (nodes[rankA], rankA)
+            ]
         else:
-            outrow = ['<tr><td>%s</td>' % rankA]
-        for rankB in rankBs:
+            outrow = [
+                # pylint: disable-next=consider-using-f-string
+                "<tr><td>%s</td>"
+                % rankA
+            ]
+        for rankB in rankBs:  # pylint: disable=invalid-name
             val = data.get((rankA, rankB))
             if val is not None:
                 try:
-                    lightness = 50 + (50 - 50 * ((val - minv) / (maxv - minv))) # want value in range LOW = 100 (white) -> HIGH 50(red)
-                except ZeroDivisionError: # no min-max spread
+                    lightness = 50 + (
+                        50 - 50 * ((val - minv) / (maxv - minv))
+                    )  # want value in range LOW = 100 (white) -> HIGH 50(red)
+                except ZeroDivisionError:  # no min-max spread
                     lightness = 100
-                outrow += ['<td style="background-color:hsl(0, 100%%, %i%%);">%.1f</td>' % (lightness, val)]
+                outrow += [
+                    # pylint: disable-next=consider-using-f-string
+                    '<td style="background-color:hsl(0, 100%%, %i%%);">%.1f</td>'
+                    % (lightness, val)
+                ]
             else:
-                outrow += ['<td>-</td>']
-        outrow += ['</tr>']
-        rows.append(' '.join(outrow))
-    return '\n'.join(rows)
-
-
-def run_module():
-    module_args = dict(
-        src=dict(type="str", required=True),
-        dest=dict(type="str", required=True),
-        nodes=dict(type="str", required=False, default=None)
-    )
+                outrow += ["<td>-</td>"]
+        outrow += ["</tr>"]
+        rows.append(" ".join(outrow))
+    return "\n".join(rows)
+
+
+def run_module():  # pylint: disable=missing-function-docstring, too-many-locals
+    module_args = {
+        "src": {
+            "type": "str",
+            "required": True,
+        },
+        "dest": {
+            "type": "str",
+            "required": True,
+        },
+        "nodes": {
+            "type": "str",
+            "required": False,
+            "default": None,
+        },
+    }
 
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
     result = {"changed": False}
-    
+
     src = os.path.expanduser(module.params["src"])
     dest = os.path.expanduser(module.params["dest"])
     nodes = module.params["nodes"]
     if nodes is not None:
-        nodes = nodes.split(',')
-    
+        nodes = nodes.split(",")
+
     if module.check_mode:
         module.exit_json(**result)
 
-     # read latencies/bandwidths:
+    # read latencies/bandwidths:
     latencies = {}
     bandwidths = {}
-    with open(src) as nxn_f:
+    with open(src) as nxn_f:  # pylint: disable=unspecified-encoding
         for ln, line in enumerate(nxn_f):
-            vals = line.split(',')
-            if vals[0] == 'src':
+            vals = line.split(",")
+            if vals[0] == "src":
                 continue
             if len(vals) != 4:
-                print('warning: skipping line %i (%i values)' % (ln, len(vals)))
+                print(
+                    # pylint: disable-next=consider-using-f-string
+                    "warning: skipping line %i (%i values)"
+                    % (ln, len(vals))
+                )
                 continue
+            # pylint: disable=invalid-name
             try:
-                rankA, rankB, lat, bw = int(vals[0]), int(vals[1]), float(vals[2]), float(vals[3])
+                (
+                    rankA,
+                    rankB,
+                    lat,
+                    bw,
+                ) = (
+                    int(vals[0]),
+                    int(vals[1]),
+                    float(vals[2]),
+                    float(vals[3]),
+                )
             except ValueError:
-                print('warning: skipping line %i (%s) - parse failure' % (ln, line))
+                print(f"warning: skipping line {ln} ({line}) - parse failure")
                 continue
             latencies[rankA, rankB] = lat
             bandwidths[rankA, rankB] = bw
-    
+            # pylint: enable=invalid-name
+
     # get list of node IDs:
-    rankAs = sorted(set(k[0] for k in latencies))
-    rankBs = sorted(set(k[1] for k in latencies))
+    rankAs = sorted(set(k[0] for k in latencies))  # pylint: disable=invalid-name
+    rankBs = sorted(set(k[1] for k in latencies))  # pylint: disable=invalid-name
     if rankAs != rankBs:
         module.fail_json("Ranks extracted from result columns differed", **result)
     if nodes and len(nodes) != len(rankAs):
-        module.fail_json("Results contained %i ranks but %i node names provided" % (len(rankAs), len(nodes)), **result)
+        module.fail_json(
+            "Results contained %i ranks but %i node names provided"  # pylint: disable=consider-using-f-string
+            % (len(rankAs), len(nodes)),
+            **result,
+        )
 
     # find min values:
     min_lat = min(latencies.values())
     max_lat = max(latencies.values())
     min_bw = min(bandwidths.values())
     max_bw = max(bandwidths.values())
-    
+
     # create HTML fragments:
-    ranks = ' '.join('<td>%s</td>' % rankB for rankB in rankBs)
+    ranks = " ".join(
+        # pylint: disable-next=consider-using-f-string
+        "<td>%s</td>" % rankB
+        for rankB in rankBs
+    )
 
     lat_rows = html_rows(rankAs, rankBs, nodes, latencies)
     bw_rows = html_rows(rankAs, rankBs, nodes, bandwidths)
 
-    page = HTML_TEMPLATE.format(min_lat=min_lat, max_lat=max_lat, min_bw=min_bw, max_bw=max_bw, ranks=ranks, lat_rows=lat_rows, bw_rows=bw_rows)
+    page = HTML_TEMPLATE.format(
+        min_lat=min_lat,
+        max_lat=max_lat,
+        min_bw=min_bw,
+        max_bw=max_bw,
+        ranks=ranks,
+        lat_rows=lat_rows,
+        bw_rows=bw_rows,
+    )
 
-    with open(dest, 'w') as outf:
+    with open(dest, "w") as outf:  # pylint: disable=unspecified-encoding
         outf.write(page)
 
-    result['changed'] = True
-    result['stats'] = {
-        'min_latency (us)': min_lat,
-        'max_latency (us)': max_lat,
-        'min_bandwidth (MB/s)': min_bw,
-        'max_bandwidth (MB/s)': max_bw,
-        'min_bandwidth (Gbit/s)': min_bw / 125.0,
-        'max_bandwidth (Gbit/s)': max_bw / 125.0,
+    result["changed"] = True
+    result["stats"] = {
+        "min_latency (us)": min_lat,
+        "max_latency (us)": max_lat,
+        "min_bandwidth (MB/s)": min_bw,
+        "max_bandwidth (MB/s)": max_bw,
+        "min_bandwidth (Gbit/s)": min_bw / 125.0,
+        "max_bandwidth (Gbit/s)": max_bw / 125.0,
     }
 
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
+
 if __name__ == "__main__":
     main()
diff --git a/ansible/roles/hpctests/library/read_imb_pingpong.py b/ansible/roles/hpctests/library/read_imb_pingpong.py
index fb52ef4..808b6bb 100644
--- a/ansible/roles/hpctests/library/read_imb_pingpong.py
+++ b/ansible/roles/hpctests/library/read_imb_pingpong.py
@@ -1,11 +1,12 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 # -*- coding: utf-8 -*-
 
 # Copyright: (c) 2020, StackHPC
 # Apache 2 License
 
-from ansible.module_utils.basic import AnsibleModule
-import json
+
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
 
 ANSIBLE_METADATA = {
     "metadata_version": "0.1",
@@ -39,42 +40,47 @@
 """
 
 CONVERTERS = (int, int, float, float)
-COLUMNS = ('bytes', 'repetitions', 'latency', 'bandwidth')
+COLUMNS = ("bytes", "repetitions", "latency", "bandwidth")
+
 
-def run_module():
-    module_args = dict(
-        path=dict(type="str", required=True),
-    )
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {
+        "path": {
+            "type": "str",
+            "required": True,
+        },
+    }
 
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
     result = {"changed": False}
-    
+
     path = module.params["path"]
     if module.check_mode:
         module.exit_json(**result)
 
     columns = ([], [], [], [])
-    with open(path) as f:
+    with open(path) as f:  # pylint: disable=unspecified-encoding
         for line in f:
-            if line == '       #bytes #repetitions      t[usec]   Mbytes/sec\n':
+            if line == "       #bytes #repetitions      t[usec]   Mbytes/sec\n":
                 while True:
                     line = next(f).strip()
-                    if line == '':
+                    if line == "":
                         break
                     for ix, v in enumerate(line.split()):
                         columns[ix].append(CONVERTERS[ix](v))
-    
-    result['columns'] = {
-        'bytes': columns[0],
-        'repetitions': columns[1],
-        'latency': columns[2],
-        'bandwidth': columns[3],
+
+    result["columns"] = {
+        "bytes": columns[0],
+        "repetitions": columns[1],
+        "latency": columns[2],
+        "bandwidth": columns[3],
     }
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/ansible/roles/hpctests/library/slurm_node_info.py b/ansible/roles/hpctests/library/slurm_node_info.py
index 52e6800..dd3e0b3 100644
--- a/ansible/roles/hpctests/library/slurm_node_info.py
+++ b/ansible/roles/hpctests/library/slurm_node_info.py
@@ -1,11 +1,12 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 # -*- coding: utf-8 -*-
 
 # Copyright: (c) 2020, StackHPC
 # Apache 2 License
 
-from ansible.module_utils.basic import AnsibleModule
-import json
+
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
 
 ANSIBLE_METADATA = {
     "metadata_version": "0.1",
@@ -18,8 +19,10 @@
 module: slurm_node_info
 short_description: Get information about Slurm nodes
 version_added: "0.0"
-description:
-    - "Gets all the available information from Slurm's `sinfo` command about specified nodes. The returned `info` property is a dict with keys from sinfo --All parameters and values a list of strings in specified node order."
+description: >
+    Gets all the available information from Slurm's `sinfo` command about specified nodes.
+    The returned `info` property is a dict with keys from sinfo --
+    All parameters and values a list of strings in specified node order.
 options
     nodes:
         description:
@@ -37,32 +40,42 @@
 """
 
 
-def run_module():
-    module_args = dict(
-        nodes=dict(type="list", required=True),
-    )
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {
+        "nodes": {
+            "type": "list",
+            "required": True,
+        }
+    }
 
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
     result = {"changed": False}
     if module.check_mode:
         module.exit_json(**result)
-    
-    _, stdout,_ = module.run_command("sinfo --Format All --Node", check_rc=True) # `--nodes` doesn't filter enough, other partitions are still shown
+
+    _, stdout, _ = module.run_command(
+        "sinfo --Format All --Node", check_rc=True
+    )  # `--nodes` doesn't filter enough, other partitions are still shown
     lines = stdout.splitlines()
     info = {}
-    params = [v.strip() for v in lines[0].split('|')]
-    values = [line.split('|') for line in lines[1:]]
-    nodelist_ix = params.index('NODELIST')
+    params = [v.strip() for v in lines[0].split("|")]
+    values = [line.split("|") for line in lines[1:]]
+    nodelist_ix = params.index("NODELIST")
     print(values)
     for ix, param in enumerate(params):
-        info[param] = [nodeinfo[ix].strip() for nodeinfo in values if nodeinfo[nodelist_ix].strip() in module.params['nodes']]
-    result['info'] = info
-    
+        info[param] = [
+            nodeinfo[ix].strip()
+            for nodeinfo in values
+            if nodeinfo[nodelist_ix].strip() in module.params["nodes"]
+        ]
+    result["info"] = info
+
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
+
 if __name__ == "__main__":
     main()
diff --git a/ansible/roles/hpctests/meta/main.yml b/ansible/roles/hpctests/meta/main.yml
index 8d471f0..af60695 100644
--- a/ansible/roles/hpctests/meta/main.yml
+++ b/ansible/roles/hpctests/meta/main.yml
@@ -1,6 +1,8 @@
+---
 galaxy_info:
   author: Steve Brasier
   company: StackHPC
+  description: HPC Tests - Meta
 
   # If the issue tracker for your role is not on github, uncomment the
   # next line and provide a value
@@ -15,7 +17,7 @@ galaxy_info:
   # - CC-BY-4.0
   license: Apache-2.0
 
-  min_ansible_version: 2.1
+  min_ansible_version: "2.1"
 
   # If this a Container Enabled role, provide the minimum Ansible Container version.
   # min_ansible_container_version:
diff --git a/ansible/roles/hpctests/tasks/build-hpl.yml b/ansible/roles/hpctests/tasks/build-hpl.yml
index 7f6d48b..7339d9b 100644
--- a/ansible/roles/hpctests/tasks/build-hpl.yml
+++ b/ansible/roles/hpctests/tasks/build-hpl.yml
@@ -1,60 +1,62 @@
 ---
-
 - name: Make directory
-  file:
+  ansible.builtin.file:
     path: "{{ hpctests_rootdir }}/hpl"
     state: directory
+    mode: "0755"
 
 - name: Unarchive HPL sources from /opt/hpl
-  unarchive:
+  ansible.builtin.unarchive:
     src: "/opt/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
     dest: "{{ hpctests_rootdir }}/hpl"
-    remote_src: yes
+    remote_src: true
     owner: "{{ hpctests_user }}"
     group: "{{ hpctests_group }}"
-    mode: '0755'
-    keep_newer: yes
+    mode: "0755"
+    keep_newer: true
 
 - name: Copy BLAS makefile
-  copy:
+  ansible.builtin.copy:
     src: "{{ hpctests_hpl_srcdir }}/setup/Make.Linux_PII_CBLAS"
     dest: "{{ hpctests_hpl_srcdir }}/Make.{{ hpctests_hpl_arch }}"
-    remote_src: yes
+    remote_src: true
+    mode: "0644"
 
 - name: Modify make file
-  replace:
+  ansible.builtin.replace:
     path: "{{ hpctests_hpl_srcdir }}/Make.{{ hpctests_hpl_arch }}"
     regexp: "{{ item.regexp }}"
     replace: "{{ item.replace }}"
   loop:
-    - regexp: '^TOPdir.*$'
+    - regexp: "^TOPdir.*$"
       replace: "TOPdir       = {{ hpctests_hpl_srcdir }}"
-    - regexp: '^ARCH\s+=.*$'
+    - regexp: "^ARCH\\s+=.*$"
       replace: "ARCH         = {{ hpctests_hpl_arch }}"
-    - regexp: '^MPdir.*$'
+    - regexp: "^MPdir.*$"
       replace: "MPdir        = $(MPI_DIR)"
-    - regexp: '^MPinc.*$'
+    - regexp: "^MPinc.*$"
       replace: "MPinc        = -I$(MPI_DIR)/include"
-    - regexp: '^MPlib.*$'
+    - regexp: "^MPlib.*$"
       replace: "MPlib        = $(MPI_DIR)/lib/libmpi.so"
-    - regexp: '^LAdir.*$'
+    - regexp: "^LAdir.*$"
       replace: "LAdir        = $(OPENBLAS_DIR)"
-    - regexp: '^LAinc.*$'
-      replace: "LAinc        ="  # not sure if this one is needed?
-    - regexp: '^LAlib.*$'
+    - regexp: "^LAinc.*$"
+      replace: "LAinc        =" # not sure if this one is needed?
+    - regexp: "^LAlib.*$"
       replace: "LAlib        = $(OPENBLAS_LIB)/libopenblas.so"
-    - regexp: '^CC\s+=.*$'
+    - regexp: "^CC\\s+=.*$"
       replace: "CC           = mpicc"
-    - regexp: '^LINKER\s+=.*$'
+    - regexp: "^LINKER\\s+=.*$"
       replace: "LINKER       = mpicc"
 
 - name: Create build job script
-  template:
+  ansible.builtin.template:
     src: "hpl-build.sh.j2"
     dest: "{{ hpctests_hpl_srcdir }}/hpl-build-{{ hpctests_hpl_arch }}.sh"
-  
+    mode: "0644"
+
 - name: Build HPL executable
-  shell:
+  ansible.builtin.command:
     cmd: "bash -l -c 'sbatch --wait hpl-build-{{ hpctests_hpl_arch }}.sh'" # need login shell for module command
     chdir: "{{ hpctests_hpl_srcdir }}"
     creates: "bin/{{ hpctests_hpl_arch }}/xhpl"
diff --git a/ansible/roles/hpctests/tasks/hpl-solo.yml b/ansible/roles/hpctests/tasks/hpl-solo.yml
index 4c49531..f131733 100644
--- a/ansible/roles/hpctests/tasks/hpl-solo.yml
+++ b/ansible/roles/hpctests/tasks/hpl-solo.yml
@@ -1,12 +1,14 @@
+---
 # For further information on tuning HPL see e.g.:
 # - https://ulhpc-tutorials.readthedocs.io/en/latest/parallel/mpi/HPL/
 # - https://community.arm.com/developer/tools-software/hpc/b/hpc-blog/posts/profiling-and-tuning-linpack-step-step-guide
 # - http://www.crc.nd.edu/~rich/CRC_Summer_Scholars_2014/HPL-HowTo.pdf
 
 - name: Make directory
-  file:
+  ansible.builtin.file:
     path: "{{ hpctests_rootdir }}/hpl-solo"
     state: directory
+    mode: "0755"
 
 - name: Get Slurm node info
   slurm_node_info:
@@ -14,7 +16,7 @@
   register: hpctests_nodeinfo
 
 - name: Check nodes are homogenous
-  assert:
+  ansible.builtin.assert:
     that: "{{ hpctests_nodeinfo.info[item] | unique | length == 1 }}"
     fail_msg: "Selected nodes are not homogenous: {{ item }} ({{ hpctests_nodeinfo.info['NODELIST'] }}) = {{ hpctests_nodeinfo.info[item] }}"
   loop:
@@ -26,7 +28,7 @@
 - name: Calculate number of processes (per node)
   # Will run array job, which is SAME on each node, so only need to deal with a single node's processors here
   # Also ignore any hyperthreading TODO: document
-  set_fact:
+  ansible.builtin.set_fact:
     hpctests_hplsolo_ntasks: "{{ (hpctests_nodeinfo.info['SOCKETS'][0]) | int * (hpctests_nodeinfo.info['CORES'][0] | int) }}"
 
 - name: Calculate problem shape
@@ -37,50 +39,58 @@
 
 - name: Calculate problem size
   # Based on example shown in http://www.crc.nd.edu/~rich/CRC_Summer_Scholars_2014/HPL-HowTo.pdf but we have MB not GB
-  set_fact:
-    hpctests_hplsolo_N: "{{ ((((( (hpctests_nodeinfo.info['MEMORY'][0] | int) * (hpctests_hpl_mem_frac | float) * 1024 * 1024 * 1) / 8) | root) / hpctests_hpl_NB) | int ) * hpctests_hpl_NB }}"
-- debug:
-    msg: "Using {{ hpctests_hplsolo_ntasks }} process per node with P={{ hpctests_hplsolo_pq.grid.P }}, Q={{ hpctests_hplsolo_pq.grid.Q }} targeting {{ (hpctests_hpl_mem_frac | float)  * 100 }}% of {{ hpctests_nodeinfo.info['MEMORY'][0] }} MB memory per node, block size (NB) = {{ hpctests_hpl_NB }}, problem size (N) = {{ hpctests_hplsolo_N }}"
+  ansible.builtin.set_fact:
+    # yamllint disable-line rule:line-length
+    hpctests_hplsolo_N: "{{ ((((((hpctests_nodeinfo.info['MEMORY'][0] | int) * (hpctests_hpl_mem_frac | float) * 1024 * 1024 * 1) / 8) | root) / hpctests_hpl_NB)
+      | int) * hpctests_hpl_NB }}"
+- ansible.builtin.debug:
+    # yamllint disable rule:line-length
+    msg: "Using {{ hpctests_hplsolo_ntasks }} process per node with P={{ hpctests_hplsolo_pq.grid.P }}, Q={{ hpctests_hplsolo_pq.grid.Q }} targeting {{ (hpctests_hpl_mem_frac
+      | float) * 100 }}% of {{ hpctests_nodeinfo.info['MEMORY'][0] }} MB memory per node, block size (NB) = {{ hpctests_hpl_NB }}, problem size (N) = {{ hpctests_hplsolo_N
+      }}"
+    # yamllint enable rule:line-length
 
 - name: Get all nodes in partition
-  shell: "sinfo --Node --noheader --format %N --partition={{ hpctests_partition }}"
+  ansible.builtin.command: "sinfo --Node --noheader --format %N --partition={{ hpctests_partition }}"
   register: all_nodes
   changed_when: false
 
 - name: Calculate excluded nodes
-  set_fact:
+  ansible.builtin.set_fact:
     hpctests_hplsolo_excluded_nodes: "{{ all_nodes.stdout_lines | difference(hpctests_computes.stdout_lines) }}"
 
 - name: Copy HPL binary
-  copy:
+  ansible.builtin.copy:
     src: "{{ hpctests_hpl_srcdir }}/bin/{{ hpctests_hpl_arch }}/xhpl"
     dest: "{{ hpctests_rootdir }}/hpl-solo/xhpl-{{ hpctests_hpl_arch }}"
     mode: "u+x"
-    remote_src: yes
+    remote_src: true
 
 - name: Template out HPL.dat
-  template:
+  ansible.builtin.template:
     src: "HPL.dat.j2"
     dest: "{{ hpctests_rootdir }}/hpl-solo/HPL.dat"
+    mode: "0644"
   vars:
-      hpctests_hpl_N: "{{ hpctests_hplsolo_N }}"
-      hpctests_hpl_P: "{{ hpctests_hplsolo_pq.grid.P }}"
-      hpctests_hpl_Q: "{{ hpctests_hplsolo_pq.grid.Q }}"
+    hpctests_hpl_N: "{{ hpctests_hplsolo_N }}"
+    hpctests_hpl_P: "{{ hpctests_hplsolo_pq.grid.P }}"
+    hpctests_hpl_Q: "{{ hpctests_hplsolo_pq.grid.Q }}"
 
 - name: Create sbatch script
-  template:
+  ansible.builtin.template:
     src: hpl-solo.sh.j2
     dest: "{{ hpctests_rootdir }}/hpl-solo/hpl-solo.sh"
+    mode: "0755"
   vars:
     hpctests_hplsolo_ntasks: 2 # TODO: FIXME
 
-- name: Remove previous outputs
+- name: Remove previous outputs # noqa: no-changed-when
   # As depending on the number of nodes there will be different numbers of output files for different partitions so won't all get overwritten
-  shell:
+  ansible.builtin.shell:
     cmd: "rm -f {{ hpctests_rootdir }}/hpl-solo/hpl-solo.sh.*.out"
 
-- name: Run hpl-solo
-  shell: bash -l -c 'sbatch --wait hpl-solo.sh' # need login shell for module command
+- name: Run hpl-solo # noqa: no-changed-when
+  ansible.builtin.command: bash -l -c 'sbatch --wait hpl-solo.sh'
   args:
     chdir: "{{ hpctests_rootdir }}/hpl-solo"
   async: "{{ 20 * 60 }}" # wait for up to 20 minutes
@@ -89,7 +99,7 @@
 
 - name: Check HPL completed OK
   tags: postpro
-  shell: "grep '1 tests completed and passed residual checks' *.out"
+  ansible.builtin.shell: "grep '1 tests completed and passed residual checks' *.out"
   args:
     chdir: "{{ hpctests_rootdir }}/hpl-solo"
   changed_when: false
@@ -105,7 +115,7 @@
   #   HPL_pdgesv() start time Thu Feb 25 19:58:25 2021
   #   <snip>
   tags: postpro
-  shell: "grep '^W[R|C]' *.out | tr -s ' ' | cut -d ' ' -f 7" # tr -s squeezes multiple spaces to single, then take gflops column
+  ansible.builtin.shell: "set -o pipefail && grep '^W[R|C]' *.out | tr -s ' ' | cut -d ' ' -f 7"
   args:
     chdir: "{{ hpctests_rootdir }}/hpl-solo"
   changed_when: false
@@ -113,7 +123,8 @@
 
 - name: Summarise results
   tags: postpro
-  debug:
+  ansible.builtin.debug:
+    # yamllint disable rule:line-length
     msg: |
       Summary for hpl-solo on {{ hpctests_computes.stdout_lines | length }} nodes in '{{ hpctests_partition }}' partition, job ID {{ hpctests_hplsolo_sbatch.stdout.split()[-1] }}, device '{{ hpctests_ucx_net_devices }}':
 
@@ -122,4 +133,5 @@
         Mean: {{ (perf.stdout_lines | map('float') | sum) / (hpctests_computes.stdout_lines | length) }} gflops
 
       Individual node results (gflops):
-      {{ dict(hpctests_computes.stdout_lines | zip(perf.stdout_lines | map('float') )) | to_nice_yaml }}
+      {{ dict(hpctests_computes.stdout_lines | zip(perf.stdout_lines | map('float'))) | to_nice_yaml }}
+    # yamllint enable rule:line-length
diff --git a/ansible/roles/hpctests/tasks/main.yml b/ansible/roles/hpctests/tasks/main.yml
index f0f0817..bee1b76 100644
--- a/ansible/roles/hpctests/tasks/main.yml
+++ b/ansible/roles/hpctests/tasks/main.yml
@@ -1,38 +1,39 @@
-- name: setup
-  block:
-    - include_tasks: setup.yml
+---
+- name: Setup
   become: true
   become_user: "{{ hpctests_user }}"
   tags: always
 
-- name: pingpong
   block:
-    - include_tasks: pingpong.yml
-      when: hpctests_computes.stdout_lines | length > 1
+    - ansible.builtin.include_tasks: setup.yml
+- name: Pingpong
   become: true
   become_user: "{{ hpctests_user }}"
   tags: pingpong
 
-- name: pingmatrix
   block:
-    - include_tasks: pingmatrix.yml
+    - ansible.builtin.include_tasks: pingpong.yml
       when: hpctests_computes.stdout_lines | length > 1
+- name: Pingmatrix
   become: true
   become_user: "{{ hpctests_user }}"
   tags: pingmatrix
 
-- name: build HPL
   block:
-    - include_tasks: build-hpl.yml
+    - ansible.builtin.include_tasks: pingmatrix.yml
+      when: hpctests_computes.stdout_lines | length > 1
+- name: Build HPL
   become: true
   become_user: "{{ hpctests_user }}"
   tags:
     - hpl-solo
 
-- name: run HPL on individual nodes
   block:
-    - include_tasks: hpl-solo.yml
+    - ansible.builtin.include_tasks: build-hpl.yml
+- name: Run HPL on individual nodes
   become: true
   become_user: "{{ hpctests_user }}"
   tags:
     - hpl-solo
+  block:
+    - ansible.builtin.include_tasks: hpl-solo.yml
diff --git a/ansible/roles/hpctests/tasks/pingmatrix.yml b/ansible/roles/hpctests/tasks/pingmatrix.yml
index 3d20b78..5d5d41f 100644
--- a/ansible/roles/hpctests/tasks/pingmatrix.yml
+++ b/ansible/roles/hpctests/tasks/pingmatrix.yml
@@ -1,40 +1,44 @@
 ---
-
 - name: Make directory
-  file:
+  ansible.builtin.file:
     path: "{{ hpctests_rootdir }}/pingmatrix"
     state: directory
+    mode: "0755"
 
 - name: Copy source
-  copy:
+  ansible.builtin.copy:
     src: mpi_nxnlatbw.c
     dest: "{{ hpctests_rootdir }}/pingmatrix/mpi_nxnlatbw.c"
+    mode: "0644"
 
 - name: Create sbatch script
-  template:
+  ansible.builtin.template:
     src: pingmatrix.sh.j2
     dest: "{{ hpctests_rootdir }}/pingmatrix/pingmatrix.sh"
+    mode: "0755"
 
-- name: Run ping matrix
-  shell: bash -l -c 'sbatch --wait pingmatrix.sh' # need login shell for module command
+- name: Run ping matrix # noqa: no-changed-when
+  ansible.builtin.command: bash -l -c 'sbatch --wait pingmatrix.sh'
   args:
     chdir: "{{ hpctests_rootdir }}/pingmatrix"
   register: hpctests_pingmatrix_sbatch
 
-# nxnlatbw outputs ranks, not nodenames which would be more useful for finding issues. The sbatch manpage says nodes provided via --nodelist are sorted, but doesn't specify how.
-# Some testing using a "helloworld" program showed it is NOT sorted the same as python's sorted(), it's lexicographical. So we use scontrol to guarantee the same sort order.
+# nxnlatbw outputs ranks, not nodenames which would be more useful for finding issues.
+# The sbatch manpage says nodes provided via --nodelist are sorted, but doesn't specify how.
+# Some testing using a "helloworld" program showed it is NOT sorted the same as python's sorted(),
+# it's lexicographical. So we use scontrol to guarantee the same sort order.
 # Note this still doesn't fix any non-unique names but we should get a length mis-match at least with that.
 # although this looks a bit crazy:
-- name: Expand node list
-  shell: "scontrol show hostnames {{ hpctests_nodes if hpctests_nodes is defined else (hpctests_computes.stdout_lines | join(',')) }}"
+- name: Expand node list # noqa: no-changed-when
+  ansible.builtin.command: "scontrol show hostnames {{ hpctests_nodes if hpctests_nodes is defined else (hpctests_computes.stdout_lines | join(',')) }}"
   register: scontrol_hostnames
 
-- name: Create sorted node expression
-  shell: "scontrol show hostlistsorted {{ scontrol_hostnames.stdout_lines | join(',') }}"
+- name: Create sorted node expression # noqa: no-changed-when
+  ansible.builtin.command: "scontrol show hostlistsorted {{ scontrol_hostnames.stdout_lines | join(',') }}"
   register: scontrol_hostlistsorted
 
-- name: Expand node list again
-  shell: "scontrol show hostnames {{ scontrol_hostlistsorted.stdout_lines | join(',') }}"
+- name: Expand node list again # noqa: no-changed-when
+  ansible.builtin.command: "scontrol show hostnames {{ scontrol_hostlistsorted.stdout_lines | join(',') }}"
   register: slurm_names
 
 - name: Process output
@@ -45,16 +49,18 @@
   register: nxnlatbw
 
 - name: Fetch html results table to ansible control host
-  fetch:
+  ansible.builtin.fetch:
     src: "{{ hpctests_rootdir }}/pingmatrix/pingmatrix.html"
     dest: "{{ hpctests_outdir }}/pingmatrix.html"
-    flat: yes
+    flat: true
 
 - name: Summarise results
-  debug:
+  ansible.builtin.debug:
+    # yamllint disable rule:line-length
     msg: |
       Summary for pingmatrix pairwise over {{ slurm_names.stdout_lines | length }} nodes in '{{ hpctests_partition }}' partition, job ID {{ hpctests_pingmatrix_sbatch.stdout.split()[-1] }}, device '{{ hpctests_ucx_net_devices }}':
-      
+
       {{ nxnlatbw['stats'] | to_nice_yaml }}
-      
+
       Tabular output on ansible control host at {{ hpctests_outdir }}/pingmatrix.html
+    # yamllint enable rule:line-length
diff --git a/ansible/roles/hpctests/tasks/pingpong.yml b/ansible/roles/hpctests/tasks/pingpong.yml
index 3cde8c2..6c80065 100644
--- a/ansible/roles/hpctests/tasks/pingpong.yml
+++ b/ansible/roles/hpctests/tasks/pingpong.yml
@@ -1,45 +1,46 @@
 ---
-
 - name: Make directory
-  file:
+  ansible.builtin.file:
     path: "{{ hpctests_rootdir }}/pingpong"
     state: directory
+    mode: "0755"
 
 - name: Create sbatch script
-  template:
+  ansible.builtin.template:
     src: pingpong.sh.j2
     dest: "{{ hpctests_rootdir }}/pingpong/pingpong.sh"
+    mode: "0755"
 
 - name: Run pingpong
   block:
-    - name: Submit jobscript
-      shell: bash -l -c 'sbatch --wait pingpong.sh' # need login shell for module command
+    - name: Submit jobscript # noqa: command-instead-of-shell no-changed-when
+      ansible.builtin.shell: bash -l -c 'sbatch --wait pingpong.sh' # need login shell for module command
       args:
         chdir: "{{ hpctests_rootdir }}/pingpong"
       register: hpctests_pingpong_sbatch
   rescue:
     - name: Get slurm job output
-      slurp:
+      ansible.builtin.slurp:
         src: "{{ hpctests_rootdir }}/pingpong/pingpong.sh.out"
       register: _pingpong_out
     - name: Show job output
-      debug:
+      ansible.builtin.debug:
         msg: |
           PingPong output was:
-          
+
           {{ _pingpong_out.content | b64decode }}
       failed_when: true
 
-- set_fact:
+- ansible.builtin.set_fact:
     _pingpong_jobid: "{{ hpctests_pingpong_sbatch.stdout.split()[-1] }}"
-- set_fact:
-    _pingpong_local_output: "{{ hpctests_outdir }}/pingpong/{{_pingpong_jobid}}/pingpong.sh.out"
+- ansible.builtin.set_fact:
+    _pingpong_local_output: "{{ hpctests_outdir }}/pingpong/{{ _pingpong_jobid }}/pingpong.sh.out"
 
 - name: Retrieve results file
   ansible.builtin.fetch:
     src: "{{ hpctests_rootdir }}/pingpong/pingpong.sh.out"
     dest: "{{ _pingpong_local_output }}"
-    flat: yes
+    flat: true
 
 - name: Read pingpong results
   read_imb_pingpong:
@@ -48,22 +49,23 @@
   delegate_to: localhost
   become: false
 
-- name: Read nodes used
-  shell: "grep 'SLURM_JOB_NODELIST:' {{ _pingpong_local_output }}"
+- name: Read nodes used # noqa: no-changed-when
+  ansible.builtin.command: "grep 'SLURM_JOB_NODELIST:' {{ _pingpong_local_output }}"
   register: hpctests_pingpong_run_nodes
   delegate_to: localhost
   become: false
 
 - name: Plot image
-  shell:
+  ansible.builtin.command:
     cmd: "python {{ role_path }}/files/plot_imb_pingpong.py {{ _pingpong_local_output }}"
     creates: "{{ _pingpong_local_output | dirname }}/latency.png"
   register: _pingpong_plot
   delegate_to: localhost
   become: false
   when: hpctests_pingpong_plot | bool
-  
-- debug:
+
+- ansible.builtin.debug:
+    # yamllint disable rule:line-length
     msg: |
       Summary for pingpong using 2x scheduler-selected nodes in '{{ hpctests_partition }}' partition, job ID {{ _pingpong_jobid }}, device '{{ hpctests_ucx_net_devices }}':
 
@@ -75,3 +77,4 @@
       See plot on localhost:
       {{ _pingpong_plot.stdout }}
       {% endif %}
+    # yamllint enable rule:line-length
diff --git a/ansible/roles/hpctests/tasks/setup.yml b/ansible/roles/hpctests/tasks/setup.yml
index 316b328..cc9832a 100644
--- a/ansible/roles/hpctests/tasks/setup.yml
+++ b/ansible/roles/hpctests/tasks/setup.yml
@@ -1,34 +1,36 @@
 ---
-
 - name: Get partition information
-  shell: "sinfo --format %P --noheader"
+  ansible.builtin.command: "sinfo --format %P --noheader"
   register: _sinfo_partitions
   changed_when: false
 
 - name: Select default partition if hpctests_partition not given
-  set_fact:
-    hpctests_partition: "{{ (_sinfo_partitions.stdout_lines  | select('contains', '*') | first)[:-1] }}"
+  ansible.builtin.set_fact:
+    hpctests_partition: "{{ (_sinfo_partitions.stdout_lines | select('contains', '*') | first)[:-1] }}"
   when: hpctests_partition is not defined
 
 - name: Get info about compute nodes
-  shell: "sinfo --Node --noheader{%if hpctests_nodes is defined %} --nodes {{hpctests_nodes}}{% endif %} --partition {{hpctests_partition}} --format %N"
+  # yamllint disable-line rule:line-length
+  ansible.builtin.command: "sinfo --Node --noheader{%if hpctests_nodes is defined %} --nodes {{hpctests_nodes}}{% endif %} --partition {{hpctests_partition}} --format
+    %N"
   register: hpctests_computes
   changed_when: false
   failed_when: hpctests_computes.rc != 0
 
 - name: Check compute node selection valid
-  assert:
+  ansible.builtin.assert:
     that: hpctests_computes.stdout_lines | length > 0
     fail_msg: "No nodes selected - was variable `hpctests_nodes` set (correctly)?"
 
 - name: Create test root directory
-  file:
+  ansible.builtin.file:
     path: "{{ hpctests_rootdir }}"
     state: directory
     owner: "{{ hpctests_user }}"
     group: "{{ hpctests_group }}"
+    mode: "0755"
 
 - name: Set fact for UCX_NET_DEVICES
-  set_fact:
+  ansible.builtin.set_fact:
     hpctests_ucx_net_devices: "{{ hpctests_ucx_net_devices.get(hpctests_partition, 'all') }}"
   when: hpctests_ucx_net_devices is mapping
diff --git a/ansible/roles/hpctests/tasks/source-hpl.yml b/ansible/roles/hpctests/tasks/source-hpl.yml
index 43585d3..6083240 100644
--- a/ansible/roles/hpctests/tasks/source-hpl.yml
+++ b/ansible/roles/hpctests/tasks/source-hpl.yml
@@ -1,7 +1,7 @@
 ---
 
 - name: Make directory
-  file:
+  ansible.builtin.file:
     path: "/opt/hpl"
     state: directory
     owner: root
@@ -9,7 +9,8 @@
     mode: '0755'
 
 - name: Download HPL tarball
-  get_url:
+  # checkov:skip=CKV2_ANSIBLE_2: "Ensure that HTTPS url is used with get_url"
+  ansible.builtin.get_url:
     url: "http://www.netlib.org/benchmark/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
     dest: "/opt/hpl/hpl-{{ hpctests_hpl_version }}.tar.gz"
     owner: root
diff --git a/ansible/roles/hpctests/templates/hpl-build.sh.j2 b/ansible/roles/hpctests/templates/hpl-build.sh.j2
old mode 100644
new mode 100755
diff --git a/ansible/roles/hpctests/templates/hpl-solo.sh.j2 b/ansible/roles/hpctests/templates/hpl-solo.sh.j2
old mode 100644
new mode 100755
diff --git a/ansible/roles/hpctests/templates/pingmatrix.sh.j2 b/ansible/roles/hpctests/templates/pingmatrix.sh.j2
old mode 100644
new mode 100755
diff --git a/ansible/roles/hpctests/templates/pingpong.sh.j2 b/ansible/roles/hpctests/templates/pingpong.sh.j2
old mode 100644
new mode 100755
diff --git a/ansible/roles/k3s/README.md b/ansible/roles/k3s/README.md
index 68e8e24..4031a00 100644
--- a/ansible/roles/k3s/README.md
+++ b/ansible/roles/k3s/README.md
@@ -1,16 +1,12 @@
-k3s
-=====
+# k3s
 
 Installs k3s agent and server services on nodes and an ansible-init playbook to activate them. The service that each node will activate on init is determined by OpenStack metadata. Also includes Helm install. Currently only supports a single k3s-server
 (i.e one control node). Install based on the [official k3s ansible role](https://github.com/k3s-io/k3s-ansible).
 
-
-Requirements
-------------
+## Requirements
 
 `azimuth_cloud.image_utils.linux_ansible_init` must have been run previously on targeted nodes during image build.
 
-Role Variables
---------------
+## Role Variables
 
 - `k3s_version`: Optional str. K3s version to install, see [official releases](https://github.com/k3s-io/k3s/releases/).
diff --git a/ansible/roles/k3s/defaults/main.yml b/ansible/roles/k3s/defaults/main.yml
index 984c63d..38a5f73 100644
--- a/ansible/roles/k3s/defaults/main.yml
+++ b/ansible/roles/k3s/defaults/main.yml
@@ -1,8 +1,9 @@
+---
 # Warning: changes to these variables won't be reflected in the cluster/image if k3s is already installed
 k3s_version: "v1.31.0+k3s1"
 k3s_selinux_release: v1.6.latest.1
 k3s_selinux_rpm_version: 1.6-1
 k3s_helm_version: v3.11.0
-k3s_bootstrap_token: '' # matches common environment default
+k3s_bootstrap_token: "" # matches common environment default
 k3s_bootstrap_token_expiry: 10m
 k3s_server_name: "{{ None }}" # ansible managed
diff --git a/ansible/roles/k3s/tasks/agent-runtime.yml b/ansible/roles/k3s/tasks/agent-runtime.yml
index 8377817..732fcee 100644
--- a/ansible/roles/k3s/tasks/agent-runtime.yml
+++ b/ansible/roles/k3s/tasks/agent-runtime.yml
@@ -1,5 +1,4 @@
 ---
-
 - name: Template k3s agent env file
   when: k3s_bootstrap_token != ''
   ansible.builtin.template:
@@ -7,16 +6,16 @@
     src: k3s-agent.service.env.j2
     owner: root
     group: root
-    mode: 0640
+    mode: "0640"
   register: _k3s_agent_token_result
 
 - name: Ensure password directory exists
-  ansible.builtin.file: 
+  ansible.builtin.file:
     path: "/etc/rancher/node"
     state: directory
     owner: root
     group: root
-    mode: 0640
+    mode: "0640"
 
 - name: Write node password
   ansible.builtin.copy:
@@ -24,10 +23,10 @@
     content: "{{ vault_k3s_node_password }}"
     owner: root
     group: root
-    mode: 0640 # normal k3s install is 644 but that doesn't feel right
+    mode: "0640" # normal k3s install is 644 but that doesn't feel right
 
 - name: Start/restart k3s agent
-  when: _k3s_agent_token_result.changed
+  when: _k3s_agent_token_result.changed # noqa: no-handler
   ansible.builtin.systemd:
     name: k3s-agent
     daemon_reload: true
diff --git a/ansible/roles/k3s/tasks/install.yml b/ansible/roles/k3s/tasks/install.yml
index c250f87..79efb65 100644
--- a/ansible/roles/k3s/tasks/install.yml
+++ b/ansible/roles/k3s/tasks/install.yml
@@ -1,7 +1,6 @@
 ---
-
 - name: Check for existing k3s installation
-  stat:
+  ansible.builtin.stat:
     path: /var/lib/rancher/k3s
   register: stat_result
 
@@ -9,62 +8,64 @@
   # Using air-gapped install so containers are pre-installed to avoid rate-limiting from registries on cluster startup
   when: not stat_result.stat.exists
   block:
+    - name: Download k3s binary
+      ansible.builtin.get_url:
+        url: "https://github.com/k3s-io/k3s/releases/download/{{ k3s_version | urlencode }}/k3s"
+        dest: /usr/bin/k3s
+        owner: root
+        group: root
+        mode: "0755"
 
-  - name: Download k3s binary
-    ansible.builtin.get_url:
-      url: "https://github.com/k3s-io/k3s/releases/download/{{ k3s_version | urlencode }}/k3s"
-      dest: /usr/bin/k3s
-      owner: root
-      group: root
-      mode: "0755"
-
-  - name: Install k3s SELinux policy package
-    yum:
-      name: "https://github.com/k3s-io/k3s-selinux/releases/download/{{ k3s_selinux_release }}/k3s-selinux-{{ k3s_selinux_rpm_version }}.el{{ ansible_distribution_major_version }}.noarch.rpm"
-      disable_gpg_check: true
+    - name: Install k3s SELinux policy package
+      ansible.builtin.dnf:
+        # yamllint disable-line rule:line-length
+        name: "https://github.com/k3s-io/k3s-selinux/releases/download/{{ k3s_selinux_release }}/k3s-selinux-{{ k3s_selinux_rpm_version }}.el{{ ansible_distribution_major_version }}.noarch.rpm"
+        disable_gpg_check: true
 
-  - name: Create image directory
-    ansible.builtin.file: 
-      path: "/var/lib/rancher/k3s/agent/images"
-      state: directory
+    - name: Create image directory
+      ansible.builtin.file:
+        path: "/var/lib/rancher/k3s/agent/images"
+        state: directory
+        mode: "0755"
 
-  - name: Install k3s' internal images
-    ansible.builtin.get_url:
-      url: "https://github.com/k3s-io/k3s/releases/download/{{ k3s_version | urlencode }}/k3s-airgap-images-amd64.tar.zst"
-      dest: /var/lib/rancher/k3s/agent/images/k3s-airgap-images-amd64.tar.zst
+    - name: Install k3s' internal images
+      ansible.builtin.get_url:
+        url: "https://github.com/k3s-io/k3s/releases/download/{{ k3s_version | urlencode }}/k3s-airgap-images-amd64.tar.zst"
+        dest: /var/lib/rancher/k3s/agent/images/k3s-airgap-images-amd64.tar.zst
+        mode: "0644"
 
-  - name: Download k3s install script
-    ansible.builtin.get_url:
-      url: https://get.k3s.io/
-      timeout: 120
-      dest: /usr/bin/k3s-install.sh
-      owner: root
-      group: root
-      mode: "0755"
+    - name: Download k3s install script
+      ansible.builtin.get_url:
+        url: https://get.k3s.io/
+        timeout: 120
+        dest: /usr/bin/k3s-install.sh
+        owner: root
+        group: root
+        mode: "0755"
 
-  - name: Install k3s
-    ansible.builtin.shell:
-      cmd: /usr/bin/k3s-install.sh
-    environment:
-      INSTALL_K3S_VERSION: "{{ k3s_version }}"
-      INSTALL_K3S_EXEC: "{{ item }} --node-ip=${K3S_NODE_IP}"
-      INSTALL_K3S_SKIP_START: "true"
-      INSTALL_K3S_SKIP_ENABLE: "true"
-      INSTALL_K3S_BIN_DIR: "/usr/bin"
-      INSTALL_K3S_SKIP_DOWNLOAD: "true"
-    changed_when: true
-    loop:
-      - server --disable=traefik
-      - agent
+    - name: Install k3s
+      ansible.builtin.command:
+        cmd: /usr/bin/k3s-install.sh
+      environment:
+        INSTALL_K3S_VERSION: "{{ k3s_version }}"
+        INSTALL_K3S_EXEC: "{{ item }} --node-ip=${K3S_NODE_IP}"
+        INSTALL_K3S_SKIP_START: "true"
+        INSTALL_K3S_SKIP_ENABLE: "true"
+        INSTALL_K3S_BIN_DIR: "/usr/bin"
+        INSTALL_K3S_SKIP_DOWNLOAD: "true"
+      changed_when: true
+      loop:
+        - server --disable=traefik
+        - agent
 
 - name: Install helm
-  unarchive:
+  ansible.builtin.unarchive:
     src: "https://get.helm.sh/helm-{{ k3s_helm_version }}-linux-amd64.tar.gz"
     dest: /usr/bin
     extra_opts: "--strip-components=1"
     owner: root
     group: root
-    mode: 0755
+    mode: "0755"
     remote_src: true
 
 - name: Add k3s kubeconfig as environment variable
diff --git a/ansible/roles/k3s/tasks/server-runtime.yml b/ansible/roles/k3s/tasks/server-runtime.yml
index 6c0878e..1221cda 100644
--- a/ansible/roles/k3s/tasks/server-runtime.yml
+++ b/ansible/roles/k3s/tasks/server-runtime.yml
@@ -1,9 +1,9 @@
 ---
-
 - name: Template k3s env file
   ansible.builtin.template:
     dest: /etc/systemd/system/k3s.service.env
     src: k3s.service.env.j2
+    mode: "0644"
   register: _k3s_env_file_status
 
 - name: Start k3s server
@@ -14,9 +14,9 @@
     enabled: true
 
 # Possible race here as there is a delay between agents disconnecting and being registered as down, probably won't be hit in general use though
-- name: Check which k3s agents are connected
+- name: Check which k3s agents are connected # noqa: no-changed-when
   ansible.builtin.shell:
-    cmd: kubectl get nodes --no-headers | grep -w Ready
+    cmd: set -o pipefail && kubectl get nodes --no-headers | grep -w Ready
   register: _k3s_connected_nodes
   retries: 6 # task may fail if server is not ready yet
   delay: 10
@@ -24,12 +24,12 @@
 
 - when: _k3s_connected_nodes.stdout_lines | length != groups['k3s'] | length
   block:
-  - name: Generate new bootstrap token if not all agents are connected
-    no_log: true
-    shell:
-      cmd: "k3s token create --ttl {{ k3s_bootstrap_token_expiry }}"
-    register: _k3s_token_output
+    - name: Generate new bootstrap token if not all agents are connected # noqa: no-changed-when
+      no_log: true
+      ansible.builtin.command:
+        cmd: "k3s token create --ttl {{ k3s_bootstrap_token_expiry }}"
+      register: _k3s_token_output
 
-  - name: Set bootstrap token as fact
-    set_fact:
-      k3s_bootstrap_token: "{{ _k3s_token_output.stdout }}"
+    - name: Set bootstrap token as fact
+      ansible.builtin.set_fact:
+        k3s_bootstrap_token: "{{ _k3s_token_output.stdout }}"
diff --git a/ansible/roles/k3s/templates/k3s-agent.service.env.j2 b/ansible/roles/k3s/templates/k3s-agent.service.env.j2
index b994b06..9444765 100644
--- a/ansible/roles/k3s/templates/k3s-agent.service.env.j2
+++ b/ansible/roles/k3s/templates/k3s-agent.service.env.j2
@@ -1,3 +1,3 @@
-K3S_NODE_IP={{ ansible_host }}
-K3S_TOKEN={{ k3s_bootstrap_token }}
-K3S_URL=https://{{ k3s_server_name }}:6443
+K3S_NODE_IP="{{ ansible_host }}"
+K3S_TOKEN="{{ k3s_bootstrap_token }}"
+K3S_URL="https://{{ k3s_server_name }}:6443"
diff --git a/ansible/roles/k3s/templates/k3s.service.env.j2 b/ansible/roles/k3s/templates/k3s.service.env.j2
index 746e6d8..38fb911 100644
--- a/ansible/roles/k3s/templates/k3s.service.env.j2
+++ b/ansible/roles/k3s/templates/k3s.service.env.j2
@@ -1 +1 @@
-K3S_NODE_IP={{ ansible_host }}
+K3S_NODE_IP="{{ ansible_host }}"
diff --git a/ansible/roles/k9s/tasks/main.yml b/ansible/roles/k9s/tasks/main.yml
index 674b4df..bebe7b8 100644
--- a/ansible/roles/k9s/tasks/main.yml
+++ b/ansible/roles/k9s/tasks/main.yml
@@ -1,12 +1,12 @@
 ---
-  
-  - name: Check if k9s is installed
-    ansible.builtin.stat:
-      path: "/usr/bin/k9s"
-    register: _k9s_stat_result
+- name: Check if k9s is installed
+  ansible.builtin.stat:
+    path: "/usr/bin/k9s"
+  register: _k9s_stat_result
 
-  - name: Install k9s and clean up temporary files
-    block:
+- name: Install k9s and clean up temporary files
+  when: not _k9s_stat_result.stat.exists
+  block:
     - name: Create install directory
       ansible.builtin.file:
         path: /tmp/k9s
@@ -28,17 +28,16 @@
       ansible.builtin.unarchive:
         src: /tmp/k9s/k9s_Linux_amd64.tar.gz
         dest: /tmp/k9s
-        remote_src: yes
+        remote_src: true
 
     - name: Add k9s to root path
       ansible.builtin.copy:
         src: /tmp/k9s/k9s
         dest: /usr/bin/k9s
         mode: u+rwx
-        remote_src: yes
+        remote_src: true
 
     - name: Cleanup k9s install directory
       ansible.builtin.file:
         path: /tmp/k9s
         state: absent
-    when: not _k9s_stat_result.stat.exists
diff --git a/ansible/roles/lustre/README.md b/ansible/roles/lustre/README.md
index 0269ad6..56e6b3a 100644
--- a/ansible/roles/lustre/README.md
+++ b/ansible/roles/lustre/README.md
@@ -7,22 +7,25 @@ Install and configure a Lustre client. This builds RPM packages from source.
 **NB:** Currently this only supports RockyLinux 9.
 
 ## Role Variables
+
 The following variables control configuration of Lustre clients.
+
 - `lustre_lnet_label`: Optional str. The "lnet label" part of the host's NID, e.g. `tcp0`. Only the `tcp` protocol type is currently supported. Default `tcp`.
 - `lustre_mgs_nid`: Required str. The NID(s) for the MGS, e.g. `192.168.227.11@tcp1` (separate mutiple MGS NIDs using `:`).
 - `lustre_mounts`: Required list. Define Lustre filesystems and mountpoints as a list of dicts with keys:
-    - `fs_name`: Required str. The name of the filesystem to mount
-    - `mount_point`: Required str. Path to mount filesystem at.
-    - `mount_state`: Optional mount state, as for [ansible.posix.mount](https://docs.ansible.com/ansible/latest/collections/ansible/posix/mount_module.html#parameter-state). Default is `lustre_mount_state`.
-    - `mount_options`: Optional mount options. Default is `lustre_mount_options`.
+  - `fs_name`: Required str. The name of the filesystem to mount
+  - `mount_point`: Required str. Path to mount filesystem at.
+  - `mount_state`: Optional mount state, as for [ansible.posix.mount](https://docs.ansible.com/ansible/latest/collections/ansible/posix/mount_module.html#parameter-state). Default is `lustre_mount_state`.
+  - `mount_options`: Optional mount options. Default is `lustre_mount_options`.
 - `lustre_mount_state`. Optional default mount state for all mounts, as for [ansible.posix.mount](https://docs.ansible.com/ansible/latest/collections/ansible/posix/mount_module.html#parameter-state). Default is `mounted`.
 - `lustre_mount_options`. Optional default mount options. Default values are systemd defaults from [Lustre client docs](http://wiki.lustre.org/Mounting_a_Lustre_File_System_on_Client_Nodes).
 
 The following variables control the package build and and install:
+
 - `lustre_version`: Optional str. Version of lustre to build, default `2.15.7`
-- `lustre_repo`: Optional str. URL for Lustre repo. Default is `git://git.whamcloud.com/fs/lustre-release`.git.
+- `lustre_repo`: Optional str. URL for Lustre repository. Default is `git://git.whamcloud.com/fs/lustre-release`.git.
 - `lustre_build_packages`: Optional list. Prerequisite packages required to build Lustre. See `defaults/main.yml`.
 - `lustre_build_dir`: Optional str. Path to build lustre at, default `/tmp/lustre-release`.
 - `lustre_configure_opts`: Optional list. Options to `./configure` command. Default builds client rpms supporting Mellanox OFED, without support for GSS keys.
-- `lustre_rpm_globs`: Optional list. Shell glob patterns for rpms to install. Note order is important as the built RPMs are not in a yum repo. Default is just the `kmod-lustre-client` and `lustre-client` packages.
+- `lustre_rpm_globs`: Optional list. Shell glob patterns for rpms to install. Note order is important as the built RPMs are not in a yum repository. Default is just the `kmod-lustre-client` and `lustre-client` packages.
 - `lustre_build_cleanup`: Optional bool. Whether to uninstall prerequisite packages and delete the build directories etc. Default `true`.
diff --git a/ansible/roles/lustre/defaults/main.yml b/ansible/roles/lustre/defaults/main.yml
index 14ddc05..9958eec 100644
--- a/ansible/roles/lustre/defaults/main.yml
+++ b/ansible/roles/lustre/defaults/main.yml
@@ -1,9 +1,10 @@
+---
 lustre_version: '2.15.7'
 lustre_lnet_label: tcp
-#lustre_mgs_nid:
+# lustre_mgs_nid:
 lustre_mounts: []
 lustre_mount_state: mounted
-lustre_mount_options: 'defaults,_netdev,noauto,x-systemd.automount,x-systemd.requires=lnet.service,nosuid,nodev'
+lustre_mount_options: "defaults,_netdev,noauto,x-systemd.automount,x-systemd.requires=lnet.service,nosuid,nodev"
 
 # below variables are for build and should not generally require changes
 lustre_repo: "https://github.com/lustre/lustre-release.git"
diff --git a/ansible/roles/lustre/tasks/configure.yml b/ansible/roles/lustre/tasks/configure.yml
index be5ba35..fab9e60 100644
--- a/ansible/roles/lustre/tasks/configure.yml
+++ b/ansible/roles/lustre/tasks/configure.yml
@@ -1,5 +1,6 @@
+---
 - name: Gather Lustre interface info
-  shell:
+  ansible.builtin.shell:
     cmd: |
       ip --json r get {{ _lustre_mgs_ip }}
   changed_when: false
@@ -8,23 +9,23 @@
     _lustre_mgs_ip: "{{ lustre_mgs_nid | split('@') | first }}"
 
 - name: Set facts for Lustre interface
-  set_fact:
+  ansible.builtin.set_fact:
     _lustre_interface: "{{ _lustre_ip_r_mgs_info.dev }}"
     _lustre_ip: "{{ _lustre_ip_r_mgs_info.prefsrc }}"
   vars:
     _lustre_ip_r_mgs_info: "{{ _lustre_ip_r_mgs.stdout | from_json | first }}"
 
 - name: Write LNet configuration file
-  template:
+  ansible.builtin.template:
     src: lnet.conf.j2
-    dest:  /etc/lnet.conf # exists from package install, expected by lnet service
+    dest: /etc/lnet.conf # exists from package install, expected by lnet service
     owner: root
     group: root
     mode: u=rw,go=r # from package install
   register: _lnet_conf
 
 - name: Ensure lnet service state
-  systemd:
+  ansible.builtin.systemd:
     name: lnet
     state: "{{ 'restarted' if _lnet_conf.changed else 'started' }}"
 
@@ -32,6 +33,7 @@
   ansible.builtin.file:
     path: "{{ item.mount_point }}"
     state: directory
+    mode: "0755"
   loop: "{{ lustre_mounts }}"
   when: "(item.mount_state | default(lustre_mount_state)) != 'absent'"
 
diff --git a/ansible/roles/lustre/tasks/install.yml b/ansible/roles/lustre/tasks/install.yml
index aedc3a5..7a91a38 100644
--- a/ansible/roles/lustre/tasks/install.yml
+++ b/ansible/roles/lustre/tasks/install.yml
@@ -1,25 +1,26 @@
+---
 - name: Install lustre build prerequisites
   ansible.builtin.dnf:
     name: "{{ lustre_build_packages }}"
   register: _lustre_dnf_build_packages
- 
+
 - name: Clone lustre git repo
   ansible.builtin.git:
     repo: "{{ lustre_repo }}"
     dest: "{{ lustre_build_dir }}"
     version: "{{ lustre_version }}"
 
-- name: Prepare for lustre configuration
+- name: Prepare for lustre configuration # noqa: no-changed-when
   ansible.builtin.command:
     cmd: sh ./autogen.sh
     chdir: "{{ lustre_build_dir }}"
 
-- name: Configure lustre build
+- name: Configure lustre build # noqa: no-changed-when
   ansible.builtin.command:
     cmd: "./configure {{ lustre_configure_opts | join(' ') }}"
     chdir: "{{ lustre_build_dir }}"
 
-- name: Build lustre
+- name: Build lustre # noqa: no-changed-when
   ansible.builtin.command:
     cmd: make rpms
     chdir: "{{ lustre_build_dir }}"
@@ -32,17 +33,18 @@
   register: _lustre_find_rpms
 
 - name: Check rpms found
-  assert:
+  ansible.builtin.assert:
     that: _lustre_find_rpms.files | length
     fail_msg: "No lustre repos found with lustre_rpm_globs = {{ lustre_rpm_globs }}"
 
 - name: Install lustre rpms
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
   ansible.builtin.dnf:
-    name: "{{ _lustre_find_rpms.files | map(attribute='path')}}"
-    disable_gpg_check: yes
+    name: "{{ _lustre_find_rpms.files | map(attribute='path') }}"
+    disable_gpg_check: true
 
 - name: Delete lustre build dir
-  file:
+  ansible.builtin.file:
     path: "{{ lustre_build_dir }}"
     state: absent
   when: lustre_build_cleanup | bool
diff --git a/ansible/roles/lustre/tasks/validate.yml b/ansible/roles/lustre/tasks/validate.yml
index 609a77f..6469ac1 100644
--- a/ansible/roles/lustre/tasks/validate.yml
+++ b/ansible/roles/lustre/tasks/validate.yml
@@ -1,20 +1,21 @@
+---
 - name: Check kernel-devel package is installed
-  command: "dnf list --installed kernel-devel-{{ ansible_kernel }}"
+  ansible.builtin.command: "dnf list --installed kernel-devel-{{ ansible_kernel }}"
   changed_when: false
   # NB: we don't check here the kernel will remain the same after reboot etc, see ofed/install.yml
 
 - name: Ensure SELinux in permissive mode
-  assert:
+  ansible.builtin.assert:
     that: selinux_state in ['permissive', 'disabled']
     fail_msg: "SELinux must be permissive for Lustre not '{{ selinux_state }}'; see variable selinux_state"
 
 - name: Ensure lustre_mgs_nid is defined
-  assert:
+  ansible.builtin.assert:
     that: lustre_mgs_nid is defined
     fail_msg: Variable lustre_mgs_nid must be defined
 
 - name: Ensure lustre_mounts entries define filesystem name and mount point
-  assert:
+  ansible.builtin.assert:
     that:
       - item.fs_name is defined
       - item.mount_point is defined
diff --git a/ansible/roles/mysql/README.md b/ansible/roles/mysql/README.md
index 2c735db..e85c173 100644
--- a/ansible/roles/mysql/README.md
+++ b/ansible/roles/mysql/README.md
@@ -1,18 +1,14 @@
-mysql
-=====
+# MySQL
 
 Deploy containerised `mysql` server using Podman.
 
-
-Requirements
-------------
+## Requirements
 
 None.
 
-Role Variables
---------------
+## Role Variables
 
-- `mysql_root_password`: Required str. Password to set for `root` mysql user. **NB** This cannot be changed by this role once mysql server has initialised.
+- `mysql_root_password`: Required str. Password to set for `root` MySQL user. **NB** This cannot be changed by this role once MySQL server has initialised.
 - `mysql_tag`: Optional str. Tag for version of `mysql` container image to use. Default `8.0.30`.
 - `mysql_systemd_service_enabled`: Optional bool. Whether `mysql` service starts on boot. Default `yes`.
 - `mysql_state`: Optional str. As per `ansible.builtin.systemd:state`. Default is `started` or `restarted` as required.
@@ -22,13 +18,11 @@ Role Variables
 - `mysql_users`: Optional list of dicts defining users as per `community.mysql.mysql_user`. Default `[]`.
 - `mysql_databases`: Optional list of dicts defining databases as per `community.mysql.mysql_db`. Default `[]`.
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Playbook
-----------------
+## Example Playbook
 
 ```yaml
 - name: Setup DB
@@ -38,15 +32,13 @@ Example Playbook
     - mysql
   tasks:
     - include_role:
-        name:  mysql
+        name: mysql
 ```
 
-License
--------
+## License
 
 Apache v2
 
-Author Information
-------------------
+## Author Information
 
-Steve Brasier steveb@stackhpc.com
+Steve Brasier <steveb@stackhpc.com>
diff --git a/ansible/roles/mysql/defaults/main.yml b/ansible/roles/mysql/defaults/main.yml
index b15c800..9d549b3 100644
--- a/ansible/roles/mysql/defaults/main.yml
+++ b/ansible/roles/mysql/defaults/main.yml
@@ -1,9 +1,10 @@
+---
 # required:
 # mysql_root_password: # TODO: make it possible to CHANGE root password
 
 mysql_tag: 8.0.30
-mysql_systemd_service_enabled: yes
-#mysql_state: # default is started or restarted as required
+mysql_systemd_service_enabled: true
+# mysql_state: # default is started or restarted as required
 mysql_podman_user: "{{ ansible_user }}"
 mysql_datadir: /var/lib/mysql
 mysql_mysqld_options: [] # list of str options to mysqld, see `run -it --rm mysql:tag --verbose --help`
diff --git a/ansible/roles/mysql/tasks/configure.yml b/ansible/roles/mysql/tasks/configure.yml
index d4dd4cd..7bf9cb3 100644
--- a/ansible/roles/mysql/tasks/configure.yml
+++ b/ansible/roles/mysql/tasks/configure.yml
@@ -1,6 +1,7 @@
+---
 - name: Create environment file for mysql server root password
   # NB: This doesn't trigger a restart on changes as it will be ignored once mysql is initialised
-  copy:
+  ansible.builtin.copy:
     dest: /etc/sysconfig/mysqld
     content: |
       MYSQL_INITIAL_ROOT_PASSWORD='{{ mysql_root_password }}'
@@ -9,29 +10,29 @@
     mode: u=rw,go=
 
 - name: Ensure mysql service state
-  systemd:
+  ansible.builtin.systemd:
     name: mysql
     state: "{{ mysql_state | default('restarted' if _mysql_unitfile.changed else 'started') }}"
     enabled: "{{ mysql_systemd_service_enabled }}"
     daemon_reload: "{{ _mysql_unitfile.changed }}"
 
-- block:
-  - name: Wait for mysql to initialise
+- when: "mysql_state | default('unspecified') != 'stopped'"
+  block:
+    - name: Wait for mysql to initialise
     # NB: It is not sufficent to wait_for the port
-    community.mysql.mysql_info:
-      login_user: root
-      login_password: "{{ mysql_root_password }}"
-    no_log: "{{ no_log | default(true) }}"
-    register: _mysql_info
-    until: "'version' in _mysql_info"
-    retries: 90
-    delay: 2
+      community.mysql.mysql_info:
+        login_user: root
+        login_password: "{{ mysql_root_password }}"
+      no_log: "{{ no_log | default(true) }}"
+      register: _mysql_info
+      until: "'version' in _mysql_info"
+      retries: 90
+      delay: 2
 
-  - name: Ensure mysql databases created
-    community.mysql.mysql_db: "{{ item }}"
-    loop: "{{ mysql_databases}}"
+    - name: Ensure mysql databases created
+      community.mysql.mysql_db: "{{ item }}" # noqa: args[module]
+      loop: "{{ mysql_databases}}"
 
-  - name: Ensure mysql users present
-    community.mysql.mysql_user: "{{ item }}"
-    loop: "{{ mysql_users }}"
-  when: "mysql_state | default('unspecified') != 'stopped'"
+    - name: Ensure mysql users present
+      community.mysql.mysql_user: "{{ item }}" # noqa: args[module]
+      loop: "{{ mysql_users }}"
diff --git a/ansible/roles/mysql/tasks/install.yml b/ansible/roles/mysql/tasks/install.yml
index 4ed5d30..0a108d2 100644
--- a/ansible/roles/mysql/tasks/install.yml
+++ b/ansible/roles/mysql/tasks/install.yml
@@ -1,22 +1,25 @@
+---
 - name: Install pip
-  dnf:
+  ansible.builtin.dnf:
     name: python3-pip
 
 - name: Install python mysql client
-  pip:
+  ansible.builtin.pip:
     name:
       - pymysql
       - cryptography
     state: present
 
 - name: Create systemd mysql container unit file
-  template:
+  ansible.builtin.template:
     dest: /etc/systemd/system/mysql.service
     src: mysql.service.j2
+    mode: "0644"
   register: _mysql_unitfile
 
 - name: Pull container image
   containers.podman.podman_image:
     name: docker.io/library/mysql
     tag: "{{ mysql_tag }}"
+  become: true
   become_user: "{{ mysql_podman_user }}"
diff --git a/ansible/roles/mysql/tasks/main.yml b/ansible/roles/mysql/tasks/main.yml
index 2b65e84..cc29fba 100644
--- a/ansible/roles/mysql/tasks/main.yml
+++ b/ansible/roles/mysql/tasks/main.yml
@@ -1,2 +1,3 @@
-- import_tasks: install.yml
-- import_tasks: configure.yml
+---
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/nhc/README.md b/ansible/roles/nhc/README.md
index 8831e0e..a826932 100644
--- a/ansible/roles/nhc/README.md
+++ b/ansible/roles/nhc/README.md
@@ -22,6 +22,7 @@ compute
 ```
 
 When the `anisble/site.yml` playbook is run this will automatically:
+
 1. Add NHC-related configuration to the `slurm.conf` Slurm configuration file.
    The default configuration is defined in `openhpc_config_nhc`
    (see [environments/common/inventory/group_vars/all/openhpc.yml](../../../environments/common/inventory/group_vars/all/openhpc.yml)).
@@ -33,10 +34,11 @@ When the `anisble/site.yml` playbook is run this will automatically:
 
 2. Template out node health check rules using Ansible facts for each compute
    node. Currently these check:
-    - Filesystem mounts
-    - Ethernet interfaces
 
-    See `/etc/nhc/nhc.conf` on a compute node for the full configuration.
+   - Filesystem mounts
+   - Ethernet interfaces
+
+   See `/etc/nhc/nhc.conf` on a compute node for the full configuration.
 
 If a node healthcheck run fails, Slurm will mark the node `DOWN`. With the
 default [alerting configuration](../../../docs/alerting.md) this will trigger
@@ -52,15 +54,17 @@ an alert.
 ## Structure
 
 This role contains 3x task files, which run at different times:
+
 - `main.yml`: Runs from `site.yml` -> `slurm.yml`. Templates health check
   configuration to nodes.
 - `export.yml`: Runs from `site.yml` -> `final.yml` via role `compute_init`
   tasks `export.yml`. Templates health check configuration to the cluster NFS
-  share for compute-init. 
+  share for compute-init.
 - `boot.yml`: Runs on boot via `compute_init/files/compute-init.yml`. Copies
   the node's generated health check configuration from the cluster share to
   local disk.
 
 Note that the `stackhpc.openhpc` role:
+
 - Installs the required package
 - Configures slurm.conf parameterss
diff --git a/ansible/roles/nhc/tasks/export.yml b/ansible/roles/nhc/tasks/export.yml
index afa440f..d6b1120 100644
--- a/ansible/roles/nhc/tasks/export.yml
+++ b/ansible/roles/nhc/tasks/export.yml
@@ -3,4 +3,5 @@
   ansible.builtin.template:
     src: "{{ nhc_config_template }}"
     dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/nhc.conf"
+    mode: "0644"
   delegate_to: "{{ groups['control'] | first }}"
diff --git a/ansible/roles/nhc/tasks/main.yml b/ansible/roles/nhc/tasks/main.yml
index 5f6034f..a507113 100644
--- a/ansible/roles/nhc/tasks/main.yml
+++ b/ansible/roles/nhc/tasks/main.yml
@@ -1,4 +1,4 @@
-
+---
 - name: Ensure NHC configuration directory exists
   # When running site.yml after login/control upgrade, nhc group might be
   # enabled in repo, but as the compute nodes have not yet been upgraded they
diff --git a/ansible/roles/ofed/README.md b/ansible/roles/ofed/README.md
index 7d4bb60..9eab86f 100644
--- a/ansible/roles/ofed/README.md
+++ b/ansible/roles/ofed/README.md
@@ -6,20 +6,21 @@
 > instead.
 
 This role installs Mellanox OFED:
+
 - It checks that the running kernel is the latest installed one, and errors if not.
 - Installation uses the `mlnxofedinstall` command, with support for the running kernel
-and (by default) without firmware updates.
+  and (by default) without firmware updates.
 
 As OFED installation takes a long time generally this should only be used during image build,
 for example by setting:
 
-```
+```yaml
 environments/groups/<environment>/groups:
 [ofed:children]
 builder
 ```
 
-# Role variables
+## Role variables
 
 See `defaults/main.yml`
 
diff --git a/ansible/roles/ofed/defaults/main.yml b/ansible/roles/ofed/defaults/main.yml
index 63caf24..422ccc1 100644
--- a/ansible/roles/ofed/defaults/main.yml
+++ b/ansible/roles/ofed/defaults/main.yml
@@ -1,4 +1,6 @@
-ofed_version: '24.10-3.2.5.0' # LTS
+---
+ofed_version: "24.10-3.2.5.0" # LTS
+# yamllint disable-line rule:line-length
 ofed_download_url: https://content.mellanox.com/ofed/MLNX_OFED-{{ ofed_version }}/MLNX_OFED_LINUX-{{ ofed_version }}-{{ ofed_distro }}{{ ofed_distro_version }}-{{ ofed_arch }}.tgz
 ofed_distro: rhel # NB: not expected to work on other distros due to installation differences
 ofed_distro_version: "{{ ansible_distribution_version }}" # e.g. '8.9'
diff --git a/ansible/roles/ofed/tasks/install.yml b/ansible/roles/ofed/tasks/install.yml
index 45f341b..1532fa4 100644
--- a/ansible/roles/ofed/tasks/install.yml
+++ b/ansible/roles/ofed/tasks/install.yml
@@ -1,30 +1,34 @@
+---
 - name: Get installed kernels
-  command: dnf list --installed kernel
+  ansible.builtin.command: dnf list --installed kernel
   register: _ofed_dnf_kernels
   changed_when: false
 
 - name: Determine running kernel
-  command: uname -r # e.g. 4.18.0-513.18.1.el8_9.x86_64
+  ansible.builtin.command: uname -r
   register: _ofed_loaded_kernel
   changed_when: false
 
 - name: Check current kernel is newest installed
-  assert:
+  ansible.builtin.assert:
     that: _ofed_kernel_current == _ofed_dnf_kernels_newest
     fail_msg: "Kernel {{ _ofed_loaded_kernel.stdout }} is loaded but newer {{ _ofed_dnf_kernels_newest }} is installed: consider rebooting?"
   vars:
+    # yamllint disable rule:line-length
     _ofed_kernel_current: >-
       {{ _ofed_loaded_kernel.stdout | regex_replace('\.(?:.(?!\.))+$', '') | regex_replace('\.(?:.(?!\.))+$', '') }}
     _ofed_dnf_kernels_newest: >-
-      {{ _ofed_dnf_kernels.stdout_lines[1:] | map('split') | map(attribute=1) | map('regex_replace', '\.(?:.(?!\.))+$', '') | community.general.version_sort | last }}
-    # dnf line format e.g. "kernel.x86_64  4.18.0-513.18.1.el8_9   @baseos  "
+      {{ _ofed_dnf_kernels.stdout_lines[1:] | map('split') | map(attribute=1) | map('regex_replace', '\.(?:.(?!\.))+$', '') | community.general.version_sort | last
+      }}
+    # yamllint enable rule:line-length
+ # dnf line format e.g. "kernel.x86_64  4.18.0-513.18.1.el8_9   @baseos  "
 
 - name: Enable epel
-  dnf:
+  ansible.builtin.dnf:
     name: epel-release
 
 - name: Check for existing OFED installation
-  command: ofed_info
+  ansible.builtin.command: ofed_info
   changed_when: false
   failed_when:
     - _ofed_info.rc > 0
@@ -32,7 +36,7 @@
   register: _ofed_info
 
 - name: Install build prerequisites
-  dnf:
+  ansible.builtin.dnf:
     name: "{{ ofed_build_packages + (ofed_build_rl8_packages if ofed_distro_major_version == '8' else []) }}"
   when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
   # don't want to install a load of prereqs unnecessarily
@@ -41,13 +45,13 @@
   ansible.builtin.unarchive:
     src: "{{ ofed_download_url }}"
     dest: "{{ ofed_tmp_dir }}"
-    remote_src: yes
-  become: no
+    remote_src: true
+  become: false
   when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
 
 # Below from https://docs.nvidia.com/networking/display/mlnxofedv24010331/user+manual
-- name: Run OFED install script
-  command:
+- name: Run OFED install script # noqa: no-changed-when
+  ansible.builtin.command:
     cmd: >
       ./mlnxofedinstall
       --add-kernel-support
@@ -63,13 +67,13 @@
   async: "{{ 45 * 60 }}" # wait for up to 45 minutes
   poll: 15 # check every 15 seconds
 
-- name: Update initramfs
-  command:
+- name: Update initramfs # noqa: no-changed-when
+  ansible.builtin.command:
     cmd: dracut -f
   when: '"update your initramfs" in _ofed_install.stdout | default("")'
   failed_when: false # always shows errors due to deleted modules for inbox RDMA drivers
 
-- name: Load the new driver
-  command:
+- name: Load the new driver # noqa: no-changed-when
+  ansible.builtin.command:
     cmd: /etc/init.d/openibd restart
   when: '"To load the new driver" in _ofed_install.stdout | default("")'
diff --git a/ansible/roles/ofed/tasks/main.yml b/ansible/roles/ofed/tasks/main.yml
index e7a272f..df97825 100644
--- a/ansible/roles/ofed/tasks/main.yml
+++ b/ansible/roles/ofed/tasks/main.yml
@@ -1 +1,2 @@
-- include_tasks: install.yml
+---
+- ansible.builtin.include_tasks: install.yml
diff --git a/ansible/roles/openondemand/README.md b/ansible/roles/openondemand/README.md
index 099276c..b1fb673 100644
--- a/ansible/roles/openondemand/README.md
+++ b/ansible/roles/openondemand/README.md
@@ -17,9 +17,14 @@ This uses the [osc.ood](https://github.com/OSC/ood-ansible) Ansible role to prov
 ### General
 
 - `openondemand_clusters`: Required. Synonym for [osc.ood: clusters](https://github.com/OSC/ood-ansible#clusters) role variable.
-- `openondemand_servername`: Required. Synonym for [osc.ood: servername](https://github.com/OSC/ood-ansible/blob/master/defaults/main/ood_portal.yml#L27) role variable. This defines what the Open Ondemand portal's Apache server uses for the [name-based virtual host](https://httpd.apache.org/docs/current/mod/core.html#servername). It should be the IP or hostname(+domain) part of the URL used to access Open Ondemand in the browser, e.g. `ondemand.mysite.org`. **NB:** If a domain or external IP is not available, specify the host's internal IP here and use ssh with a `DynamicForward` option and a SOCKS proxy to access this address. Using ssh's `LocalForward` option is not recommended as the server name will have to be `localhost` which causes some issues. Changing this value on an already deployed cluster requires a reboot of the login node for OOD app state to be correctly refreshed.
+- `openondemand_servername`: Required. Synonym for [osc.ood: servername](https://github.com/OSC/ood-ansible/blob/master/defaults/main/ood_portal.yml#L27) role variable.
+  This defines what the Open Ondemand portal's Apache server uses for the [name-based virtual host](https://httpd.apache.org/docs/current/mod/core.html#servername).
+  It should be the IP or hostname(+domain) part of the URL used to access Open Ondemand in the browser, e.g. `ondemand.mysite.org`. **NB:** If a domain or external IP is not available, specify the host's internal IP here and use SSH with a `DynamicForward` option and a SOCKS proxy to access this address.
+  Using ssh's `LocalForward` option is not recommended as the server name will have to be `localhost` which causes some issues.
+  Changing this value on an already deployed cluster requires a reboot of the login node for OOD app state to be correctly refreshed.
 
 ### Authentication
+
 See the Open Ondemand [Authentication docs](https://osc.github.io/ood-documentation/latest/authentication/overview.html) for an overview of the authentication process.
 
 - `openondemand_auth`: Required. Authentication method, either `'oidc'` or `'basic_pam'`. See relevant subsection below.
@@ -28,36 +33,41 @@ See the Open Ondemand [Authentication docs](https://osc.github.io/ood-documentat
   - `openondemand_username`: The remote authenticated username. See also `openondemand_oidc_remote_user_claim` if using OIDC authentication.
 
 #### OIDC authentication
+
 The following variables are active when `openondemand_auth` is `oidc`. This role uses the variables below plus a few required defaults to set the `osc.ood: ood_auth_openidc` [variable](https://github.com/OSC/ood-ansible#open-id-connect) - if the below is insufficent to correctly configure OIDC then set `ood_auth_openidc` directly.
+
 - `openondemand_oidc_client_id`: Required. Client ID, as specified by the OIDC provider
 - `openondemand_oidc_client_secret`: Required. Client secret, as specified the OIDC provider (should be vault-protected).
 - `openondemand_oidc_provider_url`: Required. URL including protocol for the OIDC provider.
 - `openondemand_oidc_crypto_passphrase`: Required. Random string (should be vault protected)
 - `openondemand_oidc_scope`: Optional. A space-separated string giving the [OIDC scopes](https://auth0.com/docs/configure/apis/scopes/openid-connect-scopes) to request from the OIDC provider. What is available depends on the provider. Default: `openid profile preferred_username`.
-- `openondemand_oidc_remote_user_claim`: Optional. A string giving the [OIDC claim](https://auth0.com/docs/configure/apis/scopes/openid-connect-scopes#standard-claims) to use as the remote user name. What is available depends on the provider and the claims made. Default: `preferred_username`.
+- `openondemand_oidc_remote_user_claim`: Optional. A string giving the [OIDC claim](https://auth0.com/docs/configure/apis/scopes/openid-connect-scopes#standard-claims) to use as the remote username. What is available depends on the provider and the claims made. Default: `preferred_username`.
 
 The OIDC provider should be configured to redirect to `https://{{ openondemand_servername }}/oidc` with scopes as appropriate for `openondemand_oidc_scope`.
 
-
 #### Basic/PAM authentication
+
 This option uses HTTP Basic Authentication (i.e. browser prompt) to get a username and password. This is then checked against an existing local user using PAM. Note that HTTPS is configured by default, so the password is protected in transit, although there are [other](https://security.stackexchange.com/a/990) security concerns with Basic Authentication.
 
 No other authentication options are required for this method.
 
 ### SSL Certificates
+
 This role enables SSL on the Open Ondemand server, using the following self-signed certificate & key which are autogenerated by the `mod_ssl` package installed as part of the `ondemand-apache` package. Replace with your own keys if required.
+
 - `openondemand_ssl_cert`: Optional. Default `/etc/pki/tls/certs/localhost.crt`.
 - `openondemand_ssl_cert_key`: Optional. Default `/etc/pki/tls/private/localhost.key`
 
 ### Dashboard and application configuration
+
 - `openondemand_dashboard_docs_url`: Optional. URL of docs to show under Help in dashboard. Default `(undefined)`.
 - `openondemand_dashboard_links`: Optional. List of mappings defining additional links to add as menu items in the dashboard. Keys are:
-    - `name`: Required. User-facing name for the link.
-    - `category`: Required. Menu to add link under, either a default one (e.g. `Files`, `Jobs`, `Clusters`, `Interactive Apps`) or a new category to add.
-    - `icon`: Optional. URL of icon, defaults to Open Ondemand clock icon as used in standard menus.
-    - `url`: Required. URL of link.
-    - `new_window`: Optional. Whether to open link in new window. Bool, default `false`.
-    - `app_name`: Optional. Unique name for app appended to `/var/www/ood/apps/sys/`. Default is `name`, useful if that is not unique or not suitable as a path component.
+  - `name`: Required. User-facing name for the link.
+  - `category`: Required. Menu to add link under, either a default one (e.g. `Files`, `Jobs`, `Clusters`, `Interactive Apps`) or a new category to add.
+  - `icon`: Optional. URL of icon, defaults to Open Ondemand clock icon as used in standard menus.
+  - `url`: Required. URL of link.
+  - `new_window`: Optional. Whether to open link in new window. Bool, default `false`.
+  - `app_name`: Optional. Unique name for app appended to `/var/www/ood/apps/sys/`. Default is `name`, useful if that is not unique or not suitable as a path component.
 - `openondemand_dashboard_support_url`: Optional. URL or email etc to show as support contact under Help in dashboard. Default `(undefined)`.
 - `openondemand_desktop_partition`: Optional. Name of Slurm partition to use for remote desktops. Requires a corresponding group named "openondemand_desktop" and entry in openhpc_partitions.
 - `openondemand_desktop_screensaver`: Optional. Whether to enable screen locking/screensaver. **NB:** Users must have passwords if this is enabled. Bool, default `false`.
@@ -65,16 +75,19 @@ This role enables SSL on the Open Ondemand server, using the following self-sign
 - `openondemand_jupyter_partition`: Required. Name of Slurm partition to use for Jupyter Notebook servers. Requires a corresponding group named "openondemand_jupyter" and entry in openhpc_partitions.
 
 ### Monitoring
+
 - `openondemand_exporter`: Optional. Install the Prometheus [ondemand_exporter](https://github.com/OSC/ondemand_exporter) on the `openondemand` node to export metrics about Open Ondemand itself. Default `true`.
 
 ### Proxying
+
 The Open Ondemand portal can proxy other servers. Variables:
 
-- `openondemand_host_regex`: Synomyn for the `osc.ood: host_regex` [variable](https://osc.github.io/ood-documentation/latest/app-development/interactive/setup/enable-reverse-proxy.html). A Python regex matching servernames which Open Ondemand should proxy. Enables proxying and restricts which addresses are proxied (for security). E.g. this might be:
+- `openondemand_host_regex`: Synomyn for the `osc.ood: host_regex` [variable](https://osc.github.io/ood-documentation/latest/app-development/interactive/setup/enable-reverse-proxy.html). A Python regular expression matching servernames which Open Ondemand should proxy. Enables proxying and restricts which addresses are proxied (for security). E.g. this might be:
 
   `'({{ openhpc_cluster_name }}-compute-\d+)|({{ groups["grafana"] | first }})'`
 
   to proxy:
+
   - All "compute" nodes, e.g. for Open Ondemand interactive apps such as remote desktop and Jupyter notebook server.
   - The Grafana server - note a link to Grafana is always added to the Open Ondemand dashboard.
 
@@ -83,21 +96,22 @@ The Open Ondemand portal can proxy other servers. Variables:
 - `openondemand_node_proxy_directives`: Optional, default ''. Multiline string to insert into Apache directives definition for `node_uri` ([docs](https://osc.github.io/ood-documentation/master/reference/files/ood-portal-yml.html#configure-reverse-proxy)).
 
 Note that:
+
 - If Open Ondemand and Grafana are deployed, Grafana is automatically configured so that proxying it through Open Ondemand works.
 - The `osc.ood` role variables `node_uri` and `rnode_uri` are set automatically if `openondemand_host_regex` is set.
 
-# Dependencies
+## Dependencies
 
 - `osc.ood` role as described above.
 
-# Example Playbook
+## Example Playbook
 
 See `ansible/portal.yml`. Note the `main` playbook should be run on the `openondemand` node (i.e. the node to configure as hosting the Open Ondemand server/portal), and the other playbooks should be run on some subset of the `compute` group.
 
-# License
+## License
 
 Apache v2
 
-# Author Information
+## Author Information
 
 Stackhpc Ltd.
diff --git a/ansible/roles/openondemand/defaults/main.yml b/ansible/roles/openondemand/defaults/main.yml
index 851804e..86fb49f 100644
--- a/ansible/roles/openondemand/defaults/main.yml
+++ b/ansible/roles/openondemand/defaults/main.yml
@@ -3,7 +3,6 @@
 # Authentication:
 openondemand_auth: # "oidc" or "basic_pam"
 openondemand_mapping_users: []
-
 ## Variables for `openondemand_auth=oidc` :
 openondemand_oidc_client_id:
 openondemand_oidc_client_secret:
@@ -19,10 +18,10 @@ openondemand_ssl_cert_key: /etc/pki/tls/private/localhost.key
 # Dashboard and application config:
 openondemand_dashboard_docs_url: (undefined)
 openondemand_dashboard_support_url: (undefined)
-openondemand_desktop_partition: ''
+openondemand_desktop_partition: ""
 openondemand_desktop_screensaver: false
 openondemand_filesapp_paths: []
-openondemand_jupyter_partition: ''
+openondemand_jupyter_partition: ""
 openondemand_dashboard_links: []
 openondemand_rstudio_partition: ''
 openondemand_matlab_partition: ''
@@ -33,11 +32,10 @@ openondemand_exporter: true
 
 # Synonyms for osc:ood role vars:
 openondemand_clusters: {} # synonym for osc.ood:clusters
-openondemand_servername: ''
+openondemand_servername: ""
 openondemand_host_regex:
-
 # Other:
-openondemand_node_proxy_directives: '' # Added to Apache directives for `node_uri` forwarding.
+openondemand_node_proxy_directives: "" # Added to Apache directives for `node_uri` forwarding.
 
 openondemand_auth_defaults:
   # Defaults for OIDC auth - keys are osc.ood vars & can be overriden using the osc.ood var name in inventory
@@ -54,23 +52,23 @@ openondemand_auth_defaults:
       OIDCScope: "{{ openondemand_oidc_scope }}"
       OIDCRemoteUserClaim: "{{ openondemand_oidc_remote_user_claim }}"
     httpd_auth: # ood_portal.yml.j2
-      - 'AuthType openid-connect'
-      - 'Require valid-user'
-      - 'ProxyPreserveHost On' # see under https://grafana.com/blog/2022/02/08/grafana-7.5.15-and-8.3.5-released-with-moderate-severity-security-fixes/
+      - "AuthType openid-connect"
+      - "Require valid-user"
+      - "ProxyPreserveHost On" # see under https://grafana.com/blog/2022/02/08/grafana-7.5.15-and-8.3.5-released-with-moderate-severity-security-fixes/
     user_map_cmd: /opt/ood/ood_auth_map/bin/ood_auth_map.mapfile
     user_map_match: none
-  
+
   # Defaults for basic/PAM auth - see https://osc.github.io/ood-documentation/latest/authentication/pam.html
   basic_pam:
     httpd_auth: # ood_portal.yml.j2
-      - 'AuthType Basic'
+      - "AuthType Basic"
       - 'AuthName "Open OnDemand"'
-      - 'AuthBasicProvider PAM'
-      - 'AuthPAMService ood'
-      - 'Require valid-user'
-      - 'ProxyPreserveHost On' # see under https://grafana.com/blog/2022/02/08/grafana-7.5.15-and-8.3.5-released-with-moderate-severity-security-fixes/
+      - "AuthBasicProvider PAM"
+      - "AuthPAMService ood"
+      - "Require valid-user"
+      - "ProxyPreserveHost On" # see under https://grafana.com/blog/2022/02/08/grafana-7.5.15-and-8.3.5-released-with-moderate-severity-security-fixes/
     user_map_cmd: null
-    user_map_match: '.*'
+    user_map_match: ".*"
 
 # The below mapping is used to override osc.ood defaults. Keys are osc.ood variable names.
 # If you need to override *these* defaults (i.e. this role's vars are not sufficent) just set the corresponding osc.ood var as normal.
@@ -94,7 +92,7 @@ openondemand_osc_ood_defaults:
     - SSLHonorCipherOrder On
     - SSLCompression off
     - SSLSessionTickets Off
-  
+
   # User mapping:
   user_map_cmd: "{{ openondemand_auth_defaults[openondemand_auth | lower].user_map_cmd }}"
   user_map_match: "{{ openondemand_auth_defaults[openondemand_auth | lower].user_map_match }}"
@@ -106,4 +104,4 @@ openondemand_osc_ood_defaults:
 
 openondemand_code_server_version: 4.102.2
 openondemand_rstudio_version: 2025.05.1-513
-openondemand_matlab_version: ''
\ No newline at end of file
+openondemand_matlab_version: ''
diff --git a/ansible/roles/openondemand/files/missing_home_directory.html b/ansible/roles/openondemand/files/missing_home_directory.html
index db790c9..512fb92 100644
--- a/ansible/roles/openondemand/files/missing_home_directory.html
+++ b/ansible/roles/openondemand/files/missing_home_directory.html
@@ -1,49 +1,54 @@
-<!DOCTYPE html>
+<!doctype html>
 <html lang="en">
-<head>
-  <title>Home Directory Not Found</title>
-  <style>
-    body {
-      font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
-      font-size: 20px;
-      line-height: 1.4;
-      color: #333;
-      font-weight: 300;
-      padding: 15px;
-    }
-    h1 {
-      font-weight: 500;
-      font-size: 30px;
-    }
-    .btn-primary {
-      text-decoration: none;
-      font-weight: 400;
-      padding: 10px 16px;
-      border-radius: 6px;
-      color: #fff;
-      background-color: #337ab7;
-    }
-    .btn-secondary {
-      text-decoration: none;
-      font-weight: 400;
-      padding: 10px 16px;
-      border-radius: 6px;
-      color: #fff;
-      background-color: #338ab7;
-    }
-  </style>
-</head>
-<body>
-  <h1>Home directory not found</h1>
-  <p>
-  Your home directory appears to be missing. If this is the first time you have logged in with this account, you may
-  need to access our systems using SSH in order to trigger the creation of your home directory.
-  </p>
-  <ol>
-    <a class="btn-primary" target="_blank" href="/pun/sys/shell/ssh/default">Open Shell to create home directory</a>
-    </br>
-    </br>
-    <a class="btn-secondary" href="/nginx/stop?redir=/pun/sys/dashboard">Restart Web Server</a>
-  </ol>
-</body>
+  <head>
+    <title>Home Directory Not Found</title>
+    <style>
+      body {
+        font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+        font-size: 20px;
+        line-height: 1.4;
+        color: #333;
+        font-weight: 300;
+        padding: 15px;
+      }
+      h1 {
+        font-weight: 500;
+        font-size: 30px;
+      }
+      .btn-primary {
+        text-decoration: none;
+        font-weight: 400;
+        padding: 10px 16px;
+        border-radius: 6px;
+        color: #fff;
+        background-color: #337ab7;
+      }
+      .btn-secondary {
+        text-decoration: none;
+        font-weight: 400;
+        padding: 10px 16px;
+        border-radius: 6px;
+        color: #fff;
+        background-color: #338ab7;
+      }
+    </style>
+  </head>
+  <body>
+    <h1>Home directory not found</h1>
+    <p>
+      Your home directory appears to be missing. If this is the first time you
+      have logged in with this account, you may need to access our systems using
+      SSH in order to trigger the creation of your home directory.
+    </p>
+    <ol>
+      <a class="btn-primary" target="_blank" href="/pun/sys/shell/ssh/default"
+        >Open Shell to create home directory</a
+      >
+      <br />
+      <br />
+      <a class="btn-secondary" href="/nginx/stop?redir=/pun/sys/dashboard"
+        >Restart Web Server</a
+      >
+    </ol>
+  </body>
 </html>
diff --git a/ansible/roles/openondemand/tasks/codeserver_compute.yml b/ansible/roles/openondemand/tasks/codeserver_compute.yml
index 7b39bf7..6f178c5 100644
--- a/ansible/roles/openondemand/tasks/codeserver_compute.yml
+++ b/ansible/roles/openondemand/tasks/codeserver_compute.yml
@@ -1,24 +1,25 @@
 - name: Download Code Server RPM
   ansible.builtin.get_url:
-    url: "https://github.com/coder/code-server/releases/download/v{{ openondemand_code_server_version }}/code-server-{{ openondemand_code_server_version }}-amd64.rpm"
+    url: "https://github.com/coder/code-server/releases/download/v{{ openondemand_code_server_version }}/code-server-{{ openondemand_code_server_version }}-amd64.rpm" # noqa: yaml[line-length]
     dest: /tmp/code-server.rpm
     mode: '0644'
 
 - name: Install Code Server
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
   ansible.builtin.dnf:
     name: /tmp/code-server.rpm
     state: present
-    disable_gpg_check: yes
+    disable_gpg_check: true
 
 - name: Create module directory for Code Server
   ansible.builtin.file:
     path: /opt/ohpc/pub/modulefiles/code-server
     state: directory
     mode: '0755'
-    recurse: yes
+    recurse: true
 
 - name: Create modulefile for Code Server
-  copy:
+  ansible.builtin.copy:
     dest: "/opt/ohpc/pub/modulefiles/code-server/{{ openondemand_code_server_version }}"
     mode: "0644"
     content: |
diff --git a/ansible/roles/openondemand/tasks/config_changes.yml b/ansible/roles/openondemand/tasks/config_changes.yml
index f83c670..835411d 100644
--- a/ansible/roles/openondemand/tasks/config_changes.yml
+++ b/ansible/roles/openondemand/tasks/config_changes.yml
@@ -1,5 +1,6 @@
+---
 - name: Add Apache directives for node_uri forwarding
-  blockinfile:
+  ansible.builtin.blockinfile:
     path: /opt/ood/ood-portal-generator/templates/ood-portal.conf.erb
     block: "{{ openondemand_node_proxy_directives }}"
     insertafter: '    Header edit  Set-Cookie "\^\(\[\^;\]\+\)" "\$1; Path=<%= @node_uri %>\/%{MATCH_HOST}e\/%{MATCH_PORT}e"'
diff --git a/ansible/roles/openondemand/tasks/exporter.yml b/ansible/roles/openondemand/tasks/exporter.yml
index e3c387a..f9100f7 100644
--- a/ansible/roles/openondemand/tasks/exporter.yml
+++ b/ansible/roles/openondemand/tasks/exporter.yml
@@ -1,10 +1,11 @@
+---
 - name: Install ondemand prometheus exporter
-  yum:
+  ansible.builtin.dnf:
     name: ondemand_exporter
   when: openondemand_exporter
 
 - name: Start and enable ondemand prometheus exporter
-  service:
+  ansible.builtin.service:
     name: ondemand_exporter
     enabled: true
     state: started
diff --git a/ansible/roles/openondemand/tasks/jupyter_compute.yml b/ansible/roles/openondemand/tasks/jupyter_compute.yml
index a87d07d..6df0c78 100644
--- a/ansible/roles/openondemand/tasks/jupyter_compute.yml
+++ b/ansible/roles/openondemand/tasks/jupyter_compute.yml
@@ -1,32 +1,32 @@
+---
 # Should be run on compute nodes you want to run jupyter notebook on
 # See https://osc.github.io/ood-documentation/latest/app-development/tutorials-interactive-apps/add-jupyter/software-requirements.html
 # - Will already have openssl and lmod
 
 - name: Ensure python3.9 installed
-  dnf:
+  ansible.builtin.dnf:
     name: python39
   tags: install
 
 - name: Install jupyter venv
   # Requires separate step so that the upgraded pip is used to install packages
-  pip:
+  ansible.builtin.pip:
     name: pip
-    state: latest
+    state: latest # noqa: package-latest
     virtualenv: /opt/jupyter-py39
     virtualenv_command: python3.9 -m venv
   tags: install
 
 - name: Copy jupyter requirements file
-  copy:
+  ansible.builtin.copy:
     src: jupyter_requirements.txt
     dest: /opt/jupyter-py39/jupyter_requirements.txt
+    mode: "0644"
   tags: install
 
 - name: Install jupyter package in venv
-  pip:
+  ansible.builtin.pip:
     virtualenv: /opt/jupyter-py39
     virtualenv_command: python3.9 -m venv
     requirements: /opt/jupyter-py39/jupyter_requirements.txt
   tags: install
-
-
diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml
index bd5706e..783be89 100644
--- a/ansible/roles/openondemand/tasks/main.yml
+++ b/ansible/roles/openondemand/tasks/main.yml
@@ -1,7 +1,6 @@
 ---
-
 - name: Set osc.ood variables from this role's defaults if no overriding inventory var
-  set_fact:
+  ansible.builtin.set_fact:
     "{{ item.key }}": "{{ lookup('vars', item.key, default=item.value) }}"
   loop: "{{ openondemand_osc_ood_defaults | dict2items }}"
   when: (item.key in hostvars[inventory_hostname]) or (item.value)
@@ -14,47 +13,48 @@
     file: "{{ playbook_dir }}/roles/osc.ood/vars/Rocky/{{ ansible_distribution_major_version }}.yml"
 
 # if using PAM auth we need apache installed but NOT started so split the osc.ood role up:
-- include_role:
+- ansible.builtin.include_role:
     name: osc.ood
     tasks_from: install-package.yml
     vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml"
   when: appliances_mode != 'configure'
   # can't set vars: from a dict hence the workaround above
 
-- include_tasks:
+- ansible.builtin.include_tasks:
     file: pam_auth.yml
   when: openondemand_auth | lower == 'basic_pam'
 
-- include_tasks:
+- ansible.builtin.include_tasks:
     file: config_changes.yml
 
 # The configure.yml playbook needs vars from Rocky (for nginx) and main if using OIDC auth. However vars_from doensn't take a list.
 # include_vars doens't interpolate from role vars, so we use that for main.yml which only requires things we override in the appliance inventory
 # and use vars_from for Rocky which requires interpolation from role vars.
-#- include_vars:
-#    file: roles/osc.ood/vars/main.yml
+# - include_vars:
+#     file: roles/osc.ood/vars/main.yml
 
-- include_role:
+- ansible.builtin.include_role:
     name: osc.ood
     tasks_from: configure.yml
     vars_from: main.yml
-    public: yes
+    public: true
 
-- include_role:
+- ansible.builtin.include_role:
     name: osc.ood
     tasks_from: install-apps.yml
   when: ood_install_apps
 
-- include_role:
+- ansible.builtin.include_role:
     name: osc.ood
     tasks_from: apps.yml
     # vars_from: Rocky.yml
   when: ood_apps
 
 - name: Ensure post_tasks dirs exists
-  file:
+  ansible.builtin.file:
     path: "{{ item }}"
     state: directory
+    mode: "0755"
   loop:
     # - /etc/ood/config/apps/dashboard/initializers
     - /etc/ood/config/locales
@@ -62,15 +62,15 @@
     - /etc/ood/config/pun/html
 
 - name: Create dashboard additional config directory
-  file:
+  ansible.builtin.file:
     path: /etc/ood/config/apps/dashboard/initializers
     state: directory
-    recurse: yes
+    recurse: true
     owner: root
     mode: o=rwX,go=rX
 
 - name: Create additional shortcuts in Files app
-  template:
+  ansible.builtin.template:
     src: files_shortcuts.rb.j2
     dest: /etc/ood/config/apps/dashboard/initializers/ood.rb
     owner: root
@@ -78,21 +78,22 @@
   when: openondemand_filesapp_paths
 
 - name: Create job template directory
-  file:
+  ansible.builtin.file:
     path: "/etc/ood/config/apps/myjobs/templates/"
     state: directory
-    recurse: True
+    recurse: true
     owner: root
     group: root
     mode: o=rwX,go=rX
 
 - name: Copy web page to let users create their home directory
-  copy:
+  ansible.builtin.copy:
     src: missing_home_directory.html
     dest: /etc/ood/config/pun/html/missing_home_directory.html
+    mode: "0644"
 
 - name: Create mapping directory
-  file:
+  ansible.builtin.file:
     path: /etc/grid-security
     state: directory
     owner: root
@@ -101,7 +102,7 @@
   when: openondemand_mapping_users
 
 - name: Create mapping file
-  template:
+  ansible.builtin.template:
     dest: /etc/grid-security/grid-mapfile
     src: grid-mapfile.j2
     owner: root
@@ -110,15 +111,17 @@
   when: openondemand_mapping_users
 
 - name: Create app directories for dashboard links
-  file:
+  ansible.builtin.file:
     path: /var/www/ood/apps/sys/{{ item.app_name | default(item.name) }}
     state: directory
+    mode: "0755"
   loop: "{{ openondemand_dashboard_links }}"
 
 - name: Create app manifests for dashboard links
-  template:
+  ansible.builtin.template:
     src: dashboard_app_links.yml.j2
     dest: /var/www/ood/apps/sys/{{ item.app_name | default(item.name) }}/manifest.yml
+    mode: "0644"
   loop: "{{ openondemand_dashboard_links }}"
 
 # - name: Ensure ondemand-dex is running and active
@@ -137,13 +140,13 @@
 #     - /usr/share/ondemand-dex/web/themes/
 
 - name: Keyscan login host
-  command:
+  ansible.builtin.command:
     cmd: "ssh-keyscan {{ openondemand_clusters.slurm.v2.login.host }}"
   register: _openondemand_login_key
   changed_when: false
 
 - name: Add login hostkeys to known hosts
-  blockinfile:
+  ansible.builtin.blockinfile:
     path: /etc/ssh/ssh_known_hosts
     create: true
     block: "{{ _openondemand_login_key.stdout_lines | sort | join('\n') }}"
diff --git a/ansible/roles/openondemand/tasks/pam_auth.yml b/ansible/roles/openondemand/tasks/pam_auth.yml
index 6bc4bda..2cf8a5b 100644
--- a/ansible/roles/openondemand/tasks/pam_auth.yml
+++ b/ansible/roles/openondemand/tasks/pam_auth.yml
@@ -1,31 +1,31 @@
 # https://osc.github.io/ood-documentation/latest/authentication/pam.html
 ---
 - name: Install Apache PAM module # Extracted from start of roles/openondemand/tasks/pam_auth.yml to ensure only installed during build
-  yum:
+  ansible.builtin.dnf:
     name: mod_authnz_pam
 
 - name: Enable Apache PAM module
-  lineinfile:
+  ansible.builtin.lineinfile:
     path: /etc/httpd/conf.modules.d/55-authnz_pam.conf
     line: LoadModule authnz_pam_module modules/mod_authnz_pam.so
     regexp: ^LoadModule authnz_pam_module modules/mod_authnz_pam.so
 
 - name: Set PAM service # TODO: might need subsequent modification??
-  command:
+  ansible.builtin.command:
     cmd: cp /etc/pam.d/sshd /etc/pam.d/ood
     creates: /etc/pam.d/ood
 
 - name: Allow the Apache user to read /etc/shadow
-  file:
+  ansible.builtin.file:
     path: /etc/shadow
-    mode: 0640
+    mode: "0640"
     group: apache
 
 - name: Allow httpd access to PAM in SELinux
   ansible.posix.seboolean:
     name: httpd_mod_auth_pam
-    state: yes
-    persistent: yes
+    state: true
+    persistent: true
   when: ansible_facts.selinux.status == 'enabled'
 
 # TODO: do we need to restart OOD here??
diff --git a/ansible/roles/openondemand/tasks/rstudio_compute.yml b/ansible/roles/openondemand/tasks/rstudio_compute.yml
index 99dd83a..8cb3c91 100644
--- a/ansible/roles/openondemand/tasks/rstudio_compute.yml
+++ b/ansible/roles/openondemand/tasks/rstudio_compute.yml
@@ -9,22 +9,23 @@
 
 - name: Download RStudio Server RPM
   ansible.builtin.get_url:
-    url: "https://download2.rstudio.org/server/rhel{{ ansible_distribution_major_version }}/x86_64/rstudio-server-rhel-{{ openondemand_rstudio_version }}-x86_64.rpm"
+    url: "https://download2.rstudio.org/server/rhel{{ ansible_distribution_major_version }}/x86_64/rstudio-server-rhel-{{ openondemand_rstudio_version }}-x86_64.rpm" # noqa: yaml[line-length]
     dest: /tmp/rstudio-server.rpm
     mode: '0644'
 
 - name: Install RStudio Server
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
   ansible.builtin.dnf:
     name: /tmp/rstudio-server.rpm
     state: present
-    disable_gpg_check: yes
+    disable_gpg_check: true
 
 - name: Create module directory for RStudio Server
   ansible.builtin.file:
     path: /opt/ohpc/pub/modulefiles/rstudio-server
     state: directory
     mode: '0755'
-    recurse: yes
+    recurse: true
 
 - name: Write modulefile for RStudio Server
   ansible.builtin.copy:
diff --git a/ansible/roles/openondemand/tasks/validate.yml b/ansible/roles/openondemand/tasks/validate.yml
index 92e83d3..b22f51b 100644
--- a/ansible/roles/openondemand/tasks/validate.yml
+++ b/ansible/roles/openondemand/tasks/validate.yml
@@ -1,4 +1,5 @@
+---
 - name: Check Open Ondemand servername is defined
-  assert:
+  ansible.builtin.assert:
     that: openondemand_servername != ''
     fail_msg: "Variable `openondemand_servername` must be set on openondemand and (by default) grafana hosts. See ansible/roles/openondemand/README.md"
diff --git a/ansible/roles/openondemand/tasks/vnc_compute.yml b/ansible/roles/openondemand/tasks/vnc_compute.yml
index 8b6f6cd..1fba0cd 100644
--- a/ansible/roles/openondemand/tasks/vnc_compute.yml
+++ b/ansible/roles/openondemand/tasks/vnc_compute.yml
@@ -1,13 +1,15 @@
+---
 # Should be run on compute nodes you want to run the graphical desktop on
 - name: Enable TurboVNC repo
   tags: install
-  get_url:
+  ansible.builtin.get_url:
     url: https://raw.githubusercontent.com/TurboVNC/repo/main/TurboVNC.repo
     dest: /etc/yum.repos.d/TurboVNC.repo
+    mode: "0644"
 
 - name: Install EPEL
   tags: install
-  yum:
+  ansible.builtin.dnf:
     name: epel-release
 
 - name: Check /etc/init.d
@@ -28,7 +30,7 @@
 
 - name: Install VNC-related packages
   tags: install
-  dnf:
+  ansible.builtin.dnf:
     name:
       - turbovnc-3.0.1
       - nmap-ncat
@@ -37,7 +39,7 @@
 
 - name: Stop turbovnc service
   # This is not actually required
-  systemd:
+  ansible.builtin.systemd:
     name: tvncserver
     state: stopped
     enabled: false
@@ -47,16 +49,18 @@
     src: /etc/init.d.orig/ # trailing / to get contents
     dest: /etc/init.d
     remote_src: true
+    directory_mode: "preserve"
+    mode: "preserve"
   when:
     - init_d.stat.exists
     - not init_d.stat.islnk
 
 - name: Install Xfce desktop
   tags: install
-  yum:
-    name: '@Xfce'
+  ansible.builtin.dnf:
+    name: "@Xfce"
   when: appliances_mode != 'configure' # dnf group/module installs aren't idempotent so only run during build
-  
+
 # - name: Ensure python3.9 installed
 #   dnf:
 #     name: python39
@@ -64,22 +68,22 @@
 
 - name: Install websockify venv
   # Requires separate step so that the upgraded pip is used to install packages
-  pip:
+  ansible.builtin.pip:
     name: pip
-    state: latest
+    state: latest # noqa: package-latest
     virtualenv: /opt/websockify-py39
     virtualenv_command: python3.9 -m venv
   tags: install
 
 - name: Install websockify package in venv
-  pip:
+  ansible.builtin.pip:
     name: websockify
     virtualenv: /opt/websockify-py39
     virtualenv_command: python3 -m venv
   tags: install
 
-- name: Symlink websockify to where Open Ondemand expects
-  file: "{{ item }}"
+- name: Symlink websockify to where Open Ondemand expects # noqa: args[module]
+  ansible.builtin.file: "{{ item }}"
   loop:
     - path: /opt/websockify
       state: directory
@@ -87,7 +91,7 @@
       dest: /opt/websockify/run
       state: link
 - name: Disable screensaver # as users might not have passwords
-  yum:
+  ansible.builtin.dnf:
     name: xfce4-screensaver
     state: absent
   when: not (openondemand_desktop_screensaver | bool)
diff --git a/ansible/roles/opensearch/defaults/main.yml b/ansible/roles/opensearch/defaults/main.yml
index 69e7f9c..1b05521 100644
--- a/ansible/roles/opensearch/defaults/main.yml
+++ b/ansible/roles/opensearch/defaults/main.yml
@@ -1,9 +1,9 @@
 ---
 # Used to set passwords
-#opensearch_internal_users_path:
+# opensearch_internal_users_path:
 
 opensearch_podman_user: "{{ ansible_user }}"
-opensearch_version: '2.9.0' # https://hub.docker.com/r/opensearchproject/opensearch/tags
+opensearch_version: "2.9.0" # https://hub.docker.com/r/opensearchproject/opensearch/tags
 opensearch_config_path: /usr/share/opensearch/config
 opensearch_data_path: /usr/share/opensearch/data
 opensearch_state: started # will be restarted if required
diff --git a/ansible/roles/opensearch/handlers/main.yml b/ansible/roles/opensearch/handlers/main.yml
index d3a040d..61f5bbf 100644
--- a/ansible/roles/opensearch/handlers/main.yml
+++ b/ansible/roles/opensearch/handlers/main.yml
@@ -1,7 +1,6 @@
 ---
-
 - name: Restart opensearch service
-  systemd:
+  ansible.builtin.systemd:
     name: opensearch.service
     state: "{{ 'restarted' if 'started' in opensearch_state else opensearch_state }}"
     enabled: "{{ opensearch_systemd_service_enabled }}"
diff --git a/ansible/roles/opensearch/tasks/archive_data.yml b/ansible/roles/opensearch/tasks/archive_data.yml
index 298f66a..cb3403e 100644
--- a/ansible/roles/opensearch/tasks/archive_data.yml
+++ b/ansible/roles/opensearch/tasks/archive_data.yml
@@ -1,8 +1,9 @@
+---
 # Remove data which was NOT indexed by Slurm Job ID
 # It will be re-ingested by filebeat from the slurmdbd, with that index
 
 - name: Ensure opensearch stopped
-  systemd:
+  ansible.builtin.systemd:
     name: opensearch
     state: stopped
   register: _opensearch_stop
@@ -15,3 +16,4 @@
     path: "{{ opensearch_data_path }}"
     dest: "{{ opensearch_data_path | dirname }}/data-{{ lookup('pipe', 'date --iso-8601=minutes') }}.tar.gz"
     remove: true
+    mode: "0644"
diff --git a/ansible/roles/opensearch/tasks/certs.yml b/ansible/roles/opensearch/tasks/certs.yml
index e40f652..4eee580 100644
--- a/ansible/roles/opensearch/tasks/certs.yml
+++ b/ansible/roles/opensearch/tasks/certs.yml
@@ -1,5 +1,6 @@
+---
 - name: Ensure host certs directory exists
-  file:
+  ansible.builtin.file:
     path: "{{ opensearch_config_path }}/certs"
     state: directory
     owner: "{{ opensearch_podman_user }}"
diff --git a/ansible/roles/opensearch/tasks/install.yml b/ansible/roles/opensearch/tasks/install.yml
index 9a0ffd3..0ca5ebd 100644
--- a/ansible/roles/opensearch/tasks/install.yml
+++ b/ansible/roles/opensearch/tasks/install.yml
@@ -1,25 +1,28 @@
+---
 # safe to use during build
 
 - name: Increase maximum number of virtual memory maps
   # see https://opensearch.org/docs/2.0/opensearch/install/important-settings/
   ansible.posix.sysctl:
     name: vm.max_map_count
-    value: '262144'
+    value: "262144"
     state: present
-    reload: yes
+    reload: true
 
 - name: Create systemd unit file
-  template:
+  ansible.builtin.template:
     dest: /etc/systemd/system/opensearch.service
     src: opensearch.service.j2
+    mode: "0644"
   register: _opensearch_unit
 
 - name: Pull container image
   containers.podman.podman_image:
     name: docker.io/opensearchproject/opensearch
     tag: "{{ opensearch_version }}"
+  become: true
   become_user: "{{ opensearch_podman_user }}"
 
-- name: Reload opensearch unit file
-  command: systemctl daemon-reload
-  when: _opensearch_unit.changed
+- name: Reload opensearch unit file # noqa: no-changed-when
+  ansible.builtin.command: systemctl daemon-reload # noqa: command-instead-of-module
+  when: _opensearch_unit.changed # noqa: no-handler
diff --git a/ansible/roles/opensearch/tasks/migrate-opendistro.yml b/ansible/roles/opensearch/tasks/migrate-opendistro.yml
index 7cb5c81..fd239bc 100644
--- a/ansible/roles/opensearch/tasks/migrate-opendistro.yml
+++ b/ansible/roles/opensearch/tasks/migrate-opendistro.yml
@@ -1,3 +1,4 @@
+---
 # Migrate data from existing containerised opendistro v1.12.0 to containerised opensearch 2.1.0.
 #
 # This relies on:
@@ -22,7 +23,7 @@
     dest: "{{ opensearch_data_path | dirname }}/" # copying a directory, so need to specify the parent for destination
     owner: "{{ opensearch_podman_user }}"
     group: "{{ opensearch_podman_user }}"
-    mode: 0770
+    mode: "0770"
   vars:
     # from environments/common/inventory/group_vars/all/opendistro.yml:
     _default_opendistro_data_path: "{{ appliances_state_dir | default('/usr/share') }}/elasticsearch/data"
diff --git a/ansible/roles/opensearch/tasks/runtime.yml b/ansible/roles/opensearch/tasks/runtime.yml
index 7fe197a..7247f15 100644
--- a/ansible/roles/opensearch/tasks/runtime.yml
+++ b/ansible/roles/opensearch/tasks/runtime.yml
@@ -1,55 +1,54 @@
 ---
-
 - name: Check for existing opendistro service
-  stat:
+  ansible.builtin.stat:
     path: /etc/systemd/system/opendistro.service
   register: _opensearch_opendistro_service
 
 - name: Migrate opendistro data
-  import_tasks:
+  ansible.builtin.import_tasks:
     file: migrate-opendistro.yml
   when: _opensearch_opendistro_service.stat.exists
 
 - name: Remove opendistro service
-  file:
+  ansible.builtin.file:
     path: /etc/systemd/system/opendistro.service
     state: absent
 
 - name: Enumerate files in data directory
-  find:
+  ansible.builtin.find:
     path: "{{ opensearch_data_path }}"
   register: _find_opensearch_data
 
 - name: Archive incorrectly indexed data
-  import_tasks: archive_data.yml
+  ansible.builtin.import_tasks: archive_data.yml
   when:
     - _find_opensearch_data.files | length > 0
     - "'slurm_jobid_index' not in _find_opensearch_data.files | map(attribute='path') | map('basename')"
 
 - name: Ensure required opensearch host directories exist
-  file:
+  ansible.builtin.file:
     state: directory
     path: "{{ item }}"
     owner: "{{ opensearch_podman_user }}"
     group: "{{ opensearch_podman_user }}"
-    mode: 0770
+    mode: "0770"
   become: true
   loop:
     - "{{ opensearch_config_path }}"
     - "{{ opensearch_data_path }}"
 
 - name: Set indexed data flag
-  copy:
+  ansible.builtin.copy:
     dest: "{{ opensearch_data_path }}/slurm_jobid_index"
     content: |
       This is a flag file to indicate that filebeat is pushing data
       indexed by Slurm JobID to prevent duplicate OpenSearch records
     owner: "{{ opensearch_podman_user }}"
     group: "{{ opensearch_podman_user }}"
+    mode: "0644"
 
 - name: Create certs
-  import_tasks: certs.yml
-
+  ansible.builtin.import_tasks: certs.yml
 - name: Template general configuration
   ansible.builtin.template:
     src: opensearch.yml.j2
@@ -58,27 +57,26 @@
     group: "{{ opensearch_podman_user }}"
     # NOTE: root user in container maps to user on host, so this will appear as
     # owned by root in the container.
-    mode: 0660
+    mode: "0660"
   notify: Restart opensearch service
   become: true
 
 - name: Template internal user configuration
-  template:
-      src: "{{ opensearch_internal_users_path }}"
-      dest: "{{ opensearch_config_path }}/internal_users.yml"
-      owner: "{{ opensearch_podman_user }}"
-      group: "{{ opensearch_podman_user }}"
-      # NOTE: root user in container maps to user on host, so this will appear as
-      # owned by root in the container.
-      mode: 0660
+  ansible.builtin.template:
+    src: "{{ opensearch_internal_users_path }}"
+    dest: "{{ opensearch_config_path }}/internal_users.yml"
+    owner: "{{ opensearch_podman_user }}"
+    group: "{{ opensearch_podman_user }}"
+    # NOTE: root user in container maps to user on host, so this will appear as
+    # owned by root in the container.
+    mode: "0660"
   notify: Restart opensearch service
   become: true
 
 - name: Flush handlers
-  meta: flush_handlers
-
+  ansible.builtin.meta: flush_handlers
 - name: Ensure opensearch service state
-  systemd:
+  ansible.builtin.systemd:
     name: opensearch.service
     state: "{{ opensearch_state }}"
     enabled: "{{ opensearch_systemd_service_enabled }}"
diff --git a/ansible/roles/passwords/defaults/main.yml b/ansible/roles/passwords/defaults/main.yml
index 95e3b6a..a848431 100644
--- a/ansible/roles/passwords/defaults/main.yml
+++ b/ansible/roles/passwords/defaults/main.yml
@@ -1,7 +1,9 @@
 ---
 
 slurm_appliance_secrets:
+  # yamllint disable-line rule:line-length
   vault_grafana_admin_password: "{{ secrets_openhpc_grafana_admin_password | default(vault_grafana_admin_password | default(lookup('password', '/dev/null'))) }}"
+  # yamllint disable-line rule:line-length
   vault_elasticsearch_admin_password: "{{ secrets_openhpc_elasticsearch_admin_password | default(vault_elasticsearch_admin_password | default(lookup('password', '/dev/null'))) }}"
   vault_mysql_root_password: "{{ secrets_openhpc_mysql_root_password | default(vault_mysql_root_password | default(lookup('password', '/dev/null'))) }}"
   vault_mysql_slurm_password: "{{ secrets_openhpc_mysql_slurm_password | default(vault_mysql_slurm_password | default(lookup('password', '/dev/null'))) }}"
@@ -16,4 +18,5 @@ slurm_appliance_secrets:
 secrets_openhpc_mungekey_default:
   content: "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') }}"
 
+# yamllint disable-line rule:line-length
 openhpc_passwords_output_path: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') | default(undefined, true) | mandatory('You must define the APPLIANCES_ENVIRONMENT_ROOT environment variable') }}/inventory/group_vars/all/secrets.yml"
diff --git a/ansible/roles/passwords/tasks/main.yml b/ansible/roles/passwords/tasks/main.yml
index 743a6cd..cb41cbb 100644
--- a/ansible/roles/passwords/tasks/main.yml
+++ b/ansible/roles/passwords/tasks/main.yml
@@ -1,8 +1,8 @@
 ---
-
 - name: Template passwords
-  template:
+  ansible.builtin.template:
     src: passwords.yml
     dest: "{{ openhpc_passwords_output_path }}"
+    mode: "0644"
   delegate_to: localhost
   run_once: true
diff --git a/ansible/roles/passwords/tasks/validate.yml b/ansible/roles/passwords/tasks/validate.yml
index b30b069..6cde144 100644
--- a/ansible/roles/passwords/tasks/validate.yml
+++ b/ansible/roles/passwords/tasks/validate.yml
@@ -1,4 +1,5 @@
+---
 - name: Assert secrets created
-  assert:
+  ansible.builtin.assert:
     that: (hostvars[inventory_hostname].keys() | select('contains', 'vault_') | length) > 1 # 1 as may have vault_demo_user_password defined in dev
     fail_msg: "No inventory variables 'vault_*' found: Has ansible/adhoc/generate-passwords.yml been run?"
diff --git a/ansible/roles/persist_hostkeys/defaults/main.yml b/ansible/roles/persist_hostkeys/defaults/main.yml
index 3c00004..0de0b71 100644
--- a/ansible/roles/persist_hostkeys/defaults/main.yml
+++ b/ansible/roles/persist_hostkeys/defaults/main.yml
@@ -1,2 +1,3 @@
+---
 persist_hostkeys_state_server: "{{ groups['control'] | first }}"
 persist_hostkeys_state_dir: "{{ hostvars[persist_hostkeys_state_server]['appliances_state_dir'] }}/hostkeys"
diff --git a/ansible/roles/persist_hostkeys/tasks/main.yml b/ansible/roles/persist_hostkeys/tasks/main.yml
index deff112..139281a 100644
--- a/ansible/roles/persist_hostkeys/tasks/main.yml
+++ b/ansible/roles/persist_hostkeys/tasks/main.yml
@@ -1,47 +1,47 @@
 ---
-
 - name: Generate persistent hostkeys in state directory
   delegate_to: "{{ persist_hostkeys_state_server }}"
   block:
-  - name: Ensure hostkeys directory exists on persistent storage
-    file:
-      path: "{{ persist_hostkeys_state_dir }}"
-      state: directory
-      owner: root
-      group: root
-      mode: 0600
+    - name: Ensure hostkeys directory exists on persistent storage
+      ansible.builtin.file:
+        path: "{{ persist_hostkeys_state_dir }}"
+        state: directory
+        owner: root
+        group: root
+        mode: "0600"
+
+    - name: Check for existing hostkeys
+      ansible.builtin.find:
+        paths: "{{ persist_hostkeys_state_dir }}/"
+      register: _files_found
 
-  - name: Check for existing hostkeys
-    find:
-      paths: "{{ persist_hostkeys_state_dir }}/"
-    register: _files_found
+    - name: Generate hostkeys # noqa: no-changed-when
+      when: _files_found.matched == 0
+      ansible.builtin.shell:
+        # ssh-keygen -A needs a directory with an /etc/ssh suffix to write hostkeys into
+        cmd: |
+          mkdir -p {{ persist_hostkeys_state_dir }}/etc/ssh
+          ssh-keygen -A -N '' -f {{ persist_hostkeys_state_dir }}
+          mv {{ persist_hostkeys_state_dir }}/etc/ssh/* {{ persist_hostkeys_state_dir }}
+          rm -rf {{ persist_hostkeys_state_dir }}/etc/ssh
 
-  - name: Generate hostkeys
-    when: _files_found.matched == 0
-    shell:
-    # ssh-keygen -A needs a directory with an /etc/ssh suffix to write hostkeys into
-      cmd: |
-        mkdir -p {{ persist_hostkeys_state_dir }}/etc/ssh
-        ssh-keygen -A -N '' -f {{ persist_hostkeys_state_dir }}
-        mv {{ persist_hostkeys_state_dir }}/etc/ssh/* {{ persist_hostkeys_state_dir }}
-        rm -rf {{ persist_hostkeys_state_dir }}/etc/ssh
-  
-  - name: Get created key names
-    find:
-      path: "{{ persist_hostkeys_state_dir }}/"
-    register: _find_ssh_keys
+    - name: Get created key names
+      ansible.builtin.find:
+        path: "{{ persist_hostkeys_state_dir }}/"
+      register: _find_ssh_keys
 
-  - name: Create in-memory copies of keys
-    ansible.builtin.slurp:
-      src: "{{ item.path }}"
-    loop: "{{ _find_ssh_keys.files }}"
-    register: _slurp_keys
+    - name: Create in-memory copies of keys
+      ansible.builtin.slurp:
+        src: "{{ item.path }}"
+      loop: "{{ _find_ssh_keys.files }}"
+      register: _slurp_keys
 
 - name: Copy keys to hosts
   no_log: true
-  copy:
+  ansible.builtin.copy:
     content: "{{ item.content | b64decode }}"
     dest: "/etc/ssh/{{ item.source | regex_search('[^/]+$') }}"
+    mode: "preserve"
   loop: "{{ _slurp_keys.results }}"
 
-- meta: reset_connection
+- ansible.builtin.meta: reset_connection
diff --git a/ansible/roles/persist_openhpc_secrets/tasks/main.yml b/ansible/roles/persist_openhpc_secrets/tasks/main.yml
index e0f5865..dc12e2a 100644
--- a/ansible/roles/persist_openhpc_secrets/tasks/main.yml
+++ b/ansible/roles/persist_openhpc_secrets/tasks/main.yml
@@ -1,16 +1,16 @@
 ---
 
 - name: Check if OpenHPC secrets exist in persistent storage
-  stat:
+  ansible.builtin.stat:
     path: "{{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
   register: openhpc_secrets_stat
 
 - name: Ensure Ansible facts directories exist
-  file:
+  ansible.builtin.file:
     path: "{{ item }}"
     state: directory
     owner: root
-    mode: 0600
+    mode: "0600"
   loop:
     - "{{ appliances_state_dir }}/ansible.facts.d"
     - "/etc/ansible/facts.d"
@@ -21,19 +21,19 @@
   when: openhpc_secrets_stat.stat.exists
 
 - name: Write OpenHPC secrets
-  template:
+  ansible.builtin.template:
     src: openhpc_secrets.fact
     dest: "{{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
     owner: root
-    mode: 0600
+    mode: "0600"
 
 - name: Symlink persistent facts to facts_path
-  file:
+  ansible.builtin.file:
     state: link
     src: "{{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
     dest: /etc/ansible/facts.d/openhpc_secrets.fact
     owner: root
-    
+
 - name: Refresh facts to pick up any new secrets
   ansible.builtin.setup:
     filter: ansible_local
diff --git a/ansible/roles/podman/defaults/main.yml b/ansible/roles/podman/defaults/main.yml
index 8b3c9ef..fc76d06 100644
--- a/ansible/roles/podman/defaults/main.yml
+++ b/ansible/roles/podman/defaults/main.yml
@@ -1,2 +1,3 @@
+---
 podman_users:
   - name: "{{ ansible_user }}"
diff --git a/ansible/roles/podman/tasks/configure.yml b/ansible/roles/podman/tasks/configure.yml
index 74cf1d5..962712f 100644
--- a/ansible/roles/podman/tasks/configure.yml
+++ b/ansible/roles/podman/tasks/configure.yml
@@ -1,7 +1,6 @@
 ---
-
 - name: Up default resource limits
-  copy:
+  ansible.builtin.copy:
     content: |
       # WARNING: This file is managed by ansible, do not modify.
       # This is so non-root containers can use more resources. This is useful
@@ -11,6 +10,7 @@
       * soft nofile 65536
       * hard nofile 65536
     dest: /etc/security/limits.d/custom.conf
+    mode: "0644"
   become: true
 
 - name: Up number of non-root kernel keys permitted per user
@@ -36,17 +36,16 @@
     value: '"cgroupfs"'
   become: true
 
-- name: reset ssh connection to allow user changes to affect 'current login user'
-  meta: reset_connection
-
+- name: Reset ssh connection to allow user changes to affect 'current login user'
+  ansible.builtin.meta: reset_connection
 - name: Ensure podman users exist
-  user: "{{ item }}"
+  ansible.builtin.user: "{{ item }}" # noqa: args[module]
   with_items: "{{ podman_users }}"
   register: podman_user_info
-  become: yes
+  become: true
 
 - name: Clear up podman temporary files on startup
-  copy:
+  ansible.builtin.copy:
     content: |
       # Created by ansible
       # Delete ephemeral podman files to avoid issues where /tmp is not of type tmpfs and persists across reboots.
@@ -59,5 +58,5 @@
     dest: /etc/tmpfiles.d/podman-local.conf
     owner: root
     group: root
-    mode: 0660
+    mode: "0660"
   become: true
diff --git a/ansible/roles/podman/tasks/install.yml b/ansible/roles/podman/tasks/install.yml
index 362d3a1..d7a4d86 100644
--- a/ansible/roles/podman/tasks/install.yml
+++ b/ansible/roles/podman/tasks/install.yml
@@ -1,8 +1,8 @@
 ---
 - name: Install OS packages
-  yum:
+  ansible.builtin.dnf:
     name:
       - podman
       - python3
     state: installed
-  become: true
\ No newline at end of file
+  become: true
diff --git a/ansible/roles/podman/tasks/main.yml b/ansible/roles/podman/tasks/main.yml
index 2b65e84..2538c7f 100644
--- a/ansible/roles/podman/tasks/main.yml
+++ b/ansible/roles/podman/tasks/main.yml
@@ -1,2 +1,2 @@
-- import_tasks: install.yml
-- import_tasks: configure.yml
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/proxy/defaults/main.yml b/ansible/roles/proxy/defaults/main.yml
index f87f340..289e819 100644
--- a/ansible/roles/proxy/defaults/main.yml
+++ b/ansible/roles/proxy/defaults/main.yml
@@ -1,3 +1,4 @@
+---
 # proxy_http_proxy:
 proxy_https_proxy: "{{ proxy_http_proxy }}"
 proxy_no_proxy_defaults: "{{ ['localhost', '127.0.0.1', '169.254.169.254'] + groups['all'] + hostvars.values() | map(attribute='ansible_host') }}"
diff --git a/ansible/roles/proxy/tasks/main.yml b/ansible/roles/proxy/tasks/main.yml
index b6d880f..be3898d 100644
--- a/ansible/roles/proxy/tasks/main.yml
+++ b/ansible/roles/proxy/tasks/main.yml
@@ -7,9 +7,9 @@
       for convenience variables to set this.
 - name: Define configuration in /etc/environment
   tags: proxy
-  lineinfile:
+  ansible.builtin.lineinfile:
     path: "/etc/environment"
-    create: yes
+    create: true
     owner: root
     group: root
     mode: o=rw,go=r
@@ -25,7 +25,7 @@
       value: "{{ proxy_no_proxy }}"
 
 - name: Define dnf proxy
-  ini_file:
+  community.general.ini_file:
     path: /etc/dnf/dnf.conf
     section: main
     option: "proxy"
@@ -38,7 +38,7 @@
   when: proxy_dnf | bool
 
 - name: Create systemd configuration directory
-  file:
+  ansible.builtin.file:
     path: /etc/systemd/system.conf.d/
     state: directory
     owner: root
@@ -52,9 +52,9 @@
     section: Manager
     option: DefaultEnvironment
     value: >-
-        "http_proxy={{ proxy_http_proxy }}"
-        "https_proxy={{ proxy_http_proxy }}"
-        "no_proxy={{ proxy_no_proxy }}"
+      "http_proxy={{ proxy_http_proxy }}"
+      "https_proxy={{ proxy_http_proxy }}"
+      "no_proxy={{ proxy_no_proxy }}"
     no_extra_spaces: true
     state: "{{ proxy_state }}"
     owner: root
@@ -63,12 +63,11 @@
   register: _copy_systemd_proxy
   when: proxy_systemd | bool
 
-- name: Restart systemd
-  command: systemctl daemon-reexec
-  when: 
+- name: Restart systemd # noqa: no-changed-when
+  ansible.builtin.command: systemctl daemon-reexec # noqa: command-instead-of-module
+  when:
     - proxy_systemd | bool
     - _copy_systemd_proxy.changed | default(false)
 
 - name: Reset connection to get new /etc/environment
-  meta: reset_connection
-  # NB: conditionals not supported
+  ansible.builtin.meta: reset_connection
diff --git a/ansible/roles/pulp_site/README.md b/ansible/roles/pulp_site/README.md
index 3af801c..f860954 100644
--- a/ansible/roles/pulp_site/README.md
+++ b/ansible/roles/pulp_site/README.md
@@ -1,21 +1,18 @@
-pulp_site
-=========
+# pulp_site
 
-Contains playbooks to deploy a Pulp server and sync its content with repo snapshots in
+Contains playbooks to deploy a Pulp server and sync its content with repository snapshots in
 StackHPC's Ark Pulp server
 
-Requirements
-------------
+## Requirements
 
 Requires Ark credentials. The VM you are deploying Pulp on must allow ingress on `pulp_site_port`
 and not be externally accessible (as the Pulp server's content is unauthenticated). Rocky Linux 9 has been
 tested as the target VM for deploying Pulp.
 
-Role Variables
---------------
+## Role Variables
 
-- `pulp_site_url`: Required str. The base url from which Pulp content will be hosted. Defaults to `{{ appliances_pulp_url }}`. 
-                 Value to set for ``appliances_pulp_url` will be generated and output by the deploy.yml playbook.
+- `pulp_site_url`: Required str. The base URL from which Pulp content will be hosted. Defaults to `{{ appliances_pulp_url }}`.
+  Value to set for ``appliances_pulp_url` will be generated and output by the deploy.yml playbook.
 - `pulp_site_port`: Optional str. Port to serve Pulp server on. Defaults to `8080`.
 - `pulp_site_username`: Optional str. Admin username for the Pulp server. Defaults to `admin`.
 - `pulp_site_password`: Required str. Admin password for the Pulp server. Defaults to `{{ vault_pulp_admin_password }}`.
@@ -24,13 +21,13 @@ Role Variables
 - `pulp_site_upstream_content_url`: Optional str. Content URL of upstream Ark Pulp. Defaults to `https://ark.stackhpc.com/pulp/content`.
 - `pulp_site_install_dir`: Optional str. Directory on Pulp host to install config and persistent state to be mounted into Pulp container. Defaults to `/home/rocky/pulp`.
 - `pulp_site_target_facts`: Optional str. The `ansible_facts` of a host which will be pulling from your Pulp server, allowing the role to auto-discover the necessary repos to pull.
-                          defaults to `{{ hostvars[groups['pulp'][0]]['ansible_facts'] }}`.
+  defaults to `{{ hostvars[groups['pulp'][0]]['ansible_facts'] }}`.
 - `pulp_site_target_distribution_version`: Optional str. The Rocky Linux minor release to sync repos from Ark for. Defaults to `{{ pulp_site_target_facts['distribution_version'] }}`.
-- `pulp_site_rpm_repo_defaults`: Optional dict. Contains key value pairs for fields which are common to all repo definition in `pulp_site_rpm_repos`. Includes values for `remote_username`,
-                               `remote_password` and `policy` by default.
-- `pulp_site_rpm_repos`: Optional list of dicts. List of repo definitions in format required by the `stackhpc.pulp.pulp_repository`. Defaults to modified versions of repos defined in
-                       `dnf_repos_all`.
-- `pulp_site_rpm_publications`: Optional list of dicts. List of repo definitions in format required by the `stackhpc.pulp.pulp_publication`. Defaults to list of publications for repos defined in
-                              `dnf_repos_all`.
-- `pulp_site_rpm_distributions`: Optional list of dicts. List of repo definitions in format required by the `stackhpc.pulp.pulp_distribution`. Defaults to list of distributions for repos defined in
-                              `dnf_repos_all`.
+- `pulp_site_rpm_repo_defaults`: Optional dict. Contains key-value pairs for fields which are common to all repository definition in `pulp_site_rpm_repos`. Includes values for `remote_username`,
+  `remote_password` and `policy` by default.
+- `pulp_site_rpm_repos`: Optional list of dicts. List of repository definitions in format required by the `stackhpc.pulp.pulp_repository`. Defaults to modified versions of repos defined in
+  `dnf_repos_all`.
+- `pulp_site_rpm_publications`: Optional list of dicts. List of repository definitions in format required by the `stackhpc.pulp.pulp_publication`. Defaults to list of publications for repos defined in
+  `dnf_repos_all`.
+- `pulp_site_rpm_distributions`: Optional list of dicts. List of repository definitions in format required by the `stackhpc.pulp.pulp_distribution`. Defaults to list of distributions for repos defined in
+  `dnf_repos_all`.
diff --git a/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py b/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py
index 41e995c..76c62c9 100644
--- a/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py
+++ b/ansible/roles/pulp_site/filter_plugins/pulp-list-filters.py
@@ -1,19 +1,23 @@
+# pylint: disable=invalid-name, missing-module-docstring
+# pylint: disable-next=missing-class-docstring, useless-object-inheritance
 class FilterModule(object):
-    def filters(self):
+
+    def filters(self):  # pylint: disable=missing-function-docstring
         return {
-            'to_rpm_repos': self.to_rpm_repos,
-            'to_rpm_pubs': self.to_rpm_pubs,
-            'to_rpm_distros': self.to_rpm_distros,
-            'select_repos': self.select_repos,
+            "to_rpm_repos": self.to_rpm_repos,
+            "to_rpm_pubs": self.to_rpm_pubs,
+            "to_rpm_distros": self.to_rpm_distros,
+            "select_repos": self.select_repos,
         }
-    
+
     def select_repos(self, dnf_repos, target_distro_ver):
-        """ Filter dnf_repos to only those for a relevant distribution version (M.m or M). Returns a list of dicts.
-            Also adds pulp_repo_name field to give the repository a unique name in Pulp to be referenced by subsequent
-            filters
+        """Filter dnf_repos to only those for a relevant distribution version (M.m or M).
+        Returns a list of dicts.
+        Also adds pulp_repo_name field to give the repository a unique name in Pulp
+        to be referenced by subsequent filters
         """
-    
-        target_distro_ver_major = target_distro_ver.split('.')[0]
+
+        target_distro_ver_major = target_distro_ver.split(".")[0]
 
         rpm_repos = []
         for repokey in dnf_repos:
@@ -23,41 +27,54 @@ def select_repos(self, dnf_repos, target_distro_ver):
             elif target_distro_ver_major in dnf_repos[repokey]:
                 selected_ver = target_distro_ver_major
             else:
-                raise ValueError(f'No key matching {target_distro_ver_major} or {target_distro_ver} found in f{repokey}')
+                raise ValueError(
+                    # pylint: disable-next=line-too-long
+                    f"No key matching {target_distro_ver_major} or {target_distro_ver} found in f{repokey}"
+                )
             repo_data = dnf_repos[repokey][selected_ver]
-            repo_data['pulp_repo_name'] = f"{repokey}-{selected_ver}-{dnf_repos[repokey][selected_ver]['pulp_timestamp']}"
+            repo_data["pulp_repo_name"] = (
+                f"{repokey}-{selected_ver}-{dnf_repos[repokey][selected_ver]['pulp_timestamp']}"
+            )
             rpm_repos.append(repo_data)
         return rpm_repos
 
     def to_rpm_repos(self, rpm_info, content_url, repo_defaults):
-        """ Filter repo object list given by select_repos into dict required by the pulp_repository_rpm_repos variable
-            from stackhpc.pulp.pulp_repository role
+        """Filter repo object list given by select_repos into dict required by the
+        pulp_repository_rpm_repos variable from stackhpc.pulp.pulp_repository role
         """
         rpm_repos = []
         for repo_data in rpm_info:
-            rpm_data = repo_defaults.copy() # NB: this changes behaviour vs before, so now defaults can correctly be overriden
-            rpm_data['name'] = repo_data['pulp_repo_name']
-            rpm_data['url'] = '/'.join([content_url, repo_data['pulp_path'], repo_data['pulp_timestamp']])
-            rpm_data['state'] = 'present'
+            rpm_data = (
+                repo_defaults.copy()
+            )  # NB: this changes behaviour vs before, so now defaults can correctly be overriden
+            rpm_data["name"] = repo_data["pulp_repo_name"]
+            rpm_data["url"] = "/".join(
+                [content_url, repo_data["pulp_path"], repo_data["pulp_timestamp"]]
+            )
+            rpm_data["state"] = "present"
             rpm_repos.append(rpm_data)
         return rpm_repos
 
-    def to_rpm_pubs(self, list):
-        """ Filter repo object list given by select_repos into dict required by the pulp_publication_rpm variable
-            from stackhpc.pulp.pulp_publication role
+    def to_rpm_pubs(self, _list):
+        """Filter repo object list given by select_repos into dict required by the
+        pulp_publication_rpm variable from stackhpc.pulp.pulp_publication role
         """
-        pub_list = map(lambda x: {
-            'repository': x['pulp_repo_name'],
-            'state': 'present' }, list)
+        pub_list = map(
+            lambda x: {"repository": x["pulp_repo_name"], "state": "present"}, _list
+        )
         return pub_list
-    
-    def to_rpm_distros(self, list):
-        """ Filter repo object list given by select_repos into dict required by the pulp_distirubtion_rpm variable
-            from stackhpc.pulp.pulp_distribution role
+
+    def to_rpm_distros(self, _list):
+        """Filter repo object list given by select_repos into dict required by the
+        pulp_distirubtion_rpm variable from stackhpc.pulp.pulp_distribution role
         """
-        distro_list = map(lambda x: {
-            'name': x['pulp_repo_name'],
-            'repository': x['pulp_repo_name'],
-            'base_path': '/'.join([x['pulp_path'],x['pulp_timestamp']]),
-            'state': 'present' }, list)
+        distro_list = map(
+            lambda x: {
+                "name": x["pulp_repo_name"],
+                "repository": x["pulp_repo_name"],
+                "base_path": "/".join([x["pulp_path"], x["pulp_timestamp"]]),
+                "state": "present",
+            },
+            _list,
+        )
         return distro_list
diff --git a/ansible/roles/pulp_site/tasks/install.yml b/ansible/roles/pulp_site/tasks/install.yml
index 75b0f66..3be89d0 100644
--- a/ansible/roles/pulp_site/tasks/install.yml
+++ b/ansible/roles/pulp_site/tasks/install.yml
@@ -1,24 +1,25 @@
 ---
-
 - name: Install packages
-  dnf:
+  ansible.builtin.dnf:
     name:
-    - podman
+      - podman
 
 - name: Create install directories
   ansible.builtin.file:
     state: directory
     path: "{{ pulp_site_install_dir }}/{{ item }}"
+    mode: "0755"
   loop:
-  - settings/certs
-  - pulp_storage
-  - pgsql
-  - containers
+    - settings/certs
+    - pulp_storage
+    - pgsql
+    - containers
 
 - name: Template settings file
   ansible.builtin.template:
     src: settings.py.j2
     dest: "{{ pulp_site_install_dir }}/settings/settings.py"
+    mode: "0644"
 
 - name: Install pulp podman container
   containers.podman.podman_container:
@@ -26,30 +27,31 @@
     publish:
       - "{{ pulp_site_port }}:80"
     volume:
-    - "{{ pulp_site_install_dir }}/settings:/etc/pulp{{ _pulp_site_selinux_suffix }}"
-    - "{{ pulp_site_install_dir }}/pulp_storage:/var/lib/pulp{{ _pulp_site_selinux_suffix }}"
-    - "{{ pulp_site_install_dir }}/pgsql:/var/lib/pgsql{{ _pulp_site_selinux_suffix }}"
-    - "{{ pulp_site_install_dir }}/containers:/var/lib/containers{{ _pulp_site_selinux_suffix }}"
+      - "{{ pulp_site_install_dir }}/settings:/etc/pulp{{ _pulp_site_selinux_suffix }}"
+      - "{{ pulp_site_install_dir }}/pulp_storage:/var/lib/pulp{{ _pulp_site_selinux_suffix }}"
+      - "{{ pulp_site_install_dir }}/pgsql:/var/lib/pgsql{{ _pulp_site_selinux_suffix }}"
+      - "{{ pulp_site_install_dir }}/containers:/var/lib/containers{{ _pulp_site_selinux_suffix }}"
     device: /dev/fuse
     image: docker.io/pulp/pulp:3.68.1
     state: present
 
 - name: Create systemd file
-  copy:
+  ansible.builtin.copy:
     src: pulp.service
     dest: /etc/systemd/system/pulp.service
+    mode: "0644"
   register: _pulp_service
-  
+
 - name: Start Pulp service
-  systemd:
+  ansible.builtin.systemd:
     name: pulp
     state: "{{ 'started' if _pulp_service.changed else 'restarted' }}"
     daemon_reload: "{{ _pulp_service.changed }}"
     enabled: true
-  
-- name: Reset admin password once container has initialised
+
+- name: Reset admin password once container has initialised # noqa: no-changed-when
   no_log: true
-  ansible.builtin.shell:
+  ansible.builtin.command:
     cmd: "podman exec pulp bash -c 'pulpcore-manager reset-admin-password -p {{ pulp_site_password }}'"
   register: _admin_reset_output
   until: 0 == _admin_reset_output.rc
diff --git a/ansible/roles/pulp_site/tasks/sync.yml b/ansible/roles/pulp_site/tasks/sync.yml
index 9a2a932..670a940 100644
--- a/ansible/roles/pulp_site/tasks/sync.yml
+++ b/ansible/roles/pulp_site/tasks/sync.yml
@@ -1,5 +1,4 @@
 ---
-
 - ansible.builtin.assert:
     that: pulp_site_upstream_password != ''
     quiet: true
@@ -9,13 +8,14 @@
   ansible.builtin.file:
     path: ~/.config/pulp
     state: directory
+    mode: "0755"
 
 - name: Create config file
   no_log: true
   ansible.builtin.template:
     src: cli.toml.j2
     dest: ~/.config/pulp/cli.toml
-    mode: '0644'
+    mode: "0644"
 
 - name: Wait for Pulp server
   pulp.squeezer.status:
@@ -27,25 +27,28 @@
   retries: 30
   delay: 20
 
-- block:
-  - name: Ensure squeezer cache exists
-    ansible.builtin.file:
-      path: "{{ _cache_dir }}"
-      state: directory
+- vars:
+    _cache_dir: "~/.cache/squeezer/{{ pulp_site_url | regex_replace(':|/', '_') }}"
 
-  - name: Check if squeezer cache is populated
-    ansible.builtin.stat:
-      path: "{{ _cache_dir }}/api.json"
-    register: _cache_stat
+  block:
+    - name: Ensure squeezer cache exists
+      ansible.builtin.file:
+        path: "{{ _cache_dir }}"
+        state: directory
+        mode: "0755"
 
-  - name: Prepopulate squeezer cache # workaround for race on the cache
-    ansible.builtin.get_url:
-      url: "{{ pulp_site_url }}/pulp/api/v3/docs/api.json"
-      dest: "{{ _cache_dir }}/api.json"
-      timeout: 40
-    when: not _cache_stat.stat.exists
-  vars:
-    _cache_dir: "~/.cache/squeezer/{{ pulp_site_url | regex_replace( ':|/' , '_' ) }}"
+    - name: Check if squeezer cache is populated
+      ansible.builtin.stat:
+        path: "{{ _cache_dir }}/api.json"
+      register: _cache_stat
+
+    - name: Prepopulate squeezer cache # workaround for race on the cache
+      ansible.builtin.get_url:
+        url: "{{ pulp_site_url }}/pulp/api/v3/docs/api.json"
+        dest: "{{ _cache_dir }}/api.json"
+        mode: "0644"
+        timeout: 40
+      when: not _cache_stat.stat.exists
 
 - name: Get Pulp repos from release train
   ansible.builtin.include_role:
diff --git a/ansible/roles/rebuild/README.md b/ansible/roles/rebuild/README.md
index 4e4e87a..affc7b6 100644
--- a/ansible/roles/rebuild/README.md
+++ b/ansible/roles/rebuild/README.md
@@ -1,17 +1,14 @@
-rebuild
-=========
+# rebuild
 
-Enables reboot tool from https://github.com/stackhpc/slurm-openstack-tools.git
+Enables reboot tool from <https://github.com/stackhpc/slurm-openstack-tools.git>
 to be run from control node.
 
-Requirements
-------------
+## Requirements
 
 An OpenStack clouds.yaml file containing credentials for a cloud under the
 "openstack" key.
 
-Role Variables
---------------
+## Role Variables
 
 The below is only used by this role's `main.yml` task file, i.e. when running
 the `ansible/site.yml` or `ansible/slurm.yml` playbooks:
diff --git a/ansible/roles/rebuild/defaults/main.yml b/ansible/roles/rebuild/defaults/main.yml
index 9482836..16e2141 100644
--- a/ansible/roles/rebuild/defaults/main.yml
+++ b/ansible/roles/rebuild/defaults/main.yml
@@ -4,9 +4,9 @@ rebuild_clouds_path: ~/.config/openstack/clouds.yaml
 
 rebuild_job_partitions: rebuild
 rebuild_job_name: "rebuild-{{ item }}" # item is nodename
-rebuild_job_command: 'sleep 5'
+rebuild_job_command: "sleep 5"
 rebuild_job_reboot: true
-rebuild_job_options: ''
+rebuild_job_options: ""
 rebuild_job_user: root
 rebuild_job_template: >-
   sbatch
@@ -20,4 +20,4 @@ rebuild_job_template: >-
   --output=/dev/null
   --wrap="{{ rebuild_job_command }}"
   {{ rebuild_job_options }}
-#rebuild_job_hostlist:
\ No newline at end of file
+# rebuild_job_hostlist:
diff --git a/ansible/roles/rebuild/tasks/configure.yml b/ansible/roles/rebuild/tasks/configure.yml
index 78a3b7b..801e2ea 100644
--- a/ansible/roles/rebuild/tasks/configure.yml
+++ b/ansible/roles/rebuild/tasks/configure.yml
@@ -1,7 +1,7 @@
 ---
 
 - name: Create /etc/openstack
-  file:
+  ansible.builtin.file:
     path: /etc/openstack
     state: directory
     owner: slurm
@@ -9,7 +9,7 @@
     mode: u=rX,g=rwX
 
 - name: Copy out clouds.yaml
-  copy:
+  ansible.builtin.copy:
     src: "{{ rebuild_clouds_path }}"
     dest: /etc/openstack/clouds.yaml
     owner: slurm
diff --git a/ansible/roles/rebuild/tasks/install.yml b/ansible/roles/rebuild/tasks/install.yml
index 1152426..1c1b63a 100644
--- a/ansible/roles/rebuild/tasks/install.yml
+++ b/ansible/roles/rebuild/tasks/install.yml
@@ -1,3 +1,3 @@
 - name: Setup slurm tools
-  include_role:
+  ansible.builtin.include_role:
     name: slurm_tools
diff --git a/ansible/roles/rebuild/tasks/main.yml b/ansible/roles/rebuild/tasks/main.yml
index 79d326c..e5e0787 100644
--- a/ansible/roles/rebuild/tasks/main.yml
+++ b/ansible/roles/rebuild/tasks/main.yml
@@ -1,4 +1,4 @@
 ---
 
-- include_tasks: install.yml
-- include_tasks: configure.yml
+- ansible.builtin.include_tasks: install.yml
+- ansible.builtin.include_tasks: configure.yml
diff --git a/ansible/roles/rebuild/tasks/rebuild.yml b/ansible/roles/rebuild/tasks/rebuild.yml
index 466951f..bc202df 100644
--- a/ansible/roles/rebuild/tasks/rebuild.yml
+++ b/ansible/roles/rebuild/tasks/rebuild.yml
@@ -1,11 +1,11 @@
+---
 - name: Create rebuild jobs for partition
-  include_tasks:
+  ansible.builtin.include_tasks:
     file: rebuild_partition.yml
   args:
     apply:
-      become: yes
+      become: true
       become_user: "{{ rebuild_job_user }}"
   loop: "{{ rebuild_job_partitions | split(',') }}"
   loop_control:
     loop_var: _rebuild_job_current_partition
-
diff --git a/ansible/roles/rebuild/tasks/rebuild_partition.yml b/ansible/roles/rebuild/tasks/rebuild_partition.yml
index 3b319e6..35c748a 100644
--- a/ansible/roles/rebuild/tasks/rebuild_partition.yml
+++ b/ansible/roles/rebuild/tasks/rebuild_partition.yml
@@ -1,4 +1,5 @@
-- name: Get list of nodes in partition
+---
+- name: Get list of nodes in partition # noqa: no-changed-when
   ansible.builtin.command:
     cmd: >-
       sinfo
@@ -9,13 +10,13 @@
   register: _sinfo_partition
   when: rebuild_job_hostlist is not defined
 
-- name: Expand rebuild_job_hostlist to host names
+- name: Expand rebuild_job_hostlist to host names # noqa: no-changed-when
   ansible.builtin.command:
     cmd: "scontrol show hostnames {{ rebuild_job_hostlist }}"
   register: _scontrol_hostnames
   when: rebuild_job_hostlist is defined
 
-- name: Submit rebuild jobs
+- name: Submit rebuild jobs # noqa: no-changed-when
   ansible.builtin.command:
     cmd: "{{ rebuild_job_template }}"
   loop: "{{ _scontrol_hostnames.stdout_lines | default(_sinfo_partition.stdout_lines) }}"
diff --git a/ansible/roles/resolv_conf/README.md b/ansible/roles/resolv_conf/README.md
index 3746407..781ec49 100644
--- a/ansible/roles/resolv_conf/README.md
+++ b/ansible/roles/resolv_conf/README.md
@@ -3,9 +3,11 @@
 Template out `/etc/resolv.conf`.
 
 ## Role variables
+
 - `resolv_conf_nameservers`: List of up to 3 nameserver addresses.
 
 Notes:
+
 - `NetworkManager` (if used) will be prevented from rewriting this file on boot.
 - If `/etc/resolv.conf` includes `127.0.0.1` (e.g. due to a FreeIPA server installation), then `resolv_conf_nameservers` is ignored and this role does not change `/etc/resolv.conf`
 - For hosts in the `resolv_conf` group, the `/etc/resolv.conf` created with `resolv_conf_nameservers` will
diff --git a/ansible/roles/resolv_conf/defaults/main.yml b/ansible/roles/resolv_conf/defaults/main.yml
index 37c97b7..44e2d85 100644
--- a/ansible/roles/resolv_conf/defaults/main.yml
+++ b/ansible/roles/resolv_conf/defaults/main.yml
@@ -1 +1,2 @@
+---
 resolv_conf_nameservers: []
diff --git a/ansible/roles/resolv_conf/tasks/main.yml b/ansible/roles/resolv_conf/tasks/main.yml
index 486ec18..41ef9c1 100644
--- a/ansible/roles/resolv_conf/tasks/main.yml
+++ b/ansible/roles/resolv_conf/tasks/main.yml
@@ -1,3 +1,4 @@
+---
 - name: Read nameservers from /etc/resolv.conf
   ansible.builtin.slurp:
     src: /etc/resolv.conf
@@ -27,4 +28,4 @@
   ansible.builtin.systemd:
     name: NetworkManager
     state: reloaded
-  when: _copy_nm_config.changed | default(false)
+  when: _copy_nm_config.changed | default(false) # noqa: no-handler
diff --git a/ansible/roles/slurm_exporter/README.md b/ansible/roles/slurm_exporter/README.md
index 7ade273..3b42f13 100644
--- a/ansible/roles/slurm_exporter/README.md
+++ b/ansible/roles/slurm_exporter/README.md
@@ -1,37 +1,34 @@
-slurm_exporter
-==============
+# slurm_exporter
 
-Build, install and configure a Prometheus exporter for metrics about Slurm itself: https://github.com/vpenso/prometheus-slurm-exporter/
+Build, install and configure a Prometheus exporter for metrics about Slurm itself: <https://github.com/vpenso/prometheus-slurm-exporter/>
 
-Requirements
-------------
+## Requirements
 
 Rocky Linux 8.5 host.
 
-Role Variables
---------------
+## Role Variables
 
 See `defaults/main.yml`
 
-Dependencies
-------------
+## Dependencies
 
 None.
 
-Example Playbook
-----------------
+## Example Playbook
 
-    - name: Deploy Slurm exporter
-      hosts: control
-      become: true
-      tags: slurm_exporter
-      tasks:
-        - import_role:
-            name: slurm_exporter
+```yaml
+- name: Deploy Slurm exporter
+  hosts: control
+  become: true
+  tags: slurm_exporter
+  tasks:
+    - import_role:
+        name: slurm_exporter
+```
 
 Prometheus scrape configuration for this might look like:
 
-```
+```text
 - job_name: "slurm_exporter"
   scrape_interval: 30s
   scrape_timeout: 30s
@@ -40,12 +37,10 @@ Prometheus scrape configuration for this might look like:
       - "{{ openhpc_slurm_control_host }}:9341"
 ```
 
-License
--------
+## License
 
 Apache v2
 
-Author Information
-------------------
+## Author Information
 
 StackHPC Ltd.
diff --git a/ansible/roles/slurm_exporter/defaults/main.yml b/ansible/roles/slurm_exporter/defaults/main.yml
index eda259b..d0b5a0f 100644
--- a/ansible/roles/slurm_exporter/defaults/main.yml
+++ b/ansible/roles/slurm_exporter/defaults/main.yml
@@ -1,5 +1,5 @@
 ---
 # see https://github.com/stackhpc/prometheus-slurm-exporter/releases - version follows upstream, release is stackhpc build
-slurm_exporter_version: '0.21'
-slurm_exporter_release: '1'
+slurm_exporter_version: "0.21"
+slurm_exporter_release: "1"
 slurm_exporter_state: started
diff --git a/ansible/roles/slurm_exporter/handlers/main.yml b/ansible/roles/slurm_exporter/handlers/main.yml
index b55c9c6..33266fe 100644
--- a/ansible/roles/slurm_exporter/handlers/main.yml
+++ b/ansible/roles/slurm_exporter/handlers/main.yml
@@ -1,7 +1,7 @@
 ---
 - name: Restart slurm exporter
   become: true
-  systemd:
+  ansible.builtin.systemd:
     daemon_reload: true
     name: prometheus-slurm-exporter
     state: restarted
diff --git a/ansible/roles/slurm_exporter/tasks/configure.yml b/ansible/roles/slurm_exporter/tasks/configure.yml
index e511be0..d8f2aae 100644
--- a/ansible/roles/slurm_exporter/tasks/configure.yml
+++ b/ansible/roles/slurm_exporter/tasks/configure.yml
@@ -1,5 +1,5 @@
 - name: Ensure slurm exporter state
-  systemd:
+  ansible.builtin.systemd:
     name: prometheus-slurm-exporter
     state: "{{ slurm_exporter_state }}"
     enabled: true
diff --git a/ansible/roles/slurm_exporter/tasks/install.yml b/ansible/roles/slurm_exporter/tasks/install.yml
index cba7aa9..48196dd 100644
--- a/ansible/roles/slurm_exporter/tasks/install.yml
+++ b/ansible/roles/slurm_exporter/tasks/install.yml
@@ -1,8 +1,10 @@
+---
 - name: Install slurm_exporter package
-  dnf:
-    name: "https://github.com/stackhpc/prometheus-slurm-exporter/releases/download/{{ slurm_exporter_version }}/prometheus-slurm-exporter-{{ slurm_exporter_version }}-{{slurm_exporter_release}}.el8.x86_64.rpm"
-    disable_gpg_check: yes
+  # checkov:skip=CKV2_ANSIBLE_4: "Ensure that packages with untrusted or missing GPG signatures are not used by dnf"
+  ansible.builtin.dnf:
+    # yamllint disable-line rule:line-length
+    name: "https://github.com/stackhpc/prometheus-slurm-exporter/releases/download/{{ slurm_exporter_version }}/prometheus-slurm-exporter-{{ slurm_exporter_version }}-{{ slurm_exporter_release }}.el8.x86_64.rpm"
+    disable_gpg_check: true
   notify: Restart slurm exporter
 
-- meta: flush_handlers
-
+- ansible.builtin.meta: flush_handlers
diff --git a/ansible/roles/slurm_exporter/tasks/main.yml b/ansible/roles/slurm_exporter/tasks/main.yml
index 0171113..cc29fba 100644
--- a/ansible/roles/slurm_exporter/tasks/main.yml
+++ b/ansible/roles/slurm_exporter/tasks/main.yml
@@ -1,3 +1,3 @@
 ---
-- import_tasks: install.yml
-- import_tasks: configure.yml
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/slurm_recompile/README.md b/ansible/roles/slurm_recompile/README.md
index e42572a..27b162c 100644
--- a/ansible/roles/slurm_recompile/README.md
+++ b/ansible/roles/slurm_recompile/README.md
@@ -1,28 +1,22 @@
 # slurm_recompile
-=================
 
 Recompiles slurm from source RPMs and installs the packages that were built.
 
-Requirements
-------------
+## Requirements
 
-Role Variables
---------------
+## Role Variables
 
 See `defaults/main.yml`.
 
-Dependencies
-------------
+## Dependencies
 
-Example Playbook
-----------------
+## Example Playbook
 
     - hosts: compute
       tasks:
         - import_role:
             name: slurm_recompile
 
-License
--------
+## License
 
 Apache-2.0
diff --git a/ansible/roles/slurm_recompile/tasks/main.yml b/ansible/roles/slurm_recompile/tasks/main.yml
index 4720a6a..22961d6 100644
--- a/ansible/roles/slurm_recompile/tasks/main.yml
+++ b/ansible/roles/slurm_recompile/tasks/main.yml
@@ -1,6 +1,6 @@
 ---
 - name: Get facts about CUDA installation
-  import_role:
+  ansible.builtin.import_role:
     name: cuda
     tasks_from: facts.yml
 
@@ -9,15 +9,16 @@
     manager: auto
 
 - name: Set fact containing slurm package facts
-  set_fact:
+  ansible.builtin.set_fact:
     slurm_package: "{{ ansible_facts.packages['slurm-slurmd-ohpc'].0 }}"
 
 - name: Install build packages
   ansible.builtin.dnf:
     name: "{{ slurm_recompile_build_packages }}"
 
-- name: Recompile and install slurm packages
-  shell: |
+- name: Recompile and install slurm packages # noqa: no-changed-when
+  # yamllint disable rule:line-length
+  ansible.builtin.shell: |
     #!/bin/bash
     source /etc/profile
     set -eux
@@ -27,17 +28,18 @@
     dnf builddep -y slurm.spec
     rpmbuild -bb{% if slurm_recompile_with_nvml | bool %} -D "_with_nvml --with-nvml=/usr/local/cuda-{{ cuda_facts_version_short }}/targets/x86_64-linux/"{% endif %} slurm.spec
     dnf reinstall -y /root/rpmbuild/RPMS/x86_64/*.rpm
+  # yamllint enable rule:line-length
   become: true
 
 - name: Workaround missing symlink
   # Workaround path issue: https://groups.google.com/g/slurm-users/c/cvGb4JnK8BY
-  command: ln -s /lib64/libnvidia-ml.so.1 /lib64/libnvidia-ml.so
+  ansible.builtin.command: ln -s /lib64/libnvidia-ml.so.1 /lib64/libnvidia-ml.so
   args:
     creates: /lib64/libnvidia-ml.so
   when: slurm_recompile_with_nvml | bool
 
-- name: Cleanup Dependencies
-  shell: |
+- name: Cleanup Dependencies # noqa: no-changed-when
+  ansible.builtin.shell: |
     #!/bin/bash
     set -eux
     set -o pipefail
diff --git a/ansible/roles/slurm_stats/README.md b/ansible/roles/slurm_stats/README.md
index f8bd38c..c67e2c0 100644
--- a/ansible/roles/slurm_stats/README.md
+++ b/ansible/roles/slurm_stats/README.md
@@ -1,33 +1,25 @@
-stackhpc.slurm_openstack_tools.slurm-stats
-==========================================
+# stackhpc.slurm_openstack_tools.slurm-stats
 
-Configures slurm-stats from https://github.com/stackhpc/slurm-openstack-tools.git which
+Configures slurm-stats from <https://github.com/stackhpc/slurm-openstack-tools.git> which
 transforms sacct output into a form that is more amenable for importing into elasticsearch/loki.
 
-Requirements
-------------
+## Requirements
 
-Role Variables
---------------
+## Role Variables
 
 See `defaults/main.yml`.
 
-Dependencies
-------------
+## Dependencies
 
-Example Playbook
-----------------
+## Example Playbook
 
     - hosts: compute
       tasks:
         - import_role:
             name: slurm_stats
 
-
-License
--------
+## License
 
 Apache-2.0
 
-Author Information
-------------------
+## Author Information
diff --git a/ansible/roles/slurm_stats/tasks/configure.yml b/ansible/roles/slurm_stats/tasks/configure.yml
index 6bd87b2..e83c33f 100644
--- a/ansible/roles/slurm_stats/tasks/configure.yml
+++ b/ansible/roles/slurm_stats/tasks/configure.yml
@@ -1,13 +1,14 @@
 ---
 
 - name: Create a directory to house the log files
-  file:
+  ansible.builtin.file:
     state: directory
     path: /var/log/slurm-stats
+    mode: "0755"
   become: true
 
 - name: Create cron job
-  cron:
+  ansible.builtin.cron:
     name: Generate slurm stats
     minute: "*/5"
     user: root
@@ -17,7 +18,7 @@
   become: true
 
 - name: Setup log rotate
-  copy:
+  ansible.builtin.copy:
     content: |
       # WARNING: This file is managed by ansible, do not modify.
       /var/log/slurm-stats/finished_jobs.json {
@@ -27,4 +28,5 @@
               delaycompress
       }
     dest: /etc/logrotate.d/slurm-stats
+    mode: "0644"
   become: true
diff --git a/ansible/roles/slurm_stats/tasks/install.yml b/ansible/roles/slurm_stats/tasks/install.yml
index 748272e..981bf84 100644
--- a/ansible/roles/slurm_stats/tasks/install.yml
+++ b/ansible/roles/slurm_stats/tasks/install.yml
@@ -1,5 +1,5 @@
 ---
 
 - name: Setup slurm tools
-  include_role:
+  ansible.builtin.include_role:
     name: slurm_tools
diff --git a/ansible/roles/slurm_stats/tasks/main.yml b/ansible/roles/slurm_stats/tasks/main.yml
index 79d326c..e5e0787 100644
--- a/ansible/roles/slurm_stats/tasks/main.yml
+++ b/ansible/roles/slurm_stats/tasks/main.yml
@@ -1,4 +1,4 @@
 ---
 
-- include_tasks: install.yml
-- include_tasks: configure.yml
+- ansible.builtin.include_tasks: install.yml
+- ansible.builtin.include_tasks: configure.yml
diff --git a/ansible/roles/slurm_tools/README.md b/ansible/roles/slurm_tools/README.md
index 9724c44..07911cb 100644
--- a/ansible/roles/slurm_tools/README.md
+++ b/ansible/roles/slurm_tools/README.md
@@ -1,10 +1,8 @@
-slurm_tools
-=========
+# slurm_tools
 
-Install python-based tools from https://github.com/stackhpc/slurm-openstack-tools.git into `/opt/slurm-tools/bin/`.
+Install python-based tools from <https://github.com/stackhpc/slurm-openstack-tools.git> into `/opt/slurm-tools/bin/`.
 
-Role Variables
---------------
+## Role Variables
 
 - `pytools_editable`: Optional bool. Whether to install the package using `pip`'s
   editable mode (installing source to `/opt/slurm-tools/src`). Default `false`.
diff --git a/ansible/roles/slurm_tools/tasks/main.yml b/ansible/roles/slurm_tools/tasks/main.yml
index deedb03..9f5eff0 100644
--- a/ansible/roles/slurm_tools/tasks/main.yml
+++ b/ansible/roles/slurm_tools/tasks/main.yml
@@ -1,33 +1,33 @@
 ---
-- name: install python3
-  package:
+- name: Install python3
+  ansible.builtin.package:
     name: python3,git
   become: true
 
 - name: Create virtualenv directory
-  file:
+  ansible.builtin.file:
     path: /opt/slurm-tools
     owner: "{{ pytools_user }}"
     group: "{{ pytools_user }}"
     state: directory
+    mode: "0755"
   become: true
 
-- block:
-  - name: Upgrade pip
-    # This needs to a separate step so that we use the updated version
-    # to install the packages below.
-    pip:
-      name: pip
-
-  - name: Create virtualenv
-    pip:
-      name: "git+https://github.com/stackhpc/slurm-openstack-tools.git@{{ pytools_gitref }}#egg=slurm_openstack_tools"
-      editable: "{{ pytools_editable }}"
-
-  module_defaults:
+- module_defaults:
     ansible.builtin.pip:
       virtualenv: /opt/slurm-tools
       virtualenv_command: "{{ 'python3.9 -m venv' if ansible_distribution_major_version == '8' else 'python3 -m venv' }}"
       state: latest
   become: true
   become_user: "{{ pytools_user }}"
+  block:
+    - name: Upgrade pip
+    # This needs to a separate step so that we use the updated version
+    # to install the packages below.
+      ansible.builtin.pip:
+        name: pip
+
+    - name: Create virtualenv
+      ansible.builtin.pip:
+        name: "git+https://github.com/stackhpc/slurm-openstack-tools.git@{{ pytools_gitref }}#egg=slurm_openstack_tools"
+        editable: "{{ pytools_editable }}"
diff --git a/ansible/roles/squid/README.md b/ansible/roles/squid/README.md
index e514c36..7b7b8db 100644
--- a/ansible/roles/squid/README.md
+++ b/ansible/roles/squid/README.md
@@ -35,5 +35,5 @@ Where noted these map to squid parameters of the same name without the `squid_`
         http_access allow localhost
         # Finally deny all other access to this proxy
         http_access deny all
-        
+
   See squid parameter.
diff --git a/ansible/roles/squid/defaults/main.yml b/ansible/roles/squid/defaults/main.yml
index 7457bdc..b224d13 100644
--- a/ansible/roles/squid/defaults/main.yml
+++ b/ansible/roles/squid/defaults/main.yml
@@ -1,3 +1,4 @@
+---
 squid_conf_template: squid.conf.j2
 squid_started: true
 squid_enabled: true
@@ -5,8 +6,8 @@ squid_enabled: true
 squid_cache_mem: "{{ undef(hint='squid_cache_mem required, e.g. \"12 GB\"')  }}"
 squid_cache_dir: /var/spool/squid
 squid_cache_disk: "{{ undef(hint='squid_cache_disk (in MB) required, e.g. \"1024\"')  }}" # always in MB
-squid_maximum_object_size_in_memory: '64 MB'
-squid_maximum_object_size: '200 MB'
+squid_maximum_object_size_in_memory: "64 MB"
+squid_maximum_object_size: "200 MB"
 squid_http_port: 3128
 squid_acls: acl anywhere src all # rely on openstack security groups
 squid_http_access: |
diff --git a/ansible/roles/squid/handlers/main.yml b/ansible/roles/squid/handlers/main.yml
index 135d98d..7448a01 100644
--- a/ansible/roles/squid/handlers/main.yml
+++ b/ansible/roles/squid/handlers/main.yml
@@ -1,5 +1,6 @@
+---
 - name: Restart squid
-  service:
+  ansible.builtin.service:
     name: squid
     state: restarted
   when: squid_started | bool
diff --git a/ansible/roles/squid/tasks/configure.yml b/ansible/roles/squid/tasks/configure.yml
index 0d4dec6..d1e49e3 100644
--- a/ansible/roles/squid/tasks/configure.yml
+++ b/ansible/roles/squid/tasks/configure.yml
@@ -1,5 +1,6 @@
+---
 - name: Ensure squid cache directory exists
-  file:
+  ansible.builtin.file:
     path: "{{ squid_cache_dir }}"
     # based on what dnf package creates:
     owner: squid
@@ -7,7 +8,7 @@
     mode: u=rwx,g=rw,o=
 
 - name: Template squid configuration
-  template:
+  ansible.builtin.template:
     src: "{{ squid_conf_template }}"
     dest: /etc/squid/squid.conf
     owner: squid
@@ -15,10 +16,9 @@
     mode: ug=rwX,go=
   notify: Restart squid
 
-- meta: flush_handlers
-
+- ansible.builtin.meta: flush_handlers
 - name: Ensure squid service state
-  systemd:
+  ansible.builtin.systemd:
     name: squid
     state: "{{ 'started' if squid_started | bool else 'stopped' }}"
     enabled: "{{ true if squid_enabled else false }}"
diff --git a/ansible/roles/squid/tasks/install.yml b/ansible/roles/squid/tasks/install.yml
index 672186c..d60af91 100644
--- a/ansible/roles/squid/tasks/install.yml
+++ b/ansible/roles/squid/tasks/install.yml
@@ -1,3 +1,4 @@
+---
 - name: Install squid package
-  dnf:
+  ansible.builtin.dnf:
     name: squid
diff --git a/ansible/roles/squid/tasks/main.yml b/ansible/roles/squid/tasks/main.yml
index 2b65e84..cc29fba 100644
--- a/ansible/roles/squid/tasks/main.yml
+++ b/ansible/roles/squid/tasks/main.yml
@@ -1,2 +1,3 @@
-- import_tasks: install.yml
-- import_tasks: configure.yml
+---
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/sshd/defaults/main.yml b/ansible/roles/sshd/defaults/main.yml
index c7a83b8..ca2f8c7 100644
--- a/ansible/roles/sshd/defaults/main.yml
+++ b/ansible/roles/sshd/defaults/main.yml
@@ -1,3 +1,4 @@
+---
 sshd_password_authentication: false
 sshd_disable_forwarding: true
 sshd_conf_src: sshd.conf.j2
diff --git a/ansible/roles/sshd/handlers/main.yml b/ansible/roles/sshd/handlers/main.yml
index e11aa78..e3e8b1c 100644
--- a/ansible/roles/sshd/handlers/main.yml
+++ b/ansible/roles/sshd/handlers/main.yml
@@ -1,4 +1,5 @@
+---
 - name: Restart sshd
-  systemd:
+  ansible.builtin.systemd:
     name: sshd
     state: restarted
diff --git a/ansible/roles/sshd/tasks/configure.yml b/ansible/roles/sshd/tasks/configure.yml
index 359d782..f47d48c 100644
--- a/ansible/roles/sshd/tasks/configure.yml
+++ b/ansible/roles/sshd/tasks/configure.yml
@@ -1,17 +1,18 @@
+---
 - name: Grab facts to determine distribution
-  setup:
+  ansible.builtin.setup:
 
 - name: Ensure drop in directory exists
-  file:
+  ansible.builtin.file:
     path: /etc/ssh/sshd_config.d/
     state: directory
     owner: root
     group: root
-    mode: 700
+    mode: "0700"
   become: true
 
 - name: Ensure drop in configuration is included
-  blockinfile:
+  ansible.builtin.blockinfile:
     dest: /etc/ssh/sshd_config
     content: |
       # To modify the system-wide sshd configuration, create <custom>.conf
@@ -32,7 +33,7 @@
   #   Include /etc/ssh/sshd_config.d/*.conf
   # early on, which is generally held to be the correct approach, so adding
   # values to the end of that file won't work
-  template:
+  ansible.builtin.template:
     src: "{{ sshd_conf_src }}"
     dest: "{{ sshd_conf_dest }}"
     owner: root
diff --git a/ansible/roles/sshd/tasks/export.yml b/ansible/roles/sshd/tasks/export.yml
index 0c153ca..a21daee 100644
--- a/ansible/roles/sshd/tasks/export.yml
+++ b/ansible/roles/sshd/tasks/export.yml
@@ -1,6 +1,7 @@
+---
 # Exclusively used for compute-init
 - name: Inject host specific config template
-  template:
+  ansible.builtin.template:
     src: "{{ sshd_conf_src }}"
     dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/sshd.conf"
     owner: root
diff --git a/ansible/roles/sshd/tasks/main.yml b/ansible/roles/sshd/tasks/main.yml
index 84f4934..ec83d2b 100644
--- a/ansible/roles/sshd/tasks/main.yml
+++ b/ansible/roles/sshd/tasks/main.yml
@@ -1 +1,2 @@
-- import_tasks: configure.yml
+---
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/sssd/README.md b/ansible/roles/sssd/README.md
index 5c9b50e..ad6de4a 100644
--- a/ansible/roles/sssd/README.md
+++ b/ansible/roles/sssd/README.md
@@ -2,7 +2,6 @@
 
 Install and configure [sssd](https://sssd.io/docs/introduction.html).
 
-
 ## Role variables
 
 The only required configuration is to create a [sssd.conf](https://www.mankier.com/5/sssd.conf) template at the location specified by `sssd_conf_src`.
diff --git a/ansible/roles/sssd/defaults/main.yml b/ansible/roles/sssd/defaults/main.yml
index 5bc58c9..605e746 100644
--- a/ansible/roles/sssd/defaults/main.yml
+++ b/ansible/roles/sssd/defaults/main.yml
@@ -1,3 +1,4 @@
+---
 sssd_packages:
   - sssd-common
 sssd_install_ldap: false
diff --git a/ansible/roles/sssd/handlers/main.yml b/ansible/roles/sssd/handlers/main.yml
index 72c36e7..4965b15 100644
--- a/ansible/roles/sssd/handlers/main.yml
+++ b/ansible/roles/sssd/handlers/main.yml
@@ -1,5 +1,6 @@
+---
 - name: Restart sssd
-  systemd:
+  ansible.builtin.systemd:
     name: sssd
     state: restarted
   when: sssd_started | bool
diff --git a/ansible/roles/sssd/tasks/configure.yml b/ansible/roles/sssd/tasks/configure.yml
index c8ebd82..66d86f6 100644
--- a/ansible/roles/sssd/tasks/configure.yml
+++ b/ansible/roles/sssd/tasks/configure.yml
@@ -1,5 +1,6 @@
+---
 - name: Manage sssd.conf configuration
-  template:
+  ansible.builtin.template:
     src: "{{ sssd_conf_src }}"
     dest: "{{ sssd_conf_dest }}"
     owner: root
@@ -7,29 +8,28 @@
     mode: u=rw,go=
   notify: "Restart sssd"
 
-- meta: flush_handlers
-
+- ansible.builtin.meta: flush_handlers
 - name: Ensure sssd service state
-  systemd:
+  ansible.builtin.systemd:
     name: sssd
     state: "{{ 'started' if sssd_started | bool else 'stopped' }}"
     enabled: "{{ sssd_enabled | bool }}"
 
 - name: Get current authselect configuration
-  command: authselect current --raw
+  ansible.builtin.command: authselect current --raw
   changed_when: false
   failed_when:
     - _authselect_current.rc != 0
     - "'No existing configuration detected' not in _authselect_current.stdout"
   register: _authselect_current # stdout: sssd with-mkhomedir
 
-- name: Configure nsswitch and PAM for SSSD
-  command: "authselect select sssd --force{% if sssd_enable_mkhomedir | bool %} with-mkhomedir{% endif %}"
+- name: Configure nsswitch and PAM for SSSD # noqa: no-changed-when
+  ansible.builtin.command: "authselect select sssd --force{% if sssd_enable_mkhomedir | bool %} with-mkhomedir{% endif %}"
   when: "'sssd' not in _authselect_current.stdout"
 
 - name: "Ensure oddjob is started"
-  service:
+  ansible.builtin.service:
     name: oddjobd
-    state: 'started'
+    state: "started"
     enabled: true
-  when: sssd_enable_mkhomedir | bool
\ No newline at end of file
+  when: sssd_enable_mkhomedir | bool
diff --git a/ansible/roles/sssd/tasks/export.yml b/ansible/roles/sssd/tasks/export.yml
index 0be6674..6078786 100644
--- a/ansible/roles/sssd/tasks/export.yml
+++ b/ansible/roles/sssd/tasks/export.yml
@@ -1,9 +1,10 @@
+---
 # Exclusively used for compute-init
 - name: Inject host specific config template
-  template:
+  ansible.builtin.template:
     src: "{{ sssd_conf_src }}"
     dest: "/exports/cluster/hostconfig/{{ inventory_hostname }}/sssd.conf"
     owner: root
     group: root
     mode: u=rw,go=
-  delegate_to: "{{ groups['control'] | first }}"
\ No newline at end of file
+  delegate_to: "{{ groups['control'] | first }}"
diff --git a/ansible/roles/sssd/tasks/install.yml b/ansible/roles/sssd/tasks/install.yml
index 97aa82a..b7c8f11 100644
--- a/ansible/roles/sssd/tasks/install.yml
+++ b/ansible/roles/sssd/tasks/install.yml
@@ -1,13 +1,14 @@
+---
 - name: Ensure sssd packages are installed
-  dnf:
+  ansible.builtin.dnf:
     name: "{{ sssd_packages + sssd_ldap_packages if (sssd_install_ldap | bool) else [] }}"
 
 - name: Control if sssd should start on boot
   # Needs to be done here to prevent starting after image build, is enabled by default
-  systemd:
+  ansible.builtin.systemd:
     name: sssd
     enabled: "{{ sssd_enabled | bool }}"
 
 - name: Ensure mkhomedir packages are installed if required
-  dnf:
+  ansible.builtin.dnf:
     name: "{{ sssd_mkhomedir_packages }}"
diff --git a/ansible/roles/sssd/tasks/main.yml b/ansible/roles/sssd/tasks/main.yml
index 2b65e84..cc29fba 100644
--- a/ansible/roles/sssd/tasks/main.yml
+++ b/ansible/roles/sssd/tasks/main.yml
@@ -1,2 +1,3 @@
-- import_tasks: install.yml
-- import_tasks: configure.yml
+---
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/systemd/README.md b/ansible/roles/systemd/README.md
index e18599f..9ec8cb8 100644
--- a/ansible/roles/systemd/README.md
+++ b/ansible/roles/systemd/README.md
@@ -2,18 +2,17 @@
 
 Create drop-in files for systemd services.
 
-# Role Variables
+## Role Variables
+
 - `systemd_dropins`: Required. A mapping where keys = systemd service name, values are a dict as follows:
-    - `group`: Required str. Inventory group this drop-in applies to.
-    - `comment`: Optional str. Comment describing reason for drop-in.
-    - `content`: Required str. Content of drop-in file.
-# systemd
+  - `group`: Required str. Inventory group this drop-in applies to.
+  - `comment`: Optional str. Comment describing reason for drop-in.
+  - `content`: Required str. Content of drop-in file.
 
-Create drop-in files for systemd services.
+## Role Variables - optional restart
 
-# Role Variables
 - `systemd_dropins`: Required. A mapping where keys = systemd service name, values are a dict as follows:
-    - `group`: Required str. Inventory group this drop-in applies to.
-    - `comment`: Optional str. Comment describing reason for drop-in.
-    - `content`: Required str. Content of drop-in file.
+  - `group`: Required str. Inventory group this drop-in applies to.
+  - `comment`: Optional str. Comment describing reason for drop-in.
+  - `content`: Required str. Content of drop-in file.
 - `systemd_restart`: Optional bool. Whether to reload unit definitions and restart services. Default `false`.
diff --git a/ansible/roles/systemd/defaults/main.yml b/ansible/roles/systemd/defaults/main.yml
index 7ca54aa..29b9b75 100644
--- a/ansible/roles/systemd/defaults/main.yml
+++ b/ansible/roles/systemd/defaults/main.yml
@@ -1,4 +1,5 @@
-#systemd_dropins:
+---
+# systemd_dropins:
 #   <unit_name>:
 #     group: <required>
 #     comment: <optional>
diff --git a/ansible/roles/systemd/tasks/main.yml b/ansible/roles/systemd/tasks/main.yml
index 822a676..8fa6f48 100644
--- a/ansible/roles/systemd/tasks/main.yml
+++ b/ansible/roles/systemd/tasks/main.yml
@@ -1,11 +1,12 @@
+---
 # NB: As `systemd_TODO:` is defined in group_vars/all, all tasks here are conditional on group.
 - name: Make directory for unit dropins
-  file:
+  ansible.builtin.file:
     path: "/etc/systemd/system/{{ item.key }}.service.d/"
     state: directory
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
   loop: "{{ systemd_dropins | dict2items }}"
   when: "item.value.group in group_names"
 
@@ -17,14 +18,14 @@
     dest: "/etc/systemd/system/{{ item.key }}.service.d/slurm_app.conf"
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
   loop: "{{ systemd_dropins | dict2items }}"
   register: _systemd_dropins
   when: "item.value.group in group_names"
 
-- name: Reload unit definitions
-  ansible.builtin.shell:
-    cmd: systemctl daemon-reload
+- name: Reload unit definitions # noqa: no-changed-when
+  ansible.builtin.command:
+    cmd: systemctl daemon-reload # noqa: command-instead-of-module
   when:
     - _systemd_dropins.changed
     - systemd_restart | default(false) | bool
diff --git a/ansible/roles/topology/README.md b/ansible/roles/topology/README.md
index 0571344..6bdeaae 100644
--- a/ansible/roles/topology/README.md
+++ b/ansible/roles/topology/README.md
@@ -1,5 +1,4 @@
-topology
-========
+# topology
 
 Templates out /etc/slurm/topology.conf file based on an OpenStack project for use by
 Slurm's [topology/tree plugin.](https://slurm.schedmd.com/topology.html) Models
@@ -12,22 +11,23 @@ reconfigure an already running cluster after a `ansible/site.yml` run. You will
 to run the `ansible/adhoc/restart-slurm.yml` playbook for changes to topology.conf to be
 recognised.
 
-Role Variables
---------------
+## Role Variables
 
 - `topology_nodes:`: Required list of strs. List of inventory hostnames of nodes to include in topology tree. Must be set to include all compute nodes in Slurm cluster. Default `[]`.
 - `topology_conf_template`: Optional str. Path to Jinja2 template of topology.conf file. Default
   `templates/topology.conf.j2`
-- `topology_above_rack_topology`: Optionally multiline str. Used to define topology above racks/AZs if 
-   you wish to partition racks further under different logical switches. New switches above should be
-   defined as [SwitchName lines](https://slurm.schedmd.com/topology.html#hierarchical) referencing 
-   rack Availability Zones under that switch in their `Switches fields`. These switches must themselves
-   be under a top level switch. e.g
-   ```
-   topology_above_rack_topology: |
-     SwitchName=rack-group-1 Switches=rack-az-1,rack-az-2
-     SwitchName=rack-group-2 Switches=rack-az-3,rack-az-4
-     SwitchName=top-level Switches=rack-group-1,rack-group-2
-   ```
-   Defaults to an empty string, which causes all AZs to be put under a
-   single top level switch.
\ No newline at end of file
+- `topology_above_rack_topology`: Optionally multiline str. Used to define topology above racks/AZs if
+  you wish to partition racks further under different logical switches. New switches above should be
+  defined as [SwitchName lines](https://slurm.schedmd.com/topology.html#hierarchical) referencing
+  rack Availability Zones under that switch in their `Switches fields`. These switches must themselves
+  be under a top level switch. e.g
+
+  ```yaml
+  topology_above_rack_topology: |
+    SwitchName=rack-group-1 Switches=rack-az-1,rack-az-2
+    SwitchName=rack-group-2 Switches=rack-az-3,rack-az-4
+    SwitchName=top-level Switches=rack-group-1,rack-group-2
+  ```
+
+  Defaults to an empty string, which causes all AZs to be put under a
+  single top level switch.
diff --git a/ansible/roles/topology/defaults/main.yml b/ansible/roles/topology/defaults/main.yml
index 6b62243..87801e8 100644
--- a/ansible/roles/topology/defaults/main.yml
+++ b/ansible/roles/topology/defaults/main.yml
@@ -5,4 +5,3 @@ topology_nodes: []
 topology_conf_template: templates/topology.conf.j2
 
 topology_above_rack_topology: ""
-
diff --git a/ansible/roles/topology/library/map_hosts.py b/ansible/roles/topology/library/map_hosts.py
index 1961132..42f22ee 100644
--- a/ansible/roles/topology/library/map_hosts.py
+++ b/ansible/roles/topology/library/map_hosts.py
@@ -1,10 +1,11 @@
 #!/usr/bin/python
+# pylint: disable=missing-module-docstring
 
 # Copyright: (c) 2025, StackHPC
 # Apache 2 License
 
-from ansible.module_utils.basic import AnsibleModule
-import openstack
+import openstack  # pylint: disable=import-error
+from ansible.module_utils.basic import AnsibleModule  # pylint: disable=import-error
 
 DOCUMENTATION = """
 ---
@@ -47,35 +48,39 @@
       - mycluster-compute-1
 """
 
+
 def min_prefix(uuids, start=4):
-    """ Take a list of uuids and return the smallest length >= start which keeps them unique """
+    """Take a list of uuids and return the smallest length >= start which keeps them unique"""
     for length in range(start, len(uuids[0])):
         prefixes = set(uuid[:length] for uuid in uuids)
         if len(prefixes) == len(uuids):
             return length
+    # Fallback to returning the full length
+    return len(uuids[0])
+
 
-def run_module():
-    module_args = dict(
-        compute_vms=dict(type='list', elements='str', required=True)
-    )
+def run_module():  # pylint: disable=missing-function-docstring
+    module_args = {"compute_vms": {"type": "list", "elements": "str", "required": True}}
     module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
 
     conn = openstack.connection.from_config()
 
-    servers = [s for s in conn.compute.servers() if s["name"] in module.params["compute_vms"]]
+    servers = [
+        s for s in conn.compute.servers() if s["name"] in module.params["compute_vms"]
+    ]
 
     topo = {}
     all_host_ids = []
     for s in servers:
-        az = s['availability_zone']
-        host_id = s['host_id']
-        if host_id != '': # empty string if e.g. server is shelved
+        az = s["availability_zone"]
+        host_id = s["host_id"]
+        if host_id != "":  # empty string if e.g. server is shelved
             all_host_ids.append(host_id)
             if az not in topo:
                 topo[az] = {}
             if host_id not in topo[az]:
                 topo[az][host_id] = []
-            topo[az][host_id].append(s['name'])
+            topo[az][host_id].append(s["name"])
 
     uuid_len = min_prefix(list(set(all_host_ids)))
 
@@ -83,14 +88,14 @@ def run_module():
         topo[az] = dict((k[:uuid_len], v) for (k, v) in topo[az].items())
 
     result = {
-        "changed": False, 
+        "changed": False,
         "topology": topo,
     }
-    
+
     module.exit_json(**result)
 
 
-def main():
+def main():  # pylint: disable=missing-function-docstring
     run_module()
 
 
diff --git a/ansible/roles/topology/tasks/main.yml b/ansible/roles/topology/tasks/main.yml
index 8debdde..3872a0c 100644
--- a/ansible/roles/topology/tasks/main.yml
+++ b/ansible/roles/topology/tasks/main.yml
@@ -13,4 +13,4 @@
     dest: /etc/slurm/topology.conf
     owner: root
     group: root
-    mode: 0644
+    mode: "0644"
diff --git a/ansible/roles/tuned/README.md b/ansible/roles/tuned/README.md
index 34885af..a4626c4 100644
--- a/ansible/roles/tuned/README.md
+++ b/ansible/roles/tuned/README.md
@@ -1,14 +1,11 @@
-tuned
-=========
+# tuned
 
 This role configures the TuneD tool for system tuning, ensuring optimal performance based on the profile settings defined.
 
-Role Variables
---------------
+## Role Variables
 
 See the [TuneD documentation](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9/html/monitoring_and_managing_system_status_and_performance/getting-started-with-tuned_monitoring-and-managing-system-status-and-performance) for profile details.
 
-
 - `tuned_profile_baremetal`: Optional str. Name of default profile for non-virtualised hosts. Default `hpc-compute`.
 - `tuned_profile_vm`: Optional str. Name of default profile for virtualised hosts. Default `virtual-guest`.
 - `tuned_profile`: Optional str. Name of profile to apply to host. Defaults to `tuned_profile_baremetal` or `tuned_profile_vm` as appropriate.
diff --git a/ansible/roles/tuned/defaults/main.yml b/ansible/roles/tuned/defaults/main.yml
index 1426bbe..8ddb139 100644
--- a/ansible/roles/tuned/defaults/main.yml
+++ b/ansible/roles/tuned/defaults/main.yml
@@ -4,4 +4,4 @@ tuned_profile_baremetal: hpc-compute
 tuned_profile_vm: virtual-guest
 tuned_profile: "{{ tuned_profile_baremetal if ansible_virtualization_role != 'guest' else tuned_profile_vm }}"
 tuned_enabled: true
-tuned_started: true 
+tuned_started: true
diff --git a/ansible/roles/tuned/tasks/configure.yml b/ansible/roles/tuned/tasks/configure.yml
index cf122d1..fa10648 100644
--- a/ansible/roles/tuned/tasks/configure.yml
+++ b/ansible/roles/tuned/tasks/configure.yml
@@ -12,7 +12,7 @@
   register: _tuned_profile_current
   changed_when: false
 
-- name: Set TuneD profile
+- name: Set TuneD profile # noqa: no-changed-when
   ansible.builtin.command:
     cmd: "tuned-adm profile {{ tuned_profile }}"
   when:
diff --git a/ansible/roles/tuned/tasks/install.yml b/ansible/roles/tuned/tasks/install.yml
index 0a2db4e..0890684 100644
--- a/ansible/roles/tuned/tasks/install.yml
+++ b/ansible/roles/tuned/tasks/install.yml
@@ -12,5 +12,6 @@
     path: /usr/lib/tuned/hpc-compute/tuned.conf
     section: sysctl
     option: vm.min_free_kbytes
-    value: '>135168'
+    value: ">135168"
     no_extra_spaces: true
+    mode: "0644"
diff --git a/ansible/roles/tuned/tasks/main.yml b/ansible/roles/tuned/tasks/main.yml
index ef0bea2..cc29fba 100644
--- a/ansible/roles/tuned/tasks/main.yml
+++ b/ansible/roles/tuned/tasks/main.yml
@@ -1,3 +1,3 @@
 ---
-- import_tasks: install.yml
-- import_tasks: configure.yml
\ No newline at end of file
+- ansible.builtin.import_tasks: install.yml
+- ansible.builtin.import_tasks: configure.yml
diff --git a/ansible/roles/zenith_proxy/defaults/main.yml b/ansible/roles/zenith_proxy/defaults/main.yml
index 02267cb..748ad71 100644
--- a/ansible/roles/zenith_proxy/defaults/main.yml
+++ b/ansible/roles/zenith_proxy/defaults/main.yml
@@ -15,7 +15,7 @@ zenith_proxy_pod_name: "{{ zenith_proxy_service_name }}"
 zenith_proxy_client_container_name: "{{ zenith_proxy_client_service_name }}"
 zenith_proxy_mitm_container_name: "{{ zenith_proxy_mitm_service_name }}"
 
-zenith_proxy_image_tag: '0.14.0'
+zenith_proxy_image_tag: "0.14.0"
 
 zenith_proxy_client_image_repository: ghcr.io/azimuth-cloud/zenith-client
 zenith_proxy_client_image: "{{ zenith_proxy_client_image_repository }}:{{ zenith_proxy_image_tag }}"
@@ -27,14 +27,12 @@ zenith_proxy_upstream_scheme: http
 zenith_proxy_upstream_host: "{{ undef(hint = 'zenith_proxy_upstream_host is required') }}"
 zenith_proxy_upstream_port: "{{ undef(hint = 'zenith_proxy_upstream_port is required') }}"
 zenith_proxy_upstream_read_timeout:
-
 zenith_proxy_client_token: "{{ undef(hint = 'zenith_proxy_client_token is required') }}"
 zenith_proxy_client_auth_skip: false
 zenith_proxy_client_auth_params: {}
-
-zenith_proxy_mitm_enabled: no
+zenith_proxy_mitm_enabled: false
 zenith_proxy_mitm_listen_port: 8080
-zenith_proxy_mitm_auth_inject: none  # valid values are 'basic' and 'bearer'
+zenith_proxy_mitm_auth_inject: none # valid values are 'basic' and 'bearer'
 zenith_proxy_mitm_auth_basic_username: >-
   {{
     undef(hint = 'zenith_proxy_mitm_auth_basic_username is required')
diff --git a/ansible/roles/zenith_proxy/files/podman-pod-infra-attach.sh b/ansible/roles/zenith_proxy/files/podman-pod-infra-attach.sh
old mode 100644
new mode 100755
index aab232a..0cdfae2
--- a/ansible/roles/zenith_proxy/files/podman-pod-infra-attach.sh
+++ b/ansible/roles/zenith_proxy/files/podman-pod-infra-attach.sh
@@ -14,4 +14,4 @@ echo "[INFO] Finding infra container for pod '$1'"
 INFRA_CONTAINER_ID="$(podman pod inspect --format '{{.InfraContainerID}}' "$1")"
 
 echo "[INFO] Attaching to infra container '${INFRA_CONTAINER_ID}'"
-exec podman container attach --no-stdin ${INFRA_CONTAINER_ID}
+exec podman container attach --no-stdin "${INFRA_CONTAINER_ID}"
diff --git a/ansible/roles/zenith_proxy/tasks/main.yml b/ansible/roles/zenith_proxy/tasks/main.yml
index 1a42b04..7a4c034 100644
--- a/ansible/roles/zenith_proxy/tasks/main.yml
+++ b/ansible/roles/zenith_proxy/tasks/main.yml
@@ -1,68 +1,71 @@
 ---
-
 - name: Install script for attaching to pod infra containers
-  copy:
+  ansible.builtin.copy:
     src: podman-pod-infra-attach.sh
     dest: /usr/bin/
     mode: +x
   become: true
 
 - name: Create systemd unit for Zenith pod
-  template:
+  ansible.builtin.template:
     src: pod.service.j2
     dest: /etc/systemd/system/{{ zenith_proxy_service_name }}.service
+    mode: "0644"
   become: true
   register: zenith_proxy_pod_systemd_unit
 
 - name: Ensure Zenith pod is started and enabled
-  service:
+  ansible.builtin.service:
     name: "{{ zenith_proxy_service_name }}.service"
     state: "{{ 'restarted' if zenith_proxy_pod_systemd_unit is changed else 'started' }}"
-    enabled: yes
+    enabled: true
     daemon_reload: "{{ zenith_proxy_pod_systemd_unit is changed }}"
   become: true
 
-- block:
+- become: true
+  when: zenith_proxy_mitm_enabled
+  block:
     - name: Create systemd unit file for MITM proxy
-      template:
+      ansible.builtin.template:
         src: mitm.service.j2
         dest: /etc/systemd/system/{{ zenith_proxy_mitm_service_name }}.service
+        mode: "0644"
       register: zenith_proxy_mitm_systemd_unit
 
     - name: Ensure MITM proxy is started and enabled
-      service:
+      ansible.builtin.service:
         name: "{{ zenith_proxy_mitm_service_name }}.service"
         state: "{{ 'restarted' if zenith_proxy_mitm_systemd_unit is changed else 'started' }}"
-        enabled: yes
+        enabled: true
         daemon_reload: "{{ zenith_proxy_mitm_systemd_unit is changed }}"
-  become: true
-  when: zenith_proxy_mitm_enabled
-
 - name: Ensure Zenith config directory exists
-  file:
+  ansible.builtin.file:
     path: /etc/zenith/{{ zenith_proxy_service_name }}
     state: directory
+    mode: "0755"
   become: true
 
 - name: Write Zenith client configuration
-  template:
+  ansible.builtin.template:
     src: zenith-client.yaml.j2
     dest: /etc/zenith/{{ zenith_proxy_service_name }}/client.yaml
+    mode: "0644"
   become: true
   register: zenith_proxy_client_config_file
 
 - name: Create directory to persist SSH key
-  file:
+  ansible.builtin.file:
     path: "{{ appliances_state_dir }}/{{ zenith_proxy_service_name }}-ssh"
     state: directory
     owner: "{{ zenith_proxy_podman_user }}"
     group: "{{ zenith_proxy_podman_user }}"
+    mode: "0755"
   become: true
 
 - name: Initialise Zenith client
   # Use a foreground command rather than the podman_container module as I could not
   # work out the combination of parameters that produced the desired behaviour :-(
-  command: >-
+  ansible.builtin.command: >-
     podman run
       --name {{ zenith_proxy_service_name }}-init
       --replace
@@ -79,14 +82,15 @@
     "token has already been used" not in zenith_proxy_client_init.stderr
 
 - name: Create systemd unit file for Zenith client
-  template:
+  ansible.builtin.template:
     src: client.service.j2
     dest: /etc/systemd/system/{{ zenith_proxy_client_service_name }}.service
+    mode: "0644"
   become: true
   register: zenith_proxy_client_systemd_unit
 
 - name: Ensure Zenith client is started and enabled
-  service:
+  ansible.builtin.service:
     name: "{{ zenith_proxy_client_service_name }}.service"
     state: >-
       {{
@@ -98,6 +102,6 @@
         )
         else 'started'
       }}
-    enabled: yes
+    enabled: true
     daemon_reload: "{{ zenith_proxy_client_systemd_unit is changed }}"
   become: true
diff --git a/ansible/site.yml b/ansible/site.yml
index faeca23..79b71e1 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -1,41 +1,38 @@
 ---
-
 - name: Run pre.yml hook
   vars:
     # hostvars not available here, so have to recalculate environment root:
     appliances_environment_root: "{{ ansible_inventory_sources | last | dirname }}"
     hook_path: "{{ appliances_environment_root }}/hooks/pre.yml"
-  import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}"
+  ansible.builtin.import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}"
   when: hook_path | exists
 
-- import_playbook: validate.yml
+- ansible.builtin.import_playbook: validate.yml
   when: appliances_validate | default(true)
 
-- import_playbook: bootstrap.yml
+- ansible.builtin.import_playbook: bootstrap.yml
 
 - name: Run post-bootstrap.yml hook
   vars:
     # hostvars not available here, so have to recalculate environment root:
     appliances_environment_root: "{{ ansible_inventory_sources | last | dirname }}"
     hook_path: "{{ appliances_environment_root }}/hooks/post-bootstrap.yml"
-  import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}"
+  ansible.builtin.import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}"
   when: hook_path | exists
 
-- import_playbook: iam.yml
-- import_playbook: filesystems.yml
-- import_playbook: extras.yml
-- import_playbook: slurm.yml
-- import_playbook: portal.yml
-- import_playbook: monitoring.yml
+- ansible.builtin.import_playbook: iam.yml
+- ansible.builtin.import_playbook: filesystems.yml
+- ansible.builtin.import_playbook: extras.yml
+- ansible.builtin.import_playbook: slurm.yml
+- ansible.builtin.import_playbook: portal.yml
+- ansible.builtin.import_playbook: monitoring.yml
 
 - name: Run post.yml hook
   vars:
     # hostvars not available here, so have to recalculate environment root:
     appliances_environment_root: "{{ ansible_inventory_sources | last | dirname }}"
     hook_path: "{{ appliances_environment_root }}/hooks/post.yml"
-  import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}"
+  ansible.builtin.import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}"
   when: hook_path | exists
 
-- import_playbook: final.yml
-
-...
\ No newline at end of file
+- ansible.builtin.import_playbook: final.yml
diff --git a/ansible/slurm.yml b/ansible/slurm.yml
index 3529755..d6d306e 100644
--- a/ansible/slurm.yml
+++ b/ansible/slurm.yml
@@ -1,22 +1,21 @@
 ---
-
 - name: Setup DB
   hosts: mysql
   become: true
   tags:
     - mysql
   tasks:
-    - include_role:
-        name:  mysql
+    - ansible.builtin.include_role:
+        name: mysql
 
 - name: Setup slurm-driven rebuild
   hosts: rebuild:!builder
-  become: yes
+  become: true
   tags:
     - rebuild
     - openhpc
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: rebuild
         tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
@@ -24,59 +23,59 @@
   hosts:
     - compute
     - login
-  become: yes
+  become: true
   tags:
     - openhpc
   tasks:
-    - name: set memory limits
-      lineinfile:
+    - name: Set memory limits
+      ansible.builtin.lineinfile:
         path: /etc/security/limits.conf
-        regexp: '\* soft memlock unlimited'
+        regexp: "\\* soft memlock unlimited"
         line: "* soft memlock unlimited"
 
 - name: Block ssh to compute nodes for non-privileged users without running jobs
   hosts: compute
-  become: yes
+  become: true
   tags:
     - openhpc
   tasks:
     - name: Configure sshd pam module
-      blockinfile:
+      ansible.builtin.blockinfile:
         path: /etc/pam.d/sshd
-        insertafter: 'account\s+required\s+pam_nologin.so'
+        insertafter: "account\\s+required\\s+pam_nologin.so"
         block: |
           account    sufficient   pam_access.so
           account    required     pam_slurm.so
     - name: Configure login access control
-      blockinfile:
+      ansible.builtin.blockinfile:
         path: /etc/security/access.conf
         block: |
           +:adm:ALL
           -:ALL:ALL
-      # vagrant uses (deprecated) ansible_ssh_user
+ # vagrant uses (deprecated) ansible_ssh_user
 
 - name: Setup slurm
   hosts: openhpc
-  become: yes
+  become: true
   tags:
     - openhpc
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: topology
       # Gated on topology group having compute nodes but role also
       # needs to run on control and login nodes
       when:
         - appliances_mode == 'configure'
         - groups['topology'] | length > 0
-    - include_role:
+    - ansible.builtin.include_role:
         name: stackhpc.openhpc
         tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}"
 
 - name: Setup Node Health Checks
   # Has to be done here as it requires openhpc repos etc for installation
   hosts: nhc:!builder
-  become: yes
+  become: true
   tags: nhc
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: nhc
diff --git a/ansible/validate.yml b/ansible/validate.yml
index 034f469..2352fff 100644
--- a/ansible/validate.yml
+++ b/ansible/validate.yml
@@ -1,5 +1,4 @@
 ---
-
 # Fail early if configuration is invalid
 
 - name: Validate secrets created
@@ -9,7 +8,7 @@
     - validate
     - passwords
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: passwords
         tasks_from: validate.yml
 
@@ -26,7 +25,7 @@
       # the actual installed version.
       # So this compares requirements.yml against a .last version produced by a
       # successful dev/setup-env.sh run.
-    - assert:
+    - ansible.builtin.assert:
         that: "{{ _requirements_current == _requirements_installed }}"
         fail_msg: |
           Ansible Galaxy installs are out of date:
@@ -34,7 +33,12 @@
           {% for req in _requirements_installed | difference(_requirements_current) %}
           {{ req }}
           {% endfor %}
-          
+
+          _requirements_current:
+          .{{ _requirements_current }}.
+          _requirements_installed:
+          .{{ _requirements_installed }}.
+
           Run dev/setup-env.sh to fix this.
       vars:
         # note difference filter requires lists, so need to rearrange yaml from files.
@@ -53,7 +57,7 @@
     - opentofu
   tasks:
     - name: Check templated groups
-      assert:
+      ansible.builtin.assert:
         that:
           - item in groups
           - groups[item] | length > 0
@@ -66,7 +70,7 @@
         - compute
         - login
     - name: Check templated 'all' vars
-      assert:
+      ansible.builtin.assert:
         that:
           - openhpc_cluster_name is defined
           - cluster_domain_suffix is defined
@@ -81,7 +85,7 @@
     - validate
     - openhpc
   tasks:
-    - assert:
+    - ansible.builtin.assert:
         that: groups['control'] | length
         fail_msg: "no hosts found in group 'control' - has control node been deployed?"
 
@@ -92,7 +96,7 @@
     - validate
     - openhpc
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: stackhpc.openhpc
         tasks_from: validate.yml
 
@@ -103,7 +107,7 @@
     - validate
     - filebeat
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: filebeat
         tasks_from: validate.yml
       tags: validate
@@ -119,17 +123,17 @@
     - openondemand_server
     - grafana
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: openondemand
         tasks_from: validate.yml
-      # This set of tasks will run if there are grafana hosts configured. 
-      # It is a valid configuration to have a grafana group with hosts 
+      # This set of tasks will run if there are grafana hosts configured.
+      # It is a valid configuration to have a grafana group with hosts
       # when *not* deploying openondemand. This would mean that openondemand
       # vars validated in the below task are not set in a way that passes
       # this set of validation tasks. To ensure that this validation does
       # not fail with a valid config, only run these tasks when the
       # openondemand group both exists *and* contains hosts.
-      when: 
+      when:
         - "'openondemand' in groups"
         - groups['openondemand'] | length > 0
       tags:
@@ -143,7 +147,7 @@
     - validate
     - freeipa
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: freeipa
         tasks_from: validate.yml
 
@@ -153,16 +157,16 @@
     - validate
     - lustre
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: lustre
         tasks_from: validate.yml
 
 - name: Validate vGPU configuration
   hosts: vgpu
-  become: yes
-  gather_facts: yes
+  become: true
+  gather_facts: true
   tags: vgpu
   tasks:
-    - include_role:
+    - ansible.builtin.include_role:
         name: stackhpc.linux.vgpu
         tasks_from: validate.yml
diff --git a/cookiecutter/cookiecutter.json b/cookiecutter/cookiecutter.json
index 93b8e7e..3eb7acf 100644
--- a/cookiecutter/cookiecutter.json
+++ b/cookiecutter/cookiecutter.json
@@ -1,4 +1,4 @@
 {
-    "environment": "foo",
-    "description" : "Describe the environment here"
+  "environment": "foo",
+  "description": "Describe the environment here"
 }
diff --git a/cookiecutter/{{cookiecutter.environment}}/README.md b/cookiecutter/{{cookiecutter.environment}}/README.md
index 202ca67..89fe6b4 100644
--- a/cookiecutter/{{cookiecutter.environment}}/README.md
+++ b/cookiecutter/{{cookiecutter.environment}}/README.md
@@ -2,4 +2,4 @@
 
 {{ cookiecutter.description }}
 
-See the main README.md in the repo root for an overview and general install instructions.  Any environment-specific instructions should be added here.
\ No newline at end of file
+See the main README.md in the repository root for an overview and general install instructions. Any environment-specific instructions should be added here.
diff --git a/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/basic_users.yml b/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/basic_users.yml
index dc993c3..4b4287c 100644
--- a/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/basic_users.yml
+++ b/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/basic_users.yml
@@ -1,3 +1,4 @@
+---
 basic_users_users:
   - name: demo_user
     password: "{% raw %}{{ vault_demo_user_password | password_hash('sha512', 65534 | random(seed=inventory_hostname) | string) }}{% endraw %}" # idempotent
diff --git a/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/hpctests.yml b/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/hpctests.yml
index e8cfcea..4724621 100644
--- a/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/hpctests.yml
+++ b/cookiecutter/{{cookiecutter.environment}}/inventory/group_vars/all/hpctests.yml
@@ -1 +1,2 @@
+---
 hpctests_user: demo_user
diff --git a/cookiecutter/{{cookiecutter.environment}}/tofu/main.tf b/cookiecutter/{{cookiecutter.environment}}/tofu/main.tf
index 9aa4475..abbcf94 100644
--- a/cookiecutter/{{cookiecutter.environment}}/tofu/main.tf
+++ b/cookiecutter/{{cookiecutter.environment}}/tofu/main.tf
@@ -1,21 +1,23 @@
+# tflint-ignore: terraform_required_version
+
 variable "environment_root" {
-    type = string
-    description = "Path to environment root, automatically set by activate script"
+  type        = string
+  description = "Path to environment root, automatically set by activate script"
 }
 
 module "cluster" {
-    source = "../../site/tofu/"
-    environment_root = var.environment_root
+  source           = "../../site/tofu/"
+  environment_root = var.environment_root
 
-    # Environment specific variables
-    # Note that some of the variables below may need to be moved to the site environment
-    # defaults e.g cluster_networks should be in site if your staging and prod
-    # environments use the same networks
-    cluster_name = 
-    cluster_image_id = 
-    control_node_flavor = 
-    cluster_networks = 
-    key_pair = 
-    login = 
-    compute = 
+  # Environment specific variables
+  # Note that some of the variables below may need to be moved to the site environment
+  # defaults e.g cluster_networks should be in site if your staging and prod
+  # environments use the same networks
+  cluster_name        = null
+  cluster_image_id    = null
+  control_node_flavor = null
+  cluster_networks    = null
+  key_pair            = null
+  login               = null
+  compute             = null
 }
diff --git a/dev/ansible-ssh b/dev/ansible-ssh
index 1e7bf75..b2e13ff 100755
--- a/dev/ansible-ssh
+++ b/dev/ansible-ssh
@@ -1,23 +1,28 @@
 #!/usr/bin/env python3
 
 # This tool allows you to ssh into a host using the ansible inventory.
-# Example: ansible-ssh compute[0] -o GlobalKnownHostsFile=/dev/null -o UserKnownHostsFile=/dev/null
+# Example: ansible-ssh compute[0] -o GlobalKnownHostsFile=/dev/null -o
+# UserKnownHostsFile=/dev/null
 
-import sys
-import subprocess
-import shlex
 import json
 import os
+import shlex
+import subprocess
+import sys
 from collections import defaultdict
 
+
 def _optional_arg(prototype, *values):
     # returns empty string if any of the values are falsey
     filtered = [value for value in values if value]
     return prototype.format(*values) if len(values) == len(filtered) else ""
 
+
 if __name__ == "__main__":
     if len(sys.argv) < 2:
-        msg = (f"Usage: {sys.argv[0]} <inventory_hostname> [args to pass to ssh]")
+        msg = (
+            f"Usage: {
+                sys.argv[0]} <inventory_hostname> [args to pass to ssh]")
         print(msg, file=sys.stderr)
         sys.exit(-1)
 
@@ -25,7 +30,8 @@ if __name__ == "__main__":
     host = shlex.quote(sys.argv[1])
 
     try:
-        output = subprocess.check_output(f'ansible-inventory --host { host }', shell=True)
+        output = subprocess.check_output(
+            f'ansible-inventory --host {host}', shell=True)
     except (subprocess.CalledProcessError) as e:
         msg = (f"[ERROR]: Is {host} missing from the inventory?")
         print(msg, file=sys.stderr)
@@ -56,7 +62,5 @@ if __name__ == "__main__":
     base = shlex.split(f'ssh {port} {identity} {opts}')
     extras = sys.argv[2:]
     cmd = base + extras + [host]
-    print(f"[INFO]: Running: { subprocess.list2cmdline(cmd) }")
-    os.execvp(cmd[0],cmd)
-
-
+    print(f"[INFO]: Running: {subprocess.list2cmdline(cmd)}")
+    os.execvp(cmd[0], cmd)
diff --git a/dev/delete-cluster.py b/dev/delete-cluster.py
index 05f53fb..f329e74 100755
--- a/dev/delete-cluster.py
+++ b/dev/delete-cluster.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# pylint: disable=invalid-name
 
 """
 Delete infrastructure for a cluster without using Terraform. Useful for CI clusters.
@@ -10,41 +11,53 @@
 If --force is provided, it will delete all resources without confirmation.
 """
 
-import sys, json, subprocess
+import json
+import subprocess
+import sys
 
+CLUSTER_RESOURCES = ["server", "port", "volume"]
 
-CLUSTER_RESOURCES = ['server', 'port', 'volume']
 
+# pylint: disable-next=missing-function-docstring, redefined-outer-name
 def delete_cluster(cluster_prefix, force=False):
+
     to_delete = {}
     for resource_type in CLUSTER_RESOURCES:
         to_delete[resource_type] = []
-        resource_list = subprocess.run(f'openstack {resource_type} list --format json', stdout=subprocess.PIPE, shell=True)
+        resource_list = subprocess.run(  # pylint: disable=subprocess-run-check
+            f"openstack {resource_type} list --format json",
+            stdout=subprocess.PIPE,
+            shell=True,
+        )
         resources = json.loads(resource_list.stdout)
         for item in resources:
             try:
-                if item['Name'] is not None and item['Name'].startswith(cluster_prefix):
-                    print(resource_type, item['Name'], item['ID'])
+                if item["Name"] is not None and item["Name"].startswith(cluster_prefix):
+                    print(resource_type, item["Name"], item["ID"])
                     to_delete[resource_type].append(item)
-            except:
+            except BaseException:
                 print(resource_type, item)
                 raise
-    
-    if force or input('Delete these (y/n)?:') == 'y':
+
+    if force or input("Delete these (y/n)?:") == "y":
         for resource_type in CLUSTER_RESOURCES:
-            items = [v['ID'] for v in to_delete[resource_type]]
+            items = [v["ID"] for v in to_delete[resource_type]]
             if items:
                 # delete all resources of each type in a single call for speed:
-                subprocess.run(f"openstack {resource_type} delete {' '.join(items)}", stdout=subprocess.PIPE, shell=True)
-                print(f'Deleted {len(items)} {resource_type}s')
+                subprocess.run(  # pylint: disable=subprocess-run-check
+                    f"openstack {resource_type} delete {' '.join(items)}",
+                    stdout=subprocess.PIPE,
+                    shell=True,
+                )
+                print(f"Deleted {len(items)} {resource_type}s")
     else:
-        print('Cancelled - no resources deleted')
+        print("Cancelled - no resources deleted")
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     if len(sys.argv) < 2 or len(sys.argv) > 3:
-        print('ERROR: Incorrect argument(s).\n' + __doc__)
-        exit(1)
-    force_flag = '--force' in sys.argv
+        print("ERROR: Incorrect argument(s).\n" + __doc__)
+        exit(1)  # pylint: disable=consider-using-sys-exit
+    force_flag = "--force" in sys.argv
     cluster_prefix = sys.argv[1]
     delete_cluster(cluster_prefix, force_flag)
-
diff --git a/dev/extract_logs.py b/dev/extract_logs.py
index 65df014..3aecd35 100644
--- a/dev/extract_logs.py
+++ b/dev/extract_logs.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 """
-Process packer build workflow logs into CSV. Useful for timing 
+Process packer build workflow logs into CSV. Useful for timing
 dissemination.
 
 Usage:
@@ -13,70 +13,94 @@
 
 import csv
 import re
-import os
 import sys
 
-def convert_time_to_seconds(time_str):
-    h, m, s = time_str.split(':')
+
+def convert_time_to_seconds(time_str):  # pylint: disable=missing-function-docstring
+    h, m, s = time_str.split(":")
     return int(h) * 3600 + int(m) * 60 + float(s)
 
-def extract_log_info_and_generate_csv(log_file_path, output_csv_path, target_directory):
+
+# pylint: disable-next=missing-function-docstring, too-many-locals
+def extract_log_info_and_generate_csv(
+    # pylint: disable=redefined-outer-name
+    log_file_path,
+    output_csv_path,
+    target_directory,
+    # pylint: enable=redefined-outer-name
+):
     data = []
 
-    unwanted_chars = re.compile(r'(\x1B\[[0-9;]*m)|([^\x00-\x7F])')
+    unwanted_chars = re.compile(r"(\x1B\[[0-9;]*m)|([^\x00-\x7F])")
 
-    with open(log_file_path, 'r') as file:
+    with open(log_file_path, "r") as file:  # pylint: disable=unspecified-encoding
         lines = file.readlines()
 
         previous_task = None
 
-        for i in range(len(lines)):
+        for i in range(len(lines)):  # pylint: disable=consider-using-enumerate
             if "TASK [" in lines[i]:
-                task_name = lines[i].strip().split('TASK [')[1].split(']')[0]
+                task_name = lines[i].strip().split("TASK [")[1].split("]")[0]
 
-                full_task_path = lines[i + 1].strip().split('task path: ')[1]
+                full_task_path = lines[i + 1].strip().split("task path: ")[1]
                 if target_directory in full_task_path:
-                    start_index = full_task_path.find(target_directory) + len(target_directory)
+                    start_index = full_task_path.find(target_directory) + len(
+                        target_directory
+                    )
                     partial_task_path = full_task_path[start_index:]
                 else:
                     partial_task_path = full_task_path
 
-                partial_task_path = unwanted_chars.sub('', partial_task_path).strip()
+                partial_task_path = unwanted_chars.sub("", partial_task_path).strip()
 
-                time_to_complete = lines[i + 2].strip().split('(')[1].split(')')[0]
+                time_to_complete = lines[i + 2].strip().split("(")[1].split(")")[0]
 
                 if previous_task:
-                    previous_task[2] = time_to_complete  # Shift the time to the previous task
+                    # pylint: disable-next=unsupported-assignment-operation
+                    previous_task[2] = (
+                        time_to_complete  # Shift the time to the previous task
+                    )
                     data.append(previous_task)
 
-                previous_task = [task_name, partial_task_path, None]  # Placeholder for the next time_to_complete
+                previous_task = [
+                    task_name,
+                    partial_task_path,
+                    None,
+                ]  # Placeholder for the next time_to_complete
 
         if previous_task:
-            previous_task[2] = time_to_complete if time_to_complete else 'N/A'
+            previous_task[2] = time_to_complete if time_to_complete else "N/A"
             data.append(previous_task)
 
     for row in data:
-        if row[2] != 'N/A':
+        if row[2] != "N/A":
             row[2] = convert_time_to_seconds(row[2])
 
     data.sort(key=lambda x: x[2], reverse=True)
 
     for row in data:
         if isinstance(row[2], float):
-            row[2] = f'{int(row[2] // 3600):02}:{int((row[2] % 3600) // 60):02}:{row[2] % 60:.3f}'
+            row[2] = (
+                f"{int(row[2] // 3600):02}:{int((row[2] % 3600) // 60):02}:{row[2] % 60:.3f}"
+            )
 
-    with open(output_csv_path, 'w', newline='') as csvfile:
+    # pylint: disable-next=unspecified-encoding
+    with open(output_csv_path, "w", newline="") as csvfile:
         csvwriter = csv.writer(csvfile)
-        csvwriter.writerow(['Task Name', 'Task Path', 'Time to Complete'])
+        csvwriter.writerow(["Task Name", "Task Path", "Time to Complete"])
         csvwriter.writerows(data)
 
     print(f"Data extracted, sorted, and saved to {output_csv_path}")
-    
+
+
 if len(sys.argv) != 2:
-    print("Path to workflow log plain text file should be provided as the only arg to this script")
+    print(
+        "Path to workflow log plain text file should be provided as the only arg to this script"
+    )
     sys.exit(1)
-log_file_path = sys.argv[1] # Input workflow log name
-output_csv_path = log_file_path.replace('.txt', '.csv') # Output CSV name
-target_directory = '/ansible/' # Shared directory for task path
+log_file_path = sys.argv[1]  # Input workflow log name
+output_csv_path = log_file_path.replace(".txt", ".csv")  # Output CSV name
+# pylint: disable-next=invalid-name
+target_directory = "/ansible/"  # Shared directory for task path
 
 extract_log_info_and_generate_csv(log_file_path, output_csv_path, target_directory)
diff --git a/dev/image-share.sh b/dev/image-share.sh
index 93a57ca..f109f16 100755
--- a/dev/image-share.sh
+++ b/dev/image-share.sh
@@ -13,18 +13,18 @@ DEST=$2
 IMAGE_NAME=$3
 
 export OS_CLOUD=$SOURCE
-SOURCE_PROJECT=$(openstack project show -c id -f value $SOURCE)
+SOURCE_PROJECT=$(openstack project show -c id -f value "$SOURCE")
 export OS_CLOUD=$DEST
-DEST_PROJECT=$(openstack project show -c id -f value $DEST)
+DEST_PROJECT=$(openstack project show -c id -f value "$DEST")
 export OS_CLOUD=$SOURCE
-IMAGE=$(openstack image show -c id -f value $IMAGE_NAME)
+IMAGE=$(openstack image show -c id -f value "$IMAGE_NAME")
 
 echo "Sharing $IMAGE_NAME ($IMAGE) from $SOURCE ($SOURCE_PROJECT) ..."
-openstack image set --shared $IMAGE
+openstack image set --shared "$IMAGE"
 echo "Adding destination project $DEST ($DEST_PROJECT) ..."
-openstack image add project $IMAGE $DEST_PROJECT
+openstack image add project "$IMAGE" "$DEST_PROJECT"
 
 export OS_CLOUD=$DEST
 echo "Accepting share ..."
-openstack image set --accept $IMAGE
+openstack image set --accept "$IMAGE"
 echo "Done"
diff --git a/dev/output_manifest.py b/dev/output_manifest.py
index b68ed49..04c9ffe 100755
--- a/dev/output_manifest.py
+++ b/dev/output_manifest.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+# pylint: disable=missing-module-docstring
+# pylint: disable=line-too-long
 # Set github workflow output parameters defining image IDs from a packer manifest.
 # Usage:
 #   ./packer/read_manifest.py packer/packer-manifest.json
@@ -10,14 +12,23 @@
 # which can be used in subsequent workflow steps: [1]
 #
 # [1]: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#example-setting-a-value
+# pylint: enable=line-too-long
+
+import json
+import sys
 
-import sys, json
 output = {}
-with open(sys.argv[1]) as f:
+with open(sys.argv[1]) as f:  # pylint: disable=unspecified-encoding
     data = json.load(f)
-for build in data['builds']:
-    node_type = build['custom_data']['source']
-    image_id = build['artifact_id']
-    output[node_type] = image_id # NB: this deliberately gets the LAST build for a node type
+for build in data["builds"]:
+    node_type = build["custom_data"]["source"]
+    image_id = build["artifact_id"]
+    output[node_type] = (
+        image_id  # NB: this deliberately gets the LAST build for a node type
+    )
 for node_type, image_id in output.items():
-    print('::set-output name=NEW_%s_IMAGE_ID::%s' % (node_type.upper(), image_id))
+    print(
+        # pylint: disable-next=consider-using-f-string
+        "::set-output name=NEW_%s_IMAGE_ID::%s"
+        % (node_type.upper(), image_id)
+    )
diff --git a/dev/setup-env.sh b/dev/setup-env.sh
index c37978a..d0c14bb 100755
--- a/dev/setup-env.sh
+++ b/dev/setup-env.sh
@@ -5,33 +5,35 @@ set -euo pipefail
 PYTHON_VERSION=${PYTHON_VERSION:-}
 
 if [[ "$PYTHON_VERSION" == "" ]]; then
-    if [[ -f /etc/os-release ]]; then
-        . /etc/os-release
-        OS=$ID
-        OS_VERSION=$VERSION_ID
-    else
-        exit 1
-    fi
+  if [[ -f /etc/os-release ]]; then
+    # shellcheck disable=SC1091
+    . /etc/os-release
+    OS=$ID
+    OS_VERSION=$VERSION_ID
+  else
+    exit 1
+  fi
 
-    MAJOR_VERSION=$(echo $OS_VERSION | cut -d. -f1)
+  MAJOR_VERSION=$(echo "$OS_VERSION" | cut -d. -f1)
 
-    if [[ "$OS" == "ubuntu" && "$MAJOR_VERSION" == "22" ]]; then
-        PYTHON_VERSION="/usr/bin/python3.10"
-    elif [[ "$OS" == "rocky" && "$MAJOR_VERSION" == "8" ]]; then
-        # python3.9+ doesn't have selinux bindings
-        PYTHON_VERSION="/usr/bin/python3.8" # use `sudo yum install python38` on Rocky Linux 8 to install this
-    elif [[ "$OS" == "rocky" && "$MAJOR_VERSION" == "9" ]]; then
-        PYTHON_VERSION="/usr/bin/python3.9"
-    else
-        echo "Unsupported OS version: $OS $MAJOR_VERSION"
-        exit 1
-    fi
+  if [[ "$OS" == "ubuntu" && "$MAJOR_VERSION" == "22" ]]; then
+    PYTHON_VERSION="/usr/bin/python3.10"
+  elif [[ "$OS" == "rocky" && "$MAJOR_VERSION" == "8" ]]; then
+    # python3.9+ doesn't have selinux bindings
+    PYTHON_VERSION="/usr/bin/python3.8" # use `sudo yum install python38` on Rocky Linux 8 to install this
+  elif [[ "$OS" == "rocky" && "$MAJOR_VERSION" == "9" ]]; then
+    PYTHON_VERSION="/usr/bin/python3.9"
+  else
+    echo "Unsupported OS version: $OS $MAJOR_VERSION"
+    exit 1
+  fi
 fi
 
 if [[ ! -d "venv" ]]; then
-    $PYTHON_VERSION -m venv venv
+  $PYTHON_VERSION -m venv venv
 fi
 
+# shellcheck disable=SC1091
 . venv/bin/activate
 pip install -U pip
 pip install -r requirements.txt
diff --git a/docs/README.md b/docs/README.md
index dfa9144..c66868a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,6 +1,6 @@
 # StackHPC Slurm Appliance Documentation
 
-### Operator docs
+## Operator docs
 
 [Image build](image-build.md)
 
@@ -16,7 +16,7 @@
 
 [Sequence diagrams](sequence.md)
 
-### Configuration docs
+## Configuration docs
 
 [Alerting](alerting.md)
 
@@ -32,7 +32,7 @@
 
 [Persistent state](persistent-state.md)
 
-#### Experimental fetaures
+### Experimental fetaures
 
 [Compute init](experimental/compute-init.md)
 
@@ -40,6 +40,6 @@
 
 [Slurm controlled rebuild](experimental/slurm-controlled-rebuild.md)
 
-### Contributor docs
+## Contributor docs
 
 [Adding functionality](adding-functionality.md)
diff --git a/docs/adding-functionality.md b/docs/adding-functionality.md
index 05bcbb5..da0b879 100644
--- a/docs/adding-functionality.md
+++ b/docs/adding-functionality.md
@@ -1,9 +1,10 @@
 # Adding new functionality
 
 Please contact us for specific advice, but this generally involves:
+
 - Adding a role.
 - Adding a play calling that role into an existing playbook in `ansible/`, or adding a new playbook there and updating `site.yml`.
 - Adding a new (empty) group named after the role into `environments/common/inventory/groups` and a non-empty example group into `environments/site/inventory/groups`.
 - Adding new default group vars into `environments/common/inventory/group_vars/all/<rolename>/`.
 - Updating the default Packer build variables in `environments/common/inventory/group_vars/builder/defaults.yml`.
-- Updating READMEs.
+- Updating readmes.
diff --git a/docs/alerting.md b/docs/alerting.md
index e030d23..38bfb05 100644
--- a/docs/alerting.md
+++ b/docs/alerting.md
@@ -4,10 +4,10 @@ The [prometheus.io docs](https://prometheus.io/docs/alerting/latest/overview/)
 describe the overall alerting process:
 
 > Alerting with Prometheus is separated into two parts. Alerting rules in
-  Prometheus servers send alerts to an Alertmanager. The Alertmanager then
-  manages those alerts, including silencing, inhibition, aggregation and
-  sending out notifications via methods such as email, on-call notification
-  systems, and chat platforms.
+> Prometheus servers send alerts to an Alertmanager. The Alertmanager then
+> manages those alerts, including silencing, inhibition, aggregation and
+> sending out notifications via methods such as email, on-call notification
+> systems, and chat platforms.
 
 The general Prometheus configuration is described in
 [monitoring-and-logging.md](./monitoring-and-logging.md#defaults-3) - note that
@@ -21,37 +21,35 @@ must be configured to generate notifications.
 ## Enabling alertmanager
 
 1. Ensure both the `prometheus` and `alertmanager` servers are deployed on the
-control node  - these are deployed by default in the site environment's groups:
-
-    ```ini
-    # environments/site/groups:
-    [prometheus:children]
-    control
-
-    [alertmanager:children]
-    control
-    ```
+   control node - these are deployed by default in the site environment's groups:
+
+```ini
+# environments/site/groups:
+[prometheus:children]
+control
+[alertmanager:children]
+control
+```
 
-2. If the appliance was deployed before the alertmanager functionality was included,
-generate a password for the alertmanager UI user:
+2. If the appliance was deployed before the alertmanager functionality was included, generate a password for the alertmanager UI user:
 
-    ```shell
-    ansible-playbook ansible/adhoc/generate-passwords.yml
-    ```
+```shell
+ansible-playbook ansible/adhoc/generate-passwords.yml
+```
 
 3. Configure a receiver to generate notifications from alerts. Currently a Slack
-integration is provided (see below) but alternative receivers could be defined
-via overriding role defaults.
- 
+   integration is provided (see below) but alternative receivers could be defined
+   via overriding role defaults.
+
 4. If desired, any other [role defaults](../ansible/roles/alertmanager/README.md)
-may be overriden in e.g. `environments/site/inventory/group_vars/all/alertmanager.yml`.
+   may be overriden in e.g. `environments/site/inventory/group_vars/all/alertmanager.yml`.
 
 5. Run the `monitoring.yml` playbook (if the cluster is already up) to configure
-both alertmanager and prometheus:
+   both alertmanager and prometheus:
 
-    ```shell
-    ansible-playbook ansible/monitoring.yml
-    ```
+```shell
+ansible-playbook ansible/monitoring.yml
+```
 
 ## Access
 
@@ -75,7 +73,7 @@ of alerts via Slack.
 
 1. Create an app with a bot token:
 
-- Go to https://api.slack.com/apps
+- Go to <https://api.slack.com/apps>
 - select "Create an App"
 - select "From scratch"
 - Set app name and workspace fields, select "Create"
@@ -92,16 +90,20 @@ of alerts via Slack.
 - Uncomment `vault_alertmanager_slack_integration_app_creds` and add the token
 - Vault-encrypt that file:
 
-        ansible-vault encrypt environments/$ENV/inventory/group_vars/all/vault_alertmanager.yml
+```shell
+ansible-vault encrypt environments/$ENV/inventory/group_vars/all/vault_alertmanager.yml
+```
 
 - Open `environments/$ENV/inventory/group_vars/all/alertmanager.yml`
 - Uncomment the `alertmanager_slack_integration` mapping and set your alert channel name
 
 3. Invite the bot to your alerts channel
-- In the appropriate Slack channel type:
 
-        /invite @YOUR_BOT_NAME
+- In the appropriate Slack channel type:
 
+```text
+/invite @YOUR_BOT_NAME
+```
 
 ## Alerting Rules
 
@@ -111,15 +113,16 @@ which is defined for the appliance at
 
 Two [cloudalchemy.prometheus](https://github.com/cloudalchemy/ansible-prometheus)
 role variables are relevant:
+
 - `prometheus_alert_rules_files`: Paths to check for files providing rules.
   Note these are copied to Prometheus config directly, so jinja expressions for
   Prometheus do not need escaping.
 - `prometheus_alert_rules`: Yaml-format rules. Jinja templating here will be
-interpolated by Ansible, so templating intended for Prometheus must be escaped
-using `{% raw %}`/`{% endraw %}` tags.
+  interpolated by Ansible, so templating intended for Prometheus must be escaped
+  using `{% raw %}`/`{% endraw %}` tags.
 
 By default, `prometheus_alert_rules_files` is set so that any `*.rules` files
-in a directory `files/prometheus/rules` in the current environment or *any*
+in a directory `files/prometheus/rules` in the current environment or _any_
 parent environment are loaded. So usually, site-specific alerts should be added
 by creating additional rules files in `environments/site/files/prometheus/rules`.
 If the same file exists in more than one environment, the "child" file will take
@@ -127,6 +130,7 @@ precedence and any rules in the "parent" file will be ignored.
 
 A set of default alert rule files is provided at `environments/common/files/prometheus/rules/`.
 These cover:
+
 - Some node-exporter metrics for disk, filesystems, memory and clock. Note
   no alerts are triggered on memory for compute nodes due to the intended use
   of those nodes.
@@ -136,6 +140,7 @@ These cover:
 When defining additional rules, note the [labels defined](./monitoring-and-logging.md#prometheus_node_exporter_targets) for node-exporter targets.
 
 In future more alerts may be added for:
+
 - smartctl-exporter-based rules for baremetal nodes where there is no
   infrastructure-level smart monitoring
 - loss of "up" network interfaces
diff --git a/docs/chrony.md b/docs/chrony.md
index 0d6f8b1..a80cd40 100644
--- a/docs/chrony.md
+++ b/docs/chrony.md
@@ -4,7 +4,7 @@ Use variables from the [mrlesmithjr.chrony](https://github.com/mrlesmithjr/ansib
 
 For example in: `environments/<environment>/inventory/group_vars/all/chrony`:
 
-```
+```yaml
 ---
 chrony_ntp_servers:
   - server: ntp-0.example.org
@@ -17,5 +17,4 @@ chrony_ntp_servers:
       - option: iburst
       - option: minpoll
         val: 8
-
 ```
diff --git a/docs/ci.md b/docs/ci.md
index c6fa890..1352649 100644
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -2,7 +2,6 @@
 
 The `.github` directory contains a set of sample workflows which can be used by downstream site-specific configuration repositories to simplify ongoing maintainence tasks. These include:
 
-- An [upgrade check](.github/workflows/upgrade-check.yml.sample) workflow which automatically checks this upstream stackhpc/ansible-slurm-appliance repo for new releases and proposes a pull request to the downstream site-specific repo when a new release is published.
+- An [upgrade check](.github/workflows/upgrade-check.yml.sample) workflow which automatically checks this upstream stackhpc/ansible-slurm-appliance repository for new releases and proposes a pull request to the downstream site-specific repository when a new release is published.
 
 - An [image upload](.github/workflows/upload-s3-image.yml.sample) workflow which takes an image name, downloads it from StackHPC's public S3 bucket if available, and uploads it to the target OpenStack cloud.
-
diff --git a/docs/environments.md b/docs/environments.md
index 183b775..ae23410 100644
--- a/docs/environments.md
+++ b/docs/environments.md
@@ -3,6 +3,7 @@
 ## Overview
 
 An environment defines the configuration for a single instantiation of this Slurm appliance. Each environment is a directory in `environments/`, containing:
+
 - Any deployment automation required - e.g. OpenTofu configuration or HEAT templates.
 - An Ansible `inventory/` directory.
 - An `activate` script which sets environment variables to point to this configuration.
@@ -13,21 +14,23 @@ All environments load the inventory from the `common` environment first, with th
 ### Environment-specific inventory structure
 
 The ansible inventory for the environment is in `environments/<environment>/inventory/`. It should generally contain:
-- A `hosts` file. This defines the hosts in the appliance. Generally it should be templated out by the deployment automation so it is also a convenient place to define variables which depend on the deployed hosts such as connection variables, IP addresses, ssh proxy arguments etc.
+
+- A `hosts` file. This defines the hosts in the appliance. Generally it should be templated out by the deployment automation so it is also a convenient place to define variables which depend on the deployed hosts such as connection variables, IP addresses, SSH proxy arguments etc.
 - A `groups` file defining ansible groups, which essentially controls which features of the appliance are enabled and where they are deployed. This repository generally follows a convention where functionality is defined using ansible roles applied to a group
-of the same name, e.g. `openhpc` or `grafana`. The meaning and use of each group is described in comments in `environments/common/inventory/groups`. As the groups defined there for the common environment are empty, functionality is disabled by default and must be
-enabled in a specific environment's `groups` file. The `site` environment contains an ini file at `environments/site/inventory/groups` which enables groups for default appliance functionality across all environments. Additional groups should generally also be
-enabled in this file to avoid divergence between staging and production environments. Note that enabling some groups may require a site-specific image build and Ark credentials (see [operations guide](operations.md)).
+  of the same name, e.g. `openhpc` or `grafana`. The meaning and use of each group is described in comments in `environments/common/inventory/groups`. As the groups defined there for the common environment are empty, functionality is disabled by default and must be
+  enabled in a specific environment's `groups` file. The `site` environment contains an ini file at `environments/site/inventory/groups` which enables groups for default appliance functionality across all environments. Additional groups should generally also be
+  enabled in this file to avoid divergence between staging and production environments. Note that enabling some groups may require a site-specific image build and Ark credentials (see [operations guide](operations.md)).
 - Optionally, group variable files in `group_vars/<group_name>/overrides.yml`, where the group names match the functional groups described above. These can be used to override the default configuration for each functionality, which are defined in `environments/common/inventory/group_vars/all/<group_name>.yml` (the use of `all` here is due to ansible's precedence rules).
 
 Although most of the inventory uses the group convention described above there are a few special cases:
+
 - The `control`, `login` and `compute` groups are special as they need to contain actual hosts rather than child groups, and so should generally be defined in the templated-out `hosts` file.
 - The cluster name must be set on all hosts using `openhpc_cluster_name`. Using an `[all:vars]` section in the `hosts` file is usually convenient.
 - `environments/common/inventory/group_vars/all/defaults.yml` contains some variables which are not associated with a specific role/feature. These are unlikely to need changing, but if necessary that could be done using a `environments/<environment>/inventory/group_vars/all/overrides.yml` file.
 - The `ansible/adhoc/generate-passwords.yml` playbook sets secrets for all hosts in `environments/<environent>/inventory/group_vars/all/secrets.yml`.
 - The Packer-based pipeline for building compute images creates a VM in groups `builder` and `compute`, allowing build-specific properties to be set in `environments/common/inventory/group_vars/builder/defaults.yml` or the equivalent inventory-specific path.
 - Each Slurm partition must have:
-    - An inventory group `<cluster_name>_<partition_name>` defining the hosts it contains - these must be homogenous w.r.t CPU and memory.
-    - An entry in the `openhpc_slurm_partitions` mapping in `environments/<environment>/inventory/group_vars/openhpc/overrides.yml`.
+  - An inventory group `<cluster_name>_<partition_name>` defining the hosts it contains - these must be homogenous w.r.t CPU and memory.
+  - An entry in the `openhpc_slurm_partitions` mapping in `environments/<environment>/inventory/group_vars/openhpc/overrides.yml`.
     See the [openhpc role documentation](https://github.com/stackhpc/ansible-role-openhpc#slurmconf) for more options.
 - On an OpenStack cloud, rebuilding/reimaging compute nodes from Slurm can be enabled by defining a `rebuild` group containing the relevant compute hosts (e.g. in the generated `hosts` file).
diff --git a/docs/experimental/compute-init.md b/docs/experimental/compute-init.md
index 8b5d5e3..dfad27b 100644
--- a/docs/experimental/compute-init.md
+++ b/docs/experimental/compute-init.md
@@ -2,7 +2,7 @@
 
 See the role README.md
 
-# Changes to image / tofu state
+## Changes to image / tofu state
 
 When a compute group has the `ignore_image_changes` parameter set to true,
 changes to the `image_id` parameter (which defaults to `cluster_image_id`) are
@@ -14,17 +14,21 @@ role templates out hostvars to the control node, which means the "target" image
 ID is then available on the control node. Subsequent work will use this to
 rebuild the node via slurm.
 
-# CI workflow
+## CI workflow
 
 The compute node rebuild is tested in CI after the tests for rebuilding the
 login and control nodes. The process follows
 
 1. Compute nodes are reimaged:
 
-         ansible-playbook -v --limit compute ansible/adhoc/rebuild.yml
+```shell
+ansible-playbook -v --limit compute ansible/adhoc/rebuild.yml
+```
 
 2. Ansible-init runs against newly reimaged compute nodes
 
 3. Run sinfo and check nodes have expected slurm state
 
-         ansible-playbook -v ansible/ci/check_slurm.yml
\ No newline at end of file
+```shell
+ansible-playbook -v ansible/ci/check_slurm.yml
+```
diff --git a/docs/experimental/isolated-clusters.md b/docs/experimental/isolated-clusters.md
index c136e99..5cf5a7b 100644
--- a/docs/experimental/isolated-clusters.md
+++ b/docs/experimental/isolated-clusters.md
@@ -11,68 +11,70 @@ all "default" features, i.e. roles/groups which are enabled either in the
 The full list of features and whether they are functional on such an "isolated"
 network is shown in the table below. Note that:
 
--  Using [EESSI](https://www.eessi.io/docs/) necessarily requires outbound
-   network access for the CernVM File System. However this can be provided
-   via an authenticated proxy. While the proxy configuration on the cluster node
-   is readable by all users, this proxy could be limited via acls to only provide
-   access to EESSI's CVMFS Stratum 1 servers.
+- Using [EESSI](https://www.eessi.io/docs/) necessarily requires outbound
+  network access for the CernVM File System. However this can be provided
+  via an authenticated proxy. While the proxy configuration on the cluster node
+  is readable by all users, this proxy could be limited via acls to only provide
+  access to EESSI's CVMFS Stratum 1 servers.
 
 ## Support by feature for isolated networks
 
 See above for definition of "Default" features. In the "Isolated?" column:
+
 - "Y": Feature works without outbound internet access.
 - "N": Known not to work.
 - "?": Not investigated at present.
 
-| Inventory group/role  | Default? | Isolated? |
-| ----------------------| -------- | --------- |
-| alertmanager          | Y | Y | 
-| ansible_init          | Y | Y | 
-| basic_users           | Y | Y | 
-| block_devices         | Y | No (depreciated) | 
-| cacerts               | - | Y | 
-| chrony                | - | Y | 
-| compute_init          | - | Y | 
-| cuda                  | - | ? | 
-| eessi                 | Y | Y - see above | 
-| etc_hosts             | Y | Y | 
-| extra_packages        | - | No | 
-| fail2ban              | Y | Y | 
-| filebeat              | Y | Y | 
-| firewalld             | Y | Y | 
-| freeipa_client        | - | Y - image build required |
-| gateway               | n/a | n/a - build only | 
-| grafana               | Y | Y | 
-| hpctests              | Y | Y | 
-| k3s_agent             | - | ? | 
-| k3s_server            | - | ? | 
-| k9s                   | - | ? | 
-| lustre                | - | ? | 
-| manila                | Y | Y | 
-| mysql                 | Y | Y | 
-| nfs                   | Y | Y | 
-| nhc                   | Y | Y | 
-| node_exporter         | Y | Y | 
-| openhpc               | Y | Y | 
-| openondemand          | Y | Y | 
-| openondemand_desktop  | Y | Y | 
-| openondemand_jupyter  | Y | Y | 
-| opensearch            | Y | Y | 
-| podman                | Y | Y | 
-| persist_hostkeys      | Y | Y | 
-| prometheus            | Y | Y | 
-| proxy                 | - | Y | 
-| resolv_conf           | - | ? | 
-| slurm_exporter        | Y | Y | 
-| slurm_stats           | Y | Y | 
-| squid                 | - | ? | 
-| sshd                  | - | ? | 
-| sssd                  | - | ? | 
-| systemd               | Y | Y | 
-| tuned                 | - | Y | 
-| update                | - | No |
+| Inventory group/role | Default? | Isolated?                |
+| -------------------- | -------- | ------------------------ |
+| alertmanager         | Y        | Y                        |
+| ansible_init         | Y        | Y                        |
+| basic_users          | Y        | Y                        |
+| block_devices        | Y        | No (depreciated)         |
+| cacerts              | -        | Y                        |
+| chrony               | -        | Y                        |
+| compute_init         | -        | Y                        |
+| cuda                 | -        | ?                        |
+| eessi                | Y        | Y - see above            |
+| etc_hosts            | Y        | Y                        |
+| extra_packages       | -        | No                       |
+| fail2ban             | Y        | Y                        |
+| filebeat             | Y        | Y                        |
+| firewalld            | Y        | Y                        |
+| freeipa_client       | -        | Y - image build required |
+| gateway              | n/a      | n/a - build only         |
+| grafana              | Y        | Y                        |
+| hpctests             | Y        | Y                        |
+| k3s_agent            | -        | ?                        |
+| k3s_server           | -        | ?                        |
+| k9s                  | -        | ?                        |
+| lustre               | -        | ?                        |
+| manila               | Y        | Y                        |
+| MySQL                | Y        | Y                        |
+| nfs                  | Y        | Y                        |
+| nhc                  | Y        | Y                        |
+| node_exporter        | Y        | Y                        |
+| openhpc              | Y        | Y                        |
+| openondemand         | Y        | Y                        |
+| openondemand_desktop | Y        | Y                        |
+| openondemand_jupyter | Y        | Y                        |
+| opensearch           | Y        | Y                        |
+| podman               | Y        | Y                        |
+| persist_hostkeys     | Y        | Y                        |
+| prometheus           | Y        | Y                        |
+| proxy                | -        | Y                        |
+| resolv_conf          | -        | ?                        |
+| slurm_exporter       | Y        | Y                        |
+| slurm_stats          | Y        | Y                        |
+| squid                | -        | ?                        |
+| sshd                 | -        | ?                        |
+| sssd                 | -        | ?                        |
+| systemd              | Y        | Y                        |
+| tuned                | -        | Y                        |
+| update               | -        | No                       |
 
 ## Image build
+
 A site image build may be required, either for features using packages not
 present in StackHPC images (e.g `freeipa_client`) or to [add additional packages](../operations.md#adding-additional-packages).
 Clearly in this case the build VM does require outbound internet access. For an
@@ -82,7 +84,7 @@ proxy is available the image build can be configured to use that, e.g.:
 
 ```yaml
 # environments/$ENV/builder.pkrvars.hcl:
-...
+---
 inventory_groups = 'proxy,freeipa_client'
 ```
 
@@ -96,7 +98,7 @@ proxy_http_address: squid.mysite.org
 ```yaml
 # environments/$ENV/group_vars/builder/vault_overrrides.yml:
 # NB: vault-encrypt this file
-vault_proxy_basic_password: 'super-secret-password'
+vault_proxy_basic_password: "super-secret-password"
 ```
 
 See [ansible/roles/proxy/README.md](../../ansible/roles/proxy/README.md) and
@@ -117,28 +119,32 @@ default security groups are less restrictive than these.
 Assuming nodes and the deploy host have a security group `isolated` applied then
 the following rules are required:
 
-    # allow outbound DNS
-    ALLOW IPv4 53/tcp to 0.0.0.0/0
-    ALLOW IPv4 53/udp to 0.0.0.0/0
-    
-    # allow everything within the cluster:
-    ALLOW IPv4 from isolated
-    ALLOW IPv4 to isolated
-    
-    # allow hosts to reach metadata server (e.g. for cloud-init keys):
-    ALLOW IPv4 80/tcp to 169.254.169.254/32
+```text
+# allow outbound DNS
+ALLOW IPv4 53/tcp to 0.0.0.0/0
+ALLOW IPv4 53/udp to 0.0.0.0/0
+
+# allow everything within the cluster:
+ALLOW IPv4 from isolated
+ALLOW IPv4 to isolated
 
-    # optionally: allow hosts to reach squid proxy for EESSI:
-    ALLOW IPv4 3128/tcp to <squid cidr>
+# allow hosts to reach metadata server (e.g. for cloud-init keys):
+ALLOW IPv4 80/tcp to 169.254.169.254/32
+
+# optionally: allow hosts to reach squid proxy for EESSI:
+ALLOW IPv4 3128/tcp to <squid cidr>
+```
 
 Note that name resolution happens on the hosts, not on the proxy, hence DNS is
 required for nodes even with a proxy.
 
-For nodes running OpenOndemand, inbound ssh and https are also required
+For nodes running OpenOndemand, inbound SSH and https are also required
 (e.g. in a security group called `isolated-ssh-https`):
 
-    ALLOW IPv4 443/tcp from 0.0.0.0/0
-    ALLOW IPv4 22/tcp from 0.0.0.0/0
+```text
+ALLOW IPv4 443/tcp from 0.0.0.0/0
+ALLOW IPv4 22/tcp from 0.0.0.0/0
+```
 
 If non-default security groups are required, then the OpenTofu variables
 `login_security_groups` and `nonlogin_security_groups` can be used to set
diff --git a/docs/experimental/pulp.md b/docs/experimental/pulp.md
index 582eec9..f0748c0 100644
--- a/docs/experimental/pulp.md
+++ b/docs/experimental/pulp.md
@@ -5,17 +5,18 @@ In order to ensure reproducible builds, the appliance can build images using rep
 ## Deploying/configuring Pulp Server
 
 ### Deploying a Pulp server
+
 A playbook is provided to install and configure a Pulp server on a given host. Admin credentials for this server are automatically generated through the `ansible/adhoc/generate-passwords.yml` playbook. To use this, create an inventory file
-defining a group `pulp_server` containing a single host, which requires at least 2 vCPUs and 4GB RAM. The group should be defined in your `site` environment's inventory so that a single Pulp server is shared between all environments and 
+defining a group `pulp_server` containing a single host, which requires at least 2 vCPUs and 4GB RAM. The group should be defined in your `site` environment's inventory so that a single Pulp server is shared between all environments and
 the same snapshots are tested in staging and production.
-Deploying and syncing Pulp has been tested on an RL9 host. The hostvar `ansible_host` should be defined, giving the IP address Ansible should use for ssh. For example, you can create an ini file at `environments/site/inventory/pulp` with the contents:
+Deploying and syncing Pulp has been tested on an RL9 host. The hostvar `ansible_host` should be defined, giving the IP address Ansible should use for SSH. For example, you can create an ini file at `environments/site/inventory/pulp` with the contents:
 
-```
+```ini
 [pulp_server]
 pulp_host ansible_host=<VM-ip-address>
 ```
 
-> [!WARNING] 
+> [!WARNING]
 > The inventory hostname cannot conflict with group names i.e can't be called `pulp_site` or `pulp_server`.
 
 Once complete, it will print a message giving a value to set for `appliances_pulp_url` (see example config below), assuming the `ansible_host` address is also the address the cluster
@@ -24,6 +25,7 @@ should use to reach the Pulp server.
 Note access to this server's content isn't authenticated so this assumes the `pulp_server` host is not externally reachable.
 
 ### Using an existing Pulp server
+
 An existing Pulp server can be used to host Ark repos by overriding `pulp_site_password` and `appliances_pulp_url` in the target environment. Note that this assumes the same configuration as the appliance deployed Pulp i.e no content authentication.
 
 ## Syncing Pulp content with Ark
@@ -34,7 +36,7 @@ Content can also be synced by running `ansible/adhoc/sync-pulp.yml`. By default
 
 ## Example config in site variables
 
-```
+```yaml
 # environments/site/inventory/group_vars/all/pulp_site.yml:
 appliances_pulp_url: "http://<pulp-host-ip>:8080"
 pulp_site_upstream_username: <Ark-username>
@@ -42,10 +44,11 @@ pulp_site_upstream_password: <Ark-password>
 ```
 
 ## Installing packages from Pulp at runtime
+
 By default, system repos are overwritten to point at Pulp repos during [image builds,](../image-build.md) so using a site Pulp server will require a new fatimage. If you instead wish to install packages at runtime,
 you will need to add all host groups on which you will be installing packages to the `dnf_repos` group in `environments/site/inventory/groups` e.g:
 
-```
+```yaml
 [dnf_repos:children]
 cluster
 ```
diff --git a/docs/experimental/slurm-controlled-rebuild.md b/docs/experimental/slurm-controlled-rebuild.md
index 7f9efa2..fc654d3 100644
--- a/docs/experimental/slurm-controlled-rebuild.md
+++ b/docs/experimental/slurm-controlled-rebuild.md
@@ -9,6 +9,7 @@ This provides a way to upgrade nodes with less impact than the normal approach.
 > or usage may change with further development.
 
 In summary, the way this functionality works is as follows:
+
 1. The image references(s) are manually updated in the OpenTofu configuration
    in the normal way.
 2. `tofu apply` is run which rebuilds the login and control nodes to the new
@@ -20,7 +21,7 @@ In summary, the way this functionality works is as follows:
    and control nodes and the old image for the compute nodes. This playbook
    also:
    - Writes cluster configuration to the control node, using the
-    [compute_init](../../ansible/roles/compute_init/README.md) role.
+     [compute_init](../../ansible/roles/compute_init/README.md) role.
    - Configures an application credential and helper programs on the control
      node, using the [rebuild](../../ansible/roles/rebuild/README.md) role.
 4. An admin submits Slurm jobs, one for each node, to a special "rebuild"
@@ -34,7 +35,7 @@ In summary, the way this functionality works is as follows:
    configuration, and if it does not match, uses OpenStack to rebuild the
    node to the desired (updated) image.
    TODO: Describe the logic if they DO match
-6. After a rebuild, the compute node runs various Ansible tasks during boot, 
+6. After a rebuild, the compute node runs various Ansible tasks during boot,
    controlled by the [compute_init](../../ansible/roles/compute_init/README.md)
    role, to fully configure the node again. It retrieves the required cluster
    configuration information from the control node via an NFS mount.
@@ -47,7 +48,7 @@ In summary, the way this functionality works is as follows:
 To enable a compute node to rejoin the cluster after a rebuild, functionality
 must be built into the image. Before progressing you should check that all the
 functionality required for your cluster is currently supported by the
-`compute_init` role. Review that role's [README](../../ansible/roles/compute_init/README.md)
+`compute_init` role. Review that role's [Readme](../../ansible/roles/compute_init/README.md)
 against `environments/*/inventory/groups` files (and any similar files which
 define groups). Note that some functionality does not require support, e.g.
 because it does not run on compute nodes.
@@ -55,9 +56,10 @@ because it does not run on compute nodes.
 ## Configuration
 
 The configuration of this is complex and involves:
+
 - OpenTofu variables to stop tracking image changes on compute nodes
 - Definition of partition(s) to use for launching rebuild jobs
-- Configuration of the [rebuild](../../ansible/roles/rebuild/README.md) role 
+- Configuration of the [rebuild](../../ansible/roles/rebuild/README.md) role
   to enable the Slurm controller to rebuild compute nodes via OpenStack.
 - Configuration of the [compute_init](../../ansible/roles/compute_init/README.md)
   role so that compute nodes rejoin the cluster after rebuilding - this is likely
@@ -71,107 +73,110 @@ The configuration of this is complex and involves:
    relevant node group in the OpenTofu `compute` variable, set the
    parameter `ignore_image_changes: true`. E.g.
 
-    ```terraform
-    # environments/$ENV/main.tf:
-    ...
-    compute = {
-        general = {
-            nodes = ["general-0", "general-1"]
-            ignore_image_changes = true
-            ...
-        }
-        gpu = {
-            node = ["a100-0", "a100-1"]
-            ignore_image_changes = true
-            ...
-        }
+```terraform
+# environments/$ENV/main.tf:
+...
+compute = {
+    general = {
+        nodes = ["general-0", "general-1"]
+        ignore_image_changes = true
+        ...
     }
-    ...
-    ```
+    gpu = {
+        node = ["a100-0", "a100-1"]
+        ignore_image_changes = true
+        ...
+    }
+}
+...
+```
 
-3. Follow the [compute_init](../../ansible/roles/compute_init/README.md) README
+3. Follow the [compute_init](../../ansible/roles/compute_init/README.md) readme
    to add OpenTofu and Ansible configuration for that role. The "rebootable"
    nodes should all be in the `compute_init` group with the `compute_init_enable`
    OpenTofu parameter set.
 
-4. If the [compute_init](../../ansible/roles/compute_init/README.md) README
+4. If the [compute_init](../../ansible/roles/compute_init/README.md) readme
    showed that a custom image is required for any entry in the
    `compute_init_enable` parameter, follow the usual process to build new
    images as required.
 
 5. Update image references in the OpenTofu configuration. Normally these should
    be in:
-    - `environments/site/tofu/variables.tf`: `cluster_image_id` for the default
-      cluster image.
-    - `environments/$ENV/tofu/main.tf`: parameter `image_id` in node groups
-      defined in the `compute` or `login` variables, to override the default
-      image for specific node groups.
 
-5. Ensure `openhpc_partitions` contains a partition covering the nodes to run
+   - `environments/site/tofu/variables.tf`: `cluster_image_id` for the default
+     cluster image.
+   - `environments/$ENV/tofu/main.tf`: parameter `image_id` in node groups
+     defined in the `compute` or `login` variables, to override the default
+     image for specific node groups.
+
+6. Ensure `openhpc_partitions` contains a partition covering the nodes to run
    rebuild jobs. The default definition in `environments/common/inventory/group_vars/all/openhpc.yml`
    will automatically include this via `openhpc_rebuild_partition` also in that
    file. If modifying this, note the important parameters are:
-   
-    - `name`: Partition name matching `rebuild` role variable `rebuild_partitions`,
-      default `rebuild`.
-    - `groups`: A list of nodegroup names, matching `openhpc_nodegroup` and
-      keys in the OpenTofu `compute` variable (see example in step 2 above).
-      Normally every compute node group should be listed here, unless
-      Slurm-controlled rebuild is not required for certain node groups.
-    - `default`: Must be set to `NO` so that it is not the default partition.
-    - `maxtime`: Maximum time to allow for rebuild jobs, in
-      [slurm.conf format](https://slurm.schedmd.com/slurm.conf.html#OPT_MaxTime).
-      The example here is 30 minutes, but see discussion below.
-    - `partition_params`: A mapping of additional parameters, which must be set
-      as follows:
-        - `PriorityJobFactor`: Ensures jobs in this partition (i.e. rebuild jobs)
-          are always scheduled before jobs in "normal" partitions on the same
-          nodes. This value is the highest which can be set. See
-          [slurm.conf docs](https://slurm.schedmd.com/slurm.conf.html#OPT_PriorityJobFactor).
-          Note this is used instead of `PriorityTier` as the latter (with the
-          default appliance configuration) allows rebuild jobs to preempt and
-          suspend running user jobs, which is probably undesirable.
-        - `Hidden`: Don't show this partition in e.g. `sinfo` for unpriviledged
-          users.
-        - `RootOnly`: Only allow the root user to submit jobs to this partition.
-        - `DisableRootJobs`: Don't disable the root user, in case this parameter
-          is set globally via `openhpc_config_extra`.
-        - `PreemptMode`: Don't allow reboot jobs to be preempted/suspended.
-        - `OverSubscribe`: Ensure that jobs run in this partition require the
-          entire node. This means they do not run on nodes as the same time as
-          user jobs running in partitions allowing non-exclusive use.
-    
-    The value for `maxtime` needs to be sufficent not just for a single node
-    to be rebuilt, but also to allow for any batching in either OpenTofu or
-    in Nova - see remarks in the [production docs](../production.md).
-
-    If it is desirable to roll out changes more gradually, it is possible to
-    create multiple "rebuild" partitions, but it is necessary that:
-    - The rebuild partitions should not themselves overlap, else nodes may be
-      rebuilt more than once.
-    - Each rebuild partition should entirely cover one or more "normal"
-      partitions, to avoid the possibility of user jobs being scheduled to a
-      mix of nodes using old and new images.
-
-6. Configure the [rebuild](../../ansible/roles/rebuild/README.md) role:
-    - Add the `control` node into the `rebuild` group.
-    - Ensure an application credential to use for rebuilding nodes is available
-      on the deploy host (default location `~/.config/openstack/clouds.yaml`).
-    - If required, override `rebuild_clouds_path` or other variables in the site
-      environment.
-
-7. Run `tofu apply` as usual to apply the new OpenTofu configuration.
-
-    > [!NOTE]
-    > If the cluster image references were updated at step 5, this will be
-    > a disruptive operation and should be planned as part of a normal upgrade
-    > cycle.
-
-    > [!CAUTION]
-    > Due to OpenTofu/Terraform state limitations, this will plan to delete and
-    > recreate all compute nodes in node groups where `ignore_image_changes: true`.
-    > was not previously set. This is a one-time issue with adding this
-    > parameter, i.e. subsequent applys will not require this.
+
+   - `name`: Partition name matching `rebuild` role variable `rebuild_partitions`,
+     default `rebuild`.
+   - `groups`: A list of nodegroup names, matching `openhpc_nodegroup` and
+     keys in the OpenTofu `compute` variable (see example in step 2 above).
+     Normally every compute node group should be listed here, unless
+     Slurm-controlled rebuild is not required for certain node groups.
+   - `default`: Must be set to `NO` so that it is not the default partition.
+   - `maxtime`: Maximum time to allow for rebuild jobs, in
+     [slurm.conf format](https://slurm.schedmd.com/slurm.conf.html#OPT_MaxTime).
+     The example here is 30 minutes, but see discussion below.
+   - `partition_params`: A mapping of additional parameters, which must be set
+     as follows:
+     - `PriorityJobFactor`: Ensures jobs in this partition (i.e. rebuild jobs)
+       are always scheduled before jobs in "normal" partitions on the same
+       nodes. This value is the highest which can be set. See
+       [slurm.conf docs](https://slurm.schedmd.com/slurm.conf.html#OPT_PriorityJobFactor).
+       Note this is used instead of `PriorityTier` as the latter (with the
+       default appliance configuration) allows rebuild jobs to preempt and
+       suspend running user jobs, which is probably undesirable.
+     - `Hidden`: Don't show this partition in e.g. `sinfo` for unpriviledged
+       users.
+     - `RootOnly`: Only allow the root user to submit jobs to this partition.
+     - `DisableRootJobs`: Don't disable the root user, in case this parameter
+       is set globally via `openhpc_config_extra`.
+     - `PreemptMode`: Don't allow reboot jobs to be preempted/suspended.
+     - `OverSubscribe`: Ensure that jobs run in this partition require the
+       entire node. This means they do not run on nodes as the same time as
+       user jobs running in partitions allowing non-exclusive use.
+
+   The value for `maxtime` needs to be sufficent not just for a single node
+   to be rebuilt, but also to allow for any batching in either OpenTofu or
+   in Nova - see remarks in the [production docs](../production.md).
+
+   If it is desirable to roll out changes more gradually, it is possible to
+   create multiple "rebuild" partitions, but it is necessary that:
+
+   - The rebuild partitions should not themselves overlap, else nodes may be
+     rebuilt more than once.
+   - Each rebuild partition should entirely cover one or more "normal"
+     partitions, to avoid the possibility of user jobs being scheduled to a
+     mix of nodes using old and new images.
+
+7. Configure the [rebuild](../../ansible/roles/rebuild/README.md) role:
+
+   - Add the `control` node into the `rebuild` group.
+   - Ensure an application credential to use for rebuilding nodes is available
+     on the deploy host (default location `~/.config/openstack/clouds.yaml`).
+   - If required, override `rebuild_clouds_path` or other variables in the site
+     environment.
+
+8. Run `tofu apply` as usual to apply the new OpenTofu configuration.
+
+   > [!NOTE]
+   > If the cluster image references were updated at step 5, this will be
+   > a disruptive operation and should be planned as part of a normal upgrade
+   > cycle.
+   >
+   > [!CAUTION]
+   > Due to OpenTofu/Terraform state limitations, this will plan to delete and
+   > recreate all compute nodes in node groups where `ignore_image_changes: true`.
+   > was not previously set. This is a one-time issue with adding this
+   > parameter, i.e. subsequent applys will not require this.
 
 TODO: clarify whether, if the image is bumped at this point, the compute nodes
 actually get recreated on the new or the old image??
@@ -193,7 +198,9 @@ However there is no need to drain compute nodes and create reservations etc.
 
 Triggering rebuild jobs is done using the following playbook:
 
-    ansible-playbook ansible/adhoc/rebuild-via-slurm.yml
+```shell
+ansible-playbook ansible/adhoc/rebuild-via-slurm.yml
+```
 
 This will create jobs to reimage every slurm-rebuildable node to the image
 currently defined in the OpenTofu configuration.
@@ -204,17 +211,22 @@ example the following comand will run in a non-default partition and does not
 actually reboot/rebuild nodes, which may be useful for testing interactions with
 other priority or QOS settings:
 
-    ansible-playbook ansible/adhoc/rebuild-via-slurm.yml -e 'rebuild_job_partitions=test rebuild_job_reboot=false'
+```shell
+ansible-playbook ansible/adhoc/rebuild-via-slurm.yml -e 'rebuild_job_partitions=test rebuild_job_reboot=false'
+```
 
 ## Testing
 
 The below demonstrates testing this using the `.stackhpc` CI environment, using:
+
 - A 2-node default "standard" partition.
 - A 2-node "extra" partition (note this does not usually have any nodes by default).
 
 In one terminal launch a watch of job state:
 
-    [root@RL9-control rocky]# clear && ~/ewatch/ewatch.py -n 1 -i '\d+:\d+' 'squeue --all --Format=PARTITION,NAME:25,USERNAME:11,STATE:12,NUMNODES:8,NODELIST'
+```shell
+[root@RL9-control rocky]# clear && ~/ewatch/ewatch.py -n 1 -i '\d+:\d+' 'squeue --all --Format=PARTITION,NAME:25,USERNAME:11,STATE:12,NUMNODES:8,NODELIST'
+```
 
 This uses [ewatch](https://github.com/sjpb/ewatch) to summarise changes in
 output.
@@ -222,17 +234,24 @@ output.
 In a second terminal, launch 2x normal jobs into the default ("standard")
 partition:
 
-    [demo_user@RL9-login-0 ~]$ sbatch -N2 --job-name=JobA --wrap "sleep 20" && sbatch -N2 --job-name=JobB --wrap "sleep 10"
+```shell
+[demo_user@RL9-login-0 ~]$ sbatch -N2 --job-name=JobA --wrap "sleep 20" && sbatch -N2 --job-name=JobB --wrap "sleep 10"
+```
 
 In a third terminal, trigger rebuild jobs:
 
-    .stackhpc/ (venv) [rocky@steveb-dev slurm-app-rl9]$ ansible-playbook ansible/adhoc/rebuild-via-slurm.yml -e 'rebuild_job_reboot=false rebuild_job_command="sleep 30"' -
+```shell
+.stackhpc/ (venv) [rocky@steveb-dev slurm-app-rl9]$ ansible-playbook ansible/adhoc/rebuild-via-slurm.yml -e 'rebuild_job_reboot=false rebuild_job_command="sleep 30"' -
+```
 
 Back in the second terminal, submit more user jobs to either partition:
 
-    [demo_user@RL9-login-0 ~]$ sbatch -N2 --job-name=JobC --partition,standard,extra --wrap "sleep 10"
+```shell
+[demo_user@RL9-login-0 ~]$ sbatch -N2 --job-name=JobC --partition,standard,extra --wrap "sleep 10"
+```
 
 The output from the first terminal should show:
+
 - Job A runs on submission in the default "standard" partition.
 - Job B pends for the default "standard" partition.
 - Rebuild jobs runs on submission in the "extra" partition and pend for the "standard" partition
@@ -246,48 +265,49 @@ The output from the first terminal should show:
 - Job B runs in the "standard" partition
 
 Example output:
-```
+
+```text
 [2025-03-28T14:26:34.510466]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST            
-standard            JobB                     demo_user  PENDING     2                           
-standard            JobA                     demo_user  RUNNING     2       RL9-compute-[0-1]   
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
+standard            JobB                     demo_user  PENDING     2
+standard            JobA                     demo_user  RUNNING     2       RL9-compute-[0-1]
 
 [2025-03-28T14:26:38.530213]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST            
-rebuild             rebuild-RL9-compute-1    root       PENDING     1                           
-rebuild             rebuild-RL9-compute-0    root       PENDING     1                           
-rebuild             rebuild-RL9-extra-0      root       RUNNING     1       RL9-extra-0         
-rebuild             rebuild-RL9-extra-1      root       RUNNING     1       RL9-extra-1         
-standard            JobB                     demo_user  PENDING     2                           
-standard            JobA                     demo_user  RUNNING     2       RL9-compute-[0-1]   
-standard,extra      JobC                     demo_user  PENDING     2                           
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
+rebuild             rebuild-RL9-compute-1    root       PENDING     1
+rebuild             rebuild-RL9-compute-0    root       PENDING     1
+rebuild             rebuild-RL9-extra-0      root       RUNNING     1       RL9-extra-0
+rebuild             rebuild-RL9-extra-1      root       RUNNING     1       RL9-extra-1
+standard            JobB                     demo_user  PENDING     2
+standard            JobA                     demo_user  RUNNING     2       RL9-compute-[0-1]
+standard,extra      JobC                     demo_user  PENDING     2
 
 [2025-03-28T14:26:54.609651]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST            
-rebuild             rebuild-RL9-compute-0    root       RUNNING     1       RL9-compute-0       
-rebuild             rebuild-RL9-compute-1    root       RUNNING     1       RL9-compute-1       
-rebuild             rebuild-RL9-extra-0      root       RUNNING     1       RL9-extra-0         
-rebuild             rebuild-RL9-extra-1      root       RUNNING     1       RL9-extra-1         
-standard            JobB                     demo_user  PENDING     2                           
-standard,extra      JobC                     demo_user  PENDING     2                           
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
+rebuild             rebuild-RL9-compute-0    root       RUNNING     1       RL9-compute-0
+rebuild             rebuild-RL9-compute-1    root       RUNNING     1       RL9-compute-1
+rebuild             rebuild-RL9-extra-0      root       RUNNING     1       RL9-extra-0
+rebuild             rebuild-RL9-extra-1      root       RUNNING     1       RL9-extra-1
+standard            JobB                     demo_user  PENDING     2
+standard,extra      JobC                     demo_user  PENDING     2
 
 [2025-03-28T14:28:39.091571]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST            
-extra               JobC                     demo_user  RUNNING     2       RL9-extra-[0-1]     
-rebuild             rebuild-RL9-compute-0    root       RUNNING     1       RL9-compute-0       
-rebuild             rebuild-RL9-compute-1    root       RUNNING     1       RL9-compute-1       
-standard            JobB                     demo_user  PENDING     2                           
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
+extra               JobC                     demo_user  RUNNING     2       RL9-extra-[0-1]
+rebuild             rebuild-RL9-compute-0    root       RUNNING     1       RL9-compute-0
+rebuild             rebuild-RL9-compute-1    root       RUNNING     1       RL9-compute-1
+standard            JobB                     demo_user  PENDING     2
 
 [2025-03-28T14:28:49.139349]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST            
-rebuild             rebuild-RL9-compute-0    root       RUNNING     1       RL9-compute-0       
-rebuild             rebuild-RL9-compute-1    root       RUNNING     1       RL9-compute-1       
-standard            JobB                     demo_user  PENDING     2                           
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
+rebuild             rebuild-RL9-compute-0    root       RUNNING     1       RL9-compute-0
+rebuild             rebuild-RL9-compute-1    root       RUNNING     1       RL9-compute-1
+standard            JobB                     demo_user  PENDING     2
 
 [2025-03-28T14:28:55.168264]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST            
-standard            JobB                     demo_user  RUNNING     2       RL9-compute-[0-1]   
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
+standard            JobB                     demo_user  RUNNING     2       RL9-compute-[0-1]
 
 [2025-03-28T14:29:05.216346]
-PARTITION           NAME                     USER       STATE       NODES   NODELIST 
+PARTITION           NAME                     USER       STATE       NODES   NODELIST
 ```
diff --git a/docs/filesystems.md b/docs/filesystems.md
index 5509aef..14669f9 100644
--- a/docs/filesystems.md
+++ b/docs/filesystems.md
@@ -8,7 +8,7 @@ The Slurm appliance supports multiple ways of configuring shared filesystems, in
 
 - Lustre
 
-# Manila
+=# Manila
 
 The Slurm appliance supports mounting shared filesystems using [CephFS](https://docs.ceph.com/en/latest/cephfs/) via [OpenStack Manila](https://docs.openstack.org/manila/latest/). This section explains:
 
@@ -24,65 +24,69 @@ The Slurm appliance requires that the Manila shares already exist on the system.
 
 If this is the first time Manila is being used on the system, a CephFS share type will need to be created. You will need admin credentials to do this.
 
-  ```bash
-  openstack share type create cephfs-type false --extra-specs storage_protocol=CEPHFS vendor_name=Ceph
-  ```
+```bash
+openstack share type create cephfs-type false --extra-specs storage_protocol=CEPHFS vendor_name=Ceph
+```
 
-Once this exists, create a share using credentials for the Slurm project. An access rule also needs to be created, where the `access_to` argument (`openstack share access create <share> <access_type> <access_to>`) is a user that will be created in Ceph. This needs to be globally unique in Ceph, so needs to be different for each OpenStack project. Ideally, this share should include your environment name. In this example, the name is "production".
+Once this exists, create a share using credentials for the Slurm project.
+An access rule also needs to be created, where the `access_to` argument
+(`openstack share access create <share> <access_type> <access_to>`) is a user that will be created in Ceph.
+This needs to be globally unique in Ceph, so needs to be different for each OpenStack project.
+Ideally, this share should include your environment name. In this example, the name is "production".
 
-  ```bash
-  openstack share create CephFS 300 --description 'Scratch dir for Slurm prod' --name slurm-production-scratch --share-type cephfs-type --wait
-  openstack share access create slurm-production-scratch cephx slurm-production
-  ```
+```bash
+openstack share create CephFS 300 --description 'Scratch dir for Slurm prod' --name slurm-production-scratch --share-type cephfs-type --wait
+openstack share access create slurm-production-scratch cephx slurm-production
+```
 
 ## Configuring the Slurm Appliance for Manila
 
 To mount shares onto hosts in a group, add them to the `manila` group.
 
-  ```ini
-  # environments/site/inventory/groups:
-  [manila:children]:
-  login
-  compute
-  ```
+```yaml
+# environments/site/inventory/groups:
+[manila:children]:
+login
+compute
+```
 
 If you are running a different version of Ceph from the defaults in the [os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount/blob/master/defaults/main.yml), you will need to update the package version by setting:
 
-  ```yaml
-  # environments/site/inventory/group_vars/manila.yml:
-  os_manila_mount_ceph_version: "18.2.4"
-  ```
+```yaml
+# environments/site/inventory/group_vars/manila.yml:
+os_manila_mount_ceph_version: "18.2.4"
+```
 
-A [site-specific image](image-build.md) should be built which includes this package; add ``manila`` to the Packer ``inventory_groups`` variable.
+A [site-specific image](image-build.md) should be built which includes this package; add `manila` to the Packer `inventory_groups` variable.
 
 Define the list of shares to be mounted, and the paths to mount them to. The example below parameterises the share name using the environment name. See the [stackhpc.os-manila-mount role](https://github.com/stackhpc/ansible-role-os-manila-mount) for further configuration options.
 
-  ```yaml
-  # environments/site/inventory/group_vars/manila.yml:
-  os_manila_mount_shares:
-    - share_name: "slurm-{{ appliances_environment_name }}-scratch"
-      mount_path: /scratch
-  ```
+```yaml
+# environments/site/inventory/group_vars/manila.yml:
+os_manila_mount_shares:
+  - share_name: "slurm-{{ appliances_environment_name }}-scratch"
+    mount_path: /scratch
+```
 
 ### Shared home directory
 
-By default, the Slurm appliance configures the control node as an NFS server and exports a directory which is mounted on the other cluster nodes as `/home`. When using Manila + CephFS for the home directory instead, this will need to be disabled. To do this, set the tf var `home_volume_provisioning` to `None`. 
+By default, the Slurm appliance configures the control node as an NFS server and exports a directory which is mounted on the other cluster nodes as `/home`. When using Manila + CephFS for the home directory instead, this will need to be disabled. To do this, set the tf var `home_volume_provisioning` to `None`.
 
 Some `basic_users_homedir_*` parameters need overriding as the provided defaults are only satisfactory for the default root-squashed NFS share:
 
-  ```yaml
-  # environments/site/inventory/group_vars/all/basic_users.yml:
-  basic_users_homedir_server: "{{ groups['login'] | first }}" # if not mounting /home on control node
-  basic_users_homedir_server_path: /home
-  ```
+```yaml
+# environments/site/inventory/group_vars/all/basic_users.yml:
+basic_users_homedir_server: "{{ groups['login'] | first }}" # if not mounting /home on control node
+basic_users_homedir_server_path: /home
+```
 
 Finally, add the home directory to the list of shares (the share should be already created in OpenStack).
 
-  ```yaml
-  # environments/site/inventory/group_vars/all/manila.yml:
-  os_manila_mount_shares:
-    - share_name: "slurm-{{ appliances_environment_name }}-scratch"
-      mount_path: /scratch
-    - share_name: "slurm-{{ appliances_environment_name }}-home"
-      mount_path: /home
-  ```
+```yaml
+# environments/site/inventory/group_vars/all/manila.yml:
+os_manila_mount_shares:
+  - share_name: "slurm-{{ appliances_environment_name }}-scratch"
+    mount_path: /scratch
+  - share_name: "slurm-{{ appliances_environment_name }}-home"
+    mount_path: /home
+```
diff --git a/docs/image-build.md b/docs/image-build.md
index dc968eb..71be030 100644
--- a/docs/image-build.md
+++ b/docs/image-build.md
@@ -3,59 +3,67 @@
 The appliance contains code and configuration to use [Packer](https://developer.hashicorp.com/packer) with the [OpenStack builder](https://www.packer.io/plugins/builders/openstack) to build images.
 
 The Packer configuration defined here builds "fat images" which contain packages, binaries and container images but no cluster-specific configuration. Using these:
+
 - Enables the image to be tested in CI before production use.
 - Ensures re-deployment of the cluster or deployment of additional nodes can be completed even if packages are changed in upstream repositories (e.g. due to RockyLinux or OpenHPC updates).
 - Improves deployment speed by reducing the number of package downloads to improve deployment speed.
 
 The fat images StackHPC builds and tests in CI are available from [GitHub releases](https://github.com/stackhpc/ansible-slurm-appliance/releases). However with some additional configuration it is also possible to:
+
 1. Build site-specific fat images from scratch.
 2. Extend an existing fat image with additional functionality.
 
-
-# Usage
+## Usage
 
 To build either a site-specific fat image from scratch, or to extend an existing StackHPC fat image:
 
 1. Ensure the current OpenStack credentials have sufficient authorisation to upload images (this may or may not require the `member` role for an application credential, depending on your OpenStack configuration).
 2. The provided dev credentials for StackHPC's "Ark" Pulp server must be added to the target environments. This is done by overriding `dnf_repos_username` and `dnf_repos_password` with your vault encrypted credentials in `environments/<base_environment>/inventory/group_vars/all/pulp.yml`. See the [experimental docs](experimental/pulp.md) if you wish instead wish to use a local Pulp server.
 3. Create a Packer [variable definition file](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/variables#assigning-values-to-input-variables) at e.g. `environments/<environment>/builder.pkrvars.hcl` containing at a minimum:
-  
-    ```hcl
-    flavor = "general.v1.small"                           # VM flavor to use for builder VMs
-    networks = ["26023e3d-bc8e-459c-8def-dbd47ab01756"]   # List of network UUIDs to attach the VM to
-    source_image_name = "Rocky-9-GenericCloud-Base-9.4"   # Name of image to create VM with, i.e. starting image
-    inventory_groups = "control,login,compute"            # Additional inventory groups to add build VM to
-
-    ```
-
-    Note that:
-    - The network used for the Packer VM must provide outbound internet access but does not need to provide access to resources which the final cluster nodes require (e.g. Slurm control node, network filesystem servers etc.).
-    - The flavor used must have sufficent memory for the build tasks, but otherwise does not need to match the final cluster nodes. Usually 8GB is sufficent. By default, the build VM is volume-backed to allow control of the root disk size (and hence final image size) so the flavor disk size does not matter.
-    - The source image should be either a RockyLinux GenericCloud image for a site-specific image build from scratch, or a StackHPC fat image if extending an existing image.
-    - The `inventory_groups` variable takes a comma-separated list of Ansible inventory groups to add the build VM to. This is in addition to the `builder` group which it is always added to. This controls which Ansible roles and functionality run during build, and hence what gets added to the image. All possible groups are listed in `environments/common/groups` but common options for this variable will be:
-      - `update,control,login,compute`: The resultant image has all packages in the source image updated, and then packages for all types of nodes in the cluster are added. When using a GenericCloud image for `source_image_name` this builds a site-specific fat image from scratch.
-      - One or more specific groups which are not enabled in the appliance by default, e.g. `lustre`. When using a StackHPC fat image for `source_image_name` this extends the image with just this additional functionality.
+
+```hcl
+flavor = "general.v1.small"                           # VM flavor to use for builder VMs
+networks = ["26023e3d-bc8e-459c-8def-dbd47ab01756"]   # List of network UUIDs to attach the VM to
+source_image_name = "Rocky-9-GenericCloud-Base-9.4"   # Name of image to create VM with, i.e. starting image
+inventory_groups = "control,login,compute"            # Additional inventory groups to add build VM to
+```
+
+Note that:
+
+- The network used for the Packer VM must provide outbound internet access but does not need to provide access to resources which the final cluster nodes require (e.g. Slurm control node, network filesystem servers etc.).
+- The flavor used must have sufficent memory for the build tasks, but otherwise does not need to match the final cluster nodes. Usually 8GB is sufficent. By default, the build VM is volume-backed to allow control of the root disk size (and hence final image size) so the flavor disk size does not matter.
+- The source image should be either a RockyLinux GenericCloud image for a site-specific image build from scratch, or a StackHPC fat image if extending an existing image.
+- The `inventory_groups` variable takes a comma-separated list of Ansible inventory groups to add the build VM to. This is in addition to the `builder` group which it is always added to. This controls which Ansible roles and functionality run during build, and hence what gets added to the image.
+  All possible groups are listed in `environments/common/groups` but common options for this variable will be:
+  - `update,control,login,compute`: The resultant image has all packages in the source image updated, and then packages for all types of nodes in the cluster are added. When using a GenericCloud image for `source_image_name` this builds a site-specific fat image from scratch.
+  - One or more specific groups which are not enabled in the appliance by default, e.g. `lustre`. When using a StackHPC fat image for `source_image_name` this extends the image with just this additional functionality.
 
 4. Activate the venv and the relevant environment.
 
 5. Build images using the relevant variable definition file, e.g.:
 
-        cd packer/
-        PACKER_LOG=1 /usr/bin/packer build -on-error=ask -var-file=$PKR_VAR_environment_root/builder.pkrvars.hcl openstack.pkr.hcl
+```shell
+cd packer/
+PACKER_LOG=1 /usr/bin/packer build -on-error=ask -var-file=$PKR_VAR_environment_root/builder.pkrvars.hcl openstack.pkr.hcl
+```
 
-    **NB:** If the build fails while creating the volume, check if the source image has the `signature_verified` property:
+**NB:** If the build fails while creating the volume, check if the source image has the `signature_verified` property:
 
-        openstack image show $SOURCE_IMAGE
+```shell
+openstack image show $SOURCE_IMAGE
+```
 
-      If it does, remove this property:
+If it does, remove this property:
 
-          openstack image unset --property signature_verified $SOURCE_IMAGE
+```shell
+openstack image unset --property signature_verified $SOURCE_IMAGE
+```
 
-      then delete the failed volume, select cancelling the build when Packer queries, and then retry. This is [OpenStack bug 1823445](https://bugs.launchpad.net/cinder/+bug/1823445).
+then delete the failed volume, select cancelling the build when Packer queries, and then retry. This is [OpenStack bug 1823445](https://bugs.launchpad.net/cinder/+bug/1823445).
 
-6. The built image will be automatically uploaded to OpenStack with a name prefixed `openhpc` and including a timestamp and a shortened git hash.
+6. The built image will be automatically uploaded to OpenStack with a name prefixed `openhpc` and including a timestamp and a shortened Git hash.
 
-# Build Process
+## Build Process
 
 In summary, Packer creates an OpenStack VM, runs Ansible on that, shuts it down, then creates an image from the root disk.
 
@@ -66,6 +74,7 @@ shows the use of the environment variable `$PKR_VAR_environment_root` (which its
 using a path in a "parent" environment is likely to be more appropriate (as builds should not be environment-specific to allow testing before deployment to a production environment).
 
 What is Slurm Appliance-specific are the details of how Ansible is run:
+
 - The build VM is always added to the `builder` inventory group, which differentiates it from nodes in a cluster. This allows
   Ansible variables to be set differently during Packer builds, e.g. to prevent services starting. The defaults for this are in `environments/common/inventory/group_vars/builder/`, which could be extended or overriden for site-specific fat image builds using `builder` groupvars for the relevant environment. It also runs some builder-specific code (e.g. to clean up the image).
 - The default fat image builds also add the build VM to the "top-level" `compute`, `control` and `login` groups. This ensures
@@ -76,9 +85,10 @@ What is Slurm Appliance-specific are the details of how Ansible is run:
   groupvars is not sufficient (e.g. a role always attempts to configure or start services).
 
 There are some things to be aware of when developing Ansible to run in a Packer build VM:
-  - Only some tasks make sense. E.g. any services with a reliance on the network cannot be started, and should not be enabled if, when creating an instance with the resulting image, the remote service will not be immediately present.
-  - Nothing should be written to the persistent state directory `appliances_state_dir`, as this is on the root filesystem rather than an OpenStack volume.
-  - Care should be taken not to leave data on the root filesystem which is not wanted in the final image (e.g secrets).
-  - Build VM hostnames are not the same as for equivalent "real" hosts and do not contain `login`, `control` etc. Therefore variables used by the build VM must be defined as groupvars not hostvars.
-  - Ansible may need to use a proxyjump to reach cluster nodes, which can be defined via Ansible's `ansible_ssh_common_args` variable. If Packer should not use the same proxy
-    to connect to build VMs (e.g. because build happens on a different network), this proxy configuration should not be added to the `all` group.
+
+- Only some tasks make sense. E.g. any services with a reliance on the network cannot be started, and should not be enabled if, when creating an instance with the resulting image, the remote service will not be immediately present.
+- Nothing should be written to the persistent state directory `appliances_state_dir`, as this is on the root filesystem rather than an OpenStack volume.
+- Care should be taken not to leave data on the root filesystem which is not wanted in the final image (e.g secrets).
+- Build VM hostnames are not the same as for equivalent "real" hosts and do not contain `login`, `control` etc. Therefore variables used by the build VM must be defined as groupvars not hostvars.
+- Ansible may need to use a proxyjump to reach cluster nodes, which can be defined via Ansible's `ansible_ssh_common_args` variable. If Packer should not use the same proxy
+  to connect to build VMs (e.g. because build happens on a different network), this proxy configuration should not be added to the `all` group.
diff --git a/docs/k3s.README.md b/docs/k3s.README.md
index 1b66511..500a789 100644
--- a/docs/k3s.README.md
+++ b/docs/k3s.README.md
@@ -1,8 +1,10 @@
 # Overview
-A K3s cluster is deployed with the Slurm cluster. Both an agent and server instance of K3s is installed during image build and the correct service (determined by OpenStack metadata) will be 
-enabled during boot. Nodes with the `k3s_server` metadata field defined will be configured as K3s agents (this field gives them the address of the server). The Slurm control node is currently configured as a server while all other nodes are configured as agents. Using multiple K3s servers isn't supported. Currently only the root user on the control node has 
+
+A K3s cluster is deployed with the Slurm cluster. Both an agent and server instance of K3s is installed during image build and the correct service (determined by OpenStack metadata) will be
+enabled during boot. Nodes with the `k3s_server` metadata field defined will be configured as K3s agents (this field gives them the address of the server). The Slurm control node is currently configured as a server while all other nodes are configured as agents. Using multiple K3s servers isn't supported. Currently only the root user on the control node has
 access to the Kubernetes API. The `k3s` role installs Helm for package management. K9s is also installed in the image and can be used by the root user.
 
-# Idempotency
+## Idempotency
+
 K3s is intended to only be installed during image build as it is configured by the appliance on first boot with `azimuth_cloud.image_utils.linux_ansible_init`. Therefore, the `k3s` role isn't
 idempotent and changes to variables will not be reflected in the image when running `site.yml`.
diff --git a/docs/mig.md b/docs/mig.md
index 0d52f96..b8eeae8 100644
--- a/docs/mig.md
+++ b/docs/mig.md
@@ -10,9 +10,9 @@ This page details how to configure Multi Instance GPU (MIG) in Slurm.
 
 ## Inventory
 
-Add relevant hosts to the ``vgpu`` group, for example in `environments/$ENV/inventory/groups`:
+Add relevant hosts to the `vgpu` group, for example in `environments/$ENV/inventory/groups`:
 
-```
+```yaml
 [vgpu:children]
 cuda
 ```
@@ -23,24 +23,24 @@ Use variables from the [stackhpc.linux.vgpu](https://github.com/stackhpc/ansible
 
 For example in: `environments/<environment>/inventory/group_vars/all/vgpu`:
 
-```
+```yaml
 ---
 vgpu_definitions:
-    - pci_address: "0000:17:00.0"
-      mig_devices:
-        "1g.10gb": 4
-        "4g.40gb": 1
-    - pci_address: "0000:81:00.0"
-      mig_devices:
-        "1g.10gb": 4
-        "4g.40gb": 1
+  - pci_address: "0000:17:00.0"
+    mig_devices:
+      "1g.10gb": 4
+      "4g.40gb": 1
+  - pci_address: "0000:81:00.0"
+    mig_devices:
+      "1g.10gb": 4
+      "4g.40gb": 1
 ```
 
-The appliance will use the driver installed via the ``cuda`` role. 
+The appliance will use the driver installed via the `cuda` role.
 
-Use ``lspci`` to determine the PCI addresses e.g:
+Use `lspci` to determine the PCI addresses e.g:
 
-```
+```text
 [root@io-io-gpu-02 ~]# lspci -nn | grep -i nvidia
 06:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 SXM5 80GB] [10de:2330] (rev a1)
 0c:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 SXM5 80GB] [10de:2330] (rev a1)
@@ -51,7 +51,7 @@ Use ``lspci`` to determine the PCI addresses e.g:
 The supported profiles can be discovered by consulting the [NVIDIA documentation](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#supported-mig-profiles)
 or interactively by running the following on one of the compute nodes with GPU resources:
 
-```
+```text
 [rocky@io-io-gpu-05 ~]$ sudo nvidia-smi -i 0 -mig 1
 Enabled MIG Mode for GPU 00000000:06:00.0
 All done.
@@ -150,7 +150,7 @@ All done.
 ## compute_init configuration for slurm triggered rebuild (optional)
 
 You only need to configure this if you are using the slurm triggered rebuild
-feature.  Use the ``vgpu`` metadata option to enable creation of mig devices on
+feature. Use the `vgpu` metadata option to enable creation of mig devices on
 rebuild.
 
 ## GRES configuration
@@ -160,19 +160,19 @@ do this you need to determine the names of the GPU types as detected by slurm. F
 deploy slurm with the default nodegroup definitions to get a working cluster. Make a temporary
 copy of slurm.conf:
 
-```
+```text
 cp /var/spool/slurm/conf-cache/slurm.conf /tmp/
 ```
 
 Then create a `/tmp/gres.conf` which enables autodetection:
 
-```
+```text
 AutoDetect=nvml
 ```
 
 You will then be able to run: `sudo slurmd -f /tmp/slurm.conf -G` on a compute node where GPU resources exist. An example is shown below:
 
-```
+```text
 [rocky@io-io-gpu-02 ~]$ sudo slurmd -f /tmp/slurm.conf -G
 slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3 Count=1 Index=0 ID=7696487 File=/dev/nvidia0 Links=(null) Flags=HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI
 ,ENV_OPENCL,ENV_DEFAULT
@@ -201,24 +201,23 @@ NOTE: If you have configured a Gres= line in slurm.conf already. You may have to
 GRES resources can then be configured manually. An example is shown below
 (`environments/<environment>/inventory/group_vars/all/openhpc.yml`):
 
-```
+```yaml
 openhpc_partitions:
   - name: cpu
   - name: gpu
 
 openhpc_nodegroups:
-    - name: cpu
-    - name: gpu
-      gres_autodetect: nvml
-      gres:
-        - conf: "gpu:nvidia_h100_80gb_hbm3:2"
-        - conf: "gpu:nvidia_h100_80gb_hbm3_4g.40gb:2"
-        - conf: "gpu:nvidia_h100_80gb_hbm3_1g.10gb:6"
+  - name: cpu
+  - name: gpu
+    gres_autodetect: nvml
+    gres:
+      - conf: "gpu:nvidia_h100_80gb_hbm3:2"
+      - conf: "gpu:nvidia_h100_80gb_hbm3_4g.40gb:2"
+      - conf: "gpu:nvidia_h100_80gb_hbm3_1g.10gb:6"
 
 openhpc_config:
   GresTypes:
     - gpu
-
 ```
 
 Making sure the types (the identifier after `gpu:`) match those collected with `slurmd -G`. Substrings
diff --git a/docs/monitoring-and-logging.md b/docs/monitoring-and-logging.md
index 46b405a..ad6fc0c 100644
--- a/docs/monitoring-and-logging.md
+++ b/docs/monitoring-and-logging.md
@@ -39,14 +39,15 @@ Where `role_name` is the name of the internal role.
 
 ## Customising variables
 
-You should only customise the variables in `environments/common` if you are working on a feature that you intend to contribute back. Instead you should override the variables in the environment relevant to your deployment. This is possible since inventories later in the inheritance chain have greater precedence. Please see [README.md](../README.md#environments) for a more detailed explanation. This notice exists to avoid the need to need to keep repeating this point in the following sections. Where it is noted that you should customise a variable, it is implied that this change should be made to your own environment e.g `environments/production` in preference to `environments/common`, even when
+You should only customise the variables in `environments/common` if you are working on a feature that you intend to contribute back. Instead you should override the variables in the environment relevant to your deployment. This is possible since inventories later in the inheritance chain have greater precedence. Please see [README.md](../README.md#environments) for a more detailed explanation.
+This notice exists to avoid the need to need to keep repeating this point in the following sections. Where it is noted that you should customise a variable, it is implied that this change should be made to your own environment e.g `environments/production` in preference to `environments/common`, even when
 this is not explicitly stated.
 
 ## filebeat
 
 This section details the configuration of filebeat.
 
-### Defaults
+### filebeat defaults
 
 Filebeat is configured by the internal `filebeat` role which can be found here:
 
@@ -56,7 +57,7 @@ The appliance defaults for the `filebeat` role can be found at the following loc
 
 > [environments/common/inventory/group_vars/all/filebeat.yml](../environments/common/inventory/group_vars/all/filebeat.yml)
 
-### Overview
+### filebeat overview
 
 Filebeat is configured to scrape the output of slurm stats. Slurm stats produces a json log file in the following location on the host:
 
@@ -73,9 +74,9 @@ This file is configurable by the `filebeat_config_path` variable.
 It is not currently possible to partially override `filebeat.yml`. You will have to configure `filebeat_config_path` to refer to another file, copying
 the parts of the default configuration you want to keep. Pull requests are welcomed to add the functionality needed to allow for partial overrides.
 
-### Placement
+### filebeat placement
 
-The `filebeat` group controls the placement of the `filebeat` service. The default configuration scrapes the `slurm_stats` service output. This requires a `filebeat` instance to be co-located with the `slurm_stats` service.
+The `filebeat` group controls the placement of the `filebeat` service. The default configuration scrapes the `slurm_stats` service output. This requires a `filebeat` instance to be colocateed with the `slurm_stats` service.
 
 In the simplest configuration, a single host should be assigned to the `filebeat` and `slurm_stats` group. The host assigned to the `slurm_stats` group should the same host as assigned to the `filebeat` group. More advanced configurations are possible, but require overriding `filebeat_config_path` using `group` or `host` variables.
 
@@ -83,18 +84,18 @@ In the simplest configuration, a single host should be assigned to the `filebeat
 
 This section details the configuration of grafana.
 
-### Defaults
+### grafana defaults
 
 Internally, we use the [cloudalchemy.grafana](https://github.com/cloudalchemy/ansible-grafana) role. You can customise any of the variables that the role supports. For a full list, please see the
 [upstream documentation](https://github.com/cloudalchemy/ansible-grafana). The appliance defaults can be found here:
 
 > [environments/common/inventory/group_vars/all/grafana.yml](../environments/common/inventory/group_vars/all/grafana.yml)
 
-### Placement
+### grafana placement
 
 The `grafana` group controls the placement of the grafana service. Load balancing is currently unsupported so it is important that you only assign one host to this group.
 
-### Access
+### grafana access
 
 If Open OnDemand is enabled then by default this is used to proxy Grafana, otherwise Grafana is accessed through the first . See `grafana_url` in [environments/common/inventory/group_vars/all/grafana.yml](../environments/common/inventory/group_vars/all/grafana.yml). The port used (variable `grafana_port`) defaults to `3000`.
 
@@ -159,7 +160,7 @@ This can be customised with the `grafana_datasources` variable.
 
 This section details the configuration of OpenSearch.
 
-### Defaults
+### opensearch defaults
 
 The internal `opensearch` role is used to configure the service. The list of variables that can be customised can found in:
 
@@ -169,11 +170,11 @@ The appliance defaults are in the following file:
 
 > [environments/common/inventory/group_vars/all/opensearch.yml](../environments/common/inventory/group_vars/all/opensearch.yml)
 
-### Placement
+### opensearch placement
 
 The `opensearch` group determines the placement of the OpenSearch service. Load balancing is currently unsupported so it is important that you only assign one host to this group.
 
-### Access
+### opensearch access
 
 By default, OpenSearch only listens on the loopback interface. It should therefore be placed on the same node as `filebeat` and `grafana` which need to access the OpenSearch API.
 
@@ -185,9 +186,9 @@ The default set of users is defined in:
 
 This defines an the following accounts:
 
-| username      |  password                                       | purpose                                   |
-| ------------- | ------------------------------------------------|-------------------------------------------|
-| admin         | <vault_elasticsearch_admin_password>  | User of highest privilege                 |
+| username | password                             | purpose                   |
+| -------- | ------------------------------------ | ------------------------- |
+| admin    | <vault_elasticsearch_admin_password> | User of highest privilege |
 
 Where the password field refers to a variable containing the actual password. These are generated by the
 `generate-passwords.yml` adhoc playbook (see [README.md](../README.md#creating-a-slurm-appliance)).
@@ -208,7 +209,7 @@ found in:
 
 This section details the configuration of prometheus.
 
-### Defaults
+### Prometheus defaults
 
 Internally, we use the [cloudalchemy.prometheus](https://github.com/cloudalchemy/ansible-prometheus) role. You can customise any of the variables that the role supports. For a full list, please see the
 [upstream documentation](https://github.com/cloudalchemy/ansible-prometheus). The appliance defaults can be found here:
@@ -217,19 +218,20 @@ Internally, we use the [cloudalchemy.prometheus](https://github.com/cloudalchemy
 
 Prometheus will be functional by default but the following variables should
 commonly be modified:
+
 - `prometheus_web_external_url`
 - `prometheus_storage_retention`
 - `prometheus_storage_retention_size`
 
-### Placement
+### Prometheus placement
 
 The `prometheus` group determines the placement of the prometheus service. Load balancing is currently unsupported so it is important that you only assign one host to this group.
 
-### Access
+### Prometheus access
 
 Prometheus is exposed on port `9090` on all hosts in the prometheus group. Currently, the configuration assumes a single host. Following the reference layout in `environments/site/inventory/groups`, this will be set to the slurm `control` node, prometheus would then be accessible from:
 
- > http://<control_node_ip>:9090
+> http://<control_node_ip>:9090
 
 The port can customised by overriding the `prometheus_web_external_url` variable.
 
@@ -268,7 +270,7 @@ The list can be customised by overriding the `collect[]` parameter of the `node`
 
 > [environments/common/inventory/group_vars/all/prometheus.yml](../environments/common/inventory/group_vars/all/prometheus.yml).
 
-Variables in this file should *not* be customised directly, but should be overridden in your `environment`. See [README.md](../README.md#environments) which details the process of overriding default variables in more detail.
+Variables in this file should _not_ be customised directly, but should be overridden in your `environment`. See [README.md](../README.md#environments) which details the process of overriding default variables in more detail.
 
 ### custom ansible filters
 
@@ -276,12 +278,13 @@ Variables in this file should *not* be customised directly, but should be overri
 
 Groups prometheus targets. Metrics from `node_exporter` hosts have two labels
 applied:
-   - `env`: This is set from the Ansible variable `prometheus_env` if present
-     (e.g. from hostvars or groupvars), defaulting to `ungrouped`. This can be
-     used to group metrics by some arbitrary "environment", e.g. rack.
-   - `group`: This refers to the "top-level" inventory group for the host and
-     is one of `control`, `login`, `compute` or `other`. This can be used to
-     define rules for specific host functionalities.
+
+- `env`: This is set from the Ansible variable `prometheus_env` if present
+  (e.g. from hostvars or groupvars), defaulting to `ungrouped`. This can be
+  used to group metrics by some arbitrary "environment", e.g. rack.
+- `group`: This refers to the "top-level" inventory group for the host and
+  is one of `control`, `login`, `compute` or `other`. This can be used to
+  define rules for specific host functionalities.
 
 ## slurm-stats
 
@@ -291,16 +294,12 @@ Slurm stats periodically queries the slurm accounting database to gather informa
 
 The polling of this data is controlled by a cron job. The default is to scrape the data every 5 minutes.
 
-### Defaults
+### slurm-stats defaults
 
-slurm-stats is configured `slurm-stats` role in the [slurm_openstack_tools collection](https://github.com/stackhpc/ansible_collection_slurm_openstack_tools). Currently there is no customisation of this role in the common environment i.e we are just using role defaults. It is possible to override these by setting the relevant variable in your environment config. See [here](https://github.com/stackhpc/ansible_collection_slurm_openstack_tools/tree/main/roles/slurm-stats) for a list of variables that can be set.
+slurm-stats is configured `slurm-stats` role in the [slurm_openstack_tools collection](https://github.com/stackhpc/ansible_collection_slurm_openstack_tools). Currently there is no customisation of this role in the common environment i.e we are just using role defaults.
+It is possible to override these by setting the relevant variable in your environment config. See [here](https://github.com/stackhpc/ansible_collection_slurm_openstack_tools/tree/main/roles/slurm-stats) for a list of variables that can be set.
 
-
-### Placement
+### slurm-stats placement
 
 The `slurm_stats` group controls the placement of the `slurm_stats` service.
-This should be configured to be a group with a single host. That host must be co-located on the same host as the `filebeat` service that scrapes its output.
-
-
-
-
+This should be configured to be a group with a single host. That host must be colocated on the same host as the `filebeat` service that scrapes its output.
diff --git a/docs/networks.md b/docs/networks.md
index 69b7ece..c2c1d12 100644
--- a/docs/networks.md
+++ b/docs/networks.md
@@ -2,6 +2,7 @@
 
 The default OpenTofu configurations in the appliance do not provision networks,
 subnets or associated infrastructure such as routers. The requirements are that:
+
 1. At least one network exists.
 2. The first network defined spans all nodes, referred to as the "access network".
 3. Only one subnet per network is attached to nodes.
@@ -36,6 +37,7 @@ Note that if an OpenStack subnet has a gateway IP defined then by default nodes
 with ports attached to that subnet get a default route set via that gateway.
 
 ## Single network
+
 This is the simplest possible configuration. A single network and subnet is
 used for all nodes. The subnet provides outbound internet access via the default
 route defined by the subnet gateway (often an OpenStack router to an external
@@ -52,6 +54,7 @@ cluster_networks = [
 ```
 
 ## Multiple homogenous networks
+
 This is similar to the above, except each node has multiple networks. The first
 network, "netA" is the access network. Note that only one subnet must have a
 gateway defined, else default routes via both subnets will be present causing
@@ -76,7 +79,6 @@ vnic_types = {
 ...
 ```
 
-
 ## Additional networks on some nodes
 
 This example shows how to modify variables for specific node groups. In this
@@ -120,13 +122,14 @@ In some multiple network configurations it may be necessary to manage default
 routes rather than them being automatically created from a subnet gateway.
 This can be done using the tofu variable `gateway_ip` which can be set for the
 cluster and/or overriden on the compute and login groups. If this is set:
+
 - a default route via that address will be created on the appropriate interface
   during boot if it does not exist
 - any other default routes will be removed
 
 For example the cluster configuration below has a "campus" network with a
 default gateway which provides inbound SSH / ondemand access and outbound
-internet  attached only to the login nodes, and a "data" network attached to
+internet attached only to the login nodes, and a "data" network attached to
 all nodes. The "data" network has no gateway IP set on its subnet to avoid dual
 default routes and routing conflicts on the multi-homed login nodes, but does
 have outbound connectivity via a router:
@@ -183,7 +186,7 @@ compute
 # environments/$SITE/inventory/group_vars/all/squid.yml:
 # these are just examples
 squid_cache_disk: 1024 # MB
-squid_cache_mem: '12 GB'
+squid_cache_mem: "12 GB"
 ```
 
 Note that name resolution must still be possible and may require defining an
diff --git a/docs/openondemand.md b/docs/openondemand.md
index 5dd3029..cd33cd5 100644
--- a/docs/openondemand.md
+++ b/docs/openondemand.md
@@ -2,42 +2,46 @@
 
 The appliance can deploy the Open OnDemand portal. This page describes how to enable this and the default appliance configuration/behaviour. Note that detailed configuration documentation is provided by:
 
-- The README for the included `openondemand` role in this repo - [ansible/roles/openondemand/README.md](../ansible/roles/openondemand/README.md).
-- The README and default variables for the underlying "official" role which the above wraps - [Open OnDemand Ansible Role](https://github.com/OSC/ood-ansible)
+- The readme for the included `openondemand` role in this repository - [ansible/roles/openondemand/README.md](../ansible/roles/openondemand/README.md).
+- The readme and default variables for the underlying "official" role which the above wraps - [Open OnDemand Ansible Role](https://github.com/OSC/ood-ansible)
 - The documentation for Open OnDemand [itself](https://osc.github.io/ood-documentation/latest/index.html)
 
 This appliance can deploy and configure:
+
 - The Open OnDemand server itself (usually on a single login node).
 - User authentication using one of:
-    - An external OIDC provider.
-    - HTTP basic authentication and PAM.
+  - An external OIDC provider.
+  - HTTP basic authentication and PAM.
 - Virtual desktops on compute nodes.
 - Jupyter nodebook servers on compute nodes.
 - Proxying of Grafana (usually deployed on the control node) via the Open OnDemand portal.
 - Links to additional filesystems and pages from the Open OnDemand Dashboard.
 - A Prometheus exporter for the Open OnDemand server and related Grafana dashboard
 
-For examples of all of the above see the `smslabs-example` environment in this repo.
+For examples of all of the above see the `smslabs-example` environment in this repository.
+
+## Enabling Open OnDemand
 
-# Enabling Open OnDemand
 To enable the Open OnDemand server, add single host to the `openondemand` inventory group. Generally, this should be a node in the `login` group, as Open OnDemand must be able to access Slurm commands.
 
-To enable compute nodes for virtual desktops, Jupyter notebooks, RStudio, VSCode, or MATLAB (accessed through the Open OnDemand portal), add nodes/groups to the `openondemand_desktop`, `openondemand_jupyter`, `openondemand_rstudio`, `openondemand_codeserver`, and `openondemand_matlab` inventory groups respectively. These may be all or a subset of the `compute` group.
+To enable compute nodes for virtual desktops, Jupyter notebooks, RStudio, Visual Studio Code, or MATLAB (accessed through the Open OnDemand portal), add nodes/groups to the `openondemand_desktop`, `openondemand_jupyter`, `openondemand_rstudio`, `openondemand_codeserver`, and `openondemand_matlab` inventory groups respectively. These may be all or a subset of the `compute` group.
 
 The above functionality is configured by running the `ansible/portal.yml` playbook. This is automatically run as part of `ansible/site.yml`.
 
 ## MATLAB
-*NB* Due to licensing, the MATLAB batch connect app requires a MATLAB intallation to be present on the relevant compute nodes. The MATLAB app is therefore disabled by default, and must be enabled by setting `openondemand_matlab_partition` in e.g. `environments/site/inventory/group_vars/all/openondemand.yml` to the name of the partition where MATLAB is available.
+
+_NB_ Due to licensing, the MATLAB batch connect app requires a MATLAB intallation to be present on the relevant compute nodes. The MATLAB app is therefore disabled by default, and must be enabled by setting `openondemand_matlab_partition` in e.g. `environments/site/inventory/group_vars/all/openondemand.yml` to the name of the partition where MATLAB is available.
 
 An Lmod modulefile also needs to be available on compute nodes - this is not provided by the appliance. See e.g.`roles/openondemand/tasks/rstudio_compute.yml` for an example. The modulefile must be named `matlab/$MATLAB_VERSION`, where the version matches thes `openondemand_matlab_version` variable. This variable is set to empty in the role default so must be defined in `environments/site/inventory/group_vars/all/openondemand.yml`.
 
-As MATLAB requires a remote desktop, the TurboVNC and Xfce Desktop packages and configuration from the  "openondemand_desktop" app will be automatically applied to nodes where the MATLAB app is enabled.
+As MATLAB requires a remote desktop, the TurboVNC and Xfce Desktop packages and configuration from the "openondemand_desktop" app will be automatically applied to nodes where the MATLAB app is enabled.
 
-# Default configuration
+## Default configuration
 
 See the [ansible/roles/openondemand/README.md](../ansible/roles/openondemand/README.md) for more details on the variables described below.
 
 The following variables have been given default values to allow Open OnDemand to work in a newly created environment without additional configuration, but generally should be overridden in `environments/site/inventory/group_vars/all/` with site-specific values:
+
 - `openondemand_servername` - this must be defined for both `openondemand` and
   `grafana` hosts (when Grafana is enabled). The default is `ansible_host` (i.e.
   the IP address) of the first host in the `openondemand` group. For production
@@ -49,6 +53,7 @@ The following variables have been given default values to allow Open OnDemand to
 - `openondemand_desktop_partition`, `openondemand_jupyter_partition`, `openondemand_rstudio_partition`, and `openondemand_codeserver_partition` if the corresponding inventory groups are defined. Defaults to the first compute group defined in the `compute` OpenTofu variable in `environments/$ENV/tofu`. Note `openondemand_matlab_partition` is not set due to the additional requirements discussed above.
 
 It is also recommended to set:
+
 - `openondemand_dashboard_support_url`
 - `openondemand_dashboard_docs_url`
 
@@ -58,5 +63,8 @@ The appliance automatically configures Open OnDemand to proxy Grafana and adds a
 
 [^1]: Note that if `openondemand_auth` is `basic_pam` and anonymous Grafana login is enabled, the appliance will (by default) configure Open OnDemand's Apache server to remove the Authorisation header from proxying of all `node/` addresses. This is done as otherwise Grafana tries to use this header to authenticate, which fails with the default configuration where only the admin Grafana user `grafana` is created. Note that the removal of this header in this configuration means it cannot be used to authenticate proxied interactive applications - however the appliance-deployed remote desktop and Jupyter Notebook server applications use other authentication methods. An alternative if using `basic_pam` is not to enable anonymous Grafana login and to create Grafana users matching the local users (e.g. in `environments/<env>/hooks/post.yml`).
 
-# Access
-By default the appliance authenticates against OOD with basic auth through PAM. When creating a new environment, a new user with username `demo_user` will be created. Its password is found under `vault_openondemand_default_user` in the appliance secrets store in `environments/{ENV}/inventory/group_vars/all/secrets.yml`. Other users can be defined by overriding the `basic_users_users` variable in your environment (templated into `environments/{ENV}/inventory/group_vars/all/basic_users.yml` by default).
+## Access
+
+By default the appliance authenticates against OOD with basic auth through PAM. When creating a new environment, a new user with username `demo_user` will be created.
+Its password is found under `vault_openondemand_default_user` in the appliance secrets store in `environments/{ENV}/inventory/group_vars/all/secrets.yml`.
+Other users can be defined by overriding the `basic_users_users` variable in your environment (templated into `environments/{ENV}/inventory/group_vars/all/basic_users.yml` by default).
diff --git a/docs/operations.md b/docs/operations.md
index 4c5c640..4064d44 100644
--- a/docs/operations.md
+++ b/docs/operations.md
@@ -3,6 +3,7 @@
 This page describes the commands required for common operations.
 
 All subsequent sections assume that:
+
 - Commands are run from the repository root, unless otherwise indicated by a `cd` command.
 - An Ansible vault secret is configured.
 - The correct private key is available to Ansible.
@@ -15,24 +16,27 @@ All subsequent sections assume that:
 
 Review any [site-specific documentation](site/README.md) for more details on the above.
 
-# Deploying a Cluster
+## Deploying a Cluster
 
 This follows the same process as defined in the main [README.md](../README.md) for the default configuration.
 
 Note that tags as defined in the various sub-playbooks defined in `ansible/` may be used to only run part of the tasks in `site.yml`.
 
-# SSH to Cluster Nodes
+## SSH to Cluster Nodes
 
 This depends on how the cluster is accessed.
 
 The script `dev/ansible-ssh` may generally be used to connect to a host specified by a `inventory_hostname` using the same connection details as Ansible. If this does not work:
+
 - Instance IPs are normally defined in `ansible_host` variables in an inventory file `environments/$ENV/inventory/hosts{,.yml}`.
-- The ssh user is defined by `ansible_user`, default is `rocky`. This may be overridden in your environment.
+- The SSH user is defined by `ansible_user`, default is `rocky`. This may be overridden in your environment.
 - If a jump host is required the user and address may be defined in the above inventory file.
 
-# Modifying general Slurm.conf parameters
+## Modifying general Slurm.conf parameters
+
 Parameters for [slurm.conf](https://slurm.schedmd.com/slurm.conf.html) can be added to an `openhpc_config_extra` mapping in `environments/$SITE_ENV/inventory/group_vars/all/openhpc.yml`.
 Note that values in this mapping may be:
+
 - A string, which will be inserted as-is.
 - A list, which will be converted to a comma-separated string.
 
@@ -40,9 +44,9 @@ This allows specifying `slurm.conf` contents in an yaml-format Ansible-native wa
 
 **NB:** The appliance provides some default values in `environments/common/inventory/group_vars/all/openhpc.yml:openhpc_config_default` which is combined with the above. The `enable_configless` flag in the `SlurmCtldParameters` key this sets must not be overridden - a validation step checks this has not happened.
 
-See [Reconfiguring Slurm](#Reconfiguring-Slurm) to apply changes.
+See [Reconfiguring Slurm](#reconfiguring-slurm) to apply changes.
 
-# Modifying Slurm Partition-specific Configuration
+## Modifying Slurm Partition-specific Configuration
 
 Modify the `openhpc_slurm_partitions` mapping usually in `environments/$SITE_ENV/inventory/group_vars/all/openhpc.yml` as described for [stackhpc.openhpc:slurmconf](https://github.com/stackhpc/ansible-role-openhpc#slurmconf) (note the relevant version of this role is defined in the `requirements.yml`)
 
@@ -50,23 +54,22 @@ Note an Ansible inventory group for the partition is required. This is generally
 
 **NB:** `default:NO` must be set on all non-default partitions, otherwise the last defined partition will always be set as the default.
 
-See [Reconfiguring Slurm](#Reconfiguring-Slurm) to apply changes.
+See [Reconfiguring Slurm](#reconfiguring-slurm) to apply changes.
+
+## Adding an Additional Partition
 
-# Adding an Additional Partition
 This is a usually a two-step process:
 
 - If new nodes are required, define a new node group by adding an entry to the `compute` mapping in `environments/$ENV/tofu/main.tf` assuming the default OpenTofu configuration:
-    - The key is the partition name.
-    - The value should be a mapping, with the parameters defined in `environments/$SITE_ENV/tofu/compute/variables.tf`, but in brief will need at least `flavor` (name) and `nodes` (a list of node name suffixes).
-- Add a new partition to the partition configuration as described under [Modifying Slurm Partition-specific Configuration](#Modifying-Slurm-Partition-specific-Configuration).
+  - The key is the partition name.
+  - The value should be a mapping, with the parameters defined in `environments/$SITE_ENV/tofu/compute/variables.tf`, but in brief will need at least `flavor` (name) and `nodes` (a list of node name suffixes).
+- Add a new partition to the partition configuration as described under [Modifying Slurm Partition-specific Configuration](#modifying-slurm-partition-specific-configuration).
 
-Deploying the additional nodes and applying these changes requires rerunning both OpenTofu and the Ansible site.yml playbook - follow [Deploying a Cluster](#Deploying-a-Cluster).
+Deploying the additional nodes and applying these changes requires rerunning both OpenTofu and the Ansible site.yml playbook - follow [Deploying a Cluster](#deploying-a-cluster).
 
-# Enabling additional functionality
-Roles in the appliance which are disabled by default can be enabled by adding the appropriate groups as children of the role's corresponding group in `environments/site/inventory/groups`. For example,
-to install a Squid proxy on nodes in the login group, you would modify the `squid` group definition in `environments/site/inventory/groups` to:
+## Package Repositories
 
-```
+```yaml
 [squid:children]
 # Hosts to run squid proxy
 login
@@ -80,8 +83,10 @@ disabled during runtime to prevent Ark credentials from being leaked. To enable
 
 In both cases, Ark credentials will be required.
 
-# Adding Additional Packages
+=# Adding Additional Packages
+
 By default, the following utility packages are installed during the StackHPC image build:
+
 - htop
 - nano
 - screen
@@ -90,22 +95,23 @@ By default, the following utility packages are installed during the StackHPC ima
 - bind-utils
 - net-tools
 - postfix
-- git
+- Git
 - latest python version for system (3.6 for for Rocky 8.9 and 3.12 for Rocky 9.4)
 - s-nail
 
 Additional packages can be added during image builds by:
+
 - adding the `extra_packages` group to the build `inventory_groups` (see
-[docs/image-build.md](./image-build.md))
+  [docs/image-build.md](./image-build.md))
 - defining a list of packages in `appliances_extra_packages_other` in e.g.
-`environments/$SITE_ENV/inventory/group_vars/all/defaults.yml`. For example:
+  `environments/$SITE_ENV/inventory/group_vars/all/defaults.yml`. For example:
 
-    ```yaml
-        # environments/foo-base/inventory/group_vars/all/defaults.yml:
-        appliances_extra_packages_other:
-        - somepackage
-        - anotherpackage
-    ```
+```yaml
+# environments/foo-base/inventory/group_vars/all/defaults.yml:
+appliances_extra_packages_other:
+  - somepackage
+  - anotherpackage
+```
 
 For packages which come from repositories mirrored by StackHPC's "Ark" Pulp server
 (including rocky, EPEL and OpenHPC repositories), this will require either [Ark
@@ -118,69 +124,85 @@ the OpenHPC installation guide (linked from the
 "user-facing" OpenHPC packages such as compilers, MPI libraries etc. include
 corresponding `lmod` modules.
 
-Packages *may* also be installed during the site.yml, by adding the `cluster`
+Packages _may_ also be installed during the site.yml, by adding the `cluster`
 group into the `extra_packages` group. An error will occur if Ark credentials
 are defined in this case, as they are readable by unprivileged users in the
 `.repo` files and a local Pulp mirror must be used instead.
 
-If additional repositories are required, these could be added/enabled as necessary in a play added to `environments/$SITE_ENV/hooks/{pre,post}.yml` as appropriate. Note such a play should NOT exclude the builder group, so that the repositories are also added to built images. There are various Ansible modules which might be useful for this:
-    - `ansible.builtin.yum_repository`: Add a repo from an URL providing a 'repodata' directory.
-    - `ansible.builtin.rpm_key` : Add a GPG key to the RPM database.
-    - `ansible.builtin.get_url`: Can be used to install a repofile directly from an URL (e.g. https://turbovnc.org/pmwiki/uploads/Downloads/TurboVNC.repo)
-    - `ansible.builtin.dnf`: Can be used to install 'release packages' providing repos, e.g. `epel-release`, `ohpc-release`.
+If additional repositories are required, these could be added/enabled as necessary in a play added to `environments/$SITE_ENV/hooks/{pre,post}.yml` as appropriate.
+Note such a play should NOT exclude the builder group, so that the repositories are also added to built images.
+There are various Ansible modules which might be useful for this:
 
-The packages to be installed from that repo could also be defined in that play. Note using the `dnf` module with a list for its `name` parameter is more efficient and allows better dependency resolution than calling the module in a loop.
+- `ansible.builtin.yum_repository`: Add a repository from a URL providing a 'repodata' directory.
+- `ansible.builtin.rpm_key` : Add a GPG key to the RPM database.
+- `ansible.builtin.get_url`: Can be used to install a repofile directly from a URL (e.g. <https://turbovnc.org/pmwiki/uploads/Downloads/TurboVNC.repo>)
+- `ansible.builtin.dnf`: Can be used to install 'release packages' providing repos, e.g. `epel-release`, `ohpc-release`.
 
+The packages to be installed from that repository could also be defined in that play. Note using the `dnf` module with a list for its `name` parameter is more efficient and allows better dependency resolution than calling the module in a loop.
 
 Adding these repos/packages to the cluster/image would then require running:
 
-    ansible-playbook environments/$SITE_ENV/hooks/{pre,post}.yml
+```shell
+ansible-playbook environments/$SITE_ENV/hooks/{pre,post}.yml
+```
 
 as appropriate.
 
 TODO: improve description about adding these to extra images.
 
-
-# Reconfiguring Slurm
+## Reconfiguring Slurm
 
 At a minimum run:
 
-    ansible-playbook ansible/slurm.yml --tags openhpc
-
+```shell
+ansible-playbook ansible/slurm.yml --tags openhpc
+```
 
 **NB:** This will restart all daemons if the `slurm.conf` has any changes, even if technically only a `scontrol reconfigure` is required.
 
-
-# Running the MPI Test Suite
+## Running the MPI Test Suite
 
 See [ansible/roles/hpctests/README.md](ansible/roles/hpctests/README.md) for a description of these. They can be run using
 
-    ansible-playbook ansible/adhoc/hpctests.yml
+```shell
+ansible-playbook ansible/adhoc/hpctests.yml
+```
 
 Note that:
+
 - The above role provides variables to select specific partitions, nodes and interfaces which may be required. If not set in inventory, these can be passed as extravars:
 
-        ansible-playbook ansible/adhoc/hpctests.yml -e hpctests_myvar=foo
+```shell
+ansible-playbook ansible/adhoc/hpctests.yml -e hpctests_myvar=foo
+```
+
 - The HPL-based test is only reasonably optimised on Intel processors due the libraries and default parallelisation scheme used. For AMD processors it is recommended this
-is skipped using:
+  is skipped using:
 
-        ansible-playbook ansible/adhoc/hpctests.yml --skip-tags hpl-solo.
+```shell
+ansible-playbook ansible/adhoc/hpctests.yml --skip-tags hpl-solo.
+```
 
 Review any [site-specific documentation](site/README.md) for more details.
 
-# Running CUDA Tests
+## Running CUDA Tests
+
 This uses the [cuda-samples](https://github.com/NVIDIA/cuda-samples/) utilities "deviceQuery" and "bandwidthTest" to test GPU functionality. It automatically runs on any
 host in the `cuda` inventory group:
 
-    ansible-playbook ansible/adhoc/cudatests.yml
+```shell
+ansible-playbook ansible/adhoc/cudatests.yml
+```
 
 **NB:** This test is not launched through Slurm, so confirm nodes are free/out of service or use `--limit` appropriately.
 
-# Ad-hoc Commands and Playbooks
+## Ad-hoc Commands and Playbooks
 
 A set of utility playbooks for managing a running appliance are provided in `ansible/adhoc` - run these by activating the environment and using:
 
-        ansible-playbook ansible/adhoc/$PLAYBOOK
+```shell
+ansible-playbook ansible/adhoc/$PLAYBOOK
+```
 
 Currently they include the following (see each playbook for links to documentation):
 
@@ -191,6 +213,8 @@ Currently they include the following (see each playbook for links to documentati
 
 The `ansible` binary [can be used](https://docs.ansible.com/ansible/latest/command_guide/intro_adhoc.html) to run arbitrary shell commands against inventory groups or hosts, for example:
 
-    ansible [--become] <group/host> -m shell -a "<shell command>"
+```shell
+ansible [--become] <group/host> -m shell -a "<shell command>"
+```
 
 This can be useful for debugging and development but any modifications made this way will be lost if nodes are rebuilt/reimaged.
diff --git a/docs/persistent-state.md b/docs/persistent-state.md
index f5d4852..922ad16 100644
--- a/docs/persistent-state.md
+++ b/docs/persistent-state.md
@@ -3,6 +3,7 @@
 To enable cluster state to persist beyond individual node lifetimes (e.g. to survive a cluster deletion or rebuild) set `appliances_state_dir` to the path of a directory on persistent storage, such as an OpenStack volume.
 
 At present this will affect the following:
+
 - `slurmctld` state, i.e. the Slurm queue.
 - The MySQL database for `slurmdbd`, i.e. Slurm accounting information as shown by the `sacct` command.
 - Prometheus database
@@ -23,6 +24,7 @@ The `site` environment supports persistent state in the default OpenTofu (see `e
 **NB: The default OpenTofu is provided as a working example and for internal CI use - therefore this volume is deleted when running `tofu destroy` - this may not be appropriate for a production environment.**
 
 In general, the Prometheus data is likely to be the only sizeable state stored. The size of this can be influenced through [Prometheus role variables](https://github.com/cloudalchemy/ansible-prometheus#role-variables), e.g.:
+
 - `prometheus_storage_retention` - [default](../environments/common/inventory/group_vars/all/prometheus.yml) 31d
 - `prometheus_storage_retention_size` - [default](../environments/common/inventory/group_vars/all/prometheus.yml) 100GB
 - `prometheus_global.scrape_interval` and `scrape_interval` for [specific scrape definitions](../environments/common/inventory/group_vars/all/prometheus.yml)
diff --git a/docs/production.md b/docs/production.md
index 8808a56..83587f9 100644
--- a/docs/production.md
+++ b/docs/production.md
@@ -2,49 +2,49 @@
 
 This page will guide you on how to create production-ready deployments. While
 you can start right away with this guide, you may find it useful to try with a
-demo deployment first, as described in the [main README](../README.md).
+demo deployment first, as described in the [main readme](../README.md).
 
 ## Prerequisites
 
 Before starting ensure that:
 
-  - You have root access on the deploy host.
+- You have root access on the deploy host.
 
-  - You can create instances from the [latest Slurm appliance
-    image](https://github.com/stackhpc/ansible-slurm-appliance/releases),
-    which already contains the required packages. This is built and tested in
-    StackHPC's CI.
+- You can create instances from the [latest Slurm appliance
+  image](https://github.com/stackhpc/ansible-slurm-appliance/releases),
+  which already contains the required packages. This is built and tested in
+  StackHPC's CI.
 
-  - You have an SSH keypair defined in OpenStack, with the private key
-    available on the deploy host.
+- You have an SSH keypair defined in OpenStack, with the private key
+  available on the deploy host.
 
-  - Created instances have access to internet (note proxies can be setup
-    through the appliance if necessary).
+- Created instances have access to internet (note proxies can be setup
+  through the appliance if necessary).
 
-  - Created instances have accurate/synchronised time (for VM instances this is
-    usually provided by the hypervisor; if not or for bare metal instances it
-    may be necessary to configure a time service via the appliance).
+- Created instances have accurate/synchronised time (for VM instances this is
+  usually provided by the hypervisor; if not or for bare metal instances it
+  may be necessary to configure a time service via the appliance).
 
-  - Three security groups are present: ``default`` allowing intra-cluster
-    communication, ``SSH`` allowing external access via SSH and ``HTTPS``
-    allowing access for Open OnDemand.
+- Three security groups are present: `default` allowing intra-cluster
+  communication, `SSH` allowing external access via SSH and `HTTPS`
+  allowing access for Open OnDemand.
 
-  - Usually, you'll want to deploy the Slurm Appliance into its own dedicated
-    project. It's recommended that your OpenStack credentials are defined in a
-    [clouds.yaml](https://docs.openstack.org/python-openstackclient/latest/configuration/index.html#clouds-yaml)
-    file in a default location with the default cloud name of `openstack`.
+- Usually, you'll want to deploy the Slurm Appliance into its own dedicated
+  project. It's recommended that your OpenStack credentials are defined in a
+  [clouds.yaml](https://docs.openstack.org/python-openstackclient/latest/configuration/index.html#clouds-yaml)
+  file in a default location with the default cloud name of `openstack`.
 
 ### Setup deploy host
 
 The following operating systems are supported for the deploy host:
 
-  - Rocky Linux 9
+- Rocky Linux 9
 
-  - Rocky Linux 8
+- Rocky Linux 8
 
 These instructions assume the deployment host is running Rocky Linux 8:
 
-```bash
+```shell
 sudo yum install -y git python38
 git clone https://github.com/stackhpc/ansible-slurm-appliance
 cd ansible-slurm-appliance
@@ -58,24 +58,24 @@ You will also need to install
 ## Version control
 
 A production deployment should be set up under version control, so you should
-create a fork of this repo.
+create a fork of this repository.
 
 First make an empty Git repository using your service of choice (e.g. GitHub or
 GitLab), then execute the following commands to turn the new empty repository
 into a copy of the ansible-slurm-appliance repository.
 
-  ```bash
-  git clone https://github.com/stackhpc/ansible-slurm-appliance.git
-  cd ansible-slurm-appliance
-  ```
+```shell
+git clone https://github.com/stackhpc/ansible-slurm-appliance.git
+cd ansible-slurm-appliance
+```
 
 Maintain the existing origin remote as upstream, and create a new origin remote
 for the repository location.
 
-  ```bash
-  git remote rename origin upstream
-  git remote add origin git@<repo location>/ansible-slurm-appliance.git
-  ```
+```shell
+git remote rename origin upstream
+git remote add origin git@<repo location>/ansible-slurm-appliance.git
+```
 
 You should use the [latest tagged
 release](https://github.com/stackhpc/ansible-slurm-appliance/releases). v1.161
@@ -83,11 +83,11 @@ has been used as an example here, make sure to change this. Do not use the
 default main branch, as this may have features that are still works in
 progress.
 
-  ```bash
-  git checkout v1.161
-  git checkout -b site/main
-  git push -u origin site/main
-  ```
+```shell
+git checkout v1.161
+git checkout -b site/main
+git push -u origin site/main
+```
 
 ## Environment setup
 
@@ -98,8 +98,9 @@ requires instance deletion/recreation.
 
 At least two environments should be created using cookiecutter, which will
 derive from the `site` base environment:
-  - `production`: production environment
-  - `staging`: staging environment
+
+- `production`: production environment
+- `staging`: staging environment
 
 A `dev` environment should also be created if considered required, or this can
 be left until later.
@@ -116,29 +117,29 @@ environment-specific (e.g. DNS names for `openondemand_servername`).
 Where possible hooks should also be placed in `environments/site/hooks/`
 and referenced from the `production` and `staging` environments, e.g.:
 
-  ```yaml
-  # environments/production/hooks/pre.yml:
-  - name: Import parent hook
-    import_playbook: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/../site/hooks/pre.yml"
-  ```
+```yaml
+# environments/production/hooks/pre.yml:
+- name: Import parent hook
+  import_playbook: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/../site/hooks/pre.yml"
+```
 
 OpenTofu configurations are defined in the `site` environment and referenced
 as a module by the site-specific cookie-cutter generated configurations. This
 will have been generated for you already under
-``environments/$ENV/tofu/main.tf``.
+`environments/$ENV/tofu/main.tf`.
 
 ### Cookiecutter instructions
 
 - Run the following from the repository root to activate the venv:
 
-  ```bash
+  ```shell
   . venv/bin/activate
   ```
 
 - Use the `cookiecutter` template to create a new environment to hold your
   configuration:
 
-  ```bash
+  ```shell
   cd environments
   cookiecutter ../cookiecutter
   ```
@@ -149,14 +150,14 @@ will have been generated for you already under
 
 - Go back to the root folder and activate the new environment:
 
-  ```bash
+  ```shell
   cd ..
   . environments/$ENV/activate
   ```
 
   And generate secrets for it:
 
-  ```bash
+  ```shell
   ansible-playbook ansible/adhoc/generate-passwords.yml
   ```
 
@@ -164,33 +165,33 @@ will have been generated for you already under
 
 Create an OpenTofu variables file to define the required infrastructure, e.g.:
 
-  ```
-  # environments/$ENV/tofu/terraform.tfvars
-  cluster_name = "mycluster"
-  cluster_networks = [
-    {
-      network = "some_network" # *
-      subnet = "some_subnet" # *
-    }
-  ]
-  key_pair = "my_key" # *
-  control_node_flavor = "some_flavor_name"
-  login = {
-      # Arbitrary group name for these login nodes
-      interactive = {
-          nodes: ["login-0"]
-          flavor: "login_flavor_name" # *
-      }
-  }
-  cluster_image_id = "rocky_linux_9_image_uuid"
-  compute = {
-      # Group name used for compute node partition definition
-      general = {
-          nodes: ["compute-0", "compute-1"]
-          flavor: "compute_flavor_name" # *
-      }
+```text
+# environments/$ENV/tofu/terraform.tfvars
+cluster_name = "mycluster"
+cluster_networks = [
+  {
+    network = "some_network" # *
+    subnet = "some_subnet" # *
   }
-  ```
+]
+key_pair = "my_key" # *
+control_node_flavor = "some_flavor_name"
+login = {
+    # Arbitrary group name for these login nodes
+    interactive = {
+        nodes: ["login-0"]
+        flavor: "login_flavor_name" # *
+    }
+}
+cluster_image_id = "rocky_linux_9_image_uuid"
+compute = {
+    # Group name used for compute node partition definition
+    general = {
+        nodes: ["compute-0", "compute-1"]
+        flavor: "compute_flavor_name" # *
+    }
+}
+```
 
 Variables marked `*` refer to OpenStack resources which must already exist.
 
@@ -199,29 +200,30 @@ The above is a minimal configuration - for all variables and descriptions see
 
 Note that:
 
-  - Environment-specific variables (`cluster_name`) should be hardcoded into
-    the cluster module block.
+- Environment-specific variables (`cluster_name`) should be hardcoded into
+  the cluster module block.
 
-  - Environment-independent variables (e.g. maybe `cluster_net` if the same
-    is used for staging and production) should be set as *defaults* in
-    `environments/site/tofu/variables.tf`, and then don't need to be passed
-    in to the module.
+- Environment-independent variables (e.g. maybe `cluster_net` if the same
+  is used for staging and production) should be set as _defaults_ in
+  `environments/site/tofu/variables.tf`, and then don't need to be passed
+  in to the module.
 
 The cluster image used should match the release which you are deploying with.
 Published images are described in the release notes
-[here](https://github.com/stackhpc/ansible-slurm-appliance/releases). 
+[here](https://github.com/stackhpc/ansible-slurm-appliance/releases).
 
 By default, the site OpenTofu configuration provisions two volumes and attaches
 them to the control node:
-  - "$cluster_name-home" for NFS-shared home directories
-  - "$cluster_name-state" for monitoring and Slurm data
-The volumes mean this data is persisted when the control node is rebuilt.
-However if the cluster is destroyed with `tofu destroy` then the volumes will
-also be deleted. This is undesirable for production environments and usually
-also for staging environments. Therefore the volumes should be manually
-created, e.g. via the CLI:
 
-  ```
+- "$cluster_name-home" for NFS-shared home directories
+- "$cluster_name-state" for monitoring and Slurm data
+  The volumes mean this data is persisted when the control node is rebuilt.
+  However if the cluster is destroyed with `tofu destroy` then the volumes will
+  also be deleted. This is undesirable for production environments and usually
+  also for staging environments. Therefore the volumes should be manually
+  created, e.g. via the CLI:
+
+  ```shell
   openstack volume create --size 200 mycluster-home # size in GB
   openstack volume create --size 100 mycluster-state
   ```
@@ -229,10 +231,10 @@ created, e.g. via the CLI:
 and OpenTofu configured to use those volumes instead of managing them itself by
 setting:
 
-  ```
-  home_volume_provisioning = "attach"
-  state_volume_provisioning = "attach"
-  ```
+```text
+home_volume_provisioning = "attach"
+state_volume_provisioning = "attach"
+```
 
 either for a specific environment within the cluster module block in
 `environments/$ENV/tofu/main.tf`, or as the site default by changing the
@@ -245,19 +247,19 @@ allows for multiple clusters to be created with this environment.
 If no home volume at all is required because the home directories are provided
 by a parallel filesystem (e.g. Manila) set
 
-  ```
-  home_volume_provisioning = "none"
-  ```
+```text
+home_volume_provisioning = "none"
+```
 
 In this case the NFS share for home directories is automatically disabled.
 
 **NB:** To apply "attach" options to existing clusters, first remove the
 volume(s) from the tofu state, e.g.:
 
-  ```
-  tofu state list # find the volume(s)
-  tofu state rm 'module.cluster.openstack_blockstorage_volume_v3.state[0]'
-  ```
+```shell
+tofu state list # find the volume(s)
+tofu state rm 'module.cluster.openstack_blockstorage_volume_v3.state[0]'
+```
 
 This leaves the volume itself intact, but means OpenTofu "forgets" it. Then set
 the "attach" options and run `tofu apply` again - this should show there are no
@@ -271,15 +273,16 @@ the IPs into the OpenTofu `login` definition.
 
 Consider enabling topology aware scheduling. This is currently only supported
 if your cluster does not include any baremetal nodes. This can be enabled by:
-  1. Creating Availability Zones in your OpenStack project for each physical
-     rack
-  2. Setting the `availability_zone` fields of compute groups in your OpenTofu
-     configuration
-  3. Adding the `compute` group as a child of `topology` in
-     `environments/$ENV/inventory/groups`
-  4. (Optional) If you are aware of the physical topology of switches above the
-     rack-level, override `topology_above_rack_topology` in your groups vars
-     (see [topology docs](../ansible/roles/topology/README.md) for more detail)
+
+1. Creating Availability Zones in your OpenStack project for each physical
+   rack
+2. Setting the `availability_zone` fields of compute groups in your OpenTofu
+   configuration
+3. Adding the `compute` group as a child of `topology` in
+   `environments/$ENV/inventory/groups`
+4. (Optional) If you are aware of the physical topology of switches above the
+   rack-level, override `topology_above_rack_topology` in your groups vars
+   (see [topology docs](../ansible/roles/topology/README.md) for more detail)
 
 Consider whether mapping of baremetal nodes to ironic nodes is required. See
 [PR 485](https://github.com/stackhpc/ansible-slurm-appliance/pull/485).
@@ -287,12 +290,12 @@ Consider whether mapping of baremetal nodes to ironic nodes is required. See
 To deploy this infrastructure, ensure the venv and the environment are
 [activated](#cookiecutter-instructions) and run:
 
-  ```bash
-  export OS_CLOUD=openstack
-  cd environments/$ENV/tofu/
-  tofu init
-  tofu apply
-  ```
+```shell
+export OS_CLOUD=openstack
+cd environments/$ENV/tofu/
+tofu init
+tofu apply
+```
 
 and follow the prompts. Note the OS_CLOUD environment variable assumes that
 OpenStack credentials are defined using a
@@ -305,9 +308,9 @@ number of concurrent operations to 10. This means that for example only 10
 ports or 10 instances can be deployed at once. This should be raised by
 modifying `environments/$ENV/activate` to add a line like:
 
-  ```bash
-  export TF_CLI_ARGS_apply="-parallelism=25"
-  ```
+```shell
+export TF_CLI_ARGS_apply="-parallelism=25"
+```
 
 The value chosen should be the highest value demonstrated during testing. Note
 that any time spent blocked due to this parallelism limit does not count
@@ -328,14 +331,15 @@ against the (un-overridable) internal OpenTofu timeout of 30 minutes
   instances) it may be necessary to [configure chrony](./chrony.md).
 
 - Consider whether Prometheus storage configuration is required. By default:
+
   - A 200GB state volume is provisioned (but see above)
   - The common environment
     [sets](../environments/common/inventory/group_vars/all/prometheus.yml) a
     maximum retention of 100 GB and 31 days.
-  These may or may not be appropriate depending on the number of nodes, the
-  scrape interval, and other uses of the state volume (primarily the `slurmctld`
-  state and the `slurmdbd` database). See
-  [docs/monitoring-and-logging](./monitoring-and-logging.md) for more options.
+    These may or may not be appropriate depending on the number of nodes, the
+    scrape interval, and other uses of the state volume (primarily the `slurmctld`
+    state and the `slurmdbd` database). See
+    [docs/monitoring-and-logging](./monitoring-and-logging.md) for more options.
 
 - Configure Open OnDemand - see [specific documentation](openondemand.md) which
   notes specific variables required.
@@ -383,17 +387,22 @@ against the (un-overridable) internal OpenTofu timeout of 30 minutes
 ### Applying configuration
 
 To configure the appliance, ensure the venv and the environment are
-[activated](#create-a-new-environment) and run:
 
-  ```bash
-  ansible-playbook ansible/site.yml
-  ```
+```text
+[activated](#create-a-new-environment)
+```
+
+and run:
+
+```shell
+ansible-playbook ansible/site.yml
+```
 
 Once it completes you can log in to the cluster using:
 
-  ```bash
-  ./dev/ansible-ssh login
-  ```
+```shell
+./dev/ansible-ssh login
+```
 
 For further information, including additional configuration guides and
 operations instructions, see the [docs](README.md) directory.
diff --git a/docs/sequence.md b/docs/sequence.md
index 8723674..8149290 100644
--- a/docs/sequence.md
+++ b/docs/sequence.md
@@ -1,10 +1,9 @@
 # Slurm Appliance Sequences
 
-
-
 ## Image build
 
 This sequence applies to both:
+
 - "fatimage" builds, starting from GenericCloud images and using
   control,login,compute inventory groups to install all packages, e.g. StackHPC
   CI builds
@@ -86,9 +85,10 @@ sequenceDiagram
 
 This sequence applies to active clusters, after running the `site.yml` playbook
 for the first time. Slurm controlled rebuild requires that:
+
 - Compute groups in the OpenTofu `compute` variable have:
-    - `ignore_image_changes: true`
-    - `compute_init_enable: ['compute', ... ]`
+  - `ignore_image_changes: true`
+  - `compute_init_enable: ['compute', ... ]`
 - The Ansible `rebuild` inventory group contains the `control` group.
 
 TODO: should also document how compute-init does NOT run if the `site.yml`
@@ -126,8 +126,9 @@ sequenceDiagram
     end
     nodes->>nodes: srun task completes
 ```
+
 Notes:
+
 1. And/or login/compute group overrides
 2. Running on control node
 3. On hosts targeted by job
-
diff --git a/docs/site/README.md b/docs/site/README.md
index ee14787..6597ea4 100644
--- a/docs/site/README.md
+++ b/docs/site/README.md
@@ -2,5 +2,4 @@
 
 This document is a placeholder for any site-specific documentation, e.g. environment descriptions.
 
-#TODO: list things which should commonly be specified here.
-
+## TODO: list things which should commonly be specified here
diff --git a/docs/upgrades.md b/docs/upgrades.md
index b44cae9..5cf4bbe 100644
--- a/docs/upgrades.md
+++ b/docs/upgrades.md
@@ -6,51 +6,61 @@ Generally, upstream releases will happen roughly monthly. Releases may contain n
 Any site-specific instructions in [docs/site/README.md](site/README.md) should be reviewed in tandem with this.
 
 This document assumes the deployment repository has:
+
 1. Remotes:
-    - `origin` referring to the site-specific remote repository.
-    - `stackhpc` referring to the StackHPC repository at https://github.com/stackhpc/ansible-slurm-appliance.git.
+   - `origin` referring to the site-specific remote repository.
+   - `stackhpc` referring to the StackHPC repository at <https://github.com/stackhpc/ansible-slurm-appliance.git>.
 2. Branches:
-    - `main` - following `main/origin`, the current site-specific code deployed to production.
-    - `upstream` - following `main/stackhpc`, i.e. the upstream `main` branch from `stackhpc`.
+   - `main` - following `main/origin`, the current site-specific code deployed to production.
+   - `upstream` - following `main/stackhpc`, i.e. the upstream `main` branch from `stackhpc`.
 3. The following environments:
-    - `$PRODUCTION`: a production environment, as defined by e.g. `environments/production/`.
-    - `$STAGING`: a production environment, as defined by e.g. `environments/staging/`.
-    - `$SITE_ENV`: a base site-specific environment, as defined by e.g. `environments/mysite/`.
+   - `$PRODUCTION`: a production environment, as defined by e.g. `environments/production/`.
+   - `$STAGING`: a production environment, as defined by e.g. `environments/staging/`.
+   - `$SITE_ENV`: a base site-specific environment, as defined by e.g. `environments/mysite/`.
 
 **NB:** Commands which should be run on the Slurm login node are shown below prefixed `[LOGIN]$`.
 All other commands should be run on the Ansible deploy host.
 
 1. Update the `upstream` branch from the `stackhpc` remote, including tags:
 
-        git fetch stackhpc main --tags
+```shell
+git fetch stackhpc main --tags
+```
 
 1. Identify the latest release from the [Slurm appliance release page](https://github.com/stackhpc/ansible-slurm-appliance/releases). Below this release is shown as `vX.Y`.
 
 1. Ensure your local site branch is up to date and create a new branch from it for the
    site-specfic release code:
 
-        git checkout main
-        git pull --prune
-        git checkout -b update/vX.Y
+```shell
+git checkout main
+git pull --prune
+git checkout -b update/vX.Y
+```
 
 1. Merge the upstream code into your release branch:
 
-        git merge vX.Y
+```shell
+git merge vX.Y
+```
+
+It is possible this will introduce merge conflicts; fix these following the usual Git
+prompts. Generally merge conflicts should only exist where functionality which was added
+for your site (not in a hook) has subsequently been merged upstream.
 
-   It is possible this will introduce merge conflicts; fix these following the usual git 
-   prompts. Generally merge conflicts should only exist where functionality which was added
-   for your site (not in a hook) has subsequently been merged upstream.
+Note that if upgrading from a release prior to v2.3, you will likely have merge conflicts
+with existing site OpenTofu configurations in `environments/site/tofu`. Generally
 
-   Note that if upgrading from a release prior to v2.3, you will likely have merge conflicts
-   with existing site OpenTofu configurations in `environments/site/tofu`. Generally
-   - Changes to `default` values in `environments/site/tofu.variables.tf` should be rejected.
-   - All other changes to the OpenTofu configuration should be accepted, unless they overwrite
-     site-specific additional resources.
+- Changes to `default` values in `environments/site/tofu.variables.tf` should be rejected.
+- All other changes to the OpenTofu configuration should be accepted, unless they overwrite
+  site-specific additional resources.
 
 1. Push this branch and create a PR:
 
-        git push
-        # follow instructions
+```shell
+git push
+# follow instructions
+```
 
 1. Review the PR to see if any added/changed functionality requires alteration of
    site-specific configuration. In general changes to existing functionality will aim to be
@@ -58,17 +68,19 @@ All other commands should be run on the Ansible deploy host.
    necessary to use new functionality or where functionality has been upstreamed as above.
    Note that the upstream `environments/site/inventory/groups` file contains all possible
    groups which can be used to enable features. This will be updated when pulling changes
-   from the StackHPC repo, and any new groups should be enabled/disabled as required for
+   from the StackHPC repository, and any new groups should be enabled/disabled as required for
    your site.
 
-   Make changes as necessary.
+Make changes as necessary.
 
 1. Identify image(s) from the relevant [Slurm appliance release](https://github.com/stackhpc/ansible-slurm-appliance/releases), and download
    using the link on the release plus the image name, e.g. for an image `openhpc-RL9-250708-1547-1494192e`:
 
-        wget https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/openhpc-images/openhpc-RL9-250708-1547-1494192e
+```shell
+wget https://leafcloud.store/swift/v1/AUTH_f39848421b2747148400ad8eeae8d536/openhpc-images/openhpc-RL9-250708-1547-1494192e
+```
 
-    Note that some releases may not include new images. In this case use the image from the latest previous release with new images.
+Note that some releases may not include new images. In this case use the image from the latest previous release with new images.
 
 1. If an "extra" image build with local modifications is required, update the
    Packer build configuration to use the above new image and run a build. See
@@ -83,33 +95,42 @@ All other commands should be run on the Ansible deploy host.
 1. Declare a future outage window to cluster users. A [Slurm reservation](https://slurm.schedmd.com/scontrol.html#lbAQ) can be
    used to prevent jobs running during that window, e.g.:
 
-        [LOGIN]$  sudo scontrol create reservation Flags=MAINT ReservationName="upgrade-vX.Y" StartTime=2024-10-16T08:00:00 EndTime=2024-10-16T10:00:00 Nodes=ALL Users=root
+```shell
+[LOGIN]$  sudo scontrol create reservation Flags=MAINT ReservationName="upgrade-vX.Y" StartTime=2024-10-16T08:00:00 EndTime=2024-10-16T10:00:00 Nodes=ALL Users=root
+```
 
-   Note a reservation cannot be created if it may overlap with currently running jobs (defined by job or partition time limits).
+Note a reservation cannot be created if it may overlap with currently running jobs (defined by job or partition time limits).
 
 1. At the outage window, check there are no jobs running:
 
-        [LOGIN]$ squeue
+```shell
+[LOGIN]$ squeue
+```
 
 1. Deploy the branch created above to production, i.e. activate the production environment, run OpenTofu to reimage or
-delete/recreate instances with the new images (depending on how the root disk is defined), and run Ansible's `site.yml`
-playbook to reconfigure the cluster, e.g. as described in the main [README.md](../README.md).
+   delete/recreate instances with the new images (depending on how the root disk is defined), and run Ansible's `site.yml`
+   playbook to reconfigure the cluster, e.g. as described in the main [README.md](../README.md).
 
 1. Check slurm is up:
 
-        [LOGIN]$ sinfo -R
-   
-   The `-R` shows the reason for any nodes being down.
+```shell
+[LOGIN]$ sinfo -R
+```
+
+The `-R` shows the reason for any nodes being down.
 
 1. If the above shows nodes done for having been "unexpectedly rebooted", set them up again:
 
-        [LOGIN]$ sudo scontrol update state=RESUME nodename=$HOSTLIST_EXPR
+```shell
+[LOGIN]$ sudo scontrol update state=RESUME nodename=$HOSTLIST_EXPR
+```
 
-    where the hostlist expression might look like e.g. `general-[0-1]` to reset state for nodes 0 and 1 of the general partition.
+where the hostlist expression might look like e.g. `general-[0-1]` to reset state for nodes 0 and 1 of the general partition.
 
 1. Delete the reservation:
 
-        [LOGIN]$ sudo scontrol delete ReservationName="upgrade-slurm-v1.160"
+```shell
+[LOGIN]$ sudo scontrol delete ReservationName="upgrade-slurm-v1.160"
+```
 
 1. Tell users the cluster is available again.
-
diff --git a/environments/.caas/README.md b/environments/.caas/README.md
index 4a08433..8402845 100644
--- a/environments/.caas/README.md
+++ b/environments/.caas/README.md
@@ -3,9 +3,10 @@
 Environment for default Azimuth Slurm. This is not intended to be manually deployed.
 
 Non-standard things for this environment:
+
 - There is no activate script.
-- `ansible.cgf` is provided in the repo root, as expected by the caas operator.
-- `ANSIBLE_INVENTORY` is set in the cluster type template, using a path relative to the 
+- `ansible.cgf` is provided in the repository root, as expected by the caas operator.
+- `ANSIBLE_INVENTORY` is set in the cluster type template, using a path relative to the
   runner project directory:
 
         azimuth_caas_stackhpc_slurm_appliance_template:
@@ -13,6 +14,6 @@ Non-standard things for this environment:
         envVars:
             ANSIBLE_INVENTORY: environments/common/inventory,environments/.caas/inventory
 
-    Ansible then defines `ansible_inventory_sources` which contains absolute paths, and 
-    that is used to derive the `appliances_environment_root` and 
-    `appliances_repository_root`.
+  Ansible then defines `ansible_inventory_sources` which contains absolute paths, and
+  that is used to derive the `appliances_environment_root` and
+  `appliances_repository_root`.
diff --git a/environments/.caas/hooks/post.yml b/environments/.caas/hooks/post.yml
index eaaeb23..cf606c7 100644
--- a/environments/.caas/hooks/post.yml
+++ b/environments/.caas/hooks/post.yml
@@ -1,9 +1,10 @@
+---
 # Configure the Zenith clients that are required
 # Note zenith hosts are in podman group
 - hosts: grafana
   tasks:
     - name: Deploy the Zenith client for Grafana
-      include_role:
+      ansible.builtin.include_role:
         name: zenith_proxy
       vars:
         zenith_proxy_service_name: zenith-monitoring
@@ -11,7 +12,7 @@
         zenith_proxy_upstream_host: "{{ ansible_host }}" # IP
         zenith_proxy_upstream_port: "{{ grafana_port }}"
         zenith_proxy_client_token: "{{ zenith_token_monitoring }}"
-        zenith_proxy_mitm_enabled: yes
+        zenith_proxy_mitm_enabled: true
         zenith_proxy_mitm_auth_inject: basic
         zenith_proxy_mitm_auth_basic_username: "{{ grafana_security.admin_user }}"
         zenith_proxy_mitm_auth_basic_password: "{{ grafana_security.admin_password }}"
@@ -20,7 +21,7 @@
 - hosts: openondemand
   tasks:
     - name: Deploy the Zenith client for OOD
-      include_role:
+      ansible.builtin.include_role:
         name: zenith_proxy
       vars:
         zenith_proxy_service_name: zenith-ood
@@ -29,7 +30,7 @@
         zenith_proxy_upstream_host: "{{ ansible_host }}" # IP
         zenith_proxy_upstream_port: 443
         zenith_proxy_client_token: "{{ zenith_token_ood }}"
-        zenith_proxy_mitm_enabled: yes
+        zenith_proxy_mitm_enabled: true
         zenith_proxy_mitm_auth_inject: basic
         zenith_proxy_mitm_auth_basic_username: azimuth
         zenith_proxy_mitm_auth_basic_password: "{{ vault_azimuth_user_password }}"
@@ -40,14 +41,15 @@
   become: false
   gather_facts: false
   tasks:
-    - import_role:
+    - ansible.builtin.import_role:
         name: hpctests
       when: cluster_run_validation | default(false) | bool
 
 # Write the outputs as the final task
 - hosts: localhost
   tasks:
-    - debug: var=outputs
+    - ansible.builtin.debug:
+        var: outputs
       vars:
         # Ansible has a fit when there are two 'hostvars' evaluations in a resolution chain,
         # so we have to repeat logic here unfortunately
@@ -62,4 +64,4 @@
                 if zenith_fqdn_ood is not defined
                 else {}
               )
-          }}
\ No newline at end of file
+          }}
diff --git a/environments/.caas/hooks/pre.yml b/environments/.caas/hooks/pre.yml
index 8924dca..8209b39 100644
--- a/environments/.caas/hooks/pre.yml
+++ b/environments/.caas/hooks/pre.yml
@@ -1,8 +1,9 @@
 ---
-
 # Generate k3s token
 - name: Generate k3s token
-  # NB: Although this generates a new token on each run, the actual token set in metadata is retrieved from a set-once tofu resource, hence only the first value ever generated is relevant.
+  # NB: Although this generates a new token on each run, the actual token set in
+  #     metadata is retrieved from a set-once tofu resource, hence only the first
+  #     value ever generated is relevant.
   hosts: openstack
   tasks:
     - ansible.builtin.set_fact:
@@ -17,44 +18,44 @@
 # Ensure that the secrets are generated and persisted on the control host
 - name: Generate and persist secrets
   hosts: control
-  gather_facts: no
-  become: yes
+  gather_facts: false
+  become: true
   roles:
     - persist_openhpc_secrets
 
 # validate.yml asserts presence of a control group which doesn't exist when
 # destroying infra, so only validate when we're not destroying
 - hosts: openstack
-  gather_facts: no
-  become: no
+  gather_facts: false
+  become: false
   tasks:
-    - set_fact:
+    - ansible.builtin.set_fact:
         appliances_validate: false
       when: "cluster_state | default('') == 'absent'"
 
 # TODO: FIXME: maybe by doing the user move in cloud-init?
-# The first task in the bootstrap playbook causes the home directory of the rocky user to be moved on the first run
+# The first task in the bootstrap playbook causes the home directory of the rocky user to be moved on the first run
 # This can disrupt the SSH connection, particularly because we use the login host as a jump host
 # So we move the home directory on the login node and reset the connections first
 - hosts: login
   gather_facts: false
   tasks:
-    - name: Set up Ansible user
-      user: "{{ (appliances_local_users_default | selectattr('user.name', 'eq', appliances_local_users_ansible_user_name))[0]['user'] }}"
-      become_method: "sudo"
+    - name: Set up Ansible user # noqa: args[module]
+      ansible.builtin.user: "{{ (appliances_local_users_default | selectattr('user.name', 'eq', appliances_local_users_ansible_user_name))[0]['user'] }}"
+      become_method: ansible.builtin.sudo
       # Need to change working directory otherwise we try to switch back to non-existent directory.
-      become_flags: '-i'
+      become_flags: "-i"
       become: true
 
 - hosts: cluster
-  gather_facts: no
+  gather_facts: false
   tasks:
     - name: Reset persistent SSH connections
-      meta: reset_connection
+      ansible.builtin.meta: reset_connection
 
 - hosts: localhost
-  gather_facts: no
-  become: no
+  gather_facts: false
+  become: false
   tasks:
     - name: Add hosts to dnf_repos group to enable repofiles
       ansible.builtin.add_host:
@@ -68,9 +69,10 @@
 # https://github.com/stackhpc/ansible-slurm-appliance/blob/ba9699267449fba58cd9c04c451759a914fd7144/ansible/validate.yml#L16
 # doesn't break CaaS platforms
 - hosts: localhost
-  gather_facts: no
+  gather_facts: false
   tasks:
     - name: Prepare requirements.yml.last for galaxy validation
-      copy:
+      ansible.builtin.copy:
         src: "{{ appliances_repository_root }}/requirements.yml"
         dest: "{{ appliances_repository_root }}/requirements.yml.last"
+        mode: "0644"
diff --git a/environments/.caas/inventory/group_vars/all/basic_users.yml b/environments/.caas/inventory/group_vars/all/basic_users.yml
index 0e38148..ef19398 100644
--- a/environments/.caas/inventory/group_vars/all/basic_users.yml
+++ b/environments/.caas/inventory/group_vars/all/basic_users.yml
@@ -1,3 +1,4 @@
+---
 basic_users_users:
   - name: azimuth
     # Hash the password with a salt that is different for each host
diff --git a/environments/.caas/inventory/group_vars/all/cluster.yml b/environments/.caas/inventory/group_vars/all/cluster.yml
index 14633c8..ea38e9f 100644
--- a/environments/.caas/inventory/group_vars/all/cluster.yml
+++ b/environments/.caas/inventory/group_vars/all/cluster.yml
@@ -1,3 +1,4 @@
+---
 # Account for the fact we are running outside of the expected environment system:
 caas_inventory: "{{ ansible_inventory_sources | last }}" # ansible_inventory_sources is absolute
 appliances_environment_root: "{{ caas_inventory | dirname }}"
diff --git a/environments/.caas/inventory/group_vars/all/grafana.yml b/environments/.caas/inventory/group_vars/all/grafana.yml
index 10fdc92..d831467 100644
--- a/environments/.caas/inventory/group_vars/all/grafana.yml
+++ b/environments/.caas/inventory/group_vars/all/grafana.yml
@@ -1 +1,2 @@
+---
 grafana_auth_anonymous: "{{ groups['openondemand'] | count > 0 }}"
diff --git a/environments/.caas/inventory/group_vars/all/hpctests.yml b/environments/.caas/inventory/group_vars/all/hpctests.yml
index a6a2c91..f4ade94 100644
--- a/environments/.caas/inventory/group_vars/all/hpctests.yml
+++ b/environments/.caas/inventory/group_vars/all/hpctests.yml
@@ -1,8 +1,9 @@
+---
 # Skip plotting pingpong as matplotlib not in runner environment
 hpctests_pingpong_plot: false
 
 # In Azimuth, the Ansible controller is an ephemeral pod, so all that matters is that
-# this is a location that is writable by the container user
+# this is a location that is writable by the container user
 hpctests_outdir: "{{ playbook_dir }}/.tmp/hpctests"
 
 # hpctests run by default in Azimuth but not trying to stress-test the nodes
diff --git a/environments/.caas/inventory/group_vars/all/manila.yml b/environments/.caas/inventory/group_vars/all/manila.yml
index 226ac21..ebd1dde 100644
--- a/environments/.caas/inventory/group_vars/all/manila.yml
+++ b/environments/.caas/inventory/group_vars/all/manila.yml
@@ -1,3 +1,4 @@
+---
 caas_manila_home:
   share_name: "{{ cluster_name }}-home"
   mount_path: /home
@@ -14,4 +15,6 @@ caas_manila_project:
   mount_group: root
   mount_mode: ugo=rwX
 
-os_manila_mount_shares: "{{ ([caas_manila_home] if cluster_home_manila_share | bool else []) + ([caas_manila_project] if cluster_project_manila_share | bool else []) }}"
+# yamllint disable-line rule:line-length
+os_manila_mount_shares: "{{ ([caas_manila_home] if cluster_home_manila_share | bool else []) + ([caas_manila_project] if cluster_project_manila_share | bool else
+  []) }}"
diff --git a/environments/.caas/inventory/group_vars/all/nfs.yml b/environments/.caas/inventory/group_vars/all/nfs.yml
index 74a42cd..0eca0c8 100644
--- a/environments/.caas/inventory/group_vars/all/nfs.yml
+++ b/environments/.caas/inventory/group_vars/all/nfs.yml
@@ -1,10 +1,11 @@
+---
 nfs_server: "{{ nfs_server_default }}"
 
 caas_nfs_home:
   - comment: Export /exports/home from Slurm control node as /home
     nfs_enable:
-        server:  "{{ inventory_hostname in groups['control'] }}"
-        clients: "{{ inventory_hostname in groups['cluster'] }}"
+      server: "{{ inventory_hostname in groups['control'] }}"
+      clients: "{{ inventory_hostname in groups['cluster'] }}"
     nfs_export: "/exports/home" # assumes default site TF is being used
     nfs_client_mnt_point: "/home"
 
diff --git a/environments/.caas/inventory/group_vars/all/openhpc.yml b/environments/.caas/inventory/group_vars/all/openhpc.yml
index 74f196c..56c8b90 100644
--- a/environments/.caas/inventory/group_vars/all/openhpc.yml
+++ b/environments/.caas/inventory/group_vars/all/openhpc.yml
@@ -1,3 +1,4 @@
+---
 openhpc_cluster_name: "{{ cluster_name }}"
 
 # Provision a single "standard" compute nodegroup using the supplied
diff --git a/environments/.caas/inventory/group_vars/all/openondemand.yml b/environments/.caas/inventory/group_vars/all/openondemand.yml
index 4dc0b93..83b15a2 100644
--- a/environments/.caas/inventory/group_vars/all/openondemand.yml
+++ b/environments/.caas/inventory/group_vars/all/openondemand.yml
@@ -6,4 +6,3 @@ openondemand_desktop_partition: "{{ openhpc_partitions[0]['name'] }}"
 httpd_listen_addr_port:
   - 80
   - 443
-
diff --git a/environments/.caas/inventory/group_vars/all/zenith.yml b/environments/.caas/inventory/group_vars/all/zenith.yml
index 56dd0ca..652f2da 100644
--- a/environments/.caas/inventory/group_vars/all/zenith.yml
+++ b/environments/.caas/inventory/group_vars/all/zenith.yml
@@ -1 +1,2 @@
+---
 zenith_proxy_podman_user: podman
diff --git a/environments/.caas/inventory/group_vars/openstack.yml b/environments/.caas/inventory/group_vars/openstack.yml
index f76c050..83dff89 100644
--- a/environments/.caas/inventory/group_vars/openstack.yml
+++ b/environments/.caas/inventory/group_vars/openstack.yml
@@ -1,3 +1,4 @@
+---
 # The default Terraform state key for backends that support it
 terraform_state_key: "cluster/{{ cluster_id }}/tfstate"
 
diff --git a/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml b/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml
index ab10eff..5a105bd 100644
--- a/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml
+++ b/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml
@@ -1,8 +1,8 @@
+---
 name: "slurm"
 label: "Slurm"
-description: >- 
-  Batch cluster running the Slurm workload manager, the Open 
-  OnDemand web interface, and custom monitoring.
+description: >-
+  Batch cluster running the Slurm workload manager, the Open  OnDemand web interface, and custom monitoring.
 logo: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Slurm_logo.svg/158px-Slurm_logo.svg.png
 
 requires_ssh_key: true
@@ -49,7 +49,7 @@ parameters:
       count_parameter: compute_count
       min_ram: 2048
       min_disk: 20
-  
+
   - name: home_volume_size
     label: Home volume size (GB)
     description: The size of the cloud volume to use for home directories.
@@ -79,7 +79,7 @@ parameters:
       this volume, 10GB is set aside for cluster state and the remaining space is used
       to store cluster metrics.
 
-      The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be 
+      The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be
       discarded to ensure that the database does not grow larger than this volume.
     kind: cloud.volume_size
     immutable: true
@@ -98,6 +98,7 @@ parameters:
     options:
       checkboxLabel: Run post-configuration validation?
 
+# yamllint disable rule:line-length
 usage_template: |-
   # Accessing the cluster using Open OnDemand
 
@@ -137,6 +138,7 @@ usage_template: |-
 
   Other parts of the filesystem may be affected during a patch operation, including any
   packages that have been installed using `dnf`.
+# yamllint enable rule:line-length
 
 services:
   - name: ood
@@ -145,4 +147,3 @@ services:
   - name: monitoring
     label: Monitoring
     icon_url: https://raw.githubusercontent.com/cncf/artwork/master/projects/prometheus/icon/color/prometheus-icon-color.png
-
diff --git a/environments/.caas/ui-meta/slurm-infra-manila-home.yml b/environments/.caas/ui-meta/slurm-infra-manila-home.yml
index 4a01bb6..6255f46 100644
--- a/environments/.caas/ui-meta/slurm-infra-manila-home.yml
+++ b/environments/.caas/ui-meta/slurm-infra-manila-home.yml
@@ -1,9 +1,9 @@
+---
 # Exactly as for slurm-infra.yml but to allow for separate manila/non-manila home appliances
 name: "slurm-manila-home"
 label: "Slurm (CephFS home)"
-description: >- 
-  Batch cluster running the Slurm workload manager, the Open 
-  OnDemand web interface, and custom monitoring.
+description: >-
+  Batch cluster running the Slurm workload manager, the Open  OnDemand web interface, and custom monitoring.
 
   This version uses CephFS for home directories.
 logo: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Slurm_logo.svg/158px-Slurm_logo.svg.png
@@ -52,7 +52,7 @@ parameters:
       count_parameter: compute_count
       min_ram: 2048
       min_disk: 20
-  
+
   - name: home_volume_size
     label: Home share size (GB)
     description: The size of the share to use for home directories.
@@ -69,7 +69,7 @@ parameters:
       this volume, 10GB is set aside for cluster state and the remaining space is used
       to store cluster metrics.
 
-      The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be 
+      The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be
       discarded to ensure that the database does not grow larger than this volume.
     kind: cloud.volume_size
     immutable: true
@@ -88,6 +88,7 @@ parameters:
     options:
       checkboxLabel: Run post-configuration validation?
 
+# yamllint disable rule:line-length
 usage_template: |-
   # Accessing the cluster using Open OnDemand
 
@@ -127,6 +128,7 @@ usage_template: |-
 
   Other parts of the filesystem may be affected during a patch operation, including any
   packages that have been installed using `dnf`.
+# yamllint enable rule:line-length
 
 services:
   - name: ood
diff --git a/environments/.caas/ui-meta/slurm-infra.yml b/environments/.caas/ui-meta/slurm-infra.yml
index 36b8928..b2d4383 100644
--- a/environments/.caas/ui-meta/slurm-infra.yml
+++ b/environments/.caas/ui-meta/slurm-infra.yml
@@ -1,8 +1,8 @@
+---
 name: "slurm"
 label: "Slurm"
-description: >- 
-  Batch cluster running the Slurm workload manager, the Open 
-  OnDemand web interface, and custom monitoring.
+description: >-
+  Batch cluster running the Slurm workload manager, the Open  OnDemand web interface, and custom monitoring.
 logo: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Slurm_logo.svg/158px-Slurm_logo.svg.png
 
 requires_ssh_key: true
@@ -49,7 +49,7 @@ parameters:
       count_parameter: compute_count
       min_ram: 2048
       min_disk: 20
-  
+
   - name: home_volume_size
     label: Home volume size (GB)
     description: The size of the cloud volume to use for home directories.
@@ -66,7 +66,7 @@ parameters:
       this volume, 10GB is set aside for cluster state and the remaining space is used
       to store cluster metrics.
 
-      The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be 
+      The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be
       discarded to ensure that the database does not grow larger than this volume.
     kind: cloud.volume_size
     immutable: true
@@ -85,6 +85,7 @@ parameters:
     options:
       checkboxLabel: Run post-configuration validation?
 
+# yamllint disable rule:line-length
 usage_template: |-
   # Accessing the cluster using Open OnDemand
 
@@ -124,6 +125,7 @@ usage_template: |-
 
   Other parts of the filesystem may be affected during a patch operation, including any
   packages that have been installed using `dnf`.
+# yamllint enable rule:line-length
 
 services:
   - name: ood
diff --git a/environments/.stackhpc/hooks/post-bootstrap.yml b/environments/.stackhpc/hooks/post-bootstrap.yml
index df39026..3e24212 100644
--- a/environments/.stackhpc/hooks/post-bootstrap.yml
+++ b/environments/.stackhpc/hooks/post-bootstrap.yml
@@ -1,17 +1,19 @@
+---
 - hosts: podman:!builder
-  become: yes
+  become: true
   gather_facts: false
   tags: podman
   tasks:
     - name: Configure container image registry to avoid docker.io ratelimits
-      copy:
+      ansible.builtin.copy:
         dest: /etc/containers/registries.conf.d/003-arcus-mirror.conf
         content: |
           [[registry]]
           location="docker.io/library/"
           prefix="docker.io/library/"
-          
+
           [[registry.mirror]]
           location = "{{ podman_registry_address }}"
           insecure = true
+        mode: "0644"
       when: "ci_cloud == 'ARCUS'"
diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml
index 305713a..e810c20 100644
--- a/environments/.stackhpc/hooks/pre.yml
+++ b/environments/.stackhpc/hooks/pre.yml
@@ -1,17 +1,18 @@
+---
 - hosts: control:!builder
-  become: yes
+  become: true
   gather_facts: false
   tasks:
     - name: Output OS version
-      command: cat /etc/redhat-release
+      ansible.builtin.command: cat /etc/redhat-release
       changed_when: false
 
     - name: Write CI-generated inventory and secrets for debugging
       ansible.builtin.copy:
         dest: /etc/ci-config/
         src: "{{ item }}"
-        directory_mode: 0400
-        mode: 0400
+        directory_mode: "0400"
+        mode: "0400"
         owner: root
         group: root
       no_log: "{{ no_log | default(true) }}"
diff --git a/environments/.stackhpc/inventory/group_vars/all/basic_users.yml b/environments/.stackhpc/inventory/group_vars/all/basic_users.yml
index e2088ff..235814c 100644
--- a/environments/.stackhpc/inventory/group_vars/all/basic_users.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/basic_users.yml
@@ -1,3 +1,6 @@
+---
+
+# yamllint disable-line rule:line-length
 test_demo_user_password: "{{ lookup('env', 'DEMO_USER_PASSWORD') | default(vault_demo_user_password, true) }}" # CI uses env, debug can set vault_demo_user_password
 
 basic_users_users:
diff --git a/environments/.stackhpc/inventory/group_vars/all/bastion.yml b/environments/.stackhpc/inventory/group_vars/all/bastion.yml
index a1001e8..ea2ad00 100644
--- a/environments/.stackhpc/inventory/group_vars/all/bastion.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/bastion.yml
@@ -1,3 +1,4 @@
+---
 ci_cloud: "{{ lookup('env', 'CI_CLOUD') }}"
 bastion_config:
   ARCUS:
diff --git a/environments/.stackhpc/inventory/group_vars/all/freeipa.yml b/environments/.stackhpc/inventory/group_vars/all/freeipa.yml
index 9a979ab..a92f011 100644
--- a/environments/.stackhpc/inventory/group_vars/all/freeipa.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/freeipa.yml
@@ -1,3 +1,4 @@
+---
 # This file provides examples of using freeipa role variables. These are NOT functional in CI as freeipa_{server,client} groups are not defined.
 
 # NB: Users defined this way have expired passwords
diff --git a/environments/.stackhpc/inventory/group_vars/all/hpctests.yml b/environments/.stackhpc/inventory/group_vars/all/hpctests.yml
index e8cfcea..4724621 100644
--- a/environments/.stackhpc/inventory/group_vars/all/hpctests.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/hpctests.yml
@@ -1 +1,2 @@
+---
 hpctests_user: demo_user
diff --git a/environments/.stackhpc/inventory/group_vars/all/manila.yml b/environments/.stackhpc/inventory/group_vars/all/manila.yml
index 59f9358..b37a130 100644
--- a/environments/.stackhpc/inventory/group_vars/all/manila.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/manila.yml
@@ -1,3 +1,4 @@
+---
 os_manila_mount_shares_arcus:
   - share_name: slurm-v2-home
     mount_path: /project
diff --git a/environments/.stackhpc/inventory/group_vars/all/openhpc.yml b/environments/.stackhpc/inventory/group_vars/all/openhpc.yml
index 5aac5f8..ae1342b 100644
--- a/environments/.stackhpc/inventory/group_vars/all/openhpc.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/openhpc.yml
@@ -1,3 +1,4 @@
+---
 openhpc_config_extra:
   SlurmctldDebug: debug
   SlurmdDebug: debug
diff --git a/environments/.stackhpc/inventory/group_vars/all/openondemand.yml b/environments/.stackhpc/inventory/group_vars/all/openondemand.yml
index a8f88e5..9779e96 100644
--- a/environments/.stackhpc/inventory/group_vars/all/openondemand.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/openondemand.yml
@@ -1,8 +1,9 @@
+---
 openondemand_auth: basic_pam
 openondemand_jupyter_partition: standard
 openondemand_desktop_partition: standard
 openondemand_rstudio_partition: standard
 openondemand_codeserver_partition: standard
-#openondemand_dashboard_support_url: 
-#openondemand_dashboard_docs_url:
-#openondemand_filesapp_paths:
+# openondemand_dashboard_support_url:
+# openondemand_dashboard_docs_url:
+# openondemand_filesapp_paths:
diff --git a/environments/.stackhpc/inventory/group_vars/all/podman.yml b/environments/.stackhpc/inventory/group_vars/all/podman.yml
index b9d4109..02d7e7f 100644
--- a/environments/.stackhpc/inventory/group_vars/all/podman.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/podman.yml
@@ -1,2 +1,3 @@
+---
 arcus_podman_registry_address: 192.168.3.95:5000
 podman_registry_address: "{{ arcus_podman_registry_address if ci_cloud == 'ARCUS' else '' }}"
diff --git a/environments/.stackhpc/inventory/group_vars/all/tuned.yml b/environments/.stackhpc/inventory/group_vars/all/tuned.yml
index f1cb034..a8074e7 100644
--- a/environments/.stackhpc/inventory/group_vars/all/tuned.yml
+++ b/environments/.stackhpc/inventory/group_vars/all/tuned.yml
@@ -1,2 +1,3 @@
+---
 # Set profile which is not default (on VMs) for testing
 tuned_profile: hpc-compute
diff --git a/environments/.stackhpc/inventory/group_vars/builder.yml b/environments/.stackhpc/inventory/group_vars/builder.yml
index 10b15ad..788666a 100644
--- a/environments/.stackhpc/inventory/group_vars/builder.yml
+++ b/environments/.stackhpc/inventory/group_vars/builder.yml
@@ -1,4 +1,5 @@
-#update_enable: false # Can uncomment for speed debugging non-update related build issues
+---
+# update_enable: false # Can uncomment for speed debugging non-update related build issues
 sssd_install_ldap: true # include sssd-ldap package in fatimage
 # update_enable: false # Can uncomment for speed debugging non-update related build issues
 
diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index 6b294d1..2000b5e 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
-    "cluster_image": {
-        "RL8": "openhpc-RL8-250820-0800-767addd8",
-        "RL9": "openhpc-RL9-250908-2047-d90ebd0e"
-    }
+  "cluster_image": {
+    "RL8": "openhpc-RL8-250918-0840-930223fb",
+    "RL9": "openhpc-RL9-250918-0840-930223fb"
+  }
 }
diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf
index 82c963c..22113cd 100644
--- a/environments/.stackhpc/tofu/main.tf
+++ b/environments/.stackhpc/tofu/main.tf
@@ -4,99 +4,106 @@ terraform {
   required_version = ">= 0.14"
   required_providers {
     openstack = {
-      source = "terraform-provider-openstack/openstack"
+      source  = "terraform-provider-openstack/openstack"
       version = "~>3.0.0"
     }
   }
 }
 
 variable "environment_root" {
-    type = string
-    description = "Path to environment root, automatically set by activate script"
+  type        = string
+  description = "Path to environment root, automatically set by activate script"
 }
 
 variable "cluster_name" {
-    type = string
-    description = "Name for cluster, used as prefix for resources - set by environment var in CI"
+  type        = string
+  description = "Name for cluster, used as prefix for resources - set by environment var in CI"
 }
 
 variable "os_version" {
-  type = string
+  type        = string
   description = "RL8 or RL9"
-  default = "RL9"
+  default     = "RL9"
 }
 
 variable "cluster_image" {
-    description = "single image for all cluster nodes, keyed by os_version - a convenience for CI"
-    type = map(string)
+  description = "single image for all cluster nodes, keyed by os_version - a convenience for CI"
+  type        = map(string)
 }
 
+# tflint-ignore: terraform_typed_variables
 variable "cluster_networks" {}
 
+# tflint-ignore: terraform_typed_variables
 variable "vnic_types" {
-    default = {}
+  default = {}
 }
 
-variable "state_volume_type"{
-    default = null
+# tflint-ignore: terraform_typed_variables
+variable "state_volume_type" {
+  default = null
 }
 
-variable "home_volume_type"{
-    default = null
+# tflint-ignore: terraform_typed_variables
+variable "home_volume_type" {
+  default = null
 }
 
+# tflint-ignore: terraform_typed_variables
 variable "control_node_flavor" {}
 
+# tflint-ignore: terraform_typed_variables
 variable "other_node_flavor" {}
 
+# tflint-ignore: terraform_typed_variables
 variable "volume_backed_instances" {
-    default = false
+  default = false
 }
 
 data "openstack_images_image_v2" "cluster" {
-    name = var.cluster_image[var.os_version]
-    most_recent = true
+  name        = var.cluster_image[var.os_version]
+  most_recent = true
 }
 
 module "cluster" {
-    source = "../../site/tofu/"
+  source = "../../site/tofu/"
 
-    cluster_name = var.cluster_name
-    cluster_networks = var.cluster_networks
-    vnic_types = var.vnic_types
-    key_pair = "slurm-app-ci"
-    cluster_image_id = data.openstack_images_image_v2.cluster.id
-    control_node_flavor = var.control_node_flavor
+  cluster_name        = var.cluster_name
+  cluster_networks    = var.cluster_networks
+  vnic_types          = var.vnic_types
+  key_pair            = "slurm-app-ci"
+  cluster_image_id    = data.openstack_images_image_v2.cluster.id
+  control_node_flavor = var.control_node_flavor
 
+  login = {
     login = {
-        login = {
-            nodes = ["login-0"]
-            flavor = var.other_node_flavor
-        }
+      nodes  = ["login-0"]
+      flavor = var.other_node_flavor
     }
-    compute = {
-        standard = { # NB: can't call this default!
-            nodes = ["compute-0", "compute-1"]
-            flavor = var.other_node_flavor
-            compute_init_enable = ["compute", "chrony", "etc_hosts", "nfs", "basic_users", "eessi", "tuned", "cacerts", "nhc"]
-            ignore_image_changes = true
-        }
-        # Normally-empty partition for testing:
-        extra = {
-            nodes = []
-            #nodes = ["extra-0", "extra-1"]
-            flavor = var.other_node_flavor
-        }
+  }
+  compute = {
+    standard = { # NB: can't call this default!
+      nodes                = ["compute-0", "compute-1"]
+      flavor               = var.other_node_flavor
+      compute_init_enable  = ["compute", "chrony", "etc_hosts", "nfs", "basic_users", "eessi", "tuned", "cacerts", "nhc"]
+      ignore_image_changes = true
+    }
+    # Normally-empty partition for testing:
+    extra = {
+      nodes = []
+      #nodes = ["extra-0", "extra-1"]
+      flavor = var.other_node_flavor
     }
+  }
 
-    volume_backed_instances = var.volume_backed_instances
+  volume_backed_instances = var.volume_backed_instances
 
-    environment_root = var.environment_root
-    # Can reduce volume size a lot for short-lived CI clusters:
-    state_volume_size = 10
-    home_volume_size = 20
+  environment_root = var.environment_root
+  # Can reduce volume size a lot for short-lived CI clusters:
+  state_volume_size = 10
+  home_volume_size  = 20
 
-    state_volume_type = var.state_volume_type
-    home_volume_type = var.home_volume_type
+  state_volume_type = var.state_volume_type
+  home_volume_type  = var.home_volume_type
 
 }
diff --git a/environments/README.md b/environments/README.md
index b6e2cf9..94a66e1 100644
--- a/environments/README.md
+++ b/environments/README.md
@@ -32,15 +32,14 @@ for usage instructions for that component.
 ### common
 
 Shared configuration for all environments. This is not
-intended to be used as a standalone environment, hence the README does *not* detail
-how to provision the infrastructure. This environment should not be edited, except as part of upstreaming new features or bug fixes.
+intended to be used as a standalone environment, hence the readme does _not_ detail
+how to provision the infrastructure. This environment should not be edited, except as part of upstreaming new features or bugfixes.
 
 ## site
 
 Provides the base configuration for all subsequent `cookiecutter` created environments,
 including OpenTofu configurations for infrastructure. In general, most local customisations should be made by adding to this environment.
 
-
 ## Defining an environment
 
 To define an environment using cookiecutter:
@@ -53,7 +52,7 @@ Once you have answered all questions, a new environment directory will
 be created. The directory will be named according to the answer you gave
 for `environment`.
 
-Follow the README in the new directory to perform initial configuration.
+Follow the readme in the new directory to perform initial configuration.
 
 ## Activating environments
 
@@ -69,13 +68,12 @@ hosts from the associated group in the inventory. A pattern we use is to name th
 ansible inventory `group` after the name of the `role` that configures it. The playbook
 that runs this role targets hosts in that group. The `common` environment typically defines
 all groups as the empty group. You must explicly opt-in and add hosts to these these groups
-to configure that service.  For example, if you don't want to deploy and configure grafana,
+to configure that service. For example, if you don't want to deploy and configure grafana,
 you simply do not add any hosts to the `grafana` group in the inventory. This allows us to
-have a shared ansible code base as we can define playbooks to configure all things,
+have a shared ansible codebase as we can define playbooks to configure all things,
 but these playbooks end up not being run if no host is in the associated group.
 
-See also:
-    - `common/inventory/groups` for a list of all groups.
+See also: - `common/inventory/groups` for a list of all groups.
 
 ## Overriding configuration
 
diff --git a/environments/common/files/filebeat/filebeat.yml b/environments/common/files/filebeat/filebeat.yml
index 0f7186b..7f19aa0 100644
--- a/environments/common/files/filebeat/filebeat.yml
+++ b/environments/common/files/filebeat/filebeat.yml
@@ -1,3 +1,4 @@
+---
 filebeat.config:
   modules:
     path: ${path.config}/modules.d/*.yml
diff --git a/environments/common/inventory/group_vars/all/alertmanager.yml b/environments/common/inventory/group_vars/all/alertmanager.yml
index c677aaa..8f5ef0f 100644
--- a/environments/common/inventory/group_vars/all/alertmanager.yml
+++ b/environments/common/inventory/group_vars/all/alertmanager.yml
@@ -1,5 +1,5 @@
-
-alertmanager_port: '9093' # defined here as required for prometheus
+---
+alertmanager_port: "9093" # defined here as required for prometheus
 
 alertmanager_slack_receiver_name: slack-receiver
 alertmanager_slack_receiver_send_resolved: true
diff --git a/environments/common/inventory/group_vars/all/ansible_init.yml b/environments/common/inventory/group_vars/all/ansible_init.yml
index df4060f..0a5198b 100644
--- a/environments/common/inventory/group_vars/all/ansible_init.yml
+++ b/environments/common/inventory/group_vars/all/ansible_init.yml
@@ -1,3 +1,4 @@
+---
 ansible_init_wait: 300 # seconds
 
 ansible_init_pip_packages:
diff --git a/environments/common/inventory/group_vars/all/basic_users.yml b/environments/common/inventory/group_vars/all/basic_users.yml
index d94d129..8d5f86a 100644
--- a/environments/common/inventory/group_vars/all/basic_users.yml
+++ b/environments/common/inventory/group_vars/all/basic_users.yml
@@ -3,7 +3,6 @@
 # See ansible/roles/basic_users/README.md for variable definitions.
 
 basic_users_users: []
-
 # The following are defined for the purpose of compute-init
 basic_users_homedir_server: "{{ groups['control'] | first }}"
-basic_users_homedir_client: "{{ groups['login'] | first }}"
\ No newline at end of file
+basic_users_homedir_client: "{{ groups['login'] | first }}"
diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml
index 8d629ff..027e407 100644
--- a/environments/common/inventory/group_vars/all/defaults.yml
+++ b/environments/common/inventory/group_vars/all/defaults.yml
@@ -5,7 +5,7 @@ appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}"
 appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}"
 appliances_environment_name: "{{ appliances_environment_root | basename | regex_replace('\\W+', '') }}" # [a-zA-Z0-9_] only
 appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in genericcloud images; appliance defaults to removing it
-#appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform
+# appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform
 appliances_mode: configure
 appliances_pulp_url: https://ark.stackhpc.com
 
@@ -29,53 +29,53 @@ alertmanager_address: "{{ hostvars[groups['alertmanager'].0].api_address }}"
 appliances_local_users_ansible_user_name: "{{ ansible_ssh_user | default(ansible_user) }}"
 appliances_local_users_podman_uid: 1001 # UID for podman user - normally next UID after default user
 appliances_local_users_podman: # also used in environments/common/inventory/group_vars/all/podman.yml:podman_users
-    name: podman
-    comment: Used for running all containers
-    # Would like to set subuid so that we that we know what will appear in /etc/subuid
-    # See: https://github.com/ansible/ansible/issues/68199
-    home: /var/lib/podman
-    uid: "{{ appliances_local_users_podman_uid }}"
+  name: podman
+  comment: Used for running all containers
+  # Would like to set subuid so that we that we know what will appear in /etc/subuid
+  # See: https://github.com/ansible/ansible/issues/68199
+  home: /var/lib/podman
+  uid: "{{ appliances_local_users_podman_uid }}"
 
 appliances_local_users_default:
-    - user:
-        name: "{{ appliances_local_users_ansible_user_name }}"
-        home: /var/lib/{{ appliances_local_users_ansible_user_name }}
-        move_home: true
-        local: true
+  - user:
+      name: "{{ appliances_local_users_ansible_user_name }}"
+      home: /var/lib/{{ appliances_local_users_ansible_user_name }}
+      move_home: true
+      local: true
 
-    - user: "{{ appliances_local_users_podman }}"
-      enable: "{{ 'podman' in group_names }}"
+  - user: "{{ appliances_local_users_podman }}"
+    enable: "{{ 'podman' in group_names }}"
 
-    - user:
-        name: slurm
-        comment: SLURM resource manager
-        home: /etc/slurm
-        shell: /sbin/nologin
-        uid: 202
-        system: true
+  - user:
+      name: slurm
+      comment: SLURM resource manager
+      home: /etc/slurm
+      shell: /sbin/nologin
+      uid: 202
+      system: true
 
-    - group:
-        name: prometheus
-        gid: 976
-      user:
-        name: prometheus
-        uid: 981
-        home: "{{ prometheus_db_dir }}"
-        shell: /usr/sbin/nologin
-        system: true
-      enable: "{{ 'prometheus' in group_names }}"
+  - group:
+      name: prometheus
+      gid: 976
+    user:
+      name: prometheus
+      uid: 981
+      home: "{{ prometheus_db_dir }}"
+      shell: /usr/sbin/nologin
+      system: true
+    enable: "{{ 'prometheus' in group_names }}"
 
-    - group:
-        name: grafana
-        gid: 979
-      user:
-        name: grafana
-        comment: grafana user
-        uid: 984
-        home: /usr/share/grafana
-        shell: /sbin/nologin
-        system: true
-      enable: "{{ 'grafana' in group_names }}"
+  - group:
+      name: grafana
+      gid: 979
+    user:
+      name: grafana
+      comment: grafana user
+      uid: 984
+      home: /usr/share/grafana
+      shell: /sbin/nologin
+      system: true
+    enable: "{{ 'grafana' in group_names }}"
 
 # Overide this to add extra users whilst keeping the defaults.
 appliances_local_users_extra: [] # see format of appliances_local_users_default above
@@ -84,18 +84,17 @@ appliances_local_users: "{{ (appliances_local_users_default + appliances_local_u
 ################## bootstrap: extra package installs ######################################
 
 appliances_extra_packages_default:
- - htop
- - nano
- - screen
- - tmux
- - wget
- - bind-utils
- - net-tools
- - postfix
- - git
- - "{{ 'python36' if ansible_distribution_version == '8.9' else 'python312' }}"
- - s-nail
+  - htop
+  - nano
+  - screen
+  - tmux
+  - wget
+  - bind-utils
+  - net-tools
+  - postfix
+  - git
+  - "{{ 'python36' if ansible_distribution_version == '8.9' else 'python312' }}"
+  - s-nail
 
 appliances_extra_packages_other: []
-
 appliances_extra_packages: "{{ (appliances_extra_packages_default + appliances_extra_packages_other) | select | list }}"
diff --git a/environments/common/inventory/group_vars/all/filebeat.yml b/environments/common/inventory/group_vars/all/filebeat.yml
index d268af1..4b91726 100644
--- a/environments/common/inventory/group_vars/all/filebeat.yml
+++ b/environments/common/inventory/group_vars/all/filebeat.yml
@@ -4,4 +4,4 @@
 filebeat_config_path: "{{ appliances_repository_root }}/environments/common/files/filebeat/filebeat.yml"
 
 # User that runs the filebeat container
-filebeat_podman_user: podman
\ No newline at end of file
+filebeat_podman_user: podman
diff --git a/environments/common/inventory/group_vars/all/firewalld.yml b/environments/common/inventory/group_vars/all/firewalld.yml
index 569428e..498ec62 100644
--- a/environments/common/inventory/group_vars/all/firewalld.yml
+++ b/environments/common/inventory/group_vars/all/firewalld.yml
@@ -1,3 +1,4 @@
+---
 # See ansible/roles/firewalld/README.md
 # for variable definitions.
 
@@ -9,14 +10,14 @@ firewalld_configs_default:
   #   name: An arbitrary name or description
   #   group: An ansible group name - this rule is applied if the fail2ban node is in this group
   #   rule: A dict of parameters passed to the `ansible.posix.firewalld` module.
-  # FaiBy default we rely on openstack security groups so 
+  # FaiBy default we rely on openstack security groups so
   - name: Make firewalld permissive
     group: openhpc
     rule:
       zone: public
       state: enabled
       target: ACCEPT
-      permanent: yes
+      permanent: true
 
 firewalld_configs_extra: [] # list of dicts with parameters as for firewalld_configs_default
 
diff --git a/environments/common/inventory/group_vars/all/freeipa_server.yml b/environments/common/inventory/group_vars/all/freeipa_server.yml
index 7f0fee7..64a1f7a 100644
--- a/environments/common/inventory/group_vars/all/freeipa_server.yml
+++ b/environments/common/inventory/group_vars/all/freeipa_server.yml
@@ -1,3 +1,4 @@
+---
 # See ansible/roles/freeipa/README.md
 # These vars are only used when freeipa_server is enabled. They are not required when enabling only freeipa_client
 freeipa_realm: "{{ openhpc_cluster_name | upper }}.{{ cluster_domain_suffix | upper }}"
diff --git a/environments/common/inventory/group_vars/all/grafana.yml b/environments/common/inventory/group_vars/all/grafana.yml
index b03d16f..b428849 100644
--- a/environments/common/inventory/group_vars/all/grafana.yml
+++ b/environments/common/inventory/group_vars/all/grafana.yml
@@ -2,7 +2,7 @@
 
 # See: https://github.com/cloudalchemy/ansible-grafana
 # for variable definitions.
-grafana_version: '10.4.18'
+grafana_version: "10.4.18"
 
 # need to copy some role defaults here so we can use in inventory:
 grafana_port: 3000
@@ -53,9 +53,9 @@ grafana_dashboards_default:
 grafana_dashboards: "{{ grafana_dashboards_default + (openondemand_dashboard if groups.get('openondemand') else []) }}"
 
 grafana_security:
-    admin_user: grafana
-    admin_password: "{{ vault_grafana_admin_password }}"
-    allow_embedding: true
+  admin_user: grafana
+  admin_password: "{{ vault_grafana_admin_password }}"
+  allow_embedding: true
 
 grafana_datasources:
   - name: prometheus
@@ -77,7 +77,7 @@ grafana_datasources:
       timeField: "@timestamp"
       # Have to set flavor and version, but ansible/roles/opensearch/templates/opensearch.yml.j2 fakes version for filebeat
       # so need to set to fake version here:
-      version: '7.10.2'
+      version: "7.10.2"
       flavor: elasticsearch
     editable: true
     # readOnly: false
@@ -99,7 +99,7 @@ grafana_server:
   # appliance specific:
   serve_from_sub_path: "{{ grafana_serve_from_sub_path }}"
 
-
+# yamllint disable-line rule:line-length
 grafana_auth_anonymous: false # Enable anonymous View-only login - see implications: https://grafana.com/docs/grafana/latest/administration/security/#implications-of-enabling-anonymous-access-to-dashboards
 
 _grafana_auth_anon_cfg:
diff --git a/environments/common/inventory/group_vars/all/k3s.yml b/environments/common/inventory/group_vars/all/k3s.yml
index a7ba0a0..aa7172f 100644
--- a/environments/common/inventory/group_vars/all/k3s.yml
+++ b/environments/common/inventory/group_vars/all/k3s.yml
@@ -1 +1,2 @@
+---
 k3s_bootstrap_token: "{{ hostvars[groups['k3s_server'] | first].k3s_bootstrap_token | default('') }}"
diff --git a/environments/common/inventory/group_vars/all/manila.yml b/environments/common/inventory/group_vars/all/manila.yml
index cb015f9..9bc6941 100644
--- a/environments/common/inventory/group_vars/all/manila.yml
+++ b/environments/common/inventory/group_vars/all/manila.yml
@@ -1,3 +1,4 @@
+---
 # Default configuration for manila file shares, see
 # https://github.com/stackhpc/ansible-role-os-manila-mount
 # for all variable definitions, and override in your environment.
diff --git a/environments/common/inventory/group_vars/all/mysql.yml b/environments/common/inventory/group_vars/all/mysql.yml
index d5245fe..2c320af 100644
--- a/environments/common/inventory/group_vars/all/mysql.yml
+++ b/environments/common/inventory/group_vars/all/mysql.yml
@@ -17,7 +17,7 @@ mysql_datadir: "{{ appliances_state_dir | default('/var/lib') }}/mysql"
 
 mysql_databases:
   - name: slurm_acct_db
-    config_file: ''
+    config_file: ""
     login_user: root
     login_password: "{{ mysql_root_password }}"
     login_host: "{{ mysql_host }}"
diff --git a/environments/common/inventory/group_vars/all/nfs.yml b/environments/common/inventory/group_vars/all/nfs.yml
index 398bde7..05ecd89 100644
--- a/environments/common/inventory/group_vars/all/nfs.yml
+++ b/environments/common/inventory/group_vars/all/nfs.yml
@@ -14,22 +14,22 @@ nfs_export_clients: "{{ _nfs_node_ips }}"
 nfs_configuration_home_volume: # volume-backed home directories
   - comment: Export /exports/home from Slurm control node as /home
     nfs_enable:
-        server:  "{{ inventory_hostname in groups['control'] }}"
-        # Don't mount share on control node:
-        clients: "{{ inventory_hostname in groups['cluster'] and inventory_hostname not in groups['control'] }}"
+      server: "{{ inventory_hostname in groups['control'] }}"
+      # Don't mount share on control node:
+      clients: "{{ inventory_hostname in groups['cluster'] and inventory_hostname not in groups['control'] }}"
     nfs_server: "{{ nfs_server_default }}"
     nfs_export: "/exports/home" # assumes default site TF is being used
     nfs_client_mnt_point: "/home"
     # prevent tunnelling and setuid binaries:
     # NB: this is stackhpc.nfs role defaults but are set here to prevent being
     # accidently overriden via default options
-    nfs_export_options: 'rw,secure,root_squash'
+    nfs_export_options: "rw,secure,root_squash"
 
 nfs_configuration_compute_nodes: # cluster configuration for compute_init/slurm-controlled rebuild
   - comment: Export /exports/cluster from Slurm control node
     nfs_enable:
-        server: "{{ inventory_hostname in groups['control'] }}"
-        clients: false
+      server: "{{ inventory_hostname in groups['control'] }}"
+      clients: false
     nfs_export: "/exports/cluster"
 
 nfs_configurations_extra: [] # site-specific nfs shares
diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml
index 0fed1c9..bf212cb 100644
--- a/environments/common/inventory/group_vars/all/openhpc.yml
+++ b/environments/common/inventory/group_vars/all/openhpc.yml
@@ -9,7 +9,7 @@ openhpc_enable:
   database: "{{ inventory_hostname in groups['control'] }}"
   runtime: true
 openhpc_slurm_service_enabled: true
-openhpc_slurm_accounting_storage_type: 'accounting_storage/slurmdbd'
+openhpc_slurm_accounting_storage_type: "accounting_storage/slurmdbd"
 openhpc_slurmdbd_mysql_database: slurm_acct_db
 openhpc_slurmdbd_mysql_password: "{{ vault_mysql_slurm_password }}"
 openhpc_slurmdbd_mysql_username: slurm
@@ -18,17 +18,18 @@ openhpc_slurmdbd_host: "{{ openhpc_slurm_control_host }}"
 openhpc_rebuild_partition: # not a role var - could actually add more indirection here for things we're expecting to be modified, e.g. groups and maxtime
   name: rebuild
   nodegroups: "{{ cluster_compute_groups | default([]) }}"
-  default: NO
+  default: false
   maxtime: 30
   partition_params:
     PriorityJobFactor: 65533
-    Hidden: YES
-    RootOnly: YES
-    DisableRootJobs: NO
-    PreemptMode: 'OFF'
+    Hidden: true
+    RootOnly: true
+    DisableRootJobs: false
+    PreemptMode: "OFF"
     OverSubscribe: EXCLUSIVE
 openhpc_nodegroups: "{{ cluster_compute_groups | map('community.general.dict_kv', 'name') }}" # create nodegroup for each compute group
 openhpc_user_partitions: "{{ openhpc_nodegroups }}" # create partition for each nodegroup (actually role default) - this is what we'd expect to be changed
+# yamllint disable-line rule:line-length
 openhpc_partitions: "{{ openhpc_user_partitions + ([openhpc_rebuild_partition] if groups['rebuild'] | length > 0 else []) }}" # auto-create rebuild partition if reqd.
 openhpc_packages_default:
   # system packages
diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml
index bd8ba76..af7554a 100644
--- a/environments/common/inventory/group_vars/all/openondemand.yml
+++ b/environments/common/inventory/group_vars/all/openondemand.yml
@@ -1,11 +1,11 @@
 ---
 
-# See: ansible/roles/openondemand/README.md 
+# See: ansible/roles/openondemand/README.md
 # for variable definitions.
 
 # NB: Variables prefixed ood_ are all from https://github.com/OSC/ood-ansible
 
-ondemand_package_version: '3.1.10' # used in ansible/cleanup.yml
+ondemand_package_version: "3.1.10" # used in ansible/cleanup.yml
 ondemand_package: ondemand-"{{ ondemand_package_version }}" # osc.ood role var controlling installed package
 
 openondemand_servername: "{{ hostvars[groups['openondemand'].0].ansible_host if groups['openondemand'] else '' }}"
@@ -20,7 +20,7 @@ openondemand_codeserver_partition: "{{ openhpc_partitions[0]['name'] }}"
 
 # Regex defining hosts which openondemand can proxy; the default regex is compute nodes (for apps) and grafana host,
 # e.g. if the group `compute` has hosts `compute-{0,1,2,..}` this will be '(compute-\d+)|(control)'.
-# The autogenerated regex may need overriding if compute node names do not contain numbers in a consistent position 
+# The autogenerated regex may need overriding if compute node names do not contain numbers in a consistent position
 # or include regex special characters.
 openondemand_host_regex: "{{ (groups['compute'] + groups['grafana']) | to_ood_regex }}"
 
@@ -146,6 +146,7 @@ openondemand_apps_desktop_default:
         - <%= "--nodelist=#{node}" %>
 openondemand_apps_desktop: "{{ {'bc_desktop':openondemand_apps_desktop_default} if openondemand_desktop_partition | default(none) else {} }}"
 
+# yamllint disable-line rule:line-length
 # See https://osc.github.io/ood-documentation/latest/app-development/tutorials-interactive-apps/add-jupyter.html#app-development-tutorials-interactive-apps-add-jupyter
 openondemand_apps_jupyter_default:
   title: Jupyter Notebook
@@ -188,7 +189,7 @@ openondemand_apps_rstudio_default:
   description: Request a RStudio server
   cluster: slurm
   attributes:
-    bc_queue: 
+    bc_queue:
       value: "{{ openondemand_rstudio_partition | default(none) }}"
     rstudio_module:
       label: RStudio module
@@ -197,10 +198,11 @@ openondemand_apps_rstudio_default:
       help: Choose your RStudio module
       widget: select
       options:
-        - ["RStudio v{{ openondemand_rstudio_version }}", "rstudio-server/{{ openondemand_rstudio_version }}}"]
+        - "RStudio v{{ openondemand_rstudio_version }}"
+        - "rstudio-server/{{ openondemand_rstudio_version }}}"
     extra_modules_script:
       label: Extra modules script
-      help: If you'd like to load additional modules alongside RStudio-Server, put the 'module load ...' commands into a text file (one 'module load...' per line) and specify its path here
+      help: If you'd like to load additional modules alongside RStudio-Server, put the 'module load ...' commands into a text file (one 'module load...' per line) and specify its path here # noqa: yaml[line-length]
       widget: text_field
       required: false
     cores:
@@ -217,7 +219,7 @@ openondemand_apps_rstudio_default:
       label: RAM in GB
       help: How much RAM to reserve for your session. NB Ensure this is within the maximum allowed by your chosen partition
       min: 4
-      max:  700
+      max: 700
       step: 1
       value: 4
       cachable: true
@@ -286,7 +288,8 @@ openondemand_apps_matlab_default:
       help: Choose your MATLAB module
       widget: select
       options:
-        - ["MATLAB v{{ openondemand_matlab_version }}", "matlab/{{ openondemand_matlab_version }}"]
+        - "MATLAB v{{ openondemand_matlab_version }}"
+        - "matlab/{{ openondemand_matlab_version }}"
     cores:
       label: Number of CPU cores
       help: How many CPU cores to reserve for your session. NB Ensure this is within the maximum allowed by your chosen partition.
@@ -301,7 +304,7 @@ openondemand_apps_matlab_default:
       label: RAM in GB
       help: How much RAM to reserve for your session. NB Ensure this is within the maximum allowed by your chosen partition
       min: 4
-      max:  700
+      max: 700
       step: 1
       value: 4
       cachable: true
@@ -346,7 +349,8 @@ openondemand_apps_codeserver_default:
       help: Choose your Code Server module
       widget: select
       options:
-        - ["Code Server v{{ openondemand_code_server_version}}", "code-server/{{ openondemand_code_server_version }}"]
+        - "Code Server v{{ openondemand_code_server_version}}"
+        - "code-server/{{ openondemand_code_server_version }}"
     bc_queue:
       value: "{{ openondemand_codeserver_partition | default(none) }}"
     cores:
@@ -363,7 +367,7 @@ openondemand_apps_codeserver_default:
       label: RAM in GB
       help: How much RAM to reserve for your session. NB Ensure this is within the maximum allowed by your chosen partition
       min: 4
-      max:  700
+      max: 700
       step: 1
       value: 4
       cachable: true
@@ -394,8 +398,8 @@ openondemand_apps_codeserver_default:
 openondemand_apps_codeserver: "{{ {'codeserver':openondemand_apps_codeserver_default} if openondemand_codeserver_partition | default(none) else {} }}"
 
 # osc.ood:ood_apps - see https://github.com/OSC/ood-ansible#ood_apps
-openondemand_dashboard_support_url: ''
-openondemand_dashboard_docs_url: ''
+openondemand_dashboard_support_url: ""
+openondemand_dashboard_docs_url: ""
 openondemand_apps:
   files:
     env:
@@ -430,11 +434,11 @@ openondemand_scrape_configs:
     scrape_timeout: 20s
     scrape_interval: 2m
     static_configs:
-    - targets:
-      - "{{ openondemand_address }}:9301"
-      labels:
-        environment: "{{ appliances_environment_name }}"
-        service: "openondemand"
+      - targets:
+          - "{{ openondemand_address }}:9301"
+        labels:
+          environment: "{{ appliances_environment_name }}"
+          service: "openondemand"
 
 openondemand_dashboard:
   - dashboard_id: 13465
@@ -443,8 +447,12 @@ openondemand_dashboard:
         replacement: prometheus
     revision_id: 1
 
-_opeonondemand_unset_auth: '    RequestHeader unset Authorization'
+_opeonondemand_unset_auth: "    RequestHeader unset Authorization"
 
 # Fix grafana proxying for basic auth if anonymous grafana access enabled:
+# yamllint disable-line rule:line-length
 openondemand_node_proxy_directives: "{{ _opeonondemand_unset_auth if (openondemand_auth == 'basic_pam' and 'openondemand_host_regex' and groups['grafana'] | length > 0 and hostvars[ groups['grafana']  | first]._grafana_auth_is_anonymous) else '' }}"
-# Reason: OOD server forwards headers to proxied servers, so when if using basic auth Grafana gets passed the Open Ondemand user. This probably isn't a Grafana user so it throws an auth error. If anonymous access is enabled we can work around this by not forwarding auth header.
+# Reason: OOD server forwards headers to proxied servers, so when if using basic auth
+# Grafana gets passed the Open Ondemand user.
+# This probably isn't a Grafana user so it throws an auth error.
+# If anonymous access is enabled we can work around this by not forwarding auth header.
diff --git a/environments/common/inventory/group_vars/all/podman.yml b/environments/common/inventory/group_vars/all/podman.yml
index 8ca8eb1..a6d38f5 100644
--- a/environments/common/inventory/group_vars/all/podman.yml
+++ b/environments/common/inventory/group_vars/all/podman.yml
@@ -1 +1,2 @@
+---
 podman_users: "{{ [appliances_local_users_podman] }}" # user to use for podman
diff --git a/environments/common/inventory/group_vars/all/prometheus.yml b/environments/common/inventory/group_vars/all/prometheus.yml
index f4587e6..6b33ce8 100644
--- a/environments/common/inventory/group_vars/all/prometheus.yml
+++ b/environments/common/inventory/group_vars/all/prometheus.yml
@@ -11,8 +11,8 @@ prometheus_db_dir: "{{ appliances_state_dir | default('/var/lib') }}/prometheus"
 
 prometheus_alertmanager_config_default:
   - static_configs:
-    - targets:
-      - "{{ alertmanager_address }}:{{ alertmanager_port }}"
+      - targets:
+          - "{{ alertmanager_address }}:{{ alertmanager_port }}"
     basic_auth:
       username: alertmanager
       # cloudalchemy.prometheus/preflight checks this config so it must be
@@ -44,37 +44,38 @@ prometheus_targets:
   other: "{{ groups.get('node_exporter', []) | difference(groups['openhpc']) | prometheus_node_exporter_targets(hostvars, 'prometheus_env', 'other') }}"
 
 prometheus_scrape_configs_default:
-- job_name: "prometheus"
-  metrics_path: "/metrics"
-  static_configs:
-  - targets:
-    - "{{ prometheus_address }}:9090"
-- job_name: "grafana"
-  static_configs:
-  - targets:
-    - "{{ grafana_api_address }}:{{ grafana_port }}"
-- job_name: "node"
-  file_sd_configs:
-  - files:
-    - /etc/prometheus/file_sd/control.yml
-    - /etc/prometheus/file_sd/login.yml
-    - /etc/prometheus/file_sd/compute.yml
-    - /etc/prometheus/file_sd/other.yml
-  relabel_configs:
-  # strip off port
-  - source_labels: ['__address__']
-    separator:     ':'
-    regex:         '(.*):.*'
-    target_label:  'instance'
-    replacement:   '${1}'
-  scrape_interval: 30s
-  scrape_timeout: 20s
+  - job_name: "prometheus"
+    metrics_path: "/metrics"
+    static_configs:
+      - targets:
+          - "{{ prometheus_address }}:9090"
+  - job_name: "grafana"
+    static_configs:
+      - targets:
+          - "{{ grafana_api_address }}:{{ grafana_port }}"
+  - job_name: "node"
+    file_sd_configs:
+      - files:
+          - /etc/prometheus/file_sd/control.yml
+          - /etc/prometheus/file_sd/login.yml
+          - /etc/prometheus/file_sd/compute.yml
+          - /etc/prometheus/file_sd/other.yml
+    relabel_configs:
+      # strip off port
+      - source_labels:
+          - '__address__'
+        separator: ':'
+        regex: '(.*):.*'
+        target_label: 'instance'
+        replacement: '${1}'
+    scrape_interval: 30s
+    scrape_timeout: 20s
 
-- job_name: "slurm_exporter"
-  scrape_interval: 30s
-  scrape_timeout: 30s
-  static_configs:
-    - targets:
-      - "{{ openhpc_slurm_control_host }}:{{ slurm_exporter_port }}"
+  - job_name: "slurm_exporter"
+    scrape_interval: 30s
+    scrape_timeout: 30s
+    static_configs:
+      - targets:
+          - "{{ openhpc_slurm_control_host }}:{{ slurm_exporter_port }}"
 
 prometheus_scrape_configs: "{{ prometheus_scrape_configs_default + (openondemand_scrape_configs if groups['openondemand'] | count > 0 else []) }}"
diff --git a/environments/common/inventory/group_vars/all/proxy.yml b/environments/common/inventory/group_vars/all/proxy.yml
index 266ae45..1a0bdb4 100644
--- a/environments/common/inventory/group_vars/all/proxy.yml
+++ b/environments/common/inventory/group_vars/all/proxy.yml
@@ -1,3 +1,4 @@
+---
 # If squid group is non-empty, default the proxy address to the hostname of
 # the first squid host, port 3128. Else empty string to avoid breaking hostvars
 
diff --git a/environments/common/inventory/group_vars/all/pulp.yml b/environments/common/inventory/group_vars/all/pulp.yml
index 22bb832..492a84a 100644
--- a/environments/common/inventory/group_vars/all/pulp.yml
+++ b/environments/common/inventory/group_vars/all/pulp.yml
@@ -1,3 +1,4 @@
+---
 pulp_site_port: 8080
 
 # If using Ark directly (no local Pulp server), override the following with Ark creds
diff --git a/environments/common/inventory/group_vars/all/slurm_exporter.yml b/environments/common/inventory/group_vars/all/slurm_exporter.yml
index 4902310..072c436 100644
--- a/environments/common/inventory/group_vars/all/slurm_exporter.yml
+++ b/environments/common/inventory/group_vars/all/slurm_exporter.yml
@@ -1,3 +1,4 @@
+---
 slurm_exporter_port: 9341 # as defined by [1] and implemented in [2]
-#[1]: https://github.com/prometheus/prometheus/wiki/Default-port-allocations
-#[2]: https://github.com/stackhpc/prometheus-slurm-exporter/blob/master/lib/systemd/prometheus-slurm-exporter.service
+# [1]: https://github.com/prometheus/prometheus/wiki/Default-port-allocations
+# [2]: https://github.com/stackhpc/prometheus-slurm-exporter/blob/master/lib/systemd/prometheus-slurm-exporter.service
diff --git a/environments/common/inventory/group_vars/all/squid.yml b/environments/common/inventory/group_vars/all/squid.yml
index 5955729..4218c5c 100644
--- a/environments/common/inventory/group_vars/all/squid.yml
+++ b/environments/common/inventory/group_vars/all/squid.yml
@@ -1 +1,2 @@
+---
 squid_http_port: 3128 # defined here for proxy role
diff --git a/environments/common/inventory/group_vars/all/sshd.yaml b/environments/common/inventory/group_vars/all/sshd.yaml
index 5d4ed22..cf22b12 100644
--- a/environments/common/inventory/group_vars/all/sshd.yaml
+++ b/environments/common/inventory/group_vars/all/sshd.yaml
@@ -1 +1,2 @@
+---
 sshd_password_authentication: "{{ sssd_install_ldap | default(false) | bool }}"
diff --git a/environments/common/inventory/group_vars/all/systemd.yml b/environments/common/inventory/group_vars/all/systemd.yml
index ae72a78..bc267d5 100644
--- a/environments/common/inventory/group_vars/all/systemd.yml
+++ b/environments/common/inventory/group_vars/all/systemd.yml
@@ -1,3 +1,4 @@
+---
 _systemd_requiresmount_statedir: |
   {% if appliances_state_dir is defined %}
   [Unit]
diff --git a/environments/common/inventory/group_vars/all/update.yml b/environments/common/inventory/group_vars/all/update.yml
index 715d418..a0b10ce 100644
--- a/environments/common/inventory/group_vars/all/update.yml
+++ b/environments/common/inventory/group_vars/all/update.yml
@@ -1,12 +1,13 @@
 ---
 
 update_enable: false
-# These variables define the packages updates and are passed to ansible's yum module parameters with the same names: https://docs.ansible.com/ansible/latest/collections/ansible/builtin/yum_module.html
-update_name: '*'
+# These variables define the packages updates and are passed to ansible's yum module parameters
+# with the same names: https://docs.ansible.com/ansible/latest/collections/ansible/builtin/yum_module.html
+update_name: "*"
 update_state: latest
 update_exclude:
   - grafana
   - apptainer # see https://github.com/stackhpc/ansible-slurm-appliance/pull/245
 update_disablerepo: omit
 # Log changes during update here on localhost:
-update_log_path:  "{{ appliances_environment_root }}/logs/{{ inventory_hostname }}-updates.log"
+update_log_path: "{{ appliances_environment_root }}/logs/{{ inventory_hostname }}-updates.log"
diff --git a/environments/site/inventory/group_vars/all/grafana.yml b/environments/site/inventory/group_vars/all/grafana.yml
index 521616a..3c49fd5 100644
--- a/environments/site/inventory/group_vars/all/grafana.yml
+++ b/environments/site/inventory/group_vars/all/grafana.yml
@@ -1 +1,2 @@
-grafana_auth_anonymous: true
\ No newline at end of file
+---
+grafana_auth_anonymous: true
diff --git a/environments/site/inventory/group_vars/all/vault_alertmanager.yml b/environments/site/inventory/group_vars/all/vault_alertmanager.yml
index 4375ed7..02abb00 100644
--- a/environments/site/inventory/group_vars/all/vault_alertmanager.yml
+++ b/environments/site/inventory/group_vars/all/vault_alertmanager.yml
@@ -1,3 +1,3 @@
 # Add a bot token here THEN VAULT-ENCRYPT this file!
 
-#vault_alertmanager_slack_integration_app_creds: '<bot_token>'
+# vault_alertmanager_slack_integration_app_creds: '<bot_token>'
diff --git a/environments/site/tofu/additional.tf b/environments/site/tofu/additional.tf
index 872f957..1079ef9 100644
--- a/environments/site/tofu/additional.tf
+++ b/environments/site/tofu/additional.tf
@@ -4,41 +4,41 @@ module "additional" {
   for_each = var.additional_nodegroups
 
   # must be set for group:
-  nodes = each.value.nodes
+  nodes  = each.value.nodes
   flavor = each.value.flavor
 
   # always taken from top-level value:
-  cluster_name = var.cluster_name
+  cluster_name          = var.cluster_name
   cluster_domain_suffix = var.cluster_domain_suffix
-  key_pair = var.key_pair
-  environment_root = var.environment_root
-  config_drive = var.config_drive
-  
+  key_pair              = var.key_pair
+  environment_root      = var.environment_root
+  config_drive          = var.config_drive
+
   # can be set for group, defaults to top-level value:
-  image_id = lookup(each.value, "image_id", var.cluster_image_id)
-  vnic_types = lookup(each.value, "vnic_types", var.vnic_types)
+  image_id                = lookup(each.value, "image_id", var.cluster_image_id)
+  vnic_types              = lookup(each.value, "vnic_types", var.vnic_types)
   volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
-  root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size)
-  root_volume_type = lookup(each.value, "root_volume_type", var.root_volume_type)
-  gateway_ip = lookup(each.value, "gateway_ip", var.gateway_ip)
-  nodename_template = lookup(each.value, "nodename_template", var.cluster_nodename_template)
-  
+  root_volume_size        = lookup(each.value, "root_volume_size", var.root_volume_size)
+  root_volume_type        = lookup(each.value, "root_volume_type", var.root_volume_type)
+  gateway_ip              = lookup(each.value, "gateway_ip", var.gateway_ip)
+  nodename_template       = lookup(each.value, "nodename_template", var.cluster_nodename_template)
+
   # optionally set for group:
   networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", []))
   # here null means "use module var default"
-  extra_volumes = lookup(each.value, "extra_volumes", null)
-  fip_addresses = lookup(each.value, "fip_addresses", null)
-  fip_network = lookup(each.value, "fip_network", null)
-  match_ironic_node = lookup(each.value, "match_ironic_node", null)
-  availability_zone = lookup(each.value, "availability_zone", null)
-  ip_addresses = lookup(each.value, "ip_addresses", null)
-  security_group_ids = lookup(each.value, "security_group_ids", [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id])
-  additional_cloud_config = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
+  extra_volumes                = lookup(each.value, "extra_volumes", null)
+  fip_addresses                = lookup(each.value, "fip_addresses", null)
+  fip_network                  = lookup(each.value, "fip_network", null)
+  match_ironic_node            = lookup(each.value, "match_ironic_node", null)
+  availability_zone            = lookup(each.value, "availability_zone", null)
+  ip_addresses                 = lookup(each.value, "ip_addresses", null)
+  security_group_ids           = lookup(each.value, "security_group_ids", [for o in data.openstack_networking_secgroup_v2.nonlogin : o.id])
+  additional_cloud_config      = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
   additional_cloud_config_vars = lookup(each.value, "additional_cloud_config_vars", var.additional_cloud_config_vars)
-  server_group_id = lookup(each.value, "server_group_id", null)
+  server_group_id              = lookup(each.value, "server_group_id", null)
 
   # can't be set for additional nodes
-  compute_init_enable = []
+  compute_init_enable  = []
   ignore_image_changes = false
 
   # computed
diff --git a/environments/site/tofu/baremetal-node-list.py b/environments/site/tofu/baremetal-node-list.py
index 14bc3ce..c1747ec 100755
--- a/environments/site/tofu/baremetal-node-list.py
+++ b/environments/site/tofu/baremetal-node-list.py
@@ -1,32 +1,34 @@
 #!/usr/bin/env python
-""" opentofu external data program to list baremetal nodes
+# pylint: disable=invalid-name
+"""opentofu external data program to list baremetal nodes
 
-    Example usage:
+Example usage:
 
-        data "external" "example" {
-            program = [this_file]
-        }
+    data "external" "example" {
+        program = [this_file]
+    }
 
-    The external data resource's result attribute then contains a mapping of
-    Ironic node names to their UUIDs.
+The external data resource's result attribute then contains a mapping of
+Ironic node names to their UUIDs.
 
-    An empty list is returned if:
-    - There are no baremetal nodes
-    - The listing fails for any reason, e.g.
-        - there is no baremetal service
-        - admin credentials are required and are not provided
+An empty list is returned if:
+- There are no baremetal nodes
+- The listing fails for any reason, e.g.
+    - there is no baremetal service
+    - admin credentials are required and are not provided
 """
 
-import openstack
 import json
 
+import openstack  # pylint: disable=import-error
+
 nodes = []
-proxy = None
+proxy = None  # pylint: disable=invalid-name
 output = {}
 conn = openstack.connection.from_config()
 try:
-    proxy = getattr(conn, 'baremetal', None)
-except Exception:
+    proxy = getattr(conn, "baremetal", None)
+except Exception:  # pylint: disable=broad-exception-caught
     pass
 if proxy is not None:
     nodes = proxy.nodes()
diff --git a/environments/site/tofu/compute.tf b/environments/site/tofu/compute.tf
index 35d62c6..54d3871 100644
--- a/environments/site/tofu/compute.tf
+++ b/environments/site/tofu/compute.tf
@@ -4,45 +4,45 @@ module "compute" {
   for_each = var.compute
 
   # must be set for group:
-  nodes = each.value.nodes
+  nodes  = each.value.nodes
   flavor = each.value.flavor
 
   # always taken from top-level value:
-  cluster_name = var.cluster_name
+  cluster_name          = var.cluster_name
   cluster_domain_suffix = var.cluster_domain_suffix
-  key_pair = var.key_pair
-  environment_root = var.environment_root
-  config_drive = var.config_drive
-  
+  key_pair              = var.key_pair
+  environment_root      = var.environment_root
+  config_drive          = var.config_drive
+
   # can be set for group, defaults to top-level value:
-  image_id = lookup(each.value, "image_id", var.cluster_image_id)
-  vnic_types = lookup(each.value, "vnic_types", var.vnic_types)
-  volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
-  root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size)
-  root_volume_type = lookup(each.value, "root_volume_type", var.root_volume_type)
-  gateway_ip = lookup(each.value, "gateway_ip", var.gateway_ip)
-  nodename_template = lookup(each.value, "nodename_template", var.cluster_nodename_template)
-  additional_cloud_config = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
+  image_id                     = lookup(each.value, "image_id", var.cluster_image_id)
+  vnic_types                   = lookup(each.value, "vnic_types", var.vnic_types)
+  volume_backed_instances      = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
+  root_volume_size             = lookup(each.value, "root_volume_size", var.root_volume_size)
+  root_volume_type             = lookup(each.value, "root_volume_type", var.root_volume_type)
+  gateway_ip                   = lookup(each.value, "gateway_ip", var.gateway_ip)
+  nodename_template            = lookup(each.value, "nodename_template", var.cluster_nodename_template)
+  additional_cloud_config      = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
   additional_cloud_config_vars = lookup(each.value, "additional_cloud_config_vars", var.additional_cloud_config_vars)
 
   # optionally set for group:
   networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", []))
   # here null means "use module var default"
-  extra_volumes = lookup(each.value, "extra_volumes", null)
-  compute_init_enable = lookup(each.value, "compute_init_enable", null)
+  extra_volumes        = lookup(each.value, "extra_volumes", null)
+  compute_init_enable  = lookup(each.value, "compute_init_enable", null)
   ignore_image_changes = lookup(each.value, "ignore_image_changes", null)
-  match_ironic_node = lookup(each.value, "match_ironic_node", null)
-  availability_zone = lookup(each.value, "availability_zone", null)
-  ip_addresses = lookup(each.value, "ip_addresses", null)
-  server_group_id = lookup(each.value, "server_group_id", null)
+  match_ironic_node    = lookup(each.value, "match_ironic_node", null)
+  availability_zone    = lookup(each.value, "availability_zone", null)
+  ip_addresses         = lookup(each.value, "ip_addresses", null)
+  server_group_id      = lookup(each.value, "server_group_id", null)
 
   # computed
   # not using openstack_compute_instance_v2.control.access_ip_v4 to avoid
   # updates to node metadata on deletion/recreation of the control node:
-  control_address = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
-  security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id]
-  baremetal_nodes = data.external.baremetal_nodes.result
-  
+  control_address    = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
+  security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin : o.id]
+  baremetal_nodes    = data.external.baremetal_nodes.result
+
   # input dict validation:
   group_name = each.key
   group_keys = keys(each.value)
@@ -67,5 +67,5 @@ module "compute" {
     "additional_cloud_config_vars",
     "server_group_id"
   ]
-  
+
 }
diff --git a/environments/site/tofu/control.tf b/environments/site/tofu/control.tf
index 19a41ae..7bfa13f 100644
--- a/environments/site/tofu/control.tf
+++ b/environments/site/tofu/control.tf
@@ -1,26 +1,26 @@
 locals {
   control_volumes = concat(
     # convert maps to lists with zero or one entries:
-    [for v in data.openstack_blockstorage_volume_v3.state: v],
-    [for v in data.openstack_blockstorage_volume_v3.home: v]
+    [for v in data.openstack_blockstorage_volume_v3.state : v],
+    [for v in data.openstack_blockstorage_volume_v3.home : v]
   )
   control_fqdn = templatestring(
     var.cluster_nodename_template,
     {
-      node = "control",
-      cluster_name = var.cluster_name,
+      node                  = "control",
+      cluster_name          = var.cluster_name,
       cluster_domain_suffix = var.cluster_domain_suffix,
-      environment_name = basename(var.environment_root)
+      environment_name      = basename(var.environment_root)
     }
   )
 }
 
 resource "openstack_networking_port_v2" "control" {
 
-  for_each = {for net in var.cluster_networks: net.network => net}
+  for_each = { for net in var.cluster_networks : net.network => net }
 
-  name = "${var.cluster_name}-control-${each.key}"
-  network_id = data.openstack_networking_network_v2.cluster_net[each.key].id
+  name           = "${var.cluster_name}-control-${each.key}"
+  network_id     = data.openstack_networking_network_v2.cluster_net[each.key].id
   admin_state_up = "true"
 
   fixed_ip {
@@ -29,7 +29,7 @@ resource "openstack_networking_port_v2" "control" {
   }
 
   no_security_groups = lookup(each.value, "no_security_groups", false)
-  security_group_ids = lookup(each.value, "no_security_groups", false) ? [] : [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id]
+  security_group_ids = lookup(each.value, "no_security_groups", false) ? [] : [for o in data.openstack_networking_secgroup_v2.nonlogin : o.id]
 
   binding {
     vnic_type = lookup(var.vnic_types, each.key, "normal")
@@ -37,37 +37,37 @@ resource "openstack_networking_port_v2" "control" {
 }
 
 resource "openstack_compute_instance_v2" "control" {
-  
-  name = split(".", local.control_fqdn)[0]
-  image_id = var.cluster_image_id
+
+  name        = split(".", local.control_fqdn)[0]
+  image_id    = var.cluster_image_id
   flavor_name = var.control_node_flavor
-  key_pair = var.key_pair
-  
+  key_pair    = var.key_pair
+
   # root device:
   block_device {
-      uuid = var.cluster_image_id
-      source_type  = "image"
-      destination_type = var.volume_backed_instances ? "volume" : "local"
-      volume_size = var.volume_backed_instances ? var.root_volume_size : null
-      volume_type = var.volume_backed_instances ? var.root_volume_type : null
-      boot_index = 0
-      delete_on_termination = true
+    uuid                  = var.cluster_image_id
+    source_type           = "image"
+    destination_type      = var.volume_backed_instances ? "volume" : "local"
+    volume_size           = var.volume_backed_instances ? var.root_volume_size : null
+    volume_type           = var.volume_backed_instances ? var.root_volume_type : null
+    boot_index            = 0
+    delete_on_termination = true
   }
 
   dynamic "block_device" {
     for_each = local.control_volumes
     content {
       destination_type = "volume"
-      source_type  = "volume"
-      boot_index = -1
-      uuid = block_device.value.id # actually openstack_blockstorage_volume_v3 id
+      source_type      = "volume"
+      boot_index       = -1
+      uuid             = block_device.value.id # actually openstack_blockstorage_volume_v3 id
     }
   }
 
   dynamic "network" {
-    for_each = {for net in var.cluster_networks: net.network => net}
+    for_each = { for net in var.cluster_networks : net.network => net }
     content {
-      port = openstack_networking_port_v2.control[network.key].id
+      port           = openstack_networking_port_v2.control[network.key].id
       access_network = network.key == var.cluster_networks[0].network
     }
   }
@@ -81,8 +81,8 @@ resource "openstack_compute_instance_v2" "control" {
 
   metadata = {
     environment_root = var.environment_root
-    access_ip = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
-    gateway_ip = var.gateway_ip
+    access_ip        = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
+    gateway_ip       = var.gateway_ip
   }
 
   user_data = <<-EOF
diff --git a/environments/site/tofu/data.tf b/environments/site/tofu/data.tf
index 443c522..f90f2f0 100644
--- a/environments/site/tofu/data.tf
+++ b/environments/site/tofu/data.tf
@@ -1,5 +1,6 @@
+# tflint-ignore: terraform_required_providers
 data "external" "baremetal_nodes" {
   # returns an empty map if cannot list baremetal nodes
   program = ["${path.module}/baremetal-node-list.py"]
-  query = {}
+  query   = {}
 }
diff --git a/environments/site/tofu/inventory.tf b/environments/site/tofu/inventory.tf
index 0e23323..fa7108b 100644
--- a/environments/site/tofu/inventory.tf
+++ b/environments/site/tofu/inventory.tf
@@ -1,16 +1,17 @@
+# tflint-ignore: terraform_required_providers
 resource "local_file" "hosts" {
-  content  = templatefile("${path.module}/inventory.tpl",
-                          {
-                            "cluster_name": var.cluster_name,
-                            "cluster_domain_suffix": var.cluster_domain_suffix
-                            "control": openstack_compute_instance_v2.control
-                            "control_fqdn": local.control_fqdn
-                            "login_groups": module.login
-                            "compute_groups": module.compute
-                            "additional_groups": module.additional
-                            "state_dir": var.state_dir
-                            "cluster_home_volume": var.home_volume_provisioning != "none"
-                          },
-                          )
+  content = templatefile("${path.module}/inventory.tpl",
+    {
+      "cluster_name" : var.cluster_name,
+      "cluster_domain_suffix" : var.cluster_domain_suffix
+      "control" : openstack_compute_instance_v2.control
+      "control_fqdn" : local.control_fqdn
+      "login_groups" : module.login
+      "compute_groups" : module.compute
+      "additional_groups" : module.additional
+      "state_dir" : var.state_dir
+      "cluster_home_volume" : var.home_volume_provisioning != "none"
+    },
+  )
   filename = "../inventory/hosts.yml"
 }
diff --git a/environments/site/tofu/login.tf b/environments/site/tofu/login.tf
index 5ecc033..0f9bc83 100644
--- a/environments/site/tofu/login.tf
+++ b/environments/site/tofu/login.tf
@@ -4,48 +4,48 @@ module "login" {
   for_each = var.login
 
   # must be set for group:
-  nodes = each.value.nodes
+  nodes  = each.value.nodes
   flavor = each.value.flavor
 
   # always taken from top-level value:
-  cluster_name = var.cluster_name
+  cluster_name          = var.cluster_name
   cluster_domain_suffix = var.cluster_domain_suffix
-  key_pair = var.key_pair
-  environment_root = var.environment_root
-  config_drive = var.config_drive
-  
+  key_pair              = var.key_pair
+  environment_root      = var.environment_root
+  config_drive          = var.config_drive
+
   # can be set for group, defaults to top-level value:
-  image_id = lookup(each.value, "image_id", var.cluster_image_id)
-  vnic_types = lookup(each.value, "vnic_types", var.vnic_types)
-  volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
-  root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size)
-  root_volume_type = lookup(each.value, "root_volume_type", var.root_volume_type)
-  gateway_ip = lookup(each.value, "gateway_ip", var.gateway_ip)
-  nodename_template = lookup(each.value, "nodename_template", var.cluster_nodename_template)
-  additional_cloud_config = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
+  image_id                     = lookup(each.value, "image_id", var.cluster_image_id)
+  vnic_types                   = lookup(each.value, "vnic_types", var.vnic_types)
+  volume_backed_instances      = lookup(each.value, "volume_backed_instances", var.volume_backed_instances)
+  root_volume_size             = lookup(each.value, "root_volume_size", var.root_volume_size)
+  root_volume_type             = lookup(each.value, "root_volume_type", var.root_volume_type)
+  gateway_ip                   = lookup(each.value, "gateway_ip", var.gateway_ip)
+  nodename_template            = lookup(each.value, "nodename_template", var.cluster_nodename_template)
+  additional_cloud_config      = lookup(each.value, "additional_cloud_config", var.additional_cloud_config)
   additional_cloud_config_vars = lookup(each.value, "additional_cloud_config_vars", var.additional_cloud_config_vars)
-  
+
   # optionally set for group:
   networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", []))
   # here null means "use module var default"
-  extra_volumes = lookup(each.value, "extra_volumes", null)
-  fip_addresses = lookup(each.value, "fip_addresses", null)
-  fip_network = lookup(each.value, "fip_network", null)
+  extra_volumes     = lookup(each.value, "extra_volumes", null)
+  fip_addresses     = lookup(each.value, "fip_addresses", null)
+  fip_network       = lookup(each.value, "fip_network", null)
   match_ironic_node = lookup(each.value, "match_ironic_node", null)
   availability_zone = lookup(each.value, "availability_zone", null)
-  ip_addresses = lookup(each.value, "ip_addresses", null)
-  server_group_id = lookup(each.value, "server_group_id", null)
+  ip_addresses      = lookup(each.value, "ip_addresses", null)
+  server_group_id   = lookup(each.value, "server_group_id", null)
 
   # can't be set for login
-  compute_init_enable = []
+  compute_init_enable  = []
   ignore_image_changes = false
 
   # computed
   # not using openstack_compute_instance_v2.control.access_ip_v4 to avoid
   # updates to node metadata on deletion/recreation of the control node:
-  control_address = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
-  security_group_ids = lookup(each.value, "security_group_ids", [for o in data.openstack_networking_secgroup_v2.login: o.id])
-  baremetal_nodes = data.external.baremetal_nodes.result
+  control_address    = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0]
+  security_group_ids = lookup(each.value, "security_group_ids", [for o in data.openstack_networking_secgroup_v2.login : o.id])
+  baremetal_nodes    = data.external.baremetal_nodes.result
 
   # input dict validation:
   group_name = each.key
@@ -72,5 +72,5 @@ module "login" {
     "security_group_ids",
     "server_group_id"
   ]
-  
+
 }
diff --git a/environments/site/tofu/main.tf b/environments/site/tofu/main.tf
index dc639f7..e88ac1a 100644
--- a/environments/site/tofu/main.tf
+++ b/environments/site/tofu/main.tf
@@ -2,7 +2,7 @@ terraform {
   required_version = ">= 1.7" # templatestring() function
   required_providers {
     openstack = {
-      source = "terraform-provider-openstack/openstack"
+      source  = "terraform-provider-openstack/openstack"
       version = "~>3.0.0"
     }
   }
diff --git a/environments/site/tofu/network.tf b/environments/site/tofu/network.tf
index 0a86b8f..43c2e5d 100644
--- a/environments/site/tofu/network.tf
+++ b/environments/site/tofu/network.tf
@@ -1,14 +1,14 @@
 
 data "openstack_networking_network_v2" "cluster_net" {
 
-  for_each = {for net in var.cluster_networks: net.network => net}
+  for_each = { for net in var.cluster_networks : net.network => net }
 
   name = each.value.network
 }
 
 data "openstack_networking_subnet_v2" "cluster_subnet" {
 
-  for_each = {for net in var.cluster_networks: net.network => net}
+  for_each = { for net in var.cluster_networks : net.network => net }
 
   name = each.value.subnet
 }
@@ -22,13 +22,13 @@ data "openstack_identity_auth_scope_v3" "scope" {
 data "openstack_networking_secgroup_v2" "login" {
   for_each = toset(var.login_security_groups)
 
-  name = each.key
+  name      = each.key
   tenant_id = data.openstack_identity_auth_scope_v3.scope.project_id
 }
 
 data "openstack_networking_secgroup_v2" "nonlogin" {
   for_each = toset(var.nonlogin_security_groups)
 
-  name = each.key
+  name      = each.key
   tenant_id = data.openstack_identity_auth_scope_v3.scope.project_id
 }
diff --git a/environments/site/tofu/node_group/main.tf b/environments/site/tofu/node_group/main.tf
index f298284..03fbec4 100644
--- a/environments/site/tofu/node_group/main.tf
+++ b/environments/site/tofu/node_group/main.tf
@@ -2,7 +2,7 @@ terraform {
   required_version = ">= 0.14"
   required_providers {
     openstack = {
-      source = "terraform-provider-openstack/openstack"
+      source  = "terraform-provider-openstack/openstack"
       version = "~>3.0.0"
     }
   }
diff --git a/environments/site/tofu/node_group/network.tf b/environments/site/tofu/node_group/network.tf
index f5763b9..5a66d32 100644
--- a/environments/site/tofu/node_group/network.tf
+++ b/environments/site/tofu/node_group/network.tf
@@ -1,14 +1,14 @@
 
 data "openstack_networking_network_v2" "network" {
 
-  for_each = {for net in var.networks: net.network => net}
+  for_each = { for net in var.networks : net.network => net }
 
   name = each.value.network
 }
 
 data "openstack_networking_subnet_v2" "subnet" {
 
-  for_each = {for net in var.networks: net.network => net}
+  for_each = { for net in var.networks : net.network => net }
 
   name = each.value.subnet
 }
diff --git a/environments/site/tofu/node_group/nodes.tf b/environments/site/tofu/node_group/nodes.tf
index 45cd449..d02028f 100644
--- a/environments/site/tofu/node_group/nodes.tf
+++ b/environments/site/tofu/node_group/nodes.tf
@@ -1,5 +1,5 @@
 locals {
-  all_compute_volumes = {for v in setproduct(var.nodes, keys(var.extra_volumes)): "${v[0]}-${v[1]}" => {"node" = v[0], "volume" = v[1]}}
+  all_compute_volumes = { for v in setproduct(var.nodes, keys(var.extra_volumes)) : "${v[0]}-${v[1]}" => { "node" = v[0], "volume" = v[1] } }
   # e.g. with
   # var.nodes = ["compute-0", "compute-1"]
   # var.extra_volumes = {
@@ -12,16 +12,16 @@ locals {
 
   # Workaround for lifecycle meta-argument only taking static values
   compute_instances = var.ignore_image_changes ? openstack_compute_instance_v2.compute_fixed_image : openstack_compute_instance_v2.compute
-  
+
   # Define fully qualified nodenames here to avoid repetition
   fqdns = {
-    for n in var.nodes: n => templatestring(
+    for n in var.nodes : n => templatestring(
       var.nodename_template,
       {
-        node = n,
-        cluster_name = var.cluster_name,
+        node                  = n,
+        cluster_name          = var.cluster_name,
         cluster_domain_suffix = var.cluster_domain_suffix,
-        environment_name = basename(var.environment_root)
+        environment_name      = basename(var.environment_root)
       }
     )
   }
@@ -31,40 +31,40 @@ locals {
 
 resource "openstack_blockstorage_volume_v3" "compute" {
 
-    for_each = local.all_compute_volumes
+  for_each = local.all_compute_volumes
 
-    name = "${var.cluster_name}-${each.key}"
-    description = "Compute node ${each.value.node} volume ${each.value.volume}"
-    size = var.extra_volumes[each.value.volume].size
-    volume_type = var.extra_volumes[each.value.volume].volume_type
+  name        = "${var.cluster_name}-${each.key}"
+  description = "Compute node ${each.value.node} volume ${each.value.volume}"
+  size        = var.extra_volumes[each.value.volume].size
+  volume_type = var.extra_volumes[each.value.volume].volume_type
 }
 
 resource "openstack_compute_volume_attach_v2" "compute" {
 
   for_each = local.all_compute_volumes
 
-  instance_id = local.compute_instances["${each.value.node}"].id
-  volume_id  = openstack_blockstorage_volume_v3.compute["${each.key}"].id
+  instance_id = local.compute_instances[each.value.node].id
+  volume_id   = openstack_blockstorage_volume_v3.compute[each.key].id
 }
 
 resource "openstack_networking_port_v2" "compute" {
 
-  for_each = {for item in setproduct(var.nodes, var.networks):
+  for_each = { for item in setproduct(var.nodes, var.networks) :
     "${item[0]}-${item[1].network}" => {
-        node_idx = index(var.nodes, item[0])
-        net = item[1]
-      }
+      node_idx = index(var.nodes, item[0])
+      net      = item[1]
+    }
   }
 
-  name = "${var.cluster_name}-${each.key}"
-  network_id = data.openstack_networking_network_v2.network[each.value.net.network].id
+  name           = "${var.cluster_name}-${each.key}"
+  network_id     = data.openstack_networking_network_v2.network[each.value.net.network].id
   admin_state_up = "true"
 
   fixed_ip {
-    subnet_id = data.openstack_networking_subnet_v2.subnet[each.value.net.network].id
+    subnet_id  = data.openstack_networking_subnet_v2.subnet[each.value.net.network].id
     ip_address = try(var.ip_addresses[each.value.net.network][each.value.node_idx], null)
   }
-  
+
   no_security_groups = lookup(each.value.net, "no_security_groups", false)
   security_group_ids = lookup(each.value.net, "no_security_groups", false) ? [] : var.security_group_ids
 
@@ -77,28 +77,28 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" {
 
   for_each = var.ignore_image_changes ? toset(var.nodes) : []
 
-  name = split(".", local.fqdns[each.key])[0]
-  image_id = var.image_id
+  name        = split(".", local.fqdns[each.key])[0]
+  image_id    = var.image_id
   flavor_name = var.flavor
-  key_pair = var.key_pair
+  key_pair    = var.key_pair
 
   dynamic "block_device" {
-    for_each = var.volume_backed_instances ? [1]: []
+    for_each = var.volume_backed_instances ? [1] : []
     content {
-      uuid = var.image_id
-      source_type  = "image"
-      destination_type = "volume"
-      volume_size = var.root_volume_size
-      volume_type = var.root_volume_type
-      boot_index = 0
+      uuid                  = var.image_id
+      source_type           = "image"
+      destination_type      = "volume"
+      volume_size           = var.root_volume_size
+      volume_type           = var.root_volume_type
+      boot_index            = 0
       delete_on_termination = true
     }
   }
 
   dynamic "network" {
-    for_each = {for net in var.networks: net.network => net}
+    for_each = { for net in var.networks : net.network => net }
     content {
-      port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id
+      port           = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id
       access_network = network.key == var.networks[0].network
     }
   }
@@ -112,12 +112,12 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" {
 
   metadata = merge(
     {
-        environment_root = var.environment_root
-        control_address    = var.control_address
-        access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0]
-        gateway_ip = var.gateway_ip
+      environment_root = var.environment_root
+      control_address  = var.control_address
+      access_ip        = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0]
+      gateway_ip       = var.gateway_ip
     },
-    {for e in var.compute_init_enable: e => true}
+    { for e in var.compute_init_enable : e => true }
   )
 
   user_data = <<-EOF
@@ -144,29 +144,29 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" {
 resource "openstack_compute_instance_v2" "compute" {
 
   for_each = var.ignore_image_changes ? [] : toset(var.nodes)
-  
-  name = split(".", local.fqdns[each.key])[0]
-  image_id = var.image_id
+
+  name        = split(".", local.fqdns[each.key])[0]
+  image_id    = var.image_id
   flavor_name = var.flavor
-  key_pair = var.key_pair
+  key_pair    = var.key_pair
 
   dynamic "block_device" {
-    for_each = var.volume_backed_instances ? [1]: []
+    for_each = var.volume_backed_instances ? [1] : []
     content {
-      uuid = var.image_id
-      source_type  = "image"
-      destination_type = "volume"
-      volume_size = var.root_volume_size
-      volume_type = var.root_volume_type
-      boot_index = 0
+      uuid                  = var.image_id
+      source_type           = "image"
+      destination_type      = "volume"
+      volume_size           = var.root_volume_size
+      volume_type           = var.root_volume_type
+      boot_index            = 0
       delete_on_termination = true
     }
   }
-  
+
   dynamic "network" {
-    for_each = {for net in var.networks: net.network => net}
+    for_each = { for net in var.networks : net.network => net }
     content {
-      port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id
+      port           = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id
       access_network = network.key == var.networks[0].network
     }
   }
@@ -180,12 +180,12 @@ resource "openstack_compute_instance_v2" "compute" {
 
   metadata = merge(
     {
-        environment_root = var.environment_root
-        control_address    = var.control_address
-        access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0]
-        gateway_ip = var.gateway_ip
+      environment_root = var.environment_root
+      control_address  = var.control_address
+      access_ip        = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0]
+      gateway_ip       = var.gateway_ip
     },
-    {for e in var.compute_init_enable: e => true}
+    { for e in var.compute_init_enable : e => true }
   )
 
   user_data = <<-EOF
@@ -204,7 +204,7 @@ resource "openstack_compute_instance_v2" "compute" {
 }
 
 resource "openstack_networking_floatingip_associate_v2" "fip" {
-  for_each = {for idx in range(length(var.fip_addresses)): var.nodes[idx] => var.fip_addresses[idx]} # zip, fip_addresses can be shorter
+  for_each = { for idx in range(length(var.fip_addresses)) : var.nodes[idx] => var.fip_addresses[idx] } # zip, fip_addresses can be shorter
 
   floating_ip = each.value
   port_id     = openstack_networking_port_v2.compute["${each.key}-${length(var.networks) == 1 ? var.networks[0].network : var.fip_network}"].id
diff --git a/environments/site/tofu/node_group/variables.tf b/environments/site/tofu/node_group/variables.tf
index 0a129ab..352b577 100644
--- a/environments/site/tofu/node_group/variables.tf
+++ b/environments/site/tofu/node_group/variables.tf
@@ -1,61 +1,61 @@
 variable "nodes" {
-    type = list(string)
-    description = "List of node names for node group"
+  type        = list(string)
+  description = "List of node names for node group"
 }
 
 variable "flavor" {
-    type = string
-    description = "Name of flavor for node group"
+  type        = string
+  description = "Name of flavor for node group"
 }
 
 variable "cluster_name" {
-    type = string
+  type = string
 }
 
 variable "cluster_domain_suffix" {
-    type = string
-    default = "invalid"
+  type    = string
+  default = "invalid"
 }
 
 variable "key_pair" {
-    type = string
-    description = "Name of an existing keypair in OpenStack"
+  type        = string
+  description = "Name of an existing keypair in OpenStack"
 }
 
 variable "image_id" {
-    type = string
-    description = "ID of image for the node group"
+  type        = string
+  description = "ID of image for the node group"
 }
 
 variable "environment_root" {
-    type = string
-    description = "Path to environment root, automatically set by activate script"
+  type        = string
+  description = "Path to environment root, automatically set by activate script"
 }
 
 variable "vnic_types" {
-    type = map(string)
-    default = {}
+  type    = map(string)
+  default = {}
 }
 
 variable "volume_backed_instances" {
-    description = "Whether to use volumes for root disks"
-    type = bool
-    default = false
+  description = "Whether to use volumes for root disks"
+  type        = bool
+  default     = false
 }
 
 variable "root_volume_size" {
-    description = "Size of volume for root volumes if using volume backed instances, in Gb"
-    type = number
-    default = 40
+  description = "Size of volume for root volumes if using volume backed instances, in Gb"
+  type        = number
+  default     = 40
 }
 
 variable "root_volume_type" {
-    type = string
-    default = null
+  type    = string
+  default = null
 }
 
 variable "extra_volumes" {
-    description = <<-EOF
+  description = <<-EOF
         Mapping defining additional volumes to create and attach.
         Keys are unique volume name.
         Values are a mapping with:
@@ -63,153 +63,154 @@ variable "extra_volumes" {
             volume_type: Optional. Type of volume, or cloud default
         **NB**: The order in /dev is not guaranteed to match the mapping
         EOF
-    type = map(
-        object({
-            size = number
-            volume_type = optional(string)
-        })
-    )
-    default = {}
-    nullable = false
+  type = map(
+    object({
+      size        = number
+      volume_type = optional(string)
+    })
+  )
+  default  = {}
+  nullable = false
 }
 
 variable "security_group_ids" {
-    type = list(string)
-    nullable = false
+  type     = list(string)
+  nullable = false
 }
 
 variable "control_address" {
-    description = "Name/address of control node"
-    type = string
+  description = "Name/address of control node"
+  type        = string
 }
 
 variable "compute_init_enable" {
-    type = list(string)
-    description = "Groups to activate for ansible-init compute rebuilds"
-    default = []
-    nullable = false
+  type        = list(string)
+  description = "Groups to activate for ansible-init compute rebuilds"
+  default     = []
+  nullable    = false
 }
 
 variable "ignore_image_changes" {
-    type = bool
-    description = "Whether to ignore changes to the image_id parameter"
-    default = false
-    nullable = false
+  type        = bool
+  description = "Whether to ignore changes to the image_id parameter"
+  default     = false
+  nullable    = false
 }
 
 variable "networks" {
-    type = list(map(string))
+  type = list(map(string))
 }
 
 variable "fip_addresses" {
-    type = list(string)
-    description = <<-EOT
+  type        = list(string)
+  description = <<-EOT
         List of addresses of floating IPs to associate with nodes,
         in same order as nodes parameter. The floating IPs must already be
         allocated to the project.
     EOT
-    default = []
-    nullable = false
+  default     = []
+  nullable    = false
 }
 
 variable "fip_network" {
-    type = string
-    description = <<-EOT
+  type        = string
+  description = <<-EOT
         Name of network containing ports to attach FIPs to. Only required if multiple
         networks are defined.
     EOT
-    default = ""
-    nullable = false
+  default     = ""
+  nullable    = false
 }
 
 variable "ip_addresses" {
-    type = map(list(string))
-    description = <<-EOT
+  type        = map(list(string))
+  description = <<-EOT
         Mapping of list of fixed IP addresses for nodes, keyed by network name,
         in same order as nodes parameter. For any networks not specified here
         the cloud will select addresses.
 
         NB: Changing IP addresses after deployment may hit terraform provider bugs.
     EOT
-    default = {}
-    nullable = false
-    validation {
-      condition = length(setsubtract(keys(var.ip_addresses), var.networks[*].network)) == 0
-      error_message = "Keys in ip_addresses for nodegroup \"${var.group_name}\" must match network names in var.cluster_networks"
-    }
-    validation {
-      condition = alltrue([for v in values(var.ip_addresses): length(v) == length(var.nodes)])
-      error_message = "Values in ip_addresses for nodegroup \"${var.group_name}\" must be a list of the same length as var.nodes"
-    }
+  default     = {}
+  nullable    = false
+  validation {
+    condition     = length(setsubtract(keys(var.ip_addresses), var.networks[*].network)) == 0
+    error_message = "Keys in ip_addresses for nodegroup \"${var.group_name}\" must match network names in var.cluster_networks"
+  }
+  validation {
+    condition     = alltrue([for v in values(var.ip_addresses) : length(v) == length(var.nodes)])
+    error_message = "Values in ip_addresses for nodegroup \"${var.group_name}\" must be a list of the same length as var.nodes"
+  }
 }
 
 variable "match_ironic_node" {
-    type = bool
-    description = "Whether to launch instances on the Ironic node of the same name as each cluster node"
-    default = false
-    nullable = false
+  type        = bool
+  description = "Whether to launch instances on the Ironic node of the same name as each cluster node"
+  default     = false
+  nullable    = false
 }
 
 variable "availability_zone" {
-    type = string
-    description = "Name of availability zone. If undefined, defaults to 'nova' if match_ironic_node is true, deferred to OpenStack otherwise"
-    default = null
+  type        = string
+  description = "Name of availability zone. If undefined, defaults to 'nova' if match_ironic_node is true, deferred to OpenStack otherwise"
+  default     = null
 }
 
 variable "baremetal_nodes" {
-    type = map(string)
-    default = {}
+  type    = map(string)
+  default = {}
 }
 
 variable "gateway_ip" {
-    type = string
-    default = ""
+  type    = string
+  default = ""
 }
 
 variable "nodename_template" {
-    type = string
-    default = ""
+  type    = string
+  default = ""
 }
 
 variable "group_name" {
-    type = string
+  type = string
 }
 
+# tflint-ignore: terraform_unused_declarations
 variable "group_keys" {
-    type = list
-    validation {
-      condition = length(setsubtract(var.group_keys, var.allowed_keys)) == 0
-      error_message = <<-EOT
+  type = list(any)
+  validation {
+    condition = length(setsubtract(var.group_keys, var.allowed_keys)) == 0
+    error_message = <<-EOT
         Node group '${var.group_name}' contains invalid key(s) ${
-        join(", ", setsubtract(var.group_keys, var.allowed_keys))}.
+  join(", ", setsubtract(var.group_keys, var.allowed_keys))}.
         
         Valid keys are ${join(", ", var.allowed_keys)}.
     EOT
-    }
+}
 }
 
 variable "allowed_keys" {
-    type = list
-    # don't provide a default here as allowed keys may depend on module use
+  type = list(any)
+  # don't provide a default here as allowed keys may depend on module use
 }
 
 variable "config_drive" {
-    type = bool
+  type = bool
 }
 
 variable "additional_cloud_config" {
-    type = string
-    default = ""
-    nullable = false
+  type     = string
+  default  = ""
+  nullable = false
 }
 
 variable "additional_cloud_config_vars" {
-    type = map(any)
-    default = {}
-    nullable = false
+  type     = map(any)
+  default  = {}
+  nullable = false
 }
 
 variable "server_group_id" {
-    type = string
-    default = null
+  type    = string
+  default = null
 }
diff --git a/environments/site/tofu/read-inventory-secrets.py b/environments/site/tofu/read-inventory-secrets.py
index e3de2f4..85ac0a9 100755
--- a/environments/site/tofu/read-inventory-secrets.py
+++ b/environments/site/tofu/read-inventory-secrets.py
@@ -1,36 +1,43 @@
 #!/usr/bin/env python
-""" opentofu external data program to load inventory string variables from
-    a (possibly vault-encrypted) secrets file.
+# pylint: disable=invalid-name
+"""opentofu external data program to load inventory string variables from
+a (possibly vault-encrypted) secrets file.
 
-    Example usage:
+Example usage:
 
-        data "external" "example" {
-            program = [this_file]
+    data "external" "example" {
+        program = [this_file]
 
-            query = {
-                path = "${path.module}/../inventory/group_vars/all/secrets.yml"
-            }
+        query = {
+            path = "${path.module}/../inventory/group_vars/all/secrets.yml"
         }
+    }
 
-    The external data resource's result attribute then contains a mapping of
-    variable names to values.
+The external data resource's result attribute then contains a mapping of
+variable names to values.
 
-    NB: Only keys/values where values are strings are returned, in line with
-    the external program protocol.
+NB: Only keys/values where values are strings are returned, in line with
+the external program protocol.
 
-    NB: This approach is better than e.g. templating inventory vars as the
-    inventory doesn't need to be valid, which is helpful when opentofu will
-    template out hosts/groups.
+NB: This approach is better than e.g. templating inventory vars as the
+inventory doesn't need to be valid, which is helpful when opentofu will
+template out hosts/groups.
 """
 
-import sys, json, subprocess, yaml
-input = sys.stdin.read()
-secrets_path = json.loads(input)['path']
+import json
+import subprocess
+import sys
 
-with open(secrets_path) as f:
+import yaml  # pylint: disable=import-error
+
+input = sys.stdin.read()  # pylint: disable=redefined-builtin
+secrets_path = json.loads(input)["path"]
+
+with open(secrets_path) as f:  # pylint: disable=unspecified-encoding
     header = f.readline()
-    if header.startswith('$ANSIBLE_VAULT'):
-        cmd = ['ansible-vault', 'view', secrets_path]
+    if header.startswith("$ANSIBLE_VAULT"):
+        cmd = ["ansible-vault", "view", secrets_path]
+        # pylint: disable-next=subprocess-run-check
         ansible = subprocess.run(cmd, capture_output=True, text=True)
         contents = ansible.stdout
     else:
diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index 3402c3a..98f364a 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -1,350 +1,351 @@
 variable "cluster_name" {
-    type = string
-    description = "Name of cluster, used as part of domain name"
+  type        = string
+  description = "Name of cluster, used as part of domain name"
 }
 
 variable "cluster_domain_suffix" {
-    type = string
-    description = "Domain suffix for cluster"
-    default = "internal"
+  type        = string
+  description = "Domain suffix for cluster"
+  default     = "internal"
 }
 
 variable "cluster_networks" {
-    type = list(map(string))
-    description = <<-EOT
-        List of mappings defining networks. Mapping key/values:
-            network: Required. Name of existing network
-            subnet: Required. Name of existing subnet
-            no_security_groups: Optional. Bool (default: false). Disable security groups
-    EOT
+  type        = list(map(string))
+  description = <<-EOT
+    List of mappings defining networks. Mapping key/values:
+      network: Required. Name of existing network
+      subnet: Required. Name of existing subnet
+      no_security_groups: Optional. Bool (default: false). Disable security groups
+  EOT
 }
 
 variable "key_pair" {
-    type = string
-    description = "Name of an existing keypair in OpenStack"
+  type        = string
+  description = "Name of an existing keypair in OpenStack"
 }
 
 variable "control_ip_addresses" {
-    type        = map(string)
-    description = <<-EOT
-        Mapping of fixed IP addresses for control node, keyed by network name.
-        For any networks not specified here the cloud will select an address.
-
-        NB: Changing IP addresses after deployment may hit terraform provider bugs.
-    EOT
-    default     = {}
-    validation {
-        # check all keys are network names in cluster_networks
-        condition = length(setsubtract(keys(var.control_ip_addresses), var.cluster_networks[*].network)) == 0
-        error_message = "Keys in var.control_ip_addresses must match network names in var.cluster_networks"
-    }
+  type        = map(string)
+  description = <<-EOT
+    Mapping of fixed IP addresses for control node, keyed by network name.
+    For any networks not specified here the cloud will select an address.
+
+    NB: Changing IP addresses after deployment may hit terraform provider bugs.
+  EOT
+  default     = {}
+  validation {
+    # check all keys are network names in cluster_networks
+    condition     = length(setsubtract(keys(var.control_ip_addresses), var.cluster_networks[*].network)) == 0
+    error_message = "Keys in var.control_ip_addresses must match network names in var.cluster_networks"
+  }
 }
 
 variable "control_node_flavor" {
-    type = string
-    description = "Flavor name for control node"
+  type        = string
+  description = "Flavor name for control node"
 }
 
 variable "login" {
-    default = {}
-    description = <<-EOF
-        Mapping defining homogenous groups of login nodes. Multiple groups may
-        be useful for e.g. separating nodes for ssh and Open Ondemand usage, or
-        to define login nodes with different capabilities such as high-memory.
-
-        Keys are names of groups.
-        Values are a mapping as follows:
-
-        Required:
-            nodes: List of node names
-            flavor: String flavor name
-        Optional:
-            image_id: Overrides variable cluster_image_id
-            extra_networks: List of mappings in same format as cluster_networks
-            vnic_types: Overrides variable vnic_types
-            volume_backed_instances: Overrides variable volume_backed_instances
-            root_volume_size: Overrides variable root_volume_size
-            extra_volumes: Mapping defining additional volumes to create and attach
-                           Keys are unique volume name.
-                           Values are a mapping with:
-                                size: Size of volume in GB
-                                volume_type: Optional. Type of volume, or cloud default
-                           **NB**: The order in /dev is not guaranteed to match the mapping
-            fip_addresses: List of addresses of floating IPs to associate with
-                           nodes, in the same order as nodes parameter. The
-                           floating IPs must already be allocated to the project.
-            fip_network: Name of network containing ports to attach FIPs to. Only
-                        required if multiple networks are defined.
-            ip_addresses: Mapping of list of fixed IP addresses for nodes, keyed
-                          by network name, in same order as nodes parameter.
-                          For any networks not specified here the cloud will
-                          select addresses.
-            match_ironic_node: Set true to launch instances on the Ironic node of the same name as each cluster node
-            availability_zone: Name of availability zone. If undefined, defaults to 'nova' 
-                               if match_ironic_node is true, defered to OpenStack otherwise
-            gateway_ip: Address to add default route via
-            nodename_template: Overrides variable cluster_nodename_template
-            server_group_id: String ID of server group to use for scheduler hint
-    EOF
-
-    type = any
+  default     = {}
+  description = <<-EOF
+    Mapping defining homogenous groups of login nodes. Multiple groups may
+    be useful for e.g. separating nodes for ssh and Open Ondemand usage, or
+    to define login nodes with different capabilities such as high-memory.
+
+    Keys are names of groups.
+    Values are a mapping as follows:
+
+    Required:
+      nodes: List of node names
+      flavor: String flavor name
+    Optional:
+      image_id: Overrides variable cluster_image_id
+      extra_networks: List of mappings in same format as cluster_networks
+      vnic_types: Overrides variable vnic_types
+      volume_backed_instances: Overrides variable volume_backed_instances
+      root_volume_size: Overrides variable root_volume_size
+      extra_volumes: Mapping defining additional volumes to create and attach
+                     Keys are unique volume name.
+                     Values are a mapping with:
+                          size: Size of volume in GB
+                          volume_type: Optional. Type of volume, or cloud default
+                     **NB**: The order in /dev is not guaranteed to match the mapping
+      fip_addresses: List of addresses of floating IPs to associate with
+                     nodes, in the same order as nodes parameter. The
+                     floating IPs must already be allocated to the project.
+      fip_network: Name of network containing ports to attach FIPs to. Only
+                  required if multiple networks are defined.
+      ip_addresses: Mapping of list of fixed IP addresses for nodes, keyed
+                    by network name, in same order as nodes parameter.
+                    For any networks not specified here the cloud will
+                    select addresses.
+      match_ironic_node: Set true to launch instances on the Ironic node of the same name as each cluster node
+      availability_zone: Name of availability zone. If undefined, defaults to 'nova' 
+                         if match_ironic_node is true, defered to OpenStack otherwise
+      gateway_ip: Address to add default route via
+      nodename_template: Overrides variable cluster_nodename_template
+      server_group_id: String ID of server group to use for scheduler hint
+  EOF
+
+  type = any
 }
 
 variable "cluster_image_id" {
-    type = string
-    description = "ID of default image for the cluster"
+  type        = string
+  description = "ID of default image for the cluster"
 }
 
 variable "compute" {
-    default = {}
-    description = <<-EOF
-        Mapping defining homogenous groups of compute nodes. Groups are used
-        in Slurm partition definitions.
-
-        Keys are names of groups.
-        Values are a mapping as follows:
-
-        Required:
-            nodes: List of node names
-            flavor: String flavor name
-        Optional:
-            image_id: Overrides variable cluster_image_id
-            extra_networks: List of mappings in same format as cluster_networks
-            vnic_types: Overrides variable vnic_types
-            compute_init_enable: Toggles compute-init rebuild (see compute-init role docs)
-            ignore_image_changes: Ignore changes to the image_id parameter (see docs/experimental/compute-init.md)
-            volume_backed_instances: Overrides variable volume_backed_instances
-            root_volume_size: Overrides variable root_volume_size
-            extra_volumes: Mapping defining additional volumes to create and attach
-                           Keys are unique volume name.
-                           Values are a mapping with:
-                                size: Size of volume in GB
-                                volume_type: Optional. Type of volume, or cloud default
-                           **NB**: The order in /dev is not guaranteed to match the mapping
-            ip_addresses: Mapping of list of fixed IP addresses for nodes, keyed
-                          by network name, in same order as nodes parameter.
-                          For any networks not specified here the cloud will
-                          select addresses.
-            match_ironic_node: Set true to launch instances on the Ironic node of the same name as each cluster node
-            availability_zone: Name of availability zone. If undefined, defaults to 'nova'
-                               if match_ironic_node is true, defered to OpenStack otherwise
-            gateway_ip: Address to add default route via
-            nodename_template: Overrides variable cluster_nodename_template
-            server_group_id: String ID of server group to use for scheduler hint
-
-        Nodes are added to the following inventory groups:
-        - $group_name
-        - $cluster_name + '_' + $group_name - this is used for the stackhpc.openhpc role
-        - 'compute'
-    EOF
-
-    type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings
+  default     = {}
+  description = <<-EOF
+    Mapping defining homogenous groups of compute nodes. Groups are used
+    in Slurm partition definitions.
+
+    Keys are names of groups.
+    Values are a mapping as follows:
+
+    Required:
+      nodes: List of node names
+      flavor: String flavor name
+    Optional:
+      image_id: Overrides variable cluster_image_id
+      extra_networks: List of mappings in same format as cluster_networks
+      vnic_types: Overrides variable vnic_types
+      compute_init_enable: Toggles compute-init rebuild (see compute-init role docs)
+      ignore_image_changes: Ignore changes to the image_id parameter (see docs/experimental/compute-init.md)
+      volume_backed_instances: Overrides variable volume_backed_instances
+      root_volume_size: Overrides variable root_volume_size
+      extra_volumes: Mapping defining additional volumes to create and attach
+                     Keys are unique volume name.
+                     Values are a mapping with:
+                          size: Size of volume in GB
+                          volume_type: Optional. Type of volume, or cloud default
+                     **NB**: The order in /dev is not guaranteed to match the mapping
+      ip_addresses: Mapping of list of fixed IP addresses for nodes, keyed
+                    by network name, in same order as nodes parameter.
+                    For any networks not specified here the cloud will
+                    select addresses.
+      match_ironic_node: Set true to launch instances on the Ironic node of the same name as each cluster node
+      availability_zone: Name of availability zone. If undefined, defaults to 'nova'
+                         if match_ironic_node is true, defered to OpenStack otherwise
+      gateway_ip: Address to add default route via
+      nodename_template: Overrides variable cluster_nodename_template
+      server_group_id: String ID of server group to use for scheduler hint
+
+    Nodes are added to the following inventory groups:
+    - $group_name
+    - $cluster_name + '_' + $group_name - this is used for the stackhpc.openhpc role
+    - 'compute'
+  EOF
+
+  type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings
 }
 
+# tflint-ignore: terraform_typed_variables
 variable "additional_nodegroups" {
-    default = {}
-    description = <<-EOF
-        Mapping defining homogenous groups of nodes for arbitrary purposes.
-        These nodes are not in the compute or login inventory groups so they
-        will not run slurmd.
-
-        Keys are names of groups.
-        Values are a mapping as for the "login" variable, with the addition of
-        the optional entry:
+  default     = {}
+  description = <<-EOF
+    Mapping defining homogenous groups of nodes for arbitrary purposes.
+    These nodes are not in the compute or login inventory groups so they
+    will not run slurmd.
+
+    Keys are names of groups.
+    Values are a mapping as for the "login" variable, with the addition of
+    the optional entry:
         
-            security_group_ids: List of strings giving IDs of security groups
-                                to apply. If not specified the groups from the
-                                variable nonlogin_security_groups are applied.
-
-        Nodes are added to the following inventory groups:
-        - $group_name
-        - $cluster_name + '_' + $group_name
-        - 'additional'
-    EOF
+      security_group_ids: List of strings giving IDs of security groups
+                          to apply. If not specified the groups from the
+                          variable nonlogin_security_groups are applied.
+
+    Nodes are added to the following inventory groups:
+    - $group_name
+    - $cluster_name + '_' + $group_name
+    - 'additional'
+  EOF
 }
 
 variable "environment_root" {
-    type = string
-    description = "Path to environment root, automatically set by activate script"
+  type        = string
+  description = "Path to environment root, automatically set by activate script"
 }
 
 variable "state_dir" {
-    type = string
-    description = "Path to state directory on control node"
-    default = "/var/lib/state"
+  type        = string
+  description = "Path to state directory on control node"
+  default     = "/var/lib/state"
 }
 
 variable "state_volume_size" {
-    type = number
-    description = "Size of state volume on control node, in GB"
-    default = 150 # GB
+  type        = number
+  description = "Size of state volume on control node, in GB"
+  default     = 150 # GB
 }
 
 variable "state_volume_type" {
-    type = string
-    description = "Type of state volume, if not default type"
-    default = null
+  type        = string
+  description = "Type of state volume, if not default type"
+  default     = null
 }
 
 variable "state_volume_provisioning" {
-    type = string
-    default = "manage"
-    description = <<-EOT
-        How to manage the state volume. Valid values are:
-            "manage": (Default) OpenTofu will create a volume "$cluster_name-state"
-                      and delete it when the cluster is destroyed. A volume
-                      with this name must not already exist. Use for demo and
-                      dev environments.
-            "attach": A single volume named "$cluster_name-state" must already
-                      exist. It is not managed by OpenTofu so e.g. is left
-                      intact if the cluster is destroyed. Use for production
-                      environments.
-        EOT
-    validation {
-      condition = contains(["manage", "attach"], var.state_volume_provisioning)
-      error_message = <<-EOT
-        state_volume_provisioning must be "manage" or "attach"
+  type        = string
+  default     = "manage"
+  description = <<-EOT
+    How to manage the state volume. Valid values are:
+      "manage": (Default) OpenTofu will create a volume "$cluster_name-state"
+                and delete it when the cluster is destroyed. A volume
+                with this name must not already exist. Use for demo and
+                dev environments.
+      "attach": A single volume named "$cluster_name-state" must already
+                exist. It is not managed by OpenTofu so e.g. is left
+                intact if the cluster is destroyed. Use for production
+                environments.
+  EOT
+  validation {
+    condition     = contains(["manage", "attach"], var.state_volume_provisioning)
+    error_message = <<-EOT
+      state_volume_provisioning must be "manage" or "attach"
     EOT
-    }
+  }
 }
 
 variable "home_volume_size" {
-    type = number
-    description = "Size of state volume on control node, in GB."
-    default = 100
-    validation {
-        condition = var.home_volume_provisioning == "manage" ? var.home_volume_size > 0 : true
-        error_message = <<-EOT
-            home_volume_size must be > 0 when var.home_volume_provisioning == "manage"
-        EOT
-    }
+  type        = number
+  description = "Size of state volume on control node, in GB."
+  default     = 100
+  validation {
+    condition     = var.home_volume_provisioning == "manage" ? var.home_volume_size > 0 : true
+    error_message = <<-EOT
+      home_volume_size must be > 0 when var.home_volume_provisioning == "manage"
+    EOT
+  }
 }
 
 variable "home_volume_type" {
-    type = string
-    default = null
-    description = "Type of home volume, if not default type"
+  type        = string
+  default     = null
+  description = "Type of home volume, if not default type"
 }
 
 variable "home_volume_provisioning" {
-    type = string
-    default = "manage"
-    description = <<-EOT
-        How to manage the home volume. Valid values are:
-            "manage": (Default) OpenTofu will create a volume "$cluster_name-home"
-                      and delete it when the cluster is destroyed. A volume
-                      with this name must not already exist. Use for demo and
-                      dev environments.
-            "attach": A single volume named "$cluster_name-home" must already
-                      exist. It is not managed by OpenTofu so e.g. is left
-                      intact if the cluster is destroyed. Use for production
-                      environments.
-            "none":   No home volume is used. Use if /home is provided by
-                      a parallel filesystem, e.g. manila.
-        EOT
-    validation {
-      condition = contains(["manage", "attach", "none"], var.home_volume_provisioning)
-      error_message = <<-EOT
-        home_volume_provisioning must be one of "manage", "attach" or "none"
+  type        = string
+  default     = "manage"
+  description = <<-EOT
+    How to manage the home volume. Valid values are:
+      "manage": (Default) OpenTofu will create a volume "$cluster_name-home"
+                and delete it when the cluster is destroyed. A volume
+                with this name must not already exist. Use for demo and
+                dev environments.
+      "attach": A single volume named "$cluster_name-home" must already
+                exist. It is not managed by OpenTofu so e.g. is left
+                intact if the cluster is destroyed. Use for production
+                environments.
+      "none":   No home volume is used. Use if /home is provided by
+                a parallel filesystem, e.g. manila.
+  EOT
+  validation {
+    condition     = contains(["manage", "attach", "none"], var.home_volume_provisioning)
+    error_message = <<-EOT
+      home_volume_provisioning must be one of "manage", "attach" or "none"
     EOT
-    }
+  }
 }
 
 variable "vnic_types" {
-    type = map(string)
-    description = <<-EOT
-        Default VNIC types, keyed by network name. See https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type
-        If not given this defaults to the "normal" type.
-    EOT
-    default = {}
+  type        = map(string)
+  description = <<-EOT
+    Default VNIC types, keyed by network name. See https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type
+    If not given this defaults to the "normal" type.
+  EOT
+  default     = {}
 }
 
 variable "login_security_groups" {
-    type = list(string)
-    description = "Name of preexisting security groups to apply to login nodes"
-    default = [
-        "default",  # allow all in-cluster services
-        "SSH",      # access via ssh
-        "HTTPS",    # access OpenOndemand
-    ]
+  type        = list(string)
+  description = "Name of preexisting security groups to apply to login nodes"
+  default = [
+    "default", # allow all in-cluster services
+    "SSH",     # access via ssh
+    "HTTPS",   # access OpenOndemand
+  ]
 }
 
 variable "nonlogin_security_groups" {
-    type = list(string)
-    description = "Name of preexisting security groups to apply to non-login nodes"
-    default = [
-        "default",  # allow all in-cluster services
-    ]
+  type        = list(string)
+  description = "Name of preexisting security groups to apply to non-login nodes"
+  default = [
+    "default", # allow all in-cluster services
+  ]
 }
 
 variable "volume_backed_instances" {
-    description = "Whether to use volumes for root disks"
-    type = bool
-    default = false
+  description = "Whether to use volumes for root disks"
+  type        = bool
+  default     = false
 }
 
 variable "root_volume_size" {
-    description = "Size of volume for root volumes if using volume backed instances, in Gb"
-    type = number
-    default = 40
+  description = "Size of volume for root volumes if using volume backed instances, in Gb"
+  type        = number
+  default     = 40
 }
 
 variable "root_volume_type" {
-    description = "Type of root volume, if using volume backed instances. If unset, the target cloud default volume type is used."
-    type = string
-    default = null
+  description = "Type of root volume, if using volume backed instances. If unset, the target cloud default volume type is used."
+  type        = string
+  default     = null
 }
 
 variable "gateway_ip" {
-    description = "Address to add default route via"
-    type = string
-    default = ""
+  description = "Address to add default route via"
+  type        = string
+  default     = ""
 }
 
 variable "cluster_nodename_template" {
-    description = <<-EOT
-        Template for node fully-qualified names. The following interpolations
-        can be used:
-            $${cluster_name}: From var.cluster_name
-            $${cluster_domain_suffix}: From var.cluster_domain_suffix
-            $${node}: The current entry in the "nodes" parameter for nodes
-            defined by var.compute and var.login, or "control" for the control
-            node
-            $${environment_name}: The last element of the current environment's path
-    EOT
-    type = string
-    default = "$${cluster_name}-$${node}.$${cluster_name}.$${cluster_domain_suffix}"
+  description = <<-EOT
+    Template for node fully-qualified names. The following interpolations
+    can be used:
+      $${cluster_name}: From var.cluster_name
+      $${cluster_domain_suffix}: From var.cluster_domain_suffix
+      $${node}: The current entry in the "nodes" parameter for nodes
+      defined by var.compute and var.login, or "control" for the control
+      node
+      $${environment_name}: The last element of the current environment's path
+  EOT
+  type        = string
+  default     = "$${cluster_name}-$${node}.$${cluster_name}.$${cluster_domain_suffix}"
 }
 
 variable "config_drive" {
-    description = <<-EOT
-        Whether to enable Nova config drives on all nodes, which will attach a drive containing
-        information usually provided through the metadata service.
-    EOT
-    type = bool
-    default = null
+  description = <<-EOT
+    Whether to enable Nova config drives on all nodes, which will attach a drive containing
+    information usually provided through the metadata service.
+  EOT
+  type        = bool
+  default     = null
 }
 
 variable "additional_cloud_config" {
-    description = <<-EOT
-        Multiline string to be appended to the node's cloud-init cloud-config user-data.
-        Must be in yaml format and not include the #cloud-config or any other user-data headers.
-        See https://cloudinit.readthedocs.io/en/latest/explanation/format.html#cloud-config-data.
-        Can be a templatestring parameterised by `additional_cloud_config_vars`.
-        The `boot-cmd`, `fqdn` and `mounts` modules must not be specified.
-    EOT
-    type = string
-    default = ""
+  description = <<-EOT
+    Multiline string to be appended to the node's cloud-init cloud-config user-data.
+    Must be in yaml format and not include the #cloud-config or any other user-data headers.
+    See https://cloudinit.readthedocs.io/en/latest/explanation/format.html#cloud-config-data.
+    Can be a templatestring parameterised by `additional_cloud_config_vars`.
+    The `boot-cmd`, `fqdn` and `mounts` modules must not be specified.
+  EOT
+  type        = string
+  default     = ""
 }
 
 variable "additional_cloud_config_vars" {
-    description = "Map of values passed to the `additional_cloud_config` templatestring"
-    type = map(any)
-    default = {}
+  description = "Map of values passed to the `additional_cloud_config` templatestring"
+  type        = map(any)
+  default     = {}
 }
 
 variable "control_server_group_id" {
-    description = "ID of server group to use for control node scheduler hint"
-    type = string
-    default = null
+  description = "ID of server group to use for control node scheduler hint"
+  type        = string
+  default     = null
 }
diff --git a/environments/site/tofu/volumes.tf b/environments/site/tofu/volumes.tf
index 18a6a09..46b63eb 100644
--- a/environments/site/tofu/volumes.tf
+++ b/environments/site/tofu/volumes.tf
@@ -1,59 +1,59 @@
 resource "openstack_blockstorage_volume_v3" "state" {
 
-    # NB: Changes to this resource's "address" i.e. (label or for_each key)
-    # may lose state data for existing clusters using this volume
+  # NB: Changes to this resource's "address" i.e. (label or for_each key)
+  # may lose state data for existing clusters using this volume
 
-    count = var.state_volume_provisioning == "manage" ? 1 : 0
+  count = var.state_volume_provisioning == "manage" ? 1 : 0
 
-    name = "${var.cluster_name}-state" # last word used to label filesystem
-    description = "State for control node"
-    size = var.state_volume_size
-    volume_type = var.state_volume_type
+  name        = "${var.cluster_name}-state" # last word used to label filesystem
+  description = "State for control node"
+  size        = var.state_volume_size
+  volume_type = var.state_volume_type
 }
 
 data "openstack_blockstorage_volume_v3" "state" {
 
-/*  We use a data resource whether or not TF is managing the volume, so the
+  /*  We use a data resource whether or not TF is managing the volume, so the
     logic is all in one place. But that means this needs a dependency on the
     actual resource to avoid a race.
 
     Because there may be no volume, this has to use for_each.
 */
 
-    for_each = toset(
-        (var.state_volume_provisioning == "manage") ?
-            [for v in openstack_blockstorage_volume_v3.state: v.name] :
-                ["${var.cluster_name}-state"]
-    )
+  for_each = toset(
+    (var.state_volume_provisioning == "manage") ?
+    [for v in openstack_blockstorage_volume_v3.state : v.name] :
+    ["${var.cluster_name}-state"]
+  )
 
-    name = each.key
+  name = each.key
 
 }
 
 resource "openstack_blockstorage_volume_v3" "home" {
 
-    # NB: Changes to this resource's "address" i.e. (label or for_each key)
-    # may lose user data for existing clusters using this volume
+  # NB: Changes to this resource's "address" i.e. (label or for_each key)
+  # may lose user data for existing clusters using this volume
 
-    count = var.home_volume_provisioning == "manage" ? 1 : 0
+  count = var.home_volume_provisioning == "manage" ? 1 : 0
 
-    name = "${var.cluster_name}-home"  # last word used to label filesystem
-    description = "Home for control node"
-    size = var.home_volume_size
-    volume_type = var.home_volume_type
+  name        = "${var.cluster_name}-home" # last word used to label filesystem
+  description = "Home for control node"
+  size        = var.home_volume_size
+  volume_type = var.home_volume_type
 }
 
 data "openstack_blockstorage_volume_v3" "home" {
 
-/*  Comments as for the state volume. */
+  /*  Comments as for the state volume. */
 
-    for_each = toset(
-        (var.home_volume_provisioning == "manage") ?
-            [for v in openstack_blockstorage_volume_v3.home: v.name] :
-                (var.home_volume_provisioning == "attach") ?
-                    ["${var.cluster_name}-home"] :
-                        []
-    )
+  for_each = toset(
+    (var.home_volume_provisioning == "manage") ?
+    [for v in openstack_blockstorage_volume_v3.home : v.name] :
+    (var.home_volume_provisioning == "attach") ?
+    ["${var.cluster_name}-home"] :
+    []
+  )
 
-    name = each.key
+  name = each.key
 }
diff --git a/packer/openhpc_extravars.yml b/packer/openhpc_extravars.yml
index 66f6686..8bfcd59 100644
--- a/packer/openhpc_extravars.yml
+++ b/packer/openhpc_extravars.yml
@@ -1 +1,2 @@
-workaround_ansible_issue_61497: yes # extravars files can't be empty
+---
+workaround_ansible_issue_61497: true # extravars files can't be empty
diff --git a/requirements.yml b/requirements.yml
index 8850c16..e577dd3 100644
--- a/requirements.yml
+++ b/requirements.yml
@@ -57,4 +57,3 @@ collections:
     version: 0.5.5
   - name: stackhpc.linux
     version: 1.5.0
-...
diff --git a/super-linter.env b/super-linter.env
new file mode 100644
index 0000000..5362c92
--- /dev/null
+++ b/super-linter.env
@@ -0,0 +1,27 @@
+# Detect that default branch is devel when running locally
+DEFAULT_BRANCH=main
+
+# Don't validate JSCPD
+VALIDATE_JSCPD=false
+
+# Don't validate JS standard because it conflicts with JS prettier
+VALIDATE_JAVASCRIPT_STANDARD=false
+
+# Don't validate Ansible because ansible-lint is more flexible
+VALIDATE_ANSIBLE=false
+
+# Don't validate YAML prettier because yamllint is sufficient
+VALIDATE_YAML_PRETTIER=false
+
+# Getting false positives with terrascan that seemingly can't be masked
+VALIDATE_TERRAFORM_TERRASCAN=false
+
+# Doesn't seem possible to exclude files with terragrunt
+VALIDATE_TERRAGRUNT=false
+
+# TODO: address the following.
+# Temporarily disable these linters,
+# there are select issues remaining with each that can be addressed individually
+VALIDATE_GITHUB_ACTIONS=false
+VALIDATE_SHELL_SHFMT=false
+VALIDATE_YAML=false

From fad0ff4a3d2f76eb4ae7bc010872ff9e9cb0636c Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Thu, 18 Sep 2025 15:57:40 +0200
Subject: [PATCH 18/50] Define login subgroups in Ansible inventory (#727)

It resolves some limitations with login subgroups, such as difficulty to
bind the Open OnDemand service to a specific node when naming of the
nodes is not predictable.

This replicates what is already done for compute subgroups.
---
 environments/site/tofu/inventory.tpl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/environments/site/tofu/inventory.tpl b/environments/site/tofu/inventory.tpl
index 6c11b32..9920f9e 100644
--- a/environments/site/tofu/inventory.tpl
+++ b/environments/site/tofu/inventory.tpl
@@ -27,6 +27,11 @@ ${cluster_name}_${group_name}:
             networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})}
             node_fqdn: ${login_groups[group_name]["fqdns"][nodename]}
 %{ endfor ~}
+
+${group_name}:
+    children:
+        ${cluster_name}_${group_name}:
+
 %{ endfor ~}
 
 login:

From eb1fb2dbcc51fc9df208d7401a1956715f507301 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Thu, 18 Sep 2025 18:17:48 +0200
Subject: [PATCH 19/50] Fix label in Jupyter Notebook form (#787)

---
 environments/common/inventory/group_vars/all/openondemand.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml
index af7554a..7727052 100644
--- a/environments/common/inventory/group_vars/all/openondemand.yml
+++ b/environments/common/inventory/group_vars/all/openondemand.yml
@@ -161,7 +161,7 @@ openondemand_apps_jupyter_default:
     - node
   attributes: # TODO
     num_cores:
-      label: Number of cores FOO
+      label: Number of cores
       value: 1
     modules: ""
     extra_jupyter_args: ""

From 0da4041e148c5a043885a0cbaee8a23f99daea9e Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Thu, 18 Sep 2025 17:19:46 +0100
Subject: [PATCH 20/50] Ignore changes to port binding and dhcp options (#778)

* ignore port binding info; fixes tf when admin

* ignore port dhcp changes to fix networking-mlxn

* ignore port binding/dhcp options for caas

* fix TF linter errors
---
 .../cluster_infra/templates/resources.tf.j2   | 47 +++++++++++++++++++
 environments/site/tofu/control.tf             |  7 +++
 environments/site/tofu/node_group/nodes.tf    |  7 +++
 3 files changed, 61 insertions(+)

diff --git a/ansible/roles/cluster_infra/templates/resources.tf.j2 b/ansible/roles/cluster_infra/templates/resources.tf.j2
index f342371..f46192c 100644
--- a/ansible/roles/cluster_infra/templates/resources.tf.j2
+++ b/ansible/roles/cluster_infra/templates/resources.tf.j2
@@ -219,6 +219,14 @@ resource "openstack_networking_port_v2" "login" {
   binding {
     vnic_type = "{{ cluster_vnic_type | default('normal') }}"
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding, # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
+
 }
 
 # Storage network
@@ -235,6 +243,14 @@ resource "openstack_networking_port_v2" "login_storage" {
   binding {
     vnic_type = "{{ cluster_storage_vnic_type | default('normal') }}"
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding, # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
+
 }
 {% endif %}
 
@@ -258,8 +274,15 @@ resource "openstack_networking_port_v2" "control" {
 
   binding {
     vnic_type = "{{ cluster_vnic_type | default('normal') }}"
+  }
 
+  lifecycle {
+    ignore_changes = [
+      binding, # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
   }
+
 }
 
 # Storage network
@@ -276,6 +299,14 @@ resource "openstack_networking_port_v2" "control_storage" {
   binding {
     vnic_type = "{{ cluster_storage_vnic_type | default('normal') }}"
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding, # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
+
 }
 {% endif %}
 
@@ -301,6 +332,14 @@ resource "openstack_networking_port_v2" "{{ nodegroup.name }}" {
   binding {
     vnic_type = "{{ cluster_vnic_type | default('normal') }}"
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding, # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
+
 }
 
 # Storage network
@@ -318,6 +357,14 @@ resource "openstack_networking_port_v2" "{{ nodegroup.name }}_storage" {
   binding {
     vnic_type = "{{ cluster_storage_vnic_type | default('normal') }}"
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding, # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
+
 }
 {% endif %}
 
diff --git a/environments/site/tofu/control.tf b/environments/site/tofu/control.tf
index 7bfa13f..87da2f7 100644
--- a/environments/site/tofu/control.tf
+++ b/environments/site/tofu/control.tf
@@ -34,6 +34,13 @@ resource "openstack_networking_port_v2" "control" {
   binding {
     vnic_type = lookup(var.vnic_types, each.key, "normal")
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding,          # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
 }
 
 resource "openstack_compute_instance_v2" "control" {
diff --git a/environments/site/tofu/node_group/nodes.tf b/environments/site/tofu/node_group/nodes.tf
index d02028f..4d874d1 100644
--- a/environments/site/tofu/node_group/nodes.tf
+++ b/environments/site/tofu/node_group/nodes.tf
@@ -71,6 +71,13 @@ resource "openstack_networking_port_v2" "compute" {
   binding {
     vnic_type = lookup(var.vnic_types, each.value.net.network, "normal")
   }
+
+  lifecycle {
+    ignore_changes = [
+      binding,          # fixes running as admin
+      extra_dhcp_option # required for networking-mlnx neutron plugin
+    ]
+  }
 }
 
 resource "openstack_compute_instance_v2" "compute_fixed_image" {

From 06857df9ae7cc1ea8efe80decfb63aab7c272744 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Thu, 18 Sep 2025 19:47:46 +0200
Subject: [PATCH 21/50] Expose vgpu group in site inventory (#786)

* Fix various comments in Ansible group files

* Expose vgpu group in site inventory
---
 environments/common/inventory/groups | 13 ++++++-------
 environments/site/inventory/groups   | 15 +++++++++------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups
index 6926355..c02226b 100644
--- a/environments/common/inventory/groups
+++ b/environments/common/inventory/groups
@@ -91,22 +91,22 @@ fail2ban
 # Add `openhpc` group to add slurm users via creation of users on each node.
 
 [openondemand]
-# Host to run Open Ondemand server on - subset of login
+# Host to run Open OnDemand server on - subset of login
 
 [openondemand_desktop]
-# Subset of compute to run a interactive desktops on via Open Ondemand
+# Subset of compute to run a interactive desktops on via Open OnDemand
 
 [openondemand_jupyter]
-# Subset of compute to run a Jupyter Notebook servers on via Open Ondemand
+# Subset of compute to run a Jupyter Notebook servers on via Open OnDemand
 
 [openondemand_rstudio]
-# Subset of compute to run RStudio servers on via Open Ondemand
+# Subset of compute to run RStudio servers on via Open OnDemand
 
 [openondemand_matlab]
-# Subset of compute to run RStudio servers on via Open Ondemand
+# Subset of compute to run a MATLAB interactive desktop on via Open OnDemand
 
 [openondemand_codeserver]
-# Subset of compute to run a Codeserver VSCode instance on via Open Ondemand
+# Subset of compute to run a Codeserver VSCode instance on via Open OnDemand
 
 [etc_hosts]
 # Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md
@@ -217,4 +217,3 @@ extra_packages
 # separately from the appliance. e.g
 # pulp_host ansible_host=<VM-ip-address>
 # Note the host name can't conflict with group names i.e can't be called `pulp` or `pulp_server`
-
diff --git a/environments/site/inventory/groups b/environments/site/inventory/groups
index b78197d..930cf93 100644
--- a/environments/site/inventory/groups
+++ b/environments/site/inventory/groups
@@ -44,27 +44,27 @@ login
 openhpc
 
 [openondemand:children]
-# Host to run Open Ondemand server on - subset of login
+# Host to run Open OnDemand server on - subset of login
 login
 
 [openondemand_desktop:children]
-# Subset of compute to run a interactive desktops on via Open Ondemand
+# Subset of compute to run a interactive desktops on via Open OnDemand
 compute
 
 [openondemand_jupyter:children]
-# Subset of compute to run a Jupyter Notebook servers on via Open Ondemand
+# Subset of compute to run a Jupyter Notebook servers on via Open OnDemand
 compute
 
 [openondemand_rstudio:children]
-# Subset of compute to run RStudio servers on via Open Ondemand
+# Subset of compute to run RStudio servers on via Open OnDemand
 compute
 
 [openondemand_matlab:children]
-# Subset of compute to run a MATLAB interactive desktop on via Open Ondemand
+# Subset of compute to run a MATLAB interactive desktop on via Open OnDemand
 compute
 
 [openondemand_codeserver:children]
-# Subset of compute to run a Codeserver VSCode instance on via Open Ondemand
+# Subset of compute to run a Codeserver VSCode instance on via Open OnDemand
 compute
 
 [etc_hosts:children]
@@ -81,6 +81,9 @@ cluster
 # Hosts to recompile Slurm for - allows supporting Slurm autodetection method 'nvml'
 cuda
 
+[vgpu]
+# Hosts where vGPU/MIG should be configured - see docs/mig.md
+
 [eessi:children]
 # Hosts on which EESSI stack should be configured
 openhpc

From 535528fb0089743d67a6332f559d672f4494f850 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Fri, 19 Sep 2025 09:57:53 +0100
Subject: [PATCH 22/50] Add documentation for OpenTofu remote state (#784)

* wip: add TF remote state docs

* wip s3 remote state

* improve gitlab backend configuration

* automate s3 creds

* make s3 buckets clearer

* fix linting

* try to allow same headings at different levels in markdown

* fix tf lint errors

* fix prettier errors
---
 .markdownlint.json                            |   5 +
 docs/opentofu-remote-state.md                 | 184 ++++++++++++++++++
 docs/production.md                            |   3 +
 .../site/tofu/example-backends/gitlab.tf      |  42 ++++
 environments/site/tofu/example-backends/s3.tf |  25 +++
 5 files changed, 259 insertions(+)
 create mode 100644 .markdownlint.json
 create mode 100644 docs/opentofu-remote-state.md
 create mode 100644 environments/site/tofu/example-backends/gitlab.tf
 create mode 100644 environments/site/tofu/example-backends/s3.tf

diff --git a/.markdownlint.json b/.markdownlint.json
new file mode 100644
index 0000000..fba9b6e
--- /dev/null
+++ b/.markdownlint.json
@@ -0,0 +1,5 @@
+{
+  "no-duplicate-heading": {
+    "siblings_only": true
+  }
+}
diff --git a/docs/opentofu-remote-state.md b/docs/opentofu-remote-state.md
new file mode 100644
index 0000000..c70a53f
--- /dev/null
+++ b/docs/opentofu-remote-state.md
@@ -0,0 +1,184 @@
+# OpenTofu remote state
+
+OpenTofu supports a number of [remote state backends](https://opentofu.org/docs/language/state/remote/)
+which can be used to persist state independently of where a deployment is run.
+This allows deployments to be made from anywhere that can access the state
+without corrupting or conflicting with any existing resources from previous
+deployments.
+
+Using remote state is therefore strongly recommended for environments which
+should only be instantiated once, e.g. `production` and `staging`.
+
+This page provides guidance for configuring remote states using backends
+commonly available on OpenStack deployments.
+
+> [!IMPORTANT]
+> In the below replace `$ENV` with the relevant environment name.
+
+## GitLab
+
+GitLab can be used with the [http backend](https://opentofu.org/docs/language/settings/backends/http/)
+to store separate states for each environment within the GitLab project.
+Access is protected by GitLab access tokens, which in the approach below are
+persisted to local files. Therefore each repository checkout will need to
+authenticate separately, using either a separate token or a shared token from
+some external secret store.
+
+The below is based on the [official docs](https://docs.gitlab.com/user/infrastructure/iac/terraform_state/)
+but includes some missing details and is modified for common appliance workflows.
+
+### Initial setup
+
+1. Create the backend file:
+
+   ```shell
+   cp environments/site/tofu/example-backends/gitlab.tf environments/$ENV/tofu
+   ```
+
+2. Modify `environments/$ENV/tofu/gitlab.tf` to set the default for the
+   project ID. This can be found by clicking the 3-dot menu at the top right of
+   the GitLab project page.
+
+   ```terraform
+   # environments/$ENV/tofu/backend.tf:
+   terraform {
+       backend "http" {}
+   }
+   ```
+
+3. Commit it.
+
+4. Follow the per-checkout steps below.
+
+### Per-checkout configuration
+
+1. Create an access token in the GitLab UI, using either:
+
+   a. If project access tokens are available, create one via
+   Project > Settings > Access tokens.
+   The token must have `Maintainer` role and `api` scope.
+
+   b. Otherwise create a personal access token via
+   User profile > Preferences > Access tokens.
+   The token must have `api` scope.
+
+   Copy the generated secret and set an environment variable:
+
+   ```shell
+   export TF_VAR_gitlab_access_token=$secret
+   ```
+
+2. If using a personal access token, set the GitLab username as an environment variable:
+
+   ```shell
+   export TF_VAR_gitlab_username=$your_username
+   ```
+
+3. With the environment activated, initialise OpenTofu.
+
+   If no local state exists run:
+
+   ```shell
+   cd environments/$ENV/tofu/
+   tofu init
+   ```
+
+   otherwise append `-migrate-state` to the `init` command to attempt to copy
+   local state to the new backend.
+
+OpenTofu is now configured to use GitLab to store state for this environment.
+
+Repeat for each environment needing remote state.
+
+> [!CAUTION]
+> The GitLab credentials are [persisted](https://opentofu.org/docs/language/settings/backends/configuration/#credentials-and-sensitive-data)
+> into a file `environments/$ENV/tofu/.terraform/terraform.tfstate` and any
+> plan files. These should therefore not be committed.
+
+### Token expiry
+
+If the project token expires repeat the per-checkout configuration, but using
+`opentofu init -reconfigure` instead.
+
+## S3
+
+For clouds with S3-compatible object storage (e.g. Ceph with [radosgw](https://docs.ceph.com/en/latest/radosgw/))
+the S3 backend can be used. This approach uses a bucket per environment and
+derives credentials from OpenStack credentials, meaning no backend-specific
+per-checkout configuration is required.
+
+### Initial setup
+
+1. Create an S3 bucket with a name `${cluster_name}-${environment_name}-tfstate`
+   where:
+
+   - `CLUSTER_NAME` is defined in `environments/$ENV/tofu/main.tf`
+   - `$ENVIRONMENT_NAME` is the name of the environment directory
+
+   e.g.
+
+   ```shell
+   openstack container create research-staging-tfstate
+   ```
+
+2. Create `ec2` credentials:
+
+   ```shell
+   openstack ec2 credentials create
+   ```
+
+   and make a note of the `access` field returned.
+
+3. Create the backend file:
+
+   ```shell
+   cp environments/site/tofu/example-backends/s3.tf environments/$ENV/tofu
+   ```
+
+4. Modify `environments/$ENV/tofu/s3.tf` to set the default for `s3_backend_endpoint`.
+   This is the radosgw address. If not known it can be determined by creating a
+   public bucket, and then getting the URL using
+   Project > Containers > (your public bucket) > Link
+   which provides a URL of the form `https://$ENDPOINT/swift/...`.
+
+5. Add the following to `environments/$ENV/activate`:
+
+   ```bash
+   export AWS_ACCESS_KEY_ID=$EC2_CREDENTIALS_ACCESS
+   export AWS_SECRET_ACCESS_KEY=$(openstack ec2 credentials show $AWS_ACCESS_KEY_ID -f value -c secret)
+   ```
+
+   replacing `$EC2_CREDENTIALS_ACCESS` with the `access` field of the created
+   credentials.
+
+   This avoids these credentials being persisted in local files.
+
+6. Copy the lines above into your shell to set them for your current shell.
+
+7. With the environment activated, initialise OpenTofu.
+
+   If no local state exists run:
+
+   ```shell
+   cd environments/$ENV/tofu/
+   tofu init
+   ```
+
+   otherwise append `-migrate-state` to the `init` command to attempt to copy
+   local state to the new backend.
+
+8. If this fails, try setting `use_path_style = true` in `environments/$ENV/tofu/s3.tf`.
+
+9. Once it works, commit `environments/$ENV/tofu/s3.tf` and `environments/$ENV/activate`.
+
+OpenTofu is now configured to use the cloud's S3-compatible storage to store
+state for this environment.
+
+Repeat for each environment needing remote state.
+
+For more configuration options, see the OpenTofu [s3 backend docs](https://opentofu.org/docs/language/settings/backends/s3/).
+
+### Per-checkout configuration
+
+The ec2 credentials will automatically be loaded when activating the environment.
+For a new checkout simply initialise OpenTofu as normal as described in step 7 above.
diff --git a/docs/production.md b/docs/production.md
index 83587f9..abebf4f 100644
--- a/docs/production.md
+++ b/docs/production.md
@@ -316,6 +316,9 @@ The value chosen should be the highest value demonstrated during testing. Note
 that any time spent blocked due to this parallelism limit does not count
 against the (un-overridable) internal OpenTofu timeout of 30 minutes
 
+Consider configuring [OpenTofu remote state](./opentofu-remote-state.md) for any
+environments which should be unique, e.g. production and staging.
+
 ## Configure appliance
 
 ### Production configuration to consider
diff --git a/environments/site/tofu/example-backends/gitlab.tf b/environments/site/tofu/example-backends/gitlab.tf
new file mode 100644
index 0000000..722744a
--- /dev/null
+++ b/environments/site/tofu/example-backends/gitlab.tf
@@ -0,0 +1,42 @@
+variable "gitlab_username" {
+  type        = string
+  description = <<-EOF
+        Username of actual GitLab user, for personal access token only.
+        Default uses bot account name, for project access token.
+    EOF
+  default     = null
+}
+
+variable "gitlab_access_token" {
+  type        = string
+  description = <<-EOF
+        GitLab Project or Personal access token.
+        Must have Maintainer role (for Project token) and API scope
+    EOF
+}
+
+variable "gitlab_project_id" {
+  type        = string
+  description = "GitLab project ID - click 3-dot menu at the top right of project page"
+  #default = # add here
+}
+
+locals {
+  gitlab_username      = coalesce(var.gitlab_username, "project_${var.gitlab_project_id}_bot")
+  gitlab_state_name    = basename(var.environment_root)
+  gitlab_state_address = "https://gitlab.com/api/v4/projects/${var.gitlab_project_id}/terraform/state/${local.gitlab_state_name}"
+}
+
+# tflint-ignore: terraform_required_version
+terraform {
+  backend "http" {
+    address        = local.gitlab_state_address
+    lock_address   = "${local.gitlab_state_address}/lock"
+    unlock_address = "${local.gitlab_state_address}/lock"
+    username       = local.gitlab_username
+    password       = var.gitlab_access_token
+    lock_method    = "POST"
+    unlock_method  = "DELETE"
+    retry_wait_min = 5
+  }
+}
diff --git a/environments/site/tofu/example-backends/s3.tf b/environments/site/tofu/example-backends/s3.tf
new file mode 100644
index 0000000..d471135
--- /dev/null
+++ b/environments/site/tofu/example-backends/s3.tf
@@ -0,0 +1,25 @@
+variable "s3_backend_endpoint" {
+  type        = string
+  description = "radosgw address without protocol or path e.g. leafcloud.store"
+  #default = # add here
+}
+
+# tflint-ignore: terraform_required_version
+terraform {
+  backend "s3" {
+    endpoint = var.s3_backend_endpoint
+    bucket   = "${var.cluster_name}-${basename(var.environment_root)}-tfstate"
+    key      = "environment.tfstate"
+
+    # Reginon is required but not used in radosgw:
+    region                 = "dummy"
+    skip_region_validation = true
+
+    # Normally STS is not configured in radosgw:
+    skip_credentials_validation = true
+
+    # Enable path-style S3 URLs (https://<HOST>/<BUCKET> instead of https://<BUCKET>.<HOST>)
+    # may or may not be required depending on radosgw configuration
+    use_path_style = true
+  }
+}

From 5bedf73c63c3de474190863809fc50e35d1a4988 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Fri, 19 Sep 2025 10:45:46 +0100
Subject: [PATCH 23/50] Remove unused cloudalchemy alertmanager role (is
 in-repo role instead) (#781)

---
 requirements.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/requirements.yml b/requirements.yml
index e577dd3..98785d7 100644
--- a/requirements.yml
+++ b/requirements.yml
@@ -12,8 +12,6 @@ roles:
   - src: https://github.com/cloudalchemy/ansible-prometheus.git
     version: 4d2c8d742de39e50387e0aa6d5510b21c7451343 # need fix in preceeding commit for rocky
     name: cloudalchemy.prometheus
-  - src: cloudalchemy.alertmanager
-    version: 0.19.1
   - src: https://github.com/stackhpc/ansible-grafana.git
     name: cloudalchemy.grafana
     version: stackhpc-0.19.0 # fix grafana install

From 3b4be853a17156d9ce2802dda5e8bed0ef6dcb44 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Tue, 23 Sep 2025 11:23:36 +0200
Subject: [PATCH 24/50] Fix various typos

---
 ansible/roles/firewalld/handlers/main.yml | 2 +-
 ansible/roles/firewalld/tasks/runtime.yml | 6 +++---
 ansible/roles/nhc/README.md               | 2 +-
 docs/sequence.md                          | 2 +-
 environments/common/inventory/groups      | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/ansible/roles/firewalld/handlers/main.yml b/ansible/roles/firewalld/handlers/main.yml
index 0e8c3df..c498f70 100644
--- a/ansible/roles/firewalld/handlers/main.yml
+++ b/ansible/roles/firewalld/handlers/main.yml
@@ -1,5 +1,5 @@
 ---
-- name: Restart filewalld
+- name: Restart firewalld
   ansible.builtin.service:
     name: firewalld
     state: restarted
diff --git a/ansible/roles/firewalld/tasks/runtime.yml b/ansible/roles/firewalld/tasks/runtime.yml
index 03a5356..4c3b8ec 100644
--- a/ansible/roles/firewalld/tasks/runtime.yml
+++ b/ansible/roles/firewalld/tasks/runtime.yml
@@ -1,11 +1,11 @@
 ---
-- name: Apply filewalld configs # noqa: args[module]
+- name: Apply firewalld configs # noqa: args[module]
   ansible.posix.firewalld: "{{ item }}"
-  notify: Restart filewalld
+  notify: Restart firewalld
   loop: "{{ firewalld_configs }}"
 
 - ansible.builtin.meta: flush_handlers
-- name: Ensure filewalld state
+- name: Ensure firewalld state
   ansible.builtin.systemd:
     name: firewalld
     state: "{{ firewalld_state }}"
diff --git a/ansible/roles/nhc/README.md b/ansible/roles/nhc/README.md
index a826932..689f054 100644
--- a/ansible/roles/nhc/README.md
+++ b/ansible/roles/nhc/README.md
@@ -21,7 +21,7 @@ To enable node health checks, ensure the `nhc` group contains the `compute` grou
 compute
 ```
 
-When the `anisble/site.yml` playbook is run this will automatically:
+When the `ansible/site.yml` playbook is run this will automatically:
 
 1. Add NHC-related configuration to the `slurm.conf` Slurm configuration file.
    The default configuration is defined in `openhpc_config_nhc`
diff --git a/docs/sequence.md b/docs/sequence.md
index 8149290..6f3b779 100644
--- a/docs/sequence.md
+++ b/docs/sequence.md
@@ -8,7 +8,7 @@ This sequence applies to both:
   control,login,compute inventory groups to install all packages, e.g. StackHPC
   CI builds
 - "extra" builds, starting from StackHPC images and using selected inventory
-  groups to add specfic features for a site-specific image.
+  groups to add specific features for a site-specific image.
 
 Note that a generic Pulp server is shown in the below diagram. This may be
 StackHPC's Ark server or a local Pulp mirroring Ark. It is assumed a local Pulp
diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups
index c02226b..ef24952 100644
--- a/environments/common/inventory/groups
+++ b/environments/common/inventory/groups
@@ -84,7 +84,7 @@ cluster
 # https://www.fail2ban.org/wiki/index.php/Main_Page
 
 [firewalld:children]
-# Hosts to install firewalld on - see ansible/roles/filewalld
+# Hosts to install firewalld on - see ansible/roles/firewalld
 fail2ban
 
 [basic_users]

From 67b2658d75b389e8273fd426984e7e38aaf541b8 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Wed, 24 Sep 2025 11:01:22 +0100
Subject: [PATCH 25/50] Update dnf repo snapshots (+ source repos, removes RL8
 Lustre build CI) (#792)

* update dnf_repos_timestamps.yml

* bump Ark timestamps

* update again

* make it possible NOT to clean up packer builds

* fixup source repo path typo

* add missing RL8 PowerTools source repo

* correct RL8 source repo files

* update timestamps

* bump CI image

* disable Lustre for RL8 extrabuild tests due to kernel mismatch

---------

Co-authored-by: bertiethorpe <bertie443@gmail.com>
---
 .github/workflows/extra.yml                   |  2 +-
 .github/workflows/fatimage.yml                |  3 +-
 .../tofu/cluster_image.auto.tfvars.json       |  4 +-
 .../group_vars/all/dnf_repo_timestamps.yml    | 66 ++++++++++++++++---
 4 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/extra.yml b/.github/workflows/extra.yml
index 1941064..b8531c1 100644
--- a/.github/workflows/extra.yml
+++ b/.github/workflows/extra.yml
@@ -25,7 +25,7 @@ jobs:
         build:
           - image_name: openhpc-extra-RL8
             source_image_name_key: RL8 # key into environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
-            inventory_groups: doca,cuda,lustre
+            inventory_groups: doca,cuda # lustre disabled due to https://github.com/stackhpc/ansible-slurm-appliance/pull/759 
             volume_size: 35 # needed for cuda
           - image_name: openhpc-extra-RL9
             source_image_name_key: RL9
diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml
index 407bd44..d9884ca 100644
--- a/.github/workflows/fatimage.yml
+++ b/.github/workflows/fatimage.yml
@@ -54,6 +54,7 @@ jobs:
       - name: Record settings for CI cloud
         run: |
           echo CI_CLOUD: ${{ env.CI_CLOUD }}
+          echo cleanup_on_failure: ${{ github.event.inputs.cleanup_on_failure  }}
 
       - name: Setup ssh
         run: |
@@ -91,7 +92,7 @@ jobs:
           packer init .
 
           PACKER_LOG=1 packer build \
-          -on-error=${{ github.event.inputs.cleanup_on_failure && 'cleanup' || 'abort' }} \
+          -on-error=${{ github.event.inputs.cleanup_on_failure == 'true' && 'cleanup' || 'abort' }} \
           -var-file="$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl" \
           -var "source_image_name=${{ matrix.build.source_image_name }}" \
           -var "image_name=${{ matrix.build.image_name }}" \
diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index 2000b5e..b0d1022 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
   "cluster_image": {
-    "RL8": "openhpc-RL8-250918-0840-930223fb",
-    "RL9": "openhpc-RL9-250918-0840-930223fb"
+    "RL8": "openhpc-RL8-250923-1321-5fcc36b0",
+    "RL9": "openhpc-RL9-250923-1321-5fcc36b0"
   }
 }
diff --git a/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml b/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
index c80a85a..ef2e2d4 100644
--- a/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
+++ b/environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml
@@ -29,7 +29,7 @@ dnf_repos_default:
   appstream:
     '8.10':
       pulp_path: rocky/8.10/AppStream/x86_64/os
-      pulp_timestamp: 20250614T013846
+      pulp_timestamp: 20250923T022841
       repo_file: Rocky-AppStream
     '9.4':
       pulp_path: rocky/9.4/AppStream/x86_64/os
@@ -41,12 +41,21 @@ dnf_repos_default:
       repo_file: rocky
     '9.6':
       pulp_path: rocky/9.6/AppStream/x86_64/os
-      pulp_timestamp: 20250902T060015
+      pulp_timestamp: 20250923T031638
+      repo_file: rocky
+  appstream-source:
+    '8.10':
+      pulp_path: rocky/8.10/AppStream/source/os
+      pulp_timestamp: 20250923T024945
+      repo_file: Rocky-Sources
+    '9.6':
+      pulp_path: rocky/9.6/AppStream/source/os
+      pulp_timestamp: 20250923T043546
       repo_file: rocky
   baseos:
     '8.10':
       pulp_path: rocky/8.10/BaseOS/x86_64/os
-      pulp_timestamp: 20250614T013846
+      pulp_timestamp: 20250918T034501
       repo_file: Rocky-BaseOS
     '9.4':
       pulp_path: rocky/9.4/BaseOS/x86_64/os
@@ -58,12 +67,21 @@ dnf_repos_default:
       repo_file: rocky
     '9.6':
       pulp_path: rocky/9.6/BaseOS/x86_64/os
-      pulp_timestamp: 20250902T094855
+      pulp_timestamp: 20250923T045903
+      repo_file: rocky
+  baseos-source:
+    '8.10':
+      pulp_path: rocky/8.10/BaseOS/source/os
+      pulp_timestamp: 20250918T040529
+      repo_file: Rocky-Sources
+    '9.6':
+      pulp_path: rocky/9.6/BaseOS/source/os
+      pulp_timestamp: 20250923T043546
       repo_file: rocky
   crb:
     '8.10':
       pulp_path: rocky/8.10/PowerTools/x86_64/os
-      pulp_timestamp: 20250614T013846
+      pulp_timestamp: 20250918T034501
       repo_file: Rocky-PowerTools
       repo_name: powertools
     '9.4':
@@ -76,16 +94,35 @@ dnf_repos_default:
       repo_file: rocky
     '9.6':
       pulp_path: rocky/9.6/CRB/x86_64/os
-      pulp_timestamp: 20250902T060015
+      pulp_timestamp: 20250923T031638
+      repo_file: rocky
+  crb-source:
+    '8.10':
+      pulp_path: rocky/8.10/PowerTools/source/tree
+      pulp_timestamp: 20250923T125600
+      repo_file: Rocky-Sources
+      repo_name: powertools-source
+    '9.6':
+      pulp_path: rocky/9.6/CRB/source/os
+      pulp_timestamp: 20250923T043546
       repo_file: rocky
   epel:
     '8':
       pulp_path: epel/8/Everything/x86_64
-      pulp_timestamp: 20250615T234151
+      pulp_timestamp: 20250923T001717
       repo_file: epel
     '9':
       pulp_path: epel/9/Everything/x86_64
-      pulp_timestamp: 20250908T001730
+      pulp_timestamp: 20250923T001717
+      repo_file: epel
+  epel-source:
+    '8':
+      pulp_path: epel/8/Everything/source
+      pulp_timestamp: 20250923T001717
+      repo_file: epel
+    '9':
+      pulp_path: epel/9/Everything/source
+      pulp_timestamp: 20250923T001717
       repo_file: epel
   extras:
     '8.10':
@@ -104,13 +141,22 @@ dnf_repos_default:
       pulp_path: rocky/9.6/extras/x86_64/os
       pulp_timestamp: 20250726T040613
       repo_file: rocky-extras
+  extras-source:
+    '8.10':
+      pulp_path: rocky/8.10/extras/source/os
+      pulp_timestamp: 20250828T161842
+      repo_file: Rocky-Sources
+    '9.6':
+      pulp_path: rocky/9.6/extras/source/os
+      pulp_timestamp: 20250828T161842
+      repo_file: rocky-extras
   grafana:
     '8':
       pulp_path: grafana/oss/rpm
-      pulp_timestamp: 20250730T011314
+      pulp_timestamp: 20250917T024714
       repo_file: grafana
       timestamp: 20250615T005738
     '9':
       pulp_path: grafana/oss/rpm
-      pulp_timestamp: 20250906T025340
+      pulp_timestamp: 20250917T024714
       repo_file: grafana

From dbf142245aaeefc8a7b07754cf380396bf612368 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Wed, 24 Sep 2025 15:06:43 +0100
Subject: [PATCH 26/50] Validate nodegroup names (#793)

* validate nodename groups

* add validation for nodegroup name clashes

* add validation for nodegroup name clashes

* fix linter whinges

* extend validation to cover additional_nodegroups

* fix TF linting

* fixup logic

* fix logic

* fix linter
---
 environments/.stackhpc/tofu/ARCUS.tfvars      |  6 +--
 .../.stackhpc/tofu/LEAFCLOUD-dev.tfvars       | 14 +++----
 environments/.stackhpc/tofu/LEAFCLOUD.tfvars  | 14 +++----
 environments/.stackhpc/tofu/SMS.tfvars        | 10 ++---
 environments/.stackhpc/tofu/main.tf           |  2 +-
 environments/site/tofu/variables.tf           | 41 +++++++++++++++++--
 6 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/environments/.stackhpc/tofu/ARCUS.tfvars b/environments/.stackhpc/tofu/ARCUS.tfvars
index 6aec599..40daa51 100644
--- a/environments/.stackhpc/tofu/ARCUS.tfvars
+++ b/environments/.stackhpc/tofu/ARCUS.tfvars
@@ -1,4 +1,4 @@
-cluster_net = "portal-internal"
-cluster_subnet = "portal-internal"
+cluster_net         = "portal-internal"
+cluster_subnet      = "portal-internal"
 control_node_flavor = "vm.ska.cpu.general.eighth"
-other_node_flavor = "vm.ska.cpu.general.small"
+other_node_flavor   = "vm.ska.cpu.general.small"
diff --git a/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars b/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars
index 82e336d..b45a961 100644
--- a/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars
+++ b/environments/.stackhpc/tofu/LEAFCLOUD-dev.tfvars
@@ -1,10 +1,10 @@
 cluster_networks = [
-    {
-        network = "stackhpc-dev"
-        subnet = "stackhpc-dev"
-    }
+  {
+    network = "stackhpc-dev"
+    subnet  = "stackhpc-dev"
+  }
 ]
 control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment
-other_node_flavor = "en1.xsmall"
-state_volume_type = "unencrypted"
-home_volume_type = "unencrypted"
+other_node_flavor   = "en1.xsmall"
+state_volume_type   = "unencrypted"
+home_volume_type    = "unencrypted"
diff --git a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars
index 135aadc..601910a 100644
--- a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars
+++ b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars
@@ -1,10 +1,10 @@
 cluster_networks = [
-    {
-        network = "slurmapp-ci"
-        subnet = "slurmapp-ci"
-    }
+  {
+    network = "slurmapp-ci"
+    subnet  = "slurmapp-ci"
+  }
 ]
 control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment
-other_node_flavor = "en1.xsmall"
-state_volume_type = "unencrypted"
-home_volume_type = "unencrypted"
+other_node_flavor   = "en1.xsmall"
+state_volume_type   = "unencrypted"
+home_volume_type    = "unencrypted"
diff --git a/environments/.stackhpc/tofu/SMS.tfvars b/environments/.stackhpc/tofu/SMS.tfvars
index 808821b..6d14fc2 100644
--- a/environments/.stackhpc/tofu/SMS.tfvars
+++ b/environments/.stackhpc/tofu/SMS.tfvars
@@ -1,8 +1,8 @@
 cluster_networks = [
-    {
-        network = "stackhpc-ipv4-geneve"
-        subnet = "stackhpc-ipv4-geneve-subnet"
-    }
+  {
+    network = "stackhpc-ipv4-geneve"
+    subnet  = "stackhpc-ipv4-geneve-subnet"
+  }
 ]
 control_node_flavor = "general.v1.small"
-other_node_flavor = "general.v1.small"
\ No newline at end of file
+other_node_flavor   = "general.v1.small"
\ No newline at end of file
diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf
index 22113cd..649f2f7 100644
--- a/environments/.stackhpc/tofu/main.tf
+++ b/environments/.stackhpc/tofu/main.tf
@@ -76,7 +76,7 @@ module "cluster" {
   control_node_flavor = var.control_node_flavor
 
   login = {
-    login = {
+    head = {
       nodes  = ["login-0"]
       flavor = var.other_node_flavor
     }
diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index 98f364a..82358b2 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -52,7 +52,8 @@ variable "login" {
     be useful for e.g. separating nodes for ssh and Open Ondemand usage, or
     to define login nodes with different capabilities such as high-memory.
 
-    Keys are names of groups.
+    Keys are names of groups, and cannot be 'login', 'compute', 'control', or
+    keys in the compute or additional_nodegroups variables.
     Values are a mapping as follows:
 
     Required:
@@ -88,6 +89,25 @@ variable "login" {
   EOF
 
   type = any
+  validation {
+    condition     = length(setintersection(keys(var.login), ["login", "compute", "control"])) == 0
+    error_message = <<-EOF
+      Login nodegroup names cannot be 'login', 'compute' or 'control'. Invalid var.login key(s): ${join(", ", setintersection(keys(var.login), ["login", "compute", "control"]))}.
+    EOF
+  }
+  validation {
+    condition = length(distinct(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups)))) == length(concat(keys(var.login), keys(var.compute), keys(var.additional_nodegroups)))
+    error_message = <<-EOF
+      Nodegroup names must be unique. Shared key(s) found in variables login, compute and/or additional_nodegroups: ${
+    join(", ", setunion(
+      setintersection(keys(var.login), keys(var.compute)),
+      setintersection(keys(var.compute), keys(var.additional_nodegroups)),
+      setintersection(keys(var.additional_nodegroups), keys(var.login))
+    ))
+  }
+    EOF
+
+}
 }
 
 variable "cluster_image_id" {
@@ -101,7 +121,8 @@ variable "compute" {
     Mapping defining homogenous groups of compute nodes. Groups are used
     in Slurm partition definitions.
 
-    Keys are names of groups.
+    Keys are names of groups, and cannot be 'compute', 'login', 'control', 'default'
+    or keys in the login or additional_nodegroups variables.
     Values are a mapping as follows:
 
     Required:
@@ -139,6 +160,12 @@ variable "compute" {
   EOF
 
   type = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings
+  validation {
+    condition     = length(setintersection(keys(var.compute), ["login", "compute", "control", "default"])) == 0
+    error_message = <<-EOF
+      Compute nodegroup names cannot be 'compute', 'default', 'login' or 'control'. Invalid var.compute key(s): ${join(", ", setintersection(keys(var.compute), ["login", "compute", "control", "default"]))}.
+    EOF
+  }
 }
 
 # tflint-ignore: terraform_typed_variables
@@ -149,7 +176,8 @@ variable "additional_nodegroups" {
     These nodes are not in the compute or login inventory groups so they
     will not run slurmd.
 
-    Keys are names of groups.
+    Keys are names of groups and cannot be 'login', 'compute, 'control', or
+    keys in the login or additional_nodegroups variables.
     Values are a mapping as for the "login" variable, with the addition of
     the optional entry:
         
@@ -162,6 +190,13 @@ variable "additional_nodegroups" {
     - $cluster_name + '_' + $group_name
     - 'additional'
   EOF
+  type        = any # can't do any better; TF type constraints can't cope with heterogeneous inner mappings
+  validation {
+    condition     = length(setintersection(keys(var.additional_nodegroups), ["login", "compute", "control"])) == 0
+    error_message = <<-EOF
+      Additional nodegroup names cannot be 'compute', 'login' or 'control'. Invalid var.additional_nodegroups key(s): ${join(", ", setintersection(keys(var.additional_nodegroups), ["login", "compute", "control"]))}.
+    EOF
+  }
 }
 
 variable "environment_root" {

From 4548b9b5962660121523a7e3c6d4a57ef87f76bb Mon Sep 17 00:00:00 2001
From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com>
Date: Thu, 25 Sep 2025 12:40:06 +0100
Subject: [PATCH 27/50] Bump Open OnDemand to v4 & install apps in fatimage
 (#782)

* bump OSC's OOD v4.0.1

* pin ondemand 4.0.7 in common env

* install ood app packages in fatimage.yml

* make packer volume 20 GB to manage ood app packages

* fix typo

* bump images

* update ood cleanup paths triggering trivy errors

* bump fatimages

* noqa yaml[brackets] for OOD options

* fix linter warnings about flow-style

* remove wrong comment

* Add module FQDN

* pickup task name fixes from PR#794

* bump CI image

---------

Co-authored-by: Steve Brasier <steveb@stackhpc.com>
Co-authored-by: Steve Brasier <33413598+sjpb@users.noreply.github.com>
---
 .yamllint.yml                                 |  2 +-
 ansible/cleanup.yml                           |  4 ++--
 ansible/fatimage.yml                          | 23 ++++++++++++++-----
 .../tofu/cluster_image.auto.tfvars.json       |  4 ++--
 .../inventory/group_vars/all/openondemand.yml | 11 ++++-----
 packer/openstack.pkr.hcl                      |  2 +-
 requirements.yml                              |  2 +-
 7 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/.yamllint.yml b/.yamllint.yml
index 3220260..650a27a 100644
--- a/.yamllint.yml
+++ b/.yamllint.yml
@@ -3,7 +3,7 @@ extends: default
 
 rules:
   brackets:
-    forbid: non-empty
+    forbid: false
   comments:
     # https://github.com/prettier/prettier/issues/6780
     min-spaces-from-content: 1
diff --git a/ansible/cleanup.yml b/ansible/cleanup.yml
index 6b495d7..b9a0d72 100644
--- a/ansible/cleanup.yml
+++ b/ansible/cleanup.yml
@@ -47,8 +47,8 @@
   loop: # NB: items here MUST have a justification!
     # ondemand install: raised at https://github.com/OSC/ondemand/security/advisories/GHSA-f7j8-ppqm-m5vw
     # All declared not to be an issue by Open Ondemand as relevant packages not installed
-    - "/opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/{{ ondemand_package_version }}-1/gems/bootstrap_form-2.7.0/test/dummy/Gemfile.lock"
-    - "/opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/{{ ondemand_package_version }}-1/gems/bootstrap_form-4.5.0/demo/yarn.lock"
+    - "/opt/ood/ondemand/root/usr/share/gems/3.3/ondemand/{{ ondemand_package_version }}-1/gems/bootstrap_form-2.7.0/test/dummy/Gemfile.lock"
+    - "/opt/ood/ondemand/root/usr/share/gems/3.3/ondemand/{{ ondemand_package_version }}-1/gems/bootstrap_form-5.0.0/demo/yarn.lock"
     - /var/www/ood/apps/sys/dashboard/node_modules/data-confirm-modal/Gemfile.lock
     # chrony role: only used for role dev, venv never created on disk
     - /etc/ansible-init/playbooks/roles/mrlesmithjr.chrony/poetry.lock
diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml
index 8e8e58a..7565af6 100644
--- a/ansible/fatimage.yml
+++ b/ansible/fatimage.yml
@@ -121,33 +121,44 @@
       when: "'openhpc' in group_names"
 
     # - import_playbook: portal.yml
-    - name: Open Ondemand server (packages)
+    - name: Open OnDemand server (packages)
       ansible.builtin.include_role:
         name: osc.ood
         tasks_from: install-package.yml
         vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml"
       when: "'openondemand' in group_names"
-    # # FUTURE: install-apps.yml - this is git clones
 
-    - name: Open Ondemand server (apps)
+    - name: Open OnDemand server (apps)
       ansible.builtin.include_role:
         name: osc.ood
         tasks_from: install-apps.yml
         vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml"
       when: "'openondemand' in group_names"
 
-    - name: Open Ondemand remote desktop
+    - name: Open OnDemand remote desktop # Used for plain desktop and matlab
       ansible.builtin.import_role:
         name: openondemand
         tasks_from: vnc_compute.yml
-      when: "'openondemand_desktop' in group_names"
+      when: "'openondemand_desktop' or 'openondemand_matlab' in group_names"
 
-    - name: Open Ondemand jupyter node
+    - name: Open OnDemand Jupyter node
       ansible.builtin.import_role:
         name: openondemand
         tasks_from: jupyter_compute.yml
       when: "'openondemand_jupyter' in group_names"
 
+    - name: Open OnDemand RStudio node
+      ansible.builtin.import_role:
+        name: openondemand
+        tasks_from: rstudio_compute.yml
+      when: "'openondemand_rstudio' in group_names"
+
+    - name: Open OnDemand Code Server node
+      ansible.builtin.import_role:
+        name: openondemand
+        tasks_from: codeserver_compute.yml
+      when: "'openondemand_codeserver' in group_names"
+
     - name: Install Apache PAM module # Extracted from start of roles/openondemand/tasks/pam_auth.yml to ensure only installed during build
       ansible.builtin.dnf:
         name: mod_authnz_pam
diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index b0d1022..bcb56cf 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
   "cluster_image": {
-    "RL8": "openhpc-RL8-250923-1321-5fcc36b0",
-    "RL9": "openhpc-RL9-250923-1321-5fcc36b0"
+    "RL8": "openhpc-RL8-250924-1502-e9afbfe5",
+    "RL9": "openhpc-RL9-250924-1536-e9afbfe5"
   }
 }
diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml
index 7727052..ea88b08 100644
--- a/environments/common/inventory/group_vars/all/openondemand.yml
+++ b/environments/common/inventory/group_vars/all/openondemand.yml
@@ -5,7 +5,7 @@
 
 # NB: Variables prefixed ood_ are all from https://github.com/OSC/ood-ansible
 
-ondemand_package_version: "3.1.10" # used in ansible/cleanup.yml
+ondemand_package_version: "4.0.7" # used in ansible/cleanup.yml
 ondemand_package: ondemand-"{{ ondemand_package_version }}" # osc.ood role var controlling installed package
 
 openondemand_servername: "{{ hostvars[groups['openondemand'].0].ansible_host if groups['openondemand'] else '' }}"
@@ -198,8 +198,7 @@ openondemand_apps_rstudio_default:
       help: Choose your RStudio module
       widget: select
       options:
-        - "RStudio v{{ openondemand_rstudio_version }}"
-        - "rstudio-server/{{ openondemand_rstudio_version }}}"
+        - ["RStudio v{{ openondemand_rstudio_version }}", "rstudio-server/{{ openondemand_rstudio_version }}"]
     extra_modules_script:
       label: Extra modules script
       help: If you'd like to load additional modules alongside RStudio-Server, put the 'module load ...' commands into a text file (one 'module load...' per line) and specify its path here # noqa: yaml[line-length]
@@ -288,8 +287,7 @@ openondemand_apps_matlab_default:
       help: Choose your MATLAB module
       widget: select
       options:
-        - "MATLAB v{{ openondemand_matlab_version }}"
-        - "matlab/{{ openondemand_matlab_version }}"
+        - ["MATLAB v{{ openondemand_matlab_version }}", "matlab/{{ openondemand_matlab_version }}"]
     cores:
       label: Number of CPU cores
       help: How many CPU cores to reserve for your session. NB Ensure this is within the maximum allowed by your chosen partition.
@@ -349,8 +347,7 @@ openondemand_apps_codeserver_default:
       help: Choose your Code Server module
       widget: select
       options:
-        - "Code Server v{{ openondemand_code_server_version}}"
-        - "code-server/{{ openondemand_code_server_version }}"
+        - ["Code Server v{{ openondemand_code_server_version}}", "code-server/{{ openondemand_code_server_version }}"]
     bc_queue:
       value: "{{ openondemand_codeserver_partition | default(none) }}"
     cores:
diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl
index 3f93d50..9faf4bb 100644
--- a/packer/openstack.pkr.hcl
+++ b/packer/openstack.pkr.hcl
@@ -125,7 +125,7 @@ variable "volume_type" {
 
 variable "volume_size" {
   type = number
-  default = 15
+  default = 20
 }
 
 variable "image_disk_format" {
diff --git a/requirements.yml b/requirements.yml
index 98785d7..27dbcbe 100644
--- a/requirements.yml
+++ b/requirements.yml
@@ -17,7 +17,7 @@ roles:
     version: stackhpc-0.19.0 # fix grafana install
   - src: https://github.com/OSC/ood-ansible.git
     name: osc.ood
-    version: v3.1.5
+    version: v4.0.1
   - src: https://github.com/stackhpc/ansible-role-os-manila-mount.git
     name: stackhpc.os-manila-mount
     version: v25.3.1

From ab4a5ae62c0d90b606b7be20f832f70f09d82e35 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Thu, 25 Sep 2025 20:48:15 +0100
Subject: [PATCH 28/50] Support software raid root disks in stackhpc images
 (#785)

* support raid root disks in stackhpc-built images

* clarify image requirements

* bump CI image

* fixup grub for RL8

* fix linter issues

* fix raid kernel commandline configuration for RL8

[no ci]

* bump CI image

* fix handler ansible-lint errors

* bump CI image
---
 ansible/.gitignore                              |  2 ++
 ansible/extras.yml                              |  9 ++++++++-
 ansible/roles/raid/README.md                    | 17 +++++++++++++++++
 ansible/roles/raid/handlers/main.yml            |  3 +++
 ansible/roles/raid/tasks/main.yml               | 17 +++++++++++++++++
 environments/.stackhpc/inventory/extra_groups   |  4 ++++
 .../tofu/cluster_image.auto.tfvars.json         |  4 ++--
 environments/common/inventory/groups            |  3 +++
 environments/site/inventory/groups              |  3 +++
 9 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 ansible/roles/raid/README.md
 create mode 100644 ansible/roles/raid/handlers/main.yml
 create mode 100644 ansible/roles/raid/tasks/main.yml

diff --git a/ansible/.gitignore b/ansible/.gitignore
index 62c9a54..b5b3572 100644
--- a/ansible/.gitignore
+++ b/ansible/.gitignore
@@ -98,3 +98,5 @@ roles/*
 !roles/eessi/**
 !roles/topology/
 !roles/topology/**
+!roles/raid/
+!roles/raid/**
diff --git a/ansible/extras.yml b/ansible/extras.yml
index 02b0d40..3c790be 100644
--- a/ansible/extras.yml
+++ b/ansible/extras.yml
@@ -1,4 +1,11 @@
----
+- hosts: raid
+  become: true
+  tags: raid
+  gather_facts: true
+  tasks:
+    - ansible.builtin.include_role:
+        name: raid
+
 - hosts: k3s_server:!builder
   become: true
   tags: k3s
diff --git a/ansible/roles/raid/README.md b/ansible/roles/raid/README.md
new file mode 100644
index 0000000..4774a46
--- /dev/null
+++ b/ansible/roles/raid/README.md
@@ -0,0 +1,17 @@
+# raid
+
+Configure an image to support software raid (via [mdadm](https://github.com/md-raid-utilities/mdadm)).
+
+RockyLinux genericcloud images already have the necessary `mdraid` dracut
+module installed, as well as kernel modules for `raid0`, `raikd1`, `raid10` and
+`raid456` [^1]. This covers all raid modes [supported by Ironic](https://docs.openstack.org/ironic/latest/admin/raid.html#software-raid)
+hence this role does not support extending this.
+
+This role changes the command line for the current kernel. It does not reboot
+the instance so generally is only useful during image builds.
+
+Note that the `rootfs_uuid` image property described in the [Ironic raid documentation](https://docs.openstack.org/ironic/latest/admin/raid.html#image-requirements)
+is not required; the root partition is the first (non-boot) partition and this
+is sufficent for Ironic to find the root file system.
+
+[^1]: As shown by `lsinitrd /boot/initramfs-$(uname -r).img | grep raid`
diff --git a/ansible/roles/raid/handlers/main.yml b/ansible/roles/raid/handlers/main.yml
new file mode 100644
index 0000000..02867f7
--- /dev/null
+++ b/ansible/roles/raid/handlers/main.yml
@@ -0,0 +1,3 @@
+- name: Update GRUB configuration file
+  ansible.builtin.command: "grub2-mkconfig -o /boot/grub2/grub.cfg {{ '--update-bls-cmdline' if ansible_distribution_major_version == '9' else '' }}"
+  changed_when: true
diff --git a/ansible/roles/raid/tasks/main.yml b/ansible/roles/raid/tasks/main.yml
new file mode 100644
index 0000000..3ea61db
--- /dev/null
+++ b/ansible/roles/raid/tasks/main.yml
@@ -0,0 +1,17 @@
+- name: Enable autoassembly of mdraid devices
+  # adds rd.auto=1 - see `man dracut.cmdline`
+  ansible.builtin.lineinfile:
+    path: /etc/default/grub
+    regexp: >
+      ^{{ grub_cmdline_var[ansible_distribution_major_version] }}="((?:(?!rd.auto=1).)*?)"$
+    line: >
+      {{ grub_cmdline_var[ansible_distribution_major_version] }}="\1 rd.auto=1"
+    backup: true
+    backrefs: true
+  register: update_grub
+  notify:
+    - Update GRUB configuration file
+  vars:
+    grub_cmdline_var:
+      '8': GRUB_CMDLINE_LINUX
+      '9': GRUB_CMDLINE_LINUX_DEFAULT
diff --git a/environments/.stackhpc/inventory/extra_groups b/environments/.stackhpc/inventory/extra_groups
index f3a9964..29d9d93 100644
--- a/environments/.stackhpc/inventory/extra_groups
+++ b/environments/.stackhpc/inventory/extra_groups
@@ -50,3 +50,7 @@ cluster
 
 [compute_init:children]
 compute
+
+[raid:children]
+# Configure fatimage for raid
+builder
diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index bcb56cf..9650ccd 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
   "cluster_image": {
-    "RL8": "openhpc-RL8-250924-1502-e9afbfe5",
-    "RL9": "openhpc-RL9-250924-1536-e9afbfe5"
+    "RL8": "openhpc-RL8-250925-1639-62d67ae3",
+    "RL9": "openhpc-RL9-250925-1639-62d67ae3"
   }
 }
diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups
index ef24952..2c67c4a 100644
--- a/environments/common/inventory/groups
+++ b/environments/common/inventory/groups
@@ -217,3 +217,6 @@ extra_packages
 # separately from the appliance. e.g
 # pulp_host ansible_host=<VM-ip-address>
 # Note the host name can't conflict with group names i.e can't be called `pulp` or `pulp_server`
+
+[raid]
+# Add `builder` to configure image for software raid
diff --git a/environments/site/inventory/groups b/environments/site/inventory/groups
index 930cf93..85d7e36 100644
--- a/environments/site/inventory/groups
+++ b/environments/site/inventory/groups
@@ -169,3 +169,6 @@ compute
 # separately from the appliance. e.g
 # pulp_host ansible_host=<VM-ip-address>
 # Note inventory host name cannot conflict with group names i.e can't be called `pulp` or `pulp_server`.
+
+[raid]
+# Add `builder` to configure image for software raid

From 00c044ff56045110c6d89ec78b61ca8939d9b4c4 Mon Sep 17 00:00:00 2001
From: wtripp180901 <will.tripp@btinternet.com>
Date: Mon, 29 Sep 2025 15:16:16 +0100
Subject: [PATCH 29/50] Fix .caas secrets not persisting post-reimage + skip
 failing validation check for .caas

---
 ansible/roles/persist_openhpc_secrets/tasks/main.yml      | 8 ++++++++
 ansible/validate.yml                                      | 1 +
 environments/.caas/inventory/group_vars/all/defaults.yml  | 1 +
 environments/common/inventory/group_vars/all/defaults.yml | 1 +
 4 files changed, 11 insertions(+)
 create mode 100644 environments/.caas/inventory/group_vars/all/defaults.yml

diff --git a/ansible/roles/persist_openhpc_secrets/tasks/main.yml b/ansible/roles/persist_openhpc_secrets/tasks/main.yml
index dc12e2a..35fd045 100644
--- a/ansible/roles/persist_openhpc_secrets/tasks/main.yml
+++ b/ansible/roles/persist_openhpc_secrets/tasks/main.yml
@@ -15,6 +15,14 @@
     - "{{ appliances_state_dir }}/ansible.facts.d"
     - "/etc/ansible/facts.d"
 
+- name: Symlink to persisted facts if present
+  ansible.builtin.file:
+    state: link
+    src: "{{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
+    dest: /etc/ansible/facts.d/openhpc_secrets.fact
+    owner: root
+  when: openhpc_secrets_stat.stat.exists
+
 - name: Load existing OpenHPC secrets if present
   ansible.builtin.setup:
     filter: ansible_local
diff --git a/ansible/validate.yml b/ansible/validate.yml
index 2352fff..e1d03a2 100644
--- a/ansible/validate.yml
+++ b/ansible/validate.yml
@@ -77,6 +77,7 @@
           - cluster_home_volume is defined
           - cluster_compute_groups is defined
         fail_msg: "One or more expected variables are missing: is OpenTofu inventory template up to date?"
+      when: not appliances_caas_skip_validate_vars
 
 - name: Ensure control node is in inventory
   hosts: all
diff --git a/environments/.caas/inventory/group_vars/all/defaults.yml b/environments/.caas/inventory/group_vars/all/defaults.yml
new file mode 100644
index 0000000..7ec96c0
--- /dev/null
+++ b/environments/.caas/inventory/group_vars/all/defaults.yml
@@ -0,0 +1 @@
+appliances_caas_skip_validate_vars: true
diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml
index 027e407..c11cc44 100644
--- a/environments/common/inventory/group_vars/all/defaults.yml
+++ b/environments/common/inventory/group_vars/all/defaults.yml
@@ -8,6 +8,7 @@ appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in gen
 # appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform
 appliances_mode: configure
 appliances_pulp_url: https://ark.stackhpc.com
+appliances_caas_skip_validate_vars: false
 
 # Address(ip/dns) for internal communication between services. This is
 # normally traffic you do no want to expose to users.

From 9fb0bf81ed743e58bc445aa8ce9dae7b47b70f44 Mon Sep 17 00:00:00 2001
From: wtripp180901 <78219569+wtripp180901@users.noreply.github.com>
Date: Tue, 30 Sep 2025 12:38:42 +0100
Subject: [PATCH 30/50] Pin bcrypt to 4.3.0 to avoid passlib bug (#801)

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 7e596f4..4e816af 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ python-openstackclient==8.0.0
 python-manilaclient
 python-ironicclient
 jmespath
+bcrypt==4.3.0
 passlib[bcrypt]==1.7.4
 cookiecutter
 selinux # this is a shim to avoid having to use --system-site-packages, you still need sudo yum install libselinux-python3

From d8b3cf6480883c54f182c524e9af6a766486e23c Mon Sep 17 00:00:00 2001
From: wtripp180901 <will.tripp@btinternet.com>
Date: Tue, 30 Sep 2025 12:46:37 +0100
Subject: [PATCH 31/50] changed variable name

---
 ansible/validate.yml                                      | 2 +-
 environments/.caas/inventory/group_vars/all/defaults.yml  | 2 +-
 environments/common/inventory/group_vars/all/defaults.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ansible/validate.yml b/ansible/validate.yml
index e1d03a2..b43eb7e 100644
--- a/ansible/validate.yml
+++ b/ansible/validate.yml
@@ -77,7 +77,7 @@
           - cluster_home_volume is defined
           - cluster_compute_groups is defined
         fail_msg: "One or more expected variables are missing: is OpenTofu inventory template up to date?"
-      when: not appliances_caas_skip_validate_vars
+      when: appliances_validate_tofu_vars | bool
 
 - name: Ensure control node is in inventory
   hosts: all
diff --git a/environments/.caas/inventory/group_vars/all/defaults.yml b/environments/.caas/inventory/group_vars/all/defaults.yml
index 7ec96c0..ded58fe 100644
--- a/environments/.caas/inventory/group_vars/all/defaults.yml
+++ b/environments/.caas/inventory/group_vars/all/defaults.yml
@@ -1 +1 @@
-appliances_caas_skip_validate_vars: true
+appliances_validate_tofu_vars: false
diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml
index c11cc44..e9852af 100644
--- a/environments/common/inventory/group_vars/all/defaults.yml
+++ b/environments/common/inventory/group_vars/all/defaults.yml
@@ -8,7 +8,7 @@ appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in gen
 # appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform
 appliances_mode: configure
 appliances_pulp_url: https://ark.stackhpc.com
-appliances_caas_skip_validate_vars: false
+appliances_validate_tofu_vars: true
 
 # Address(ip/dns) for internal communication between services. This is
 # normally traffic you do no want to expose to users.

From 7e88f5a5b31472ff3654c788a7dd5ce7097691c7 Mon Sep 17 00:00:00 2001
From: Steve Brasier <steveb@stackhpc.com>
Date: Tue, 30 Sep 2025 19:18:24 +0000
Subject: [PATCH 32/50] move image download/conversion to runner's /mnt

[no ci]
---
 .github/workflows/s3-image-sync.yml | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/s3-image-sync.yml b/.github/workflows/s3-image-sync.yml
index 43adf50..f73885c 100644
--- a/.github/workflows/s3-image-sync.yml
+++ b/.github/workflows/s3-image-sync.yml
@@ -93,33 +93,24 @@ jobs:
           echo "TARGET_IMAGE=${TARGET_IMAGE}" >> "$GITHUB_ENV"
         shell: bash
 
-      - name: Clear up some space on runner
-        run: |
-          df -h
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /opt/ghc
-          sudo rm -rf "/usr/local/share/boost"
-          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          sudo apt-get clean
-          df -h
-
       - name: Download image to runner
         run: |
           . venv/bin/activate
-          openstack image save --file "${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
+          df -h
+          openstack image save --file "/mnt/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
           df -h
         shell: bash
 
       - name: Convert image to QCOW2
         run: |
           . venv/bin/activate
-          qemu-img convert -f raw -O qcow2 -c "${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
+          qemu-img convert -f raw -O qcow2 -c "/mnt/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
         shell: bash
 
       - name: Upload Image to S3
         run: |
           echo "Uploading Image: ${{ env.TARGET_IMAGE }} to S3..."
-          s3cmd --multipart-chunk-size-mb=150 put ${{ env.TARGET_IMAGE }} s3://${{ env.S3_BUCKET }}
+          s3cmd --multipart-chunk-size-mb=150 put "/mnt/${{ env.TARGET_IMAGE }}" s3://${{ env.S3_BUCKET }}
         shell: bash
 
   image_sync:

From 4c36537ff3775429ece8c791f5c3be8c03296ff6 Mon Sep 17 00:00:00 2001
From: Steve Brasier <steveb@stackhpc.com>
Date: Tue, 30 Sep 2025 20:10:40 +0000
Subject: [PATCH 33/50] make image dir

---
 .github/workflows/s3-image-sync.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/s3-image-sync.yml b/.github/workflows/s3-image-sync.yml
index f73885c..b8629eb 100644
--- a/.github/workflows/s3-image-sync.yml
+++ b/.github/workflows/s3-image-sync.yml
@@ -9,6 +9,7 @@ on:
 env:
   S3_BUCKET: openhpc-images-prerelease
   IMAGE_PATH: environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+  RUNNER_IMAGE_DIR: /mnt/images
 
 permissions:
   contents: read
@@ -97,20 +98,22 @@ jobs:
         run: |
           . venv/bin/activate
           df -h
-          openstack image save --file "/mnt/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
+          sudo mkdir ${{ env.RUNNER_IMAGE_DIR }}
+          sudo chmod ugo=rwX ${{ env.RUNNER_IMAGE_DIR }}
+          openstack image save --file "${{ env.RUNNER_IMAGE_DIR }}/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
           df -h
         shell: bash
 
       - name: Convert image to QCOW2
         run: |
           . venv/bin/activate
-          qemu-img convert -f raw -O qcow2 -c "/mnt/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
+          qemu-img convert -f raw -O qcow2 -c "${{ env.RUNNER_IMAGE_DIR }}/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
         shell: bash
 
       - name: Upload Image to S3
         run: |
           echo "Uploading Image: ${{ env.TARGET_IMAGE }} to S3..."
-          s3cmd --multipart-chunk-size-mb=150 put "/mnt/${{ env.TARGET_IMAGE }}" s3://${{ env.S3_BUCKET }}
+          s3cmd --multipart-chunk-size-mb=150 put "${{ env.RUNNER_IMAGE_DIR }}/${{ env.TARGET_IMAGE }}" s3://${{ env.S3_BUCKET }}
         shell: bash
 
   image_sync:

From 63d918b7b0bc5c0ae0f534dd08c80199c24b15bf Mon Sep 17 00:00:00 2001
From: Steve Brasier <steveb@stackhpc.com>
Date: Wed, 1 Oct 2025 08:23:39 +0000
Subject: [PATCH 34/50] fix image upload

---
 .github/workflows/s3-image-sync.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/s3-image-sync.yml b/.github/workflows/s3-image-sync.yml
index b8629eb..3489dc6 100644
--- a/.github/workflows/s3-image-sync.yml
+++ b/.github/workflows/s3-image-sync.yml
@@ -107,7 +107,7 @@ jobs:
       - name: Convert image to QCOW2
         run: |
           . venv/bin/activate
-          qemu-img convert -f raw -O qcow2 -c "${{ env.RUNNER_IMAGE_DIR }}/${{ env.TARGET_IMAGE }}.raw" "${{ env.TARGET_IMAGE }}"
+          qemu-img convert -f raw -O qcow2 -c "${{ env.RUNNER_IMAGE_DIR }}/${{ env.TARGET_IMAGE }}.raw" "${{ env.RUNNER_IMAGE_DIR }}/${{ env.TARGET_IMAGE }}"
         shell: bash
 
       - name: Upload Image to S3

From f351b9dcdd5bc3a062873f8d678c05709cb009d2 Mon Sep 17 00:00:00 2001
From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com>
Date: Wed, 1 Oct 2025 07:17:42 -0700
Subject: [PATCH 35/50] bump codeserver app version (#806)

---
 environments/common/inventory/group_vars/all/openondemand.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml
index ea88b08..1f7859a 100644
--- a/environments/common/inventory/group_vars/all/openondemand.yml
+++ b/environments/common/inventory/group_vars/all/openondemand.yml
@@ -100,7 +100,7 @@ openondemand_install_app_matlab:
 openondemand_install_app_codeserver:
   codeserver:
     repo: https://github.com/stackhpc/bc_osc_codeserver.git
-    version: 2025.08.1
+    version: 2025.09.1
 # osc:ood role var (NB only active when not in configure):
 ood_install_apps: >-
   {{

From 69f71fe41024953fb57cd22a0c00f48550f1a6a1 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Wed, 1 Oct 2025 16:18:17 +0200
Subject: [PATCH 36/50] Use (group) syntax in access.conf (#804)

From access.conf(5):

    The second field, the users/group field, should be a list of one or
    more login names, group names, or ALL (which always matches). To
    differentiate user entries from group entries, group entries should
    be written with brackets, e.g. (group).
---
 ansible/roles/compute_init/files/compute-init.yml | 2 +-
 ansible/slurm.yml                                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml
index 0ff647a..81dedf8 100644
--- a/ansible/roles/compute_init/files/compute-init.yml
+++ b/ansible/roles/compute_init/files/compute-init.yml
@@ -346,7 +346,7 @@
       ansible.builtin.blockinfile:
         path: /etc/security/access.conf
         block: |
-          +:adm:ALL
+          +:(adm):ALL
           -:ALL:ALL
 
     - name: Ensure slurmd service state
diff --git a/ansible/slurm.yml b/ansible/slurm.yml
index d6d306e..345b361 100644
--- a/ansible/slurm.yml
+++ b/ansible/slurm.yml
@@ -50,7 +50,7 @@
       ansible.builtin.blockinfile:
         path: /etc/security/access.conf
         block: |
-          +:adm:ALL
+          +:(adm):ALL
           -:ALL:ALL
  # vagrant uses (deprecated) ansible_ssh_user
 

From 81a25814b697024d2f1f37eb323f9904adeec07b Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Wed, 1 Oct 2025 16:18:50 +0200
Subject: [PATCH 37/50] Remove extra lines in activate scripts (#803)

---
 cookiecutter/{{cookiecutter.environment}}/activate | 2 --
 environments/.stackhpc/activate                    | 2 --
 environments/site/activate                         | 2 --
 3 files changed, 6 deletions(-)

diff --git a/cookiecutter/{{cookiecutter.environment}}/activate b/cookiecutter/{{cookiecutter.environment}}/activate
index 2a58b40..c9bb527 100644
--- a/cookiecutter/{{cookiecutter.environment}}/activate
+++ b/cookiecutter/{{cookiecutter.environment}}/activate
@@ -18,5 +18,3 @@ echo "Setting PKR_VAR_repo_root to $PKR_VAR_repo_root"
 if [ -f "$APPLIANCES_ENVIRONMENT_ROOT/ansible.cfg" ]; then
    export ANSIBLE_CONFIG=$APPLIANCES_ENVIRONMENT_ROOT/ansible.cfg
 fi
-
-
diff --git a/environments/.stackhpc/activate b/environments/.stackhpc/activate
index 2a58b40..c9bb527 100644
--- a/environments/.stackhpc/activate
+++ b/environments/.stackhpc/activate
@@ -18,5 +18,3 @@ echo "Setting PKR_VAR_repo_root to $PKR_VAR_repo_root"
 if [ -f "$APPLIANCES_ENVIRONMENT_ROOT/ansible.cfg" ]; then
    export ANSIBLE_CONFIG=$APPLIANCES_ENVIRONMENT_ROOT/ansible.cfg
 fi
-
-
diff --git a/environments/site/activate b/environments/site/activate
index 2a58b40..c9bb527 100644
--- a/environments/site/activate
+++ b/environments/site/activate
@@ -18,5 +18,3 @@ echo "Setting PKR_VAR_repo_root to $PKR_VAR_repo_root"
 if [ -f "$APPLIANCES_ENVIRONMENT_ROOT/ansible.cfg" ]; then
    export ANSIBLE_CONFIG=$APPLIANCES_ENVIRONMENT_ROOT/ansible.cfg
 fi
-
-

From 82c814e0283c41ab4d8ba3b298ae474ccbb626c3 Mon Sep 17 00:00:00 2001
From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com>
Date: Thu, 2 Oct 2025 14:21:04 +0100
Subject: [PATCH 38/50] bump new fatimages (#808)

---
 environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index 9650ccd..4137567 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
   "cluster_image": {
-    "RL8": "openhpc-RL8-250925-1639-62d67ae3",
-    "RL9": "openhpc-RL9-250925-1639-62d67ae3"
+    "RL8": "openhpc-RL8-251001-1515-81a25814",
+    "RL9": "openhpc-RL9-251001-1424-81a25814"
   }
 }

From b11696e669d47cfe220e9cb982bcc774a67163d6 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Fri, 3 Oct 2025 14:40:53 +0100
Subject: [PATCH 39/50] Improve build group definitions (#788)

* support raid root disks in stackhpc-built images

* clarify image requirements

* bump CI image

* remove default build groups

* fixup doca/cuda inventory groups

* add fatimage inventory group

* update docs for image build

* minor docs tweaks

* fixup fatimage group definition

* fix build groups

* bump CI image

* minor docs tweak

* fix linter markdown error

* fix linter markdown error

* swap example site image build to normal case

* fix borked merge

* fixes after self-review

* bump CI image
---
 .github/workflows/fatimage.yml                |   4 +-
 docs/image-build.md                           | 165 +++++++++++++-----
 docs/operations.md                            |  43 ++---
 environments/.stackhpc/inventory/extra_groups |  20 +--
 .../tofu/cluster_image.auto.tfvars.json       |   4 +-
 environments/common/inventory/groups          |  20 ++-
 environments/site/inventory/groups            |  35 +++-
 7 files changed, 195 insertions(+), 96 deletions(-)

diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml
index d9884ca..66f2819 100644
--- a/.github/workflows/fatimage.yml
+++ b/.github/workflows/fatimage.yml
@@ -36,10 +36,10 @@ jobs:
         build:
           - image_name: openhpc-RL8
             source_image_name: Rocky-8-GenericCloud-Base-8.10-20240528.0.x86_64.raw
-            inventory_groups: control,compute,login,update
+            inventory_groups: fatimage
           - image_name: openhpc-RL9
             source_image_name: Rocky-9-GenericCloud-Base-9.6-20250531.0.x86_64.qcow2
-            inventory_groups: control,compute,login,update
+            inventory_groups: fatimage
     env:
       ANSIBLE_FORCE_COLOR: True
       OS_CLOUD: openstack
diff --git a/docs/image-build.md b/docs/image-build.md
index 71be030..533bc62 100644
--- a/docs/image-build.md
+++ b/docs/image-build.md
@@ -1,67 +1,136 @@
 # Packer-based image build
 
-The appliance contains code and configuration to use [Packer](https://developer.hashicorp.com/packer) with the [OpenStack builder](https://www.packer.io/plugins/builders/openstack) to build images.
-
-The Packer configuration defined here builds "fat images" which contain packages, binaries and container images but no cluster-specific configuration. Using these:
-
-- Enables the image to be tested in CI before production use.
-- Ensures re-deployment of the cluster or deployment of additional nodes can be completed even if packages are changed in upstream repositories (e.g. due to RockyLinux or OpenHPC updates).
-- Improves deployment speed by reducing the number of package downloads to improve deployment speed.
-
-The fat images StackHPC builds and tests in CI are available from [GitHub releases](https://github.com/stackhpc/ansible-slurm-appliance/releases). However with some additional configuration it is also possible to:
-
-1. Build site-specific fat images from scratch.
-2. Extend an existing fat image with additional functionality.
+The appliance contains configuration to use [Packer](https://developer.hashicorp.com/packer)
+with the [OpenStack builder](https://www.packer.io/plugins/builders/openstack)
+to build images. Using images:
+
+- Enables the image to be tested in a `staging` environment before deployment
+  to the `production` environment.
+- Ensures re-deployment of the cluster or deployment of additional nodes is
+  repeatable.
+- Improves deployment speed by reducing the number of package installation.
+
+The Packer configuration here can be used to build two types of images:
+
+1. "Fat images" which contain packages, binaries and container images but no
+   cluster-specific configuration. These start from a RockyLinux GenericCloud
+   (or compatible) image. The fat images StackHPC builds and tests in CI are
+   available from [GitHub releases](https://github.com/stackhpc/ansible-slurm-appliance/releases).
+   However site-specific fat images can also be built from a different source
+   image e.g. if a different partition layout is required.
+2. "Extra-build" images which extend a fat image to create a site-specific
+   image with with additional packages or functionality. For example the NVIDIA
+   `cuda` packages cannot be redistributed hence require an "extra" build.
 
 ## Usage
 
-To build either a site-specific fat image from scratch, or to extend an existing StackHPC fat image:
-
-1. Ensure the current OpenStack credentials have sufficient authorisation to upload images (this may or may not require the `member` role for an application credential, depending on your OpenStack configuration).
-2. The provided dev credentials for StackHPC's "Ark" Pulp server must be added to the target environments. This is done by overriding `dnf_repos_username` and `dnf_repos_password` with your vault encrypted credentials in `environments/<base_environment>/inventory/group_vars/all/pulp.yml`. See the [experimental docs](experimental/pulp.md) if you wish instead wish to use a local Pulp server.
-3. Create a Packer [variable definition file](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/variables#assigning-values-to-input-variables) at e.g. `environments/<environment>/builder.pkrvars.hcl` containing at a minimum:
-
-```hcl
-flavor = "general.v1.small"                           # VM flavor to use for builder VMs
-networks = ["26023e3d-bc8e-459c-8def-dbd47ab01756"]   # List of network UUIDs to attach the VM to
-source_image_name = "Rocky-9-GenericCloud-Base-9.4"   # Name of image to create VM with, i.e. starting image
-inventory_groups = "control,login,compute"            # Additional inventory groups to add build VM to
-```
-
-Note that:
-
-- The network used for the Packer VM must provide outbound internet access but does not need to provide access to resources which the final cluster nodes require (e.g. Slurm control node, network filesystem servers etc.).
-- The flavor used must have sufficent memory for the build tasks, but otherwise does not need to match the final cluster nodes. Usually 8GB is sufficent. By default, the build VM is volume-backed to allow control of the root disk size (and hence final image size) so the flavor disk size does not matter.
-- The source image should be either a RockyLinux GenericCloud image for a site-specific image build from scratch, or a StackHPC fat image if extending an existing image.
-- The `inventory_groups` variable takes a comma-separated list of Ansible inventory groups to add the build VM to. This is in addition to the `builder` group which it is always added to. This controls which Ansible roles and functionality run during build, and hence what gets added to the image.
-  All possible groups are listed in `environments/common/groups` but common options for this variable will be:
-  - `update,control,login,compute`: The resultant image has all packages in the source image updated, and then packages for all types of nodes in the cluster are added. When using a GenericCloud image for `source_image_name` this builds a site-specific fat image from scratch.
-  - One or more specific groups which are not enabled in the appliance by default, e.g. `lustre`. When using a StackHPC fat image for `source_image_name` this extends the image with just this additional functionality.
+For either a site-specific fat-image build or an extra-build:
+
+1. Ensure the current OpenStack credentials have sufficient authorisation to
+   upload images (this may or may not require the `member` role for an
+   application credential, depending on your OpenStack configuration).
+2. If package installs are required, add the provided dev credentials for
+   StackHPC's "Ark" Pulp server to the `site` environment:
+
+   ```yaml
+   # environments/site/inventory/group_vars/all/dnf_repos.yml:
+   dnf_repos_username: your-ark-username
+   dnf_repos_password: "{{ vault_dnf_repos_password }}"
+   ```
+
+   ```yaml
+   # environments/site/inventory/group_vars/all/dnf_repos.yml:
+   dnf_repos_password: "your-ark-password"
+   ```
+
+   > [!IMPORTANT]
+   > The latter file should be vault-encrypted.
+
+   Alternatively, configure a [local Pulp mirror](experimental/pulp.md).
+
+3. Create a Packer [variable definition file](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/variables#assigning-values-to-input-variables). It must specify at least the
+   the following variables:
+
+   ```hcl
+   # environments/site/builder.pkrvars.hcl:
+   flavor = "general.v1.small"                           # VM flavor to use for builder VMs
+   networks = ["26023e3d-bc8e-459c-8def-dbd47ab01756"]   # List of network UUIDs to attach the VM to
+   source_image_name = "Rocky-9-GenericCloud-Base-9.4"   # Name of image to create VM with, i.e. starting image
+   inventory_groups = "doca,cuda,extra_packages"         # Build VM inventory groups => functionality to add to image
+   ```
+
+   See the top of [packer/openstack.pkr.hcl](../packer/openstack.pkr.hcl)
+   for all possible variables which can be set.
+
+   Note that:
+
+   - Normally the network must provide outbound internet access. However it
+     does not need to provide access to resources used by the actual cluster
+     nodes (e.g. Slurm control node, network filesystem servers etc.).
+   - The flavor used must have sufficent memory for the build tasks (usually
+     8GB), but otherwise does not need to match the actual cluster node
+     flavor(s).
+   - By default, the build VM is volume-backed to allow control of the root
+     disk size (and hence final image size), so the flavor's disk size does not
+     matter. The default volume size is not sufficent if enabling `cuda` and/or
+     `doca` and should be increased:
+     ```terraform
+     volume_size = 35 # GB
+     ```
+   - The source image should be either:
+     - For a site-specific fatimage build: A RockyLinux GenericCloud or
+       compatible image.
+     - For an extra-build image: Usually the appropriate StackHPC fat image,
+       as defined in `environments/.stackhpc/tofu/cluster_image.auto.tfvars.json` at the
+       checkout's current commit. See the [GitHub release page](https://github.com/stackhpc/ansible-slurm-appliance/releases)
+       for download links. In some cases extra builds may be chained, e.g.
+       one extra build adds a Lustre client, and the resulting image is used
+       as the source image for an extra build adding GPU support.
+   - The `inventory_groups` variable takes a comma-separated list of Ansible
+     inventory groups to add the build VM to (in addition to the `builder`
+     group which is it always in). This controls which Ansible roles and
+     functionality run during build, and hence what gets added to the image.
+     All possible groups are listed in `environments/common/groups` but common
+     options for this variable will be:
+
+     - For a fatimage build: `fatimage`: This is defined in `enviroments/site/inventory/groups`
+       and results in an update of all packages in the source image, plus
+       installation of packages for default control, login and compute nodes.
+
+     - For an extra-built image, one or more specific groups. This extends the
+       source image with just this additional functionality. The example above
+       installs NVIDIA DOCA network drivers, NVIDIA GPU drivers/Cuda packages
+       and also enables installation of packages defined in the
+       `appliances_extra_packages_other` variable (see
+       [docs/operations.md](./operations.md#adding-additional-packages)).
 
 4. Activate the venv and the relevant environment.
 
 5. Build images using the relevant variable definition file, e.g.:
 
-```shell
-cd packer/
-PACKER_LOG=1 /usr/bin/packer build -on-error=ask -var-file=$PKR_VAR_environment_root/builder.pkrvars.hcl openstack.pkr.hcl
-```
+   ```shell
+   cd packer/
+   PACKER_LOG=1 /usr/bin/packer build -on-error=ask -var-file=../environments/site/builder.pkrvars.hcl openstack.pkr.hcl
+   ```
 
-**NB:** If the build fails while creating the volume, check if the source image has the `signature_verified` property:
+   **NB:** If the build fails while creating the volume, check if the source image has the `signature_verified` property:
 
-```shell
-openstack image show $SOURCE_IMAGE
-```
+   ```shell
+   openstack image show $SOURCE_IMAGE
+   ```
 
-If it does, remove this property:
+   If it does, remove this property:
 
-```shell
-openstack image unset --property signature_verified $SOURCE_IMAGE
-```
+   ```shell
+   openstack image unset --property signature_verified $SOURCE_IMAGE
+   ```
 
-then delete the failed volume, select cancelling the build when Packer queries, and then retry. This is [OpenStack bug 1823445](https://bugs.launchpad.net/cinder/+bug/1823445).
+   then delete the failed volume, select cancelling the build when Packer asks,
+   and then retry. This is [OpenStack bug 1823445](https://bugs.launchpad.net/cinder/+bug/1823445).
 
-6. The built image will be automatically uploaded to OpenStack with a name prefixed `openhpc` and including a timestamp and a shortened Git hash.
+6. The built image will be automatically uploaded to OpenStack. By default it
+   will have a name prefixed `openhpc` and including a timestamp and a shortened
+   Git hash.
 
 ## Build Process
 
diff --git a/docs/operations.md b/docs/operations.md
index 4064d44..4127300 100644
--- a/docs/operations.md
+++ b/docs/operations.md
@@ -83,7 +83,7 @@ disabled during runtime to prevent Ark credentials from being leaked. To enable
 
 In both cases, Ark credentials will be required.
 
-=# Adding Additional Packages
+## Adding Additional Packages
 
 By default, the following utility packages are installed during the StackHPC image build:
 
@@ -101,22 +101,27 @@ By default, the following utility packages are installed during the StackHPC ima
 
 Additional packages can be added during image builds by:
 
-- adding the `extra_packages` group to the build `inventory_groups` (see
-  [docs/image-build.md](./image-build.md))
-- defining a list of packages in `appliances_extra_packages_other` in e.g.
-  `environments/$SITE_ENV/inventory/group_vars/all/defaults.yml`. For example:
+1. Configuring an [image build](./image-build.md) to enable the
+   `extra_packages` group:
 
-```yaml
-# environments/foo-base/inventory/group_vars/all/defaults.yml:
-appliances_extra_packages_other:
-  - somepackage
-  - anotherpackage
-```
+   ```terraform
+   # environments/site/builder.pkrvars.hcl:
+   ...
+   inventory_groups = "extra_packages"
+   ...
+   ```
+
+2. Defining a list of packages in `appliances_extra_packages_other`, for example:
 
-For packages which come from repositories mirrored by StackHPC's "Ark" Pulp server
-(including rocky, EPEL and OpenHPC repositories), this will require either [Ark
-credentials](./image-build.md)) or a [local Pulp mirror](./experimental/pulp.md)
-to be configured. This includes rocky, EPEL and OpenHPC repos.
+   ```yaml
+   # environments/site/inventory/group_vars/all/defaults.yml:
+   appliances_extra_packages_other:
+     - somepackage
+     - anotherpackage
+   ```
+
+3. Either adding [Ark credentials](./image-build.md) or a [local Pulp mirror](./experimental/pulp.md)
+   to provide access to the required [repository snapshots](../environments/common/inventory/group_vars/all/dnf_repo_timestamps.yml).
 
 The packages available from the OpenHPC repos are described in Appendix E of
 the OpenHPC installation guide (linked from the
@@ -125,9 +130,9 @@ the OpenHPC installation guide (linked from the
 corresponding `lmod` modules.
 
 Packages _may_ also be installed during the site.yml, by adding the `cluster`
-group into the `extra_packages` group. An error will occur if Ark credentials
-are defined in this case, as they are readable by unprivileged users in the
-`.repo` files and a local Pulp mirror must be used instead.
+group as a child of the `extra_packages` group. An error will occur if Ark
+credential are defined in this case, as they are readable by unprivileged users
+in the `.repo` files and a local Pulp mirror must be used instead.
 
 If additional repositories are required, these could be added/enabled as necessary in a play added to `environments/$SITE_ENV/hooks/{pre,post}.yml` as appropriate.
 Note such a play should NOT exclude the builder group, so that the repositories are also added to built images.
@@ -148,8 +153,6 @@ ansible-playbook environments/$SITE_ENV/hooks/{pre,post}.yml
 
 as appropriate.
 
-TODO: improve description about adding these to extra images.
-
 ## Reconfiguring Slurm
 
 At a minimum run:
diff --git a/environments/.stackhpc/inventory/extra_groups b/environments/.stackhpc/inventory/extra_groups
index 29d9d93..0d7fb53 100644
--- a/environments/.stackhpc/inventory/extra_groups
+++ b/environments/.stackhpc/inventory/extra_groups
@@ -1,3 +1,5 @@
+# Unless noted otherwise features enabled here are tested by CI site.yml playbook
+
 [basic_users:children]
 cluster
 
@@ -20,7 +22,7 @@ cluster
 # --- end of FreeIPA example ---
 
 [manila:children]
-# Allows demo; also installs manila client in fat image
+# Not actully tested but allows demo using this environment
 login
 compute
 
@@ -28,20 +30,8 @@ compute
 cluster
 
 [tuned:children]
-# Install tuned into fat image
-# NB: builder has tuned_enabled and tuned_started false so does not configure it
-builder
-# Also test tuned during site playbook
 cluster
 
-[squid:children]
-# Install squid into fat image
-builder
-
-[sssd:children]
-# Install sssd into fat image
-builder
-
 [rebuild:children]
 control
 
@@ -50,7 +40,3 @@ cluster
 
 [compute_init:children]
 compute
-
-[raid:children]
-# Configure fatimage for raid
-builder
diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
index 4137567..585cfe2 100644
--- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
+++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
@@ -1,6 +1,6 @@
 {
   "cluster_image": {
-    "RL8": "openhpc-RL8-251001-1515-81a25814",
-    "RL9": "openhpc-RL9-251001-1424-81a25814"
+    "RL8": "openhpc-RL8-251002-1537-1d21952c",
+    "RL9": "openhpc-RL9-251002-1456-1d21952c"
   }
 }
diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups
index 2c67c4a..558bad1 100644
--- a/environments/common/inventory/groups
+++ b/environments/common/inventory/groups
@@ -1,3 +1,12 @@
+# This file
+# 1. Ensures all groups in the appliance are always defined - even if empty
+# 2. Defines dependencies between groups - child groups require & enables parent
+#
+# IMPORTANT
+# ---------
+# All groups and child groups here MUST be empty, as other environments cannot
+# remove hosts/groups.
+
 [login]
 # All Slurm login nodes. Combined control/login nodes are not supported.
 
@@ -129,6 +138,9 @@ prometheus
 freeipa_server
 freeipa_client
 
+[doca]
+# Add `builder` to install NVIDIA DOCA during image build
+
 [cuda]
 # Hosts to install NVIDIA CUDA on - see ansible/roles/cuda/README.md
 
@@ -193,9 +205,10 @@ k3s_agent
 
 [dnf_repos:children]
 # Hosts to replace system repos with Pulp repos
-# Warning: when using Ark directly rather than a local Pulp server, adding hosts other than `builder` will leak Ark creds to users
-builder
+# Roles/groups listed here *always* do installs:
 extra_packages
+doca
+# TODO: can't express: if cuda and builder, enable dnf_repos
 
 [pulp_site]
 # Add builder to this group to automatically sync pulp during image build
@@ -220,3 +233,6 @@ extra_packages
 
 [raid]
 # Add `builder` to configure image for software raid
+
+[fatimage]
+# Add build VM into this group to enable all features with this as child
diff --git a/environments/site/inventory/groups b/environments/site/inventory/groups
index 85d7e36..c40928c 100644
--- a/environments/site/inventory/groups
+++ b/environments/site/inventory/groups
@@ -1,3 +1,15 @@
+[login:children]
+# All Slurm login nodes. Combined control/login nodes are not supported.
+fatimage
+
+[control:children]
+# A single Slurm control node. Multiple (high availability) control nodes are not supported.
+fatimage
+
+[compute:children]
+# All Slurm compute nodes (in all partitions).
+fatimage
+
 [nfs:children]
 openhpc
 
@@ -31,6 +43,7 @@ slurm_stats
 # NB: [rebuild] not defined here as likely to need features not currently supported
 
 [update:children]
+fatimage
 
 [fail2ban:children]
 # Hosts to install fail2ban on to protect SSH
@@ -74,6 +87,9 @@ cluster
 [freeipa_client]
 # Hosts to be a FreeIPA client. See ansible/roles/freeipa/README.md
 
+[doca]
+# Add `builder` to install NVIDIA DOCA during image build
+
 [cuda]
 # Hosts to install NVIDIA CUDA on - see ansible/roles/cuda/README.md
 
@@ -102,18 +118,21 @@ openhpc
 login
 openondemand
 
-[squid]
+[squid:children]
 # Hosts to run squid proxy
+fatimage
 
 [tuned:children]
 # Hosts to run TuneD configuration
+fatimage
 
 [ansible_init:children]
 # Hosts to run linux-ansible-init
 cluster
 
-[sssd]
+[sssd:children]
 # Hosts to configure sssd on
+fatimage
 
 [sshd]
 # Hosts where the OpenSSH server daemon should be configured
@@ -137,8 +156,13 @@ cluster
 [lustre]
 # Hosts to run lustre client
 
-[extra_packages:children]
+[extra_packages]
 # Hosts to install specified additional packages on
+
+[dnf_repos:children]
+# Hosts to replace system repos with Pulp repos
+# Some roles do installs when in install mode/on build VM only:
+fatimage
 builder
 
 [cacerts]
@@ -149,7 +173,7 @@ builder
 
 [gateway:children]
 # Add builder to this group to install gateway ansible-init playbook into image
-builder
+fatimage
 
 [nhc:children]
 # Hosts to configure for node health checks
@@ -170,5 +194,6 @@ compute
 # pulp_host ansible_host=<VM-ip-address>
 # Note inventory host name cannot conflict with group names i.e can't be called `pulp` or `pulp_server`.
 
-[raid]
+[raid:children]
 # Add `builder` to configure image for software raid
+fatimage

From b504f10581c8831f520f5d84dd2176945bf42235 Mon Sep 17 00:00:00 2001
From: Claudia Watson <claudia.lola.watson@gmail.com>
Date: Fri, 3 Oct 2025 14:54:38 +0100
Subject: [PATCH 40/50] Expose FIPs in inventory hosts file (#807)

* Expose FIPs in inventory hosts file

* adding output for "fip_address"

* changing 'fip_address' to 'nodegroup_fips'
---
 environments/site/tofu/inventory.tpl       | 2 ++
 environments/site/tofu/node_group/nodes.tf | 9 +++++++++
 2 files changed, 11 insertions(+)

diff --git a/environments/site/tofu/inventory.tpl b/environments/site/tofu/inventory.tpl
index 9920f9e..ec17711 100644
--- a/environments/site/tofu/inventory.tpl
+++ b/environments/site/tofu/inventory.tpl
@@ -26,6 +26,7 @@ ${cluster_name}_${group_name}:
             image_id: ${ node.image_id }
             networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})}
             node_fqdn: ${login_groups[group_name]["fqdns"][nodename]}
+            node_fip: ${login_groups[group_name]["nodegroup_fips"][nodename]}
 %{ endfor ~}
 
 ${group_name}:
@@ -77,6 +78,7 @@ ${cluster_name}_${group_name}:
             instance_id: ${ node.id }
             networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})}
             node_fqdn: ${additional_groups[group_name]["fqdns"][nodename]}
+            node_fip: ${additional_groups[group_name]["nodegroup_fips"][nodename]}
 %{ endfor ~}
 ${group_name}:
     children:
diff --git a/environments/site/tofu/node_group/nodes.tf b/environments/site/tofu/node_group/nodes.tf
index 4d874d1..4ff1fd0 100644
--- a/environments/site/tofu/node_group/nodes.tf
+++ b/environments/site/tofu/node_group/nodes.tf
@@ -25,6 +25,11 @@ locals {
       }
     )
   }
+  # Map node names to floating IPs from the list var.fip_addresses by index
+  nodegroup_fips = {
+    for idx, n in var.nodes :
+    n => length(var.fip_addresses) > idx ? var.fip_addresses[idx] : ""
+  }
 
   baremetal_az = var.availability_zone != null ? var.availability_zone : "nova"
 }
@@ -229,3 +234,7 @@ output "image_id" {
 output "fqdns" {
   value = local.fqdns
 }
+
+output "nodegroup_fips" {
+  value = local.nodegroup_fips
+}
\ No newline at end of file

From 72aff75fbd296e7d42280dea4aba8fceb27df501 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Sat, 4 Oct 2025 21:22:13 +0200
Subject: [PATCH 41/50] Allow VS Code Remote SSH while blocking NFS mounts
 (#799)

---
 ansible/roles/sshd/README.md              | 1 +
 ansible/roles/sshd/defaults/main.yml      | 1 +
 ansible/roles/sshd/templates/sshd.conf.j2 | 4 ++++
 3 files changed, 6 insertions(+)

diff --git a/ansible/roles/sshd/README.md b/ansible/roles/sshd/README.md
index a47f602..3b25360 100644
--- a/ansible/roles/sshd/README.md
+++ b/ansible/roles/sshd/README.md
@@ -6,5 +6,6 @@ Configure sshd.
 
 - `sshd_password_authentication`: Optional bool. Whether to enable password login. Default `false`.
 - `sshd_disable_forwarding`: Optional bool. Whether to disable all forwarding features (X11, ssh-agent, TCP and StreamLocal). Default `true`.
+- `sshd_allow_local_forwarding`: Optional bool. Whether to allow limited forwarding for the Visual Studio Code Remote - SSH extension. Use together with `sshd_disable_forwarding: false`. NOTE THIS MAY BE INSECURE! Default `false`.
 - `sshd_conf_src`: Optional string. Path to sshd configuration template. Default is in-role template.
 - `sshd_conf_dest`: Optional string. Path to destination for sshd configuration file. Default is `/etc/ssh/sshd_config.d/10-ansible.conf` which overrides `50-{cloud-init,redhat}` files, if present.
diff --git a/ansible/roles/sshd/defaults/main.yml b/ansible/roles/sshd/defaults/main.yml
index ca2f8c7..0228dd3 100644
--- a/ansible/roles/sshd/defaults/main.yml
+++ b/ansible/roles/sshd/defaults/main.yml
@@ -1,5 +1,6 @@
 ---
 sshd_password_authentication: false
 sshd_disable_forwarding: true
+sshd_allow_local_forwarding: false
 sshd_conf_src: sshd.conf.j2
 sshd_conf_dest: /etc/ssh/sshd_config.d/10-ansible.conf
diff --git a/ansible/roles/sshd/templates/sshd.conf.j2 b/ansible/roles/sshd/templates/sshd.conf.j2
index 862e263..d409c37 100644
--- a/ansible/roles/sshd/templates/sshd.conf.j2
+++ b/ansible/roles/sshd/templates/sshd.conf.j2
@@ -1,3 +1,7 @@
 # {{ ansible_managed }}
 PasswordAuthentication {{ 'yes' if sshd_password_authentication | bool else 'no' }}
 DisableForwarding {{ 'yes' if sshd_disable_forwarding | bool else 'no' }}
+{% if sshd_allow_local_forwarding %}
+AllowTcpForwarding local
+PermitOpen 127.0.0.1:*
+{% endif %}

From d9c5d8f67cc72943a3bb9b4f18b4b1e2d5cbc6c7 Mon Sep 17 00:00:00 2001
From: Steve Brasier <33413598+sjpb@users.noreply.github.com>
Date: Tue, 7 Oct 2025 10:41:28 +0100
Subject: [PATCH 42/50] Delete build VMs in CI nightly cleanup (#777)

* delete build VMs in CI nightly cleanup

* name build volumes and include in nightly cleanup

* simplify cleanup of volumes and include fatimage build VMs

---------

Co-authored-by: bertiethorpe <bertie443@gmail.com>
Co-authored-by: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com>
---
 .github/workflows/nightly-cleanup.yml | 53 +++++----------------------
 packer/openstack.pkr.hcl              |  6 +++
 2 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/nightly-cleanup.yml b/.github/workflows/nightly-cleanup.yml
index 5bec96d..4c2fd01 100644
--- a/.github/workflows/nightly-cleanup.yml
+++ b/.github/workflows/nightly-cleanup.yml
@@ -46,53 +46,20 @@ jobs:
           echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
         shell: bash
 
-      - name: Find CI clusters
+      - name: Delete all CI clusters
         run: |
           . venv/bin/activate
-          CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+'  | sort | uniq || true)
-          echo "DEBUG: Raw CI clusters: $CI_CLUSTERS"
-      
-          if [[ -z "$CI_CLUSTERS" ]]; then
-            echo "No matching CI clusters found."
-          else
-            # Flatten multiline value so can be passed as env var
-            CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//')
-            echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED"
-            echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> "$GITHUB_ENV"
-          fi
+          ./dev/delete-cluster.py slurmci-RL --force
         shell: bash
-      
-      - name: Delete CI clusters
+
+      - name: Delete all CI extra build VMs and volumes
         run: |
           . venv/bin/activate
-          if [[ -z ${ci_clusters} ]]; then
-            echo "No clusters to delete."
-            exit 0
-          fi
-
-          for cluster_prefix in ${ci_clusters}
-          do
-            echo "Processing cluster: $cluster_prefix"
-
-            # Get all servers with the matching name for control node
-            CONTROL_SERVERS=$(openstack server list --name "${cluster_prefix}-control" --format json)
-
-            # Get unique server names to avoid duplicate cleanup
-            UNIQUE_NAMES=$(echo "$CONTROL_SERVERS" | jq -r '.[].Name' | sort | uniq)
-            for name in $UNIQUE_NAMES; do
-              echo "Deleting cluster with control node: $name"
-
-              # Get the first matching server ID by name
-              server=$(echo "$CONTROL_SERVERS" | jq -r '.[] | select(.Name=="'"$name"'") | .ID' | head -n1)
-
-              # Make sure server still exists (wasn't deleted earlier)
-              if ! openstack server show "$server" &>/dev/null; then
-                echo "Server $server no longer exists, skipping $name."
-                continue
-              fi
+          ./dev/delete-cluster.py openhpc-extra-RL --force
+        shell: bash
 
-              echo "Deleting cluster $cluster_prefix (server $server)..."
-              ./dev/delete-cluster.py "$cluster_prefix" --force
-            done
-          done
+      - name: Delete all fatimage build VMs and volumes
+        run: |
+          . venv/bin/activate
+          ./dev/delete-cluster.py openhpc-RL --force
         shell: bash
diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl
index 9faf4bb..b6f570e 100644
--- a/packer/openstack.pkr.hcl
+++ b/packer/openstack.pkr.hcl
@@ -128,6 +128,11 @@ variable "volume_size" {
   default = 20
 }
 
+variable "volume_name" {
+  type = string
+  default = null
+}
+
 variable "image_disk_format" {
   type = string
   default = "raw"
@@ -162,6 +167,7 @@ source "openstack" "openhpc" {
   use_blockstorage_volume = var.use_blockstorage_volume
   volume_type = var.volume_type
   volume_size = var.volume_size
+  volume_name = "${var.image_name}${local.image_name_version}"
   metadata = var.metadata
   instance_metadata = {
     ansible_init_disable = "true"

From c40a383eaab9b6f38e6371410daaeafd6dc5d200 Mon Sep 17 00:00:00 2001
From: wtripp180901 <78219569+wtripp180901@users.noreply.github.com>
Date: Thu, 9 Oct 2025 09:38:09 +0100
Subject: [PATCH 43/50] Export state directory to OnDemand nodes in CaaS
 environment (#809)

* export state directory to ondemand nodes for caas

* fixed caas config
---
 ansible/roles/zenith_proxy/tasks/main.yml           |  2 ++
 environments/.caas/inventory/group_vars/all/nfs.yml | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/ansible/roles/zenith_proxy/tasks/main.yml b/ansible/roles/zenith_proxy/tasks/main.yml
index 7a4c034..360e77d 100644
--- a/ansible/roles/zenith_proxy/tasks/main.yml
+++ b/ansible/roles/zenith_proxy/tasks/main.yml
@@ -61,6 +61,8 @@
     group: "{{ zenith_proxy_podman_user }}"
     mode: "0755"
   become: true
+  delegate_to: "{{ groups['control'] | first }}"
+  run_once: true
 
 - name: Initialise Zenith client
   # Use a foreground command rather than the podman_container module as I could not
diff --git a/environments/.caas/inventory/group_vars/all/nfs.yml b/environments/.caas/inventory/group_vars/all/nfs.yml
index 0eca0c8..7d617df 100644
--- a/environments/.caas/inventory/group_vars/all/nfs.yml
+++ b/environments/.caas/inventory/group_vars/all/nfs.yml
@@ -9,4 +9,12 @@ caas_nfs_home:
     nfs_export: "/exports/home" # assumes default site TF is being used
     nfs_client_mnt_point: "/home"
 
-nfs_configurations: "{{ caas_nfs_home if not cluster_home_manila_share | bool else [] }}"
+caas_ood_zenith_state_dir:
+  - comment: Export /var/lib/state from Slurm control node
+    nfs_enable:
+      server: "{{ inventory_hostname in groups['control'] }}"
+      clients: "{{ inventory_hostname in groups['openondemand'] }}"
+    nfs_export: "/var/lib/state"
+    nfs_client_mnt_point: "/var/lib/state"
+
+nfs_configurations: "{{ caas_ood_zenith_state_dir + ( caas_nfs_home if not cluster_home_manila_share | bool else [] ) }}"

From bc4e6ee9a8e0c3e0c50809b895c9960bead7d7fa Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Fri, 7 Nov 2025 11:16:58 +0000
Subject: [PATCH 44/50] Bump image for v2.7

---
 environments/site/builder.pkrvars.hcl | 2 +-
 environments/site/tofu/variables.tf   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environments/site/builder.pkrvars.hcl b/environments/site/builder.pkrvars.hcl
index 8163baf..9029117 100644
--- a/environments/site/builder.pkrvars.hcl
+++ b/environments/site/builder.pkrvars.hcl
@@ -1,6 +1,6 @@
 flavor = "ec1.large"                           # VM flavor to use for builder VMs
 networks = ["84205817-e75c-47c7-a57e-0f14ee8de257"]   # List of network UUIDs to attach the VM to - workshop-internal
-source_image_name = "openhpc-RL9-250808-1727-faa44755"   # Name of image to create VM with, i.e. starting image
+source_image_name = "openhpc-RL9-251002-1456-1d21952c"   # Name of image to create VM with, i.e. starting image
 volume_size = "15" # in GB
 volume_type = "unencrypted"
 inventory_groups = "extra_packages"  # Additional inventory groups to add build VM to
diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index f5e7dd0..f4679b9 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -126,7 +126,7 @@ variable "login" {
 variable "cluster_image_id" {
   type        = string
   description = "ID of default image for the cluster"
-  default = "7ca99016-c342-4557-8a8d-9a856e934b58" # openhpc-RL9-250808-1727-faa44755
+  default     = "d62d93df-0a1f-473f-81ef-d89538dd6cef" # openhpc-RL9-251002-1456-1d21952c
 }
 
 variable "compute" {

From d954cbc72e9d84977ff2e4dd1a39aab199227550 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 10 Nov 2025 14:20:14 +0000
Subject: [PATCH 45/50] Don't use login as name for login node object

---
 environments/site/tofu/variables.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index f4679b9..4579a7c 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -55,7 +55,7 @@ variable "control_node_flavor" {
 
 variable "login" {
     default = {
-      login = {
+      head = {
           nodes = ["login-0"]
           flavor = "en1.xsmall"
       }

From cd96d9d203791199470f73d5908652cfcf38658c Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 10 Nov 2025 16:01:12 +0000
Subject: [PATCH 46/50] Fix structure of ansible-ssh

---
 dev/ansible-ssh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev/ansible-ssh b/dev/ansible-ssh
index b2e13ff..6a1839b 100755
--- a/dev/ansible-ssh
+++ b/dev/ansible-ssh
@@ -21,8 +21,7 @@ def _optional_arg(prototype, *values):
 if __name__ == "__main__":
     if len(sys.argv) < 2:
         msg = (
-            f"Usage: {
-                sys.argv[0]} <inventory_hostname> [args to pass to ssh]")
+            f"Usage: {sys.argv[0]} <inventory_hostname> [args to pass to ssh]")
         print(msg, file=sys.stderr)
         sys.exit(-1)
 

From 5812791956c1d766126fe224a83b2d286fdc5ed6 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 10 Nov 2025 19:45:55 +0000
Subject: [PATCH 47/50] Add script to do all env related config at once

---
 dev/activate-env.sh | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100755 dev/activate-env.sh

diff --git a/dev/activate-env.sh b/dev/activate-env.sh
new file mode 100755
index 0000000..0982709
--- /dev/null
+++ b/dev/activate-env.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+. venv/bin/activate
+. environments/staging/activate
+export OS_CLOUD=openstack
+export ANSIBLE_VAULT_PASSWORD_FILE=/home/lab/vault.password

From 06dede48fe76fa6c4c5c13e94786594ae726f560 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Mon, 10 Nov 2025 20:22:45 +0000
Subject: [PATCH 48/50] Change image to use id and volume size for v2.7 image
 build (packer)

---
 environments/site/builder.pkrvars.hcl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/environments/site/builder.pkrvars.hcl b/environments/site/builder.pkrvars.hcl
index 9029117..7ce3795 100644
--- a/environments/site/builder.pkrvars.hcl
+++ b/environments/site/builder.pkrvars.hcl
@@ -1,7 +1,8 @@
 flavor = "ec1.large"                           # VM flavor to use for builder VMs
 networks = ["84205817-e75c-47c7-a57e-0f14ee8de257"]   # List of network UUIDs to attach the VM to - workshop-internal
-source_image_name = "openhpc-RL9-251002-1456-1d21952c"   # Name of image to create VM with, i.e. starting image
-volume_size = "15" # in GB
+# source_image_name = "openhpc-RL9-251002-1456-1d21952c"   # Name of image to create VM with, i.e. starting image
+source_image = "d62d93df-0a1f-473f-81ef-d89538dd6cef" # Use image ID instead of name to avoid ambiguity
+volume_size = "25" # in GB
 volume_type = "unencrypted"
 inventory_groups = "extra_packages"  # Additional inventory groups to add build VM to
 

From 079dd05fefe5472ce18babebf77ce169a11a309b Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Tue, 11 Nov 2025 14:32:28 +0000
Subject: [PATCH 49/50] Change volume size for workshop

---
 environments/site/tofu/variables.tf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/environments/site/tofu/variables.tf b/environments/site/tofu/variables.tf
index 4579a7c..a5c5050 100644
--- a/environments/site/tofu/variables.tf
+++ b/environments/site/tofu/variables.tf
@@ -227,7 +227,7 @@ variable "state_dir" {
 variable "state_volume_size" {
   type        = number
   description = "Size of state volume on control node, in GB"
-  default     = 150 # GB
+  default     = 75 # GB
 }
 
 variable "state_volume_type" {
@@ -261,7 +261,7 @@ variable "state_volume_provisioning" {
 variable "home_volume_size" {
   type        = number
   description = "Size of state volume on control node, in GB."
-  default     = 100
+  default     = 75
   validation {
     condition     = var.home_volume_provisioning == "manage" ? var.home_volume_size > 0 : true
     error_message = <<-EOT

From ea2aea212129f1b6a3912c9716340a81533b2ed8 Mon Sep 17 00:00:00 2001
From: technowhizz <7688823+technowhizz@users.noreply.github.com>
Date: Wed, 12 Nov 2025 14:15:32 +0000
Subject: [PATCH 50/50] Change volume size for packer

---
 environments/site/builder.pkrvars.hcl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/site/builder.pkrvars.hcl b/environments/site/builder.pkrvars.hcl
index 7ce3795..ea65759 100644
--- a/environments/site/builder.pkrvars.hcl
+++ b/environments/site/builder.pkrvars.hcl
@@ -2,7 +2,7 @@ flavor = "ec1.large"                           # VM flavor to use for builder VM
 networks = ["84205817-e75c-47c7-a57e-0f14ee8de257"]   # List of network UUIDs to attach the VM to - workshop-internal
 # source_image_name = "openhpc-RL9-251002-1456-1d21952c"   # Name of image to create VM with, i.e. starting image
 source_image = "d62d93df-0a1f-473f-81ef-d89538dd6cef" # Use image ID instead of name to avoid ambiguity
-volume_size = "25" # in GB
+volume_size = "20" # in GB
 volume_type = "unencrypted"
 inventory_groups = "extra_packages"  # Additional inventory groups to add build VM to