Skip to content

Commit 98f978f

Browse files
authored
Merge pull request #201 from stackhpc/feat/template-mpi-conf
Add support for mpi.conf templating
2 parents 34d3996 + 3b50208 commit 98f978f

File tree

4 files changed

+31
-0
lines changed

4 files changed

+31
-0
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ used to supplement or override the template defaults. Templated parameters can
126126
also be removed by setting the value to the literal string `'omit'` - note
127127
that this is *not the same* as the Ansible `omit` [special variable](https://docs.ansible.com/ansible/latest/reference_appendices/special_variables.html#term-omit).
128128

129+
`openhpc_mpi_config`: Optional. Mapping of additional parameters and values for
130+
[mpi.conf](https://slurm.schedmd.com/mpi.conf.html). Keys are mpi.conf
131+
parameter names and values are lists or strings as appropriate. This can be
132+
used to supplement or override the template defaults. Templated parameters can
133+
also be removed by setting the value to the literal string `'omit'` - note
134+
that this is *not the same* as the Ansible `omit` [special variable](https://docs.ansible.com/ansible/latest/reference_appendices/special_variables.html#term-omit).
135+
129136
`openhpc_ram_multiplier`: Optional, default `0.95`. Multiplier used in the calculation: `total_memory * openhpc_ram_multiplier` when setting `RealMemory` for the partition in slurm.conf. Can be overriden on a per partition basis using `openhpc_slurm_partitions.ram_multiplier`. Has no effect if `openhpc_slurm_partitions.ram_mb` is set.
130137

131138
`openhpc_state_save_location`: Optional. Absolute path for Slurm controller state (`slurm.conf` parameter [StateSaveLocation](https://slurm.schedmd.com/slurm.conf.html#OPT_StateSaveLocation))

defaults/main.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ openhpc_cgroup_config: {}
5151
openhpc_gres_template: gres.conf.j2
5252
openhpc_cgroup_template: cgroup.conf.j2
5353

54+
openhpc_mpi_template: mpi.conf.j2
55+
openhpc_mpi_config: {}
56+
5457
openhpc_state_save_location: /var/spool/slurm
5558
openhpc_slurmd_spool_dir: /var/spool/slurm
5659
openhpc_slurm_conf_path: /etc/slurm/slurm.conf

tasks/runtime.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@
103103
register: ohpc_cgroup_conf
104104
# NB uses restart rather than reload as this is needed in some cases
105105

106+
- name: Template mpi.conf
107+
template:
108+
src: "{{ openhpc_mpi_template }}"
109+
dest: "{{ openhpc_slurm_conf_path | dirname }}/mpi.conf"
110+
owner: root
111+
group: root
112+
mode: "0644"
113+
when:
114+
- openhpc_enable.control | default(false)
115+
- openhpc_mpi_config | length > 0
116+
notify:
117+
- Restart slurmctld service
118+
register: ohpc_mpi_conf
119+
106120
# Workaround for https://bugs.rockylinux.org/view.php?id=10165
107121
- name: Fix permissions on /etc for Munge service
108122
ansible.builtin.file:
@@ -134,6 +148,7 @@
134148
when:
135149
- openhpc_slurm_control_host in ansible_play_hosts
136150
- hostvars[openhpc_slurm_control_host].ohpc_slurm_conf.changed or
151+
hostvars[openhpc_slurm_control_host].ohpc_mpi_conf.changed or
137152
hostvars[openhpc_slurm_control_host].ohpc_cgroup_conf.changed or
138153
hostvars[openhpc_slurm_control_host].ohpc_gres_conf.changed # noqa no-handler
139154
notify:

templates/mpi.conf.j2

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# {{ ansible_managed }}
2+
{% for k, v in openhpc_mpi_config.items %}
3+
{% if v != "omit" %}{# allow removing items using setting key: omit #}
4+
{{ k }}={{ v | join(',') if (v is sequence and v is not string) else v }}
5+
{% endif %}
6+
{% endfor %}

0 commit comments

Comments
 (0)