From d1da946bdd07d86eb0e16223809e0dd9ba784338 Mon Sep 17 00:00:00 2001 From: Anthony Ramirez Date: Wed, 17 Dec 2025 18:07:09 -0500 Subject: [PATCH 1/4] add minimal config for slurm support on OLCF CADES Baseline --- python/solid_dmft/dft_managers/mpi_helpers.py | 8 ++++++++ python/solid_dmft/io_tools/verify_input_params.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/python/solid_dmft/dft_managers/mpi_helpers.py b/python/solid_dmft/dft_managers/mpi_helpers.py index a1376eb0..73a36d39 100644 --- a/python/solid_dmft/dft_managers/mpi_helpers.py +++ b/python/solid_dmft/dft_managers/mpi_helpers.py @@ -68,6 +68,7 @@ def create_hostfile(number_cores, cluster_name): mask_hostfile = {'openmpi': '{} slots={}', # OpenMPI format 'openmpi-intra': '{} slots={}', # OpenMPI format 'mpich': '{}:{}', # MPICH format + 'slurm': '{}', # SLURM format }[cluster_name] hostfile = 'dft.hostfile' @@ -148,6 +149,13 @@ def get_mpi_arguments(mpi_profile, mpi_exe, number_cores, dft_exe, hostfile): return [mpi_exe, '-launcher', 'ssh', '-hostfile', hostfile, '-np', str(number_cores), '-envlist', 'PATH'] + shlex.split(dft_exe) + if mpi_profile == 'slurm': + return [ + mpi_exe, '-w', hostfile, '-n', str(number_cores), '--export=PATH', + '-N', os.getenv("SLURM_JOB_NUM_NODES"), '-A', os.getenv("SLURM_JOB_ACCOUNT"), + '-p', os.getenv("SLURM_JOB_PARTITION") + ] + shlex.split(dft_exe) + return None diff --git a/python/solid_dmft/io_tools/verify_input_params.py b/python/solid_dmft/io_tools/verify_input_params.py index 5f0b2ea3..3e646faa 100644 --- a/python/solid_dmft/io_tools/verify_input_params.py +++ b/python/solid_dmft/io_tools/verify_input_params.py @@ -72,7 +72,7 @@ def _verify_input_params_dft(params: FullConfig) -> None: if params['dft']['dft_code'] not in ('vasp', 'qe', None): raise ValueError(f'Invalid "dft.dft_code" = {params["dft"]["dft_code"]}.') - if params['dft']['mpi_env'] not in ('default', 'openmpi', 'openmpi-intra', 'mpich'): + if params['dft']['mpi_env'] not in ('default', 'openmpi', 'openmpi-intra', 'mpich', 'slurm'): raise ValueError(f'Invalid "dft.mpi_env" = {params["dft"]["mpi_env"]}.') if params['dft']['projector_type'] not in ('w90', 'plo'): From 0479d71e43f5f0fa47951cd547441a9a1f2ce576 Mon Sep 17 00:00:00 2001 From: Anthony Ramirez Date: Wed, 17 Dec 2025 18:21:33 -0500 Subject: [PATCH 2/4] slurm hostfile path fix --- python/solid_dmft/dft_managers/mpi_helpers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/solid_dmft/dft_managers/mpi_helpers.py b/python/solid_dmft/dft_managers/mpi_helpers.py index 73a36d39..94abb841 100644 --- a/python/solid_dmft/dft_managers/mpi_helpers.py +++ b/python/solid_dmft/dft_managers/mpi_helpers.py @@ -151,9 +151,10 @@ def get_mpi_arguments(mpi_profile, mpi_exe, number_cores, dft_exe, hostfile): if mpi_profile == 'slurm': return [ - mpi_exe, '-w', hostfile, '-n', str(number_cores), '--export=PATH', + mpi_exe, '-n', str(number_cores), '--export=PATH', '-N', os.getenv("SLURM_JOB_NUM_NODES"), '-A', os.getenv("SLURM_JOB_ACCOUNT"), - '-p', os.getenv("SLURM_JOB_PARTITION") + '-p', os.getenv("SLURM_JOB_PARTITION"), + '-w', f"./{hostfile}", ] + shlex.split(dft_exe) return None From 68459f383d92bca0b109066816394907ce7c7b49 Mon Sep 17 00:00:00 2001 From: Anthony Ramirez Date: Wed, 17 Dec 2025 18:26:56 -0500 Subject: [PATCH 3/4] add walltime to slurm config --- python/solid_dmft/dft_managers/mpi_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/solid_dmft/dft_managers/mpi_helpers.py b/python/solid_dmft/dft_managers/mpi_helpers.py index 94abb841..f94aff1f 100644 --- a/python/solid_dmft/dft_managers/mpi_helpers.py +++ b/python/solid_dmft/dft_managers/mpi_helpers.py @@ -153,7 +153,7 @@ def get_mpi_arguments(mpi_profile, mpi_exe, number_cores, dft_exe, hostfile): return [ mpi_exe, '-n', str(number_cores), '--export=PATH', '-N', os.getenv("SLURM_JOB_NUM_NODES"), '-A', os.getenv("SLURM_JOB_ACCOUNT"), - '-p', os.getenv("SLURM_JOB_PARTITION"), + '-p', os.getenv("SLURM_JOB_PARTITION"), '-t', '05:00', #TODO: decide way to get time limit '-w', f"./{hostfile}", ] + shlex.split(dft_exe) From 2fafc057d49feeb4de6d2b9230d0d6c21346e939 Mon Sep 17 00:00:00 2001 From: Anthony Ramirez Date: Wed, 17 Dec 2025 18:44:34 -0500 Subject: [PATCH 4/4] fix hostnames for baseline --- python/solid_dmft/dft_managers/mpi_helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/solid_dmft/dft_managers/mpi_helpers.py b/python/solid_dmft/dft_managers/mpi_helpers.py index f94aff1f..fa461747 100644 --- a/python/solid_dmft/dft_managers/mpi_helpers.py +++ b/python/solid_dmft/dft_managers/mpi_helpers.py @@ -59,6 +59,9 @@ def create_hostfile(number_cores, cluster_name): return None hostnames = mpi.world.gather(socket.gethostname(), root=0) + if cluster_name == 'slurm': + slurm_hostnames = [hostname.split('.')[0] for hostname in hostnames] # TODO: please find a better solution + hostnames = slurm_hostnames if mpi.is_master_node(): # create hostfile based on first number_cores ranks hosts = defaultdict(int)