From b764de4564295f0b32a2eb5b2d7d2b0c8c755968 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 9 Feb 2022 08:53:48 -0500 Subject: [PATCH] Now writes .out and .err files to local /tmp dir and then copies once the simulation finishes. Disables job array status files. --- hercules/constants.py | 2 +- hercules/hexbug | 2 +- hercules/simulation.py | 27 +++++++++++++++++++++------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/hercules/constants.py b/hercules/constants.py index 3fd7d02..11112a4 100644 --- a/hercules/constants.py +++ b/hercules/constants.py @@ -17,7 +17,7 @@ #container is running linux #-> make sure it's PosixPath when run from windows HEXBUG_DIR_CONTAINER = PosixPath('/') / 'tmp' -OUTPUT_DIR_CONTAINER = PosixPath('/') / 'home' +OUTPUT_DIR_CONTAINER = PosixPath('/') / 'home' LOCUST_CONFIG_NAME_P2 = 'LocustPhase2Template.json' KASS_CONFIG_NAME_P2 = 'Project8Phase2_electrons.xml' LOCUST_CONFIG_NAME_P3 = 'LocustPhase3Template.json' diff --git a/hercules/hexbug b/hercules/hexbug index 373cecc..ec881e5 160000 --- a/hercules/hexbug +++ b/hercules/hexbug @@ -1 +1 @@ -Subproject commit 373cecc7a89dca0e707880efd54bffea7acfeb94 +Subproject commit ec881e5c2ba842a7e5032c76f3f80eeffcad6698 diff --git a/hercules/simulation.py b/hercules/simulation.py index e306925..157cadc 100644 --- a/hercules/simulation.py +++ b/hercules/simulation.py @@ -342,6 +342,8 @@ def __init__(self, working_dir, direct=True): """ AbstractKassLocustP3.__init__(self, working_dir, direct) + + self._tmp_dir = Path('/tmp', self._working_dir.parts[-1]) def __call__(self, config_list): """This method overrides :meth:`AbstractKassLocustP3.__call__`. @@ -373,7 +375,8 @@ def _submit_job(self): job_limit = '--max-jobs ' + CONFIG.job_limit job_memory = '--mem-per-cpu ' + CONFIG.job_memory +'m' job_timelimit = '-t ' + CONFIG.job_timelimit - job_status = '--status-dir ' + str(self._working_dir) + #job_status = '--status-dir ' + str(self._working_dir) + job_status = '--suppress-stats-file' job_output = '--output /dev/null' cmd = _char_concatenate(' ', module, dsq, job_file, job_partition, @@ -389,6 +392,8 @@ def _add_job(self, sim_config: SimConfig): output_dir = self._working_dir / sim_config.sim_name output_dir.mkdir(parents=True, exist_ok=True) + + tmp_output_dir = self._tmp_dir / sim_config.sim_name locust_file = output_dir / LOCUST_CONFIG_NAME kass_file = output_dir / KASS_CONFIG_NAME @@ -398,33 +403,43 @@ def _add_job(self, sim_config: SimConfig): sim_config.to_json(config_dump) self._gen_locust_script(output_dir) - cmd = self._assemble_command(output_dir) + cmd = self._assemble_command(output_dir, tmp_output_dir) with open(self._joblist, 'a+') as out_file: out_file.write(cmd) - def _assemble_command(self, output_dir): + def _assemble_command(self, output_dir, tmp_output_dir): #Assemble the singularity command that runs the KassLocust simulation #in the p8compute singularity container + + create_log_dir = f'mkdir -p {str(tmp_output_dir)}' singularity_exec = 'singularity exec --no-home' share_output_dir = _gen_shared_dir_string_singularity(output_dir, OUTPUT_DIR_CONTAINER) share_hexbug_dir = _gen_shared_dir_string_singularity(HEXBUG_DIR, HEXBUG_DIR_CONTAINER) + container = str(self._singularity) run_script = str(OUTPUT_DIR_CONTAINER/self._command_script_name) - log = '>' + str(output_dir) + '/run_singularity.out' - err = '2>' + str(output_dir) + '/run_singularity.err' + #log = '>' + str(output_dir) + '/run_singularity.out' + #err = '2>' + str(output_dir) + '/run_singularity.err' + log = '>' + str(tmp_output_dir) + '/run_singularity.out' + err = '2>' + str(tmp_output_dir) + '/run_singularity.err' singularity_cmd = _char_concatenate(' ', singularity_exec, share_output_dir, share_hexbug_dir, container, run_script, log, err) check_failure = "if [ $? -gt 1 ];then scontrol requeue $SLURM_JOB_ID;fi;" + + mv_log_file = f"mv {str(tmp_output_dir)}/run_singularity.out {str(output_dir)}" + mv_err_file = f"mv {str(tmp_output_dir)}/run_singularity.err {str(output_dir)}" + mv_files = mv_log_file + ';' + mv_err_file - final_command = singularity_cmd + ';' + check_failure +'\n' + final_command = create_log_dir + ';' + singularity_cmd + ';' + mv_files + ';' + check_failure +'\n' + #final_command = create_log_dir + ';' + singularity_cmd + ';' + check_failure +'\n' return final_command