Skip to content

Commit 0c22d97

Browse files
committed
Merge pull request #87 from mgermain/job_logs
Job logs
2 parents 0dd91d8 + 915a7f2 commit 0c22d97

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

scripts/smart_dispatch.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def main():
9999
'mem_per_command': None # args.memPerCommand
100100
}
101101

102-
job_generator = job_generator_factory(queue, commands, command_params, CLUSTER_NAME)
102+
job_generator = job_generator_factory(queue, commands, command_params, CLUSTER_NAME, path_job)
103103
pbs_filenames = job_generator.write_pbs_files(path_job_commands)
104104

105105
# Launch the jobs
@@ -175,6 +175,8 @@ def get_job_folders(jobname):
175175
os.makedirs(path_job_logs)
176176
if not os.path.exists(os.path.join(path_job_logs, "worker")):
177177
os.makedirs(os.path.join(path_job_logs, "worker"))
178+
if not os.path.exists(os.path.join(path_job_logs, "job")):
179+
os.makedirs(os.path.join(path_job_logs, "job"))
178180

179181
return path_job, path_job_logs, path_job_commands
180182

@@ -189,6 +191,7 @@ def create_job_folders(jobname):
189191
if not os.path.exists(path_job_logs):
190192
os.makedirs(path_job_logs)
191193
os.makedirs(os.path.join(path_job_logs, "worker"))
194+
os.makedirs(os.path.join(path_job_logs, "job"))
192195

193196
return path_job, path_job_logs, path_job_commands
194197

smartdispatch/job_generator.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66
from smartdispatch import utils
77

88

9-
def job_generator_factory(queue, commands, command_params={}, cluster_name=None):
9+
def job_generator_factory(queue, commands, command_params={}, cluster_name=None, base_path="./"):
1010
if cluster_name == "guillimin":
11-
return GuilliminJobGenerator(queue, commands, command_params)
11+
return GuilliminJobGenerator(queue, commands, command_params, base_path)
1212
elif cluster_name == "mammouth":
13-
return MammouthJobGenerator(queue, commands, command_params)
13+
return MammouthJobGenerator(queue, commands, command_params, base_path)
1414
elif cluster_name == "helios":
15-
return HeliosJobGenerator(queue, commands, command_params)
15+
return HeliosJobGenerator(queue, commands, command_params, base_path)
1616

17-
return JobGenerator(queue, commands, command_params)
17+
return JobGenerator(queue, commands, command_params, base_path)
1818

1919

2020
class JobGenerator(object):
@@ -31,9 +31,10 @@ class JobGenerator(object):
3131
information about the commands
3232
"""
3333

34-
def __init__(self, queue, commands, command_params={}):
34+
def __init__(self, queue, commands, command_params={}, base_path="./"):
3535
self.commands = commands
3636
self.queue = queue
37+
self.job_log_filename = '"{base_path}/logs/job/"$PBS_JOBID".{{ext}}"'.format(base_path=base_path)
3738

3839
self.nb_cores_per_command = command_params.get('nb_cores_per_command', 1)
3940
self.nb_gpus_per_command = command_params.get('nb_gpus_per_command', 1)
@@ -51,6 +52,9 @@ def generate_pbs(self):
5152
for i, commands in enumerate(utils.chunks(self.commands, n=nb_commands_per_node)):
5253
pbs = PBS(self.queue.name, self.queue.walltime)
5354

55+
# TODO Move the add_options into the JobManager once created.
56+
pbs.add_options(o=self.job_log_filename.format(ext='out'), e=self.job_log_filename.format(ext='err'))
57+
5458
# Set resource: nodes
5559
resource = "1:ppn={ppn}".format(ppn=len(commands) * self.nb_cores_per_command)
5660
if self.queue.nb_gpus_per_node > 0:
@@ -126,6 +130,6 @@ def generate_pbs(self):
126130
# Nb of GPUs has to be a multiple of 2
127131
nb_gpus = int(re.findall("gpus=([0-9]+)", pbs.resources['nodes'])[0])
128132
if nb_gpus % 2 != 0:
129-
pbs.resources['nodes'] = re.sub("gpus=[0-9]+", "gpus={0}".format(nb_gpus+1), pbs.resources['nodes'])
133+
pbs.resources['nodes'] = re.sub("gpus=[0-9]+", "gpus={0}".format(nb_gpus + 1), pbs.resources['nodes'])
130134

131135
return pbs_list

0 commit comments

Comments
 (0)