Skip to content

Commit 3f82d39

Browse files
committed
Merge pull request #80 from mgermain/failed_command_management
Failed command management
2 parents 811ee90 + e64a375 commit 3f82d39

File tree

6 files changed

+126
-10
lines changed

6 files changed

+126
-10
lines changed

scripts/smart_dispatch.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33

44
import os
55
import argparse
6+
import time as t
67
import numpy as np
78
from subprocess import check_output
9+
from textwrap import dedent
810

911
from smartdispatch.command_manager import CommandManager
1012

@@ -59,6 +61,24 @@ def main():
5961
if args.mode == "launch":
6062
command_manager.set_commands_to_run(commands)
6163
else:
64+
# Verifying if there is are failed commands
65+
failed_commands = command_manager.get_failed_commands()
66+
if len(failed_commands) > 0:
67+
FAILED_COMMAND_MESSAGE = dedent("""\
68+
{nb_failed} command(s) are in a failed state. They won't be resumed.
69+
Failed commands:
70+
{failed_commands}
71+
The actual errors can be found in the log folder under:
72+
{failed_commands_err_file}""")
73+
utils.print_boxed(FAILED_COMMAND_MESSAGE.format(
74+
nb_failed=len(failed_commands),
75+
failed_commands=''.join(failed_commands),
76+
failed_commands_err_file='\n'.join([utils.generate_uid_from_string(c[:-1])+'.err' for c in failed_commands])
77+
))
78+
79+
if not utils.yes_no_prompt("Do you want to continue?", 'n'):
80+
exit()
81+
6282
command_manager.reset_running_commands()
6383
nb_commands = command_manager.get_nb_commands_to_run()
6484

@@ -88,16 +108,17 @@ def main():
88108

89109
# Launch the jobs
90110
print "## {nb_commands} command(s) will be executed in {nb_jobs} job(s) ##".format(nb_commands=nb_commands, nb_jobs=len(pbs_filenames))
91-
print "Batch UID:\n {batch_uid}".format(batch_uid=jobname)
111+
print "Batch UID:\n{batch_uid}".format(batch_uid=jobname)
92112
if not args.doNotLaunch:
93113
jobs_id = []
94114
for pbs_filename in pbs_filenames:
95115
qsub_output = check_output('{launcher} {pbs_filename}'.format(launcher=LAUNCHER if args.launcher is None else args.launcher, pbs_filename=pbs_filename), shell=True)
96-
jobs_id += [qsub_output.rstrip()]
116+
jobs_id += [qsub_output.strip()]
97117

98118
with utils.open_with_lock(os.path.join(path_job, "jobs_id.txt"), 'a') as jobs_id_file:
99-
jobs_id_file.writelines("\n".join(jobs_id))
100-
print "\nJobs id:\n {jobs_id}".format(jobs_id=" ".join(jobs_id))
119+
jobs_id_file.writelines(t.strftime("## %Y-%m-%d %H:%M:%S ##\n"))
120+
jobs_id_file.writelines("\n".join(jobs_id) + "\n")
121+
print "\nJobs id:\n{jobs_id}".format(jobs_id=" ".join(jobs_id))
101122
print "\nLogs, command, and jobs id related to this batch will be in:\n {smartdispatch_folder}".format(smartdispatch_folder=path_job)
102123

103124

scripts/smart_worker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ def main():
5151
stdout_file.flush()
5252
stderr_file.flush()
5353

54-
subprocess.call(command, stdout=stdout_file, stderr=stderr_file, shell=True)
54+
error_code = subprocess.call(command, stdout=stdout_file, stderr=stderr_file, shell=True)
5555

56-
command_manager.set_running_command_as_finished(command)
56+
command_manager.set_running_command_as_finished(command, error_code)
5757

5858
if __name__ == '__main__':
5959
main()

smartdispatch/command_manager.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def __init__(self, commands_filename):
99

1010
self._running_commands_filename = os.path.join(base_path, "running_" + filename)
1111
self._finished_commands_filename = os.path.join(base_path, "finished_" + filename)
12+
self._failed_commands_filename = os.path.join(base_path, "failed_" + filename)
1213
self._commands_filename = commands_filename
1314

1415
def _move_line_between_files(self, file1, file2, line):
@@ -40,9 +41,21 @@ def get_nb_commands_to_run(self):
4041
with open(self._commands_filename, 'r') as commands_file:
4142
return len(commands_file.readlines())
4243

43-
def set_running_command_as_finished(self, command):
44+
def get_failed_commands(self):
45+
commands = []
46+
if os.path.isfile(self._failed_commands_filename):
47+
with open(self._failed_commands_filename, 'r') as commands_file:
48+
commands = commands_file.readlines()
49+
return commands
50+
51+
def set_running_command_as_finished(self, command, error_code=0):
52+
if error_code == 0:
53+
file_name = self._finished_commands_filename
54+
else:
55+
file_name = self._failed_commands_filename
56+
4457
with utils.open_with_lock(self._running_commands_filename, 'r+') as running_commands_file:
45-
with utils.open_with_lock(self._finished_commands_filename, 'a') as finished_commands_file:
58+
with utils.open_with_lock(file_name, 'a') as finished_commands_file:
4659
self._move_line_between_files(running_commands_file, finished_commands_file, command + '\n')
4760

4861
def reset_running_commands(self):

smartdispatch/tests/test_command_manager.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def setUp(self):
1515
self.command2 = "2\n"
1616
self.command3 = "3\n"
1717

18-
command_filename = os.path.join(self._base_dir, "commant.txt")
18+
command_filename = os.path.join(self._base_dir, "commands.txt")
1919

2020
with open(command_filename, "w+") as commands_file:
2121
commands_file.write(self.command1 + self.command2 + self.command3)
@@ -40,6 +40,25 @@ def test_set_commands_to_run(self):
4040

4141
assert_true(not os.path.isfile(self.command_manager._finished_commands_filename))
4242

43+
def test_get_failed_commands(self):
44+
# Setup
45+
command = self.command_manager.get_command_to_run()
46+
self.command_manager.set_running_command_as_finished(command, 1)
47+
48+
# The function to test
49+
failed_commands = self.command_manager.get_failed_commands()
50+
51+
# Test validation
52+
assert_equal(len(failed_commands), 1)
53+
assert_equal(failed_commands[0], self.command1)
54+
55+
def test_get_failed_commands_empty(self):
56+
# The function to test
57+
failed_commands = self.command_manager.get_failed_commands()
58+
59+
# Test validation
60+
assert_equal(len(failed_commands), 0)
61+
4362
def test_get_command_to_run(self):
4463
# The function to test
4564
command = self.command_manager.get_command_to_run()
@@ -75,6 +94,28 @@ def test_set_running_command_as_finished(self):
7594
with open(self.command_manager._finished_commands_filename, "r") as finished_commands_file:
7695
assert_equal(finished_commands_file.read(), self.command1)
7796

97+
assert_true(not os.path.isfile(self.command_manager._failed_commands_filename))
98+
99+
def test_set_running_command_as_failed(self):
100+
# SetUp
101+
command = self.command_manager.get_command_to_run()
102+
error_code = 1
103+
104+
# The function to test
105+
self.command_manager.set_running_command_as_finished(command, error_code)
106+
107+
# Test validation
108+
with open(self.command_manager._commands_filename, "r") as commands_file:
109+
assert_equal(commands_file.read(), self.command2 + self.command3)
110+
111+
with open(self.command_manager._running_commands_filename, "r") as running_commands_file:
112+
assert_equal(running_commands_file.read(), "")
113+
114+
with open(self.command_manager._failed_commands_filename, "r") as failed_commands_file:
115+
assert_equal(failed_commands_file.read(), self.command1)
116+
117+
assert_true(not os.path.isfile(self.command_manager._finished_commands_filename))
118+
78119
def test_reset_running_commands(self):
79120
# SetUp
80121
self.command_manager.get_command_to_run()

smartdispatch/tests/test_utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import time
44
import tempfile
55
import shutil
6+
import unittest
67

78
from subprocess import Popen, PIPE
89

@@ -12,12 +13,25 @@
1213
from numpy.testing import assert_array_equal
1314

1415

16+
class PrintBoxedTests(unittest.TestCase):
17+
18+
def setUp(self):
19+
self.empty = ''
20+
self.text = "This is weird test for a visual thing.\nWell maybe it's fine to test it's working."
21+
22+
def test_print_boxed(self):
23+
utils.print_boxed(self.text)
24+
25+
def test_print_boxed_empty(self):
26+
utils.print_boxed(self.empty)
27+
28+
1529
def test_chunks():
1630
sequence = range(10)
1731

1832
for n in range(1, 11):
1933
expected = []
20-
for start, end in zip(range(0, len(sequence), n), range(n, len(sequence)+n, n)):
34+
for start, end in zip(range(0, len(sequence), n), range(n, len(sequence) + n, n)):
2135
expected.append(sequence[start:end])
2236

2337
assert_array_equal(list(utils.chunks(sequence, n)), expected, "n:{0}".format(n))

smartdispatch/utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,37 @@
55
import unicodedata
66
import json
77

8+
from distutils.util import strtobool
89
from subprocess import Popen, PIPE
910
from contextlib import contextmanager
1011

1112

13+
def print_boxed(string):
14+
splitted_string = string.split('\n')
15+
max_len = max(map(len, splitted_string))
16+
box_line = u"\u2500" * (max_len + 2)
17+
18+
out = u"\u250c" + box_line + u"\u2510\n"
19+
out += '\n'.join([u"\u2502 {} \u2502".format(line.ljust(max_len)) for line in splitted_string])
20+
out += u"\n\u2514" + box_line + u"\u2518"
21+
print out
22+
23+
24+
def yes_no_prompt(query, default=None):
25+
available_prompts = {None: " [y/n] ", 'y': " [Y/n] ", 'n': " [y/N] "}
26+
27+
if default not in available_prompts:
28+
raise ValueError("Invalid default: '{}'".format(default))
29+
30+
while True:
31+
try:
32+
answer = raw_input("{0}{1}".format(query, available_prompts[default]))
33+
return strtobool(answer)
34+
except ValueError:
35+
if answer == '' and default is not None:
36+
return strtobool(default)
37+
38+
1239
def chunks(sequence, n):
1340
""" Yield successive n-sized chunks from sequence. """
1441
for i in xrange(0, len(sequence), n):

0 commit comments

Comments
 (0)