Merge pull request #80 from mgermain/failed_command_management

MarcCote · MarcCote · commit 3f82d39b3f77 · 2015-03-04T11:22:47.000-05:00
Failed command management
diff --git a/scripts/smart_dispatch.py b/scripts/smart_dispatch.py
@@ -3,8 +3,10 @@
 
 import os
 import argparse
+import time as t
 import numpy as np
 from subprocess import check_output
+from textwrap import dedent
 
 from smartdispatch.command_manager import CommandManager
 
@@ -59,6 +61,24 @@ def main():
         if args.mode == "launch":
             command_manager.set_commands_to_run(commands)
         else:
+            # Verifying if there is are failed commands
+            failed_commands = command_manager.get_failed_commands()
+            if len(failed_commands) > 0:
+                FAILED_COMMAND_MESSAGE = dedent("""\
+                {nb_failed} command(s) are in a failed state. They won't be resumed.
+                Failed commands:
+                {failed_commands}
+                The actual errors can be found in the log folder under:
+                {failed_commands_err_file}""")
+                utils.print_boxed(FAILED_COMMAND_MESSAGE.format(
+                    nb_failed=len(failed_commands),
+                    failed_commands=''.join(failed_commands),
+                    failed_commands_err_file='\n'.join([utils.generate_uid_from_string(c[:-1])+'.err' for c in failed_commands])
+                ))
+
+                if not utils.yes_no_prompt("Do you want to continue?", 'n'):
+                    exit()
+
             command_manager.reset_running_commands()
             nb_commands = command_manager.get_nb_commands_to_run()
 
@@ -88,16 +108,17 @@ def main():
 
     # Launch the jobs
     print "## {nb_commands} command(s) will be executed in {nb_jobs} job(s) ##".format(nb_commands=nb_commands, nb_jobs=len(pbs_filenames))
-    print "Batch UID:\n {batch_uid}".format(batch_uid=jobname)
+    print "Batch UID:\n{batch_uid}".format(batch_uid=jobname)
     if not args.doNotLaunch:
         jobs_id = []
         for pbs_filename in pbs_filenames:
             qsub_output = check_output('{launcher} {pbs_filename}'.format(launcher=LAUNCHER if args.launcher is None else args.launcher, pbs_filename=pbs_filename), shell=True)
-            jobs_id += [qsub_output.rstrip()]
+            jobs_id += [qsub_output.strip()]
 
         with utils.open_with_lock(os.path.join(path_job, "jobs_id.txt"), 'a') as jobs_id_file:
-            jobs_id_file.writelines("\n".join(jobs_id))
-        print "\nJobs id:\n {jobs_id}".format(jobs_id=" ".join(jobs_id))
+            jobs_id_file.writelines(t.strftime("## %Y-%m-%d %H:%M:%S ##\n"))
+            jobs_id_file.writelines("\n".join(jobs_id) + "\n")
+        print "\nJobs id:\n{jobs_id}".format(jobs_id=" ".join(jobs_id))
     print "\nLogs, command, and jobs id related to this batch will be in:\n {smartdispatch_folder}".format(smartdispatch_folder=path_job)
 
 
diff --git a/scripts/smart_worker.py b/scripts/smart_worker.py
@@ -51,9 +51,9 @@ def main():
                 stdout_file.flush()
                 stderr_file.flush()
 
-                subprocess.call(command, stdout=stdout_file, stderr=stderr_file, shell=True)
+                error_code = subprocess.call(command, stdout=stdout_file, stderr=stderr_file, shell=True)
 
-        command_manager.set_running_command_as_finished(command)
+        command_manager.set_running_command_as_finished(command, error_code)
 
 if __name__ == '__main__':
     main()
diff --git a/smartdispatch/command_manager.py b/smartdispatch/command_manager.py
@@ -9,6 +9,7 @@ def __init__(self, commands_filename):
 
         self._running_commands_filename = os.path.join(base_path, "running_" + filename)
         self._finished_commands_filename = os.path.join(base_path, "finished_" + filename)
+        self._failed_commands_filename = os.path.join(base_path, "failed_" + filename)
         self._commands_filename = commands_filename
 
     def _move_line_between_files(self, file1, file2, line):
@@ -40,9 +41,21 @@ def get_nb_commands_to_run(self):
         with open(self._commands_filename, 'r') as commands_file:
             return len(commands_file.readlines())
 
-    def set_running_command_as_finished(self, command):
+    def get_failed_commands(self):
+        commands = []
+        if os.path.isfile(self._failed_commands_filename):
+            with open(self._failed_commands_filename, 'r') as commands_file:
+                commands = commands_file.readlines()
+        return commands
+
+    def set_running_command_as_finished(self, command, error_code=0):
+        if error_code == 0:
+            file_name = self._finished_commands_filename
+        else:
+            file_name = self._failed_commands_filename
+
         with utils.open_with_lock(self._running_commands_filename, 'r+') as running_commands_file:
-            with utils.open_with_lock(self._finished_commands_filename, 'a') as finished_commands_file:
+            with utils.open_with_lock(file_name, 'a') as finished_commands_file:
                 self._move_line_between_files(running_commands_file, finished_commands_file, command + '\n')
 
     def reset_running_commands(self):
diff --git a/smartdispatch/tests/test_command_manager.py b/smartdispatch/tests/test_command_manager.py
@@ -15,7 +15,7 @@ def setUp(self):
         self.command2 = "2\n"
         self.command3 = "3\n"
 
-        command_filename = os.path.join(self._base_dir, "commant.txt")
+        command_filename = os.path.join(self._base_dir, "commands.txt")
 
         with open(command_filename, "w+") as commands_file:
             commands_file.write(self.command1 + self.command2 + self.command3)
@@ -40,6 +40,25 @@ def test_set_commands_to_run(self):
 
         assert_true(not os.path.isfile(self.command_manager._finished_commands_filename))
 
+    def test_get_failed_commands(self):
+        # Setup
+        command = self.command_manager.get_command_to_run()
+        self.command_manager.set_running_command_as_finished(command, 1)
+
+        # The function to test
+        failed_commands = self.command_manager.get_failed_commands()
+
+        # Test validation
+        assert_equal(len(failed_commands), 1)
+        assert_equal(failed_commands[0], self.command1)
+
+    def test_get_failed_commands_empty(self):
+        # The function to test
+        failed_commands = self.command_manager.get_failed_commands()
+
+        # Test validation
+        assert_equal(len(failed_commands), 0)
+
     def test_get_command_to_run(self):
         # The function to test
         command = self.command_manager.get_command_to_run()
@@ -75,6 +94,28 @@ def test_set_running_command_as_finished(self):
         with open(self.command_manager._finished_commands_filename, "r") as finished_commands_file:
             assert_equal(finished_commands_file.read(), self.command1)
 
+        assert_true(not os.path.isfile(self.command_manager._failed_commands_filename))
+
+    def test_set_running_command_as_failed(self):
+        # SetUp
+        command = self.command_manager.get_command_to_run()
+        error_code = 1
+
+        # The function to test
+        self.command_manager.set_running_command_as_finished(command, error_code)
+
+        # Test validation
+        with open(self.command_manager._commands_filename, "r") as commands_file:
+            assert_equal(commands_file.read(), self.command2 + self.command3)
+
+        with open(self.command_manager._running_commands_filename, "r") as running_commands_file:
+            assert_equal(running_commands_file.read(), "")
+
+        with open(self.command_manager._failed_commands_filename, "r") as failed_commands_file:
+            assert_equal(failed_commands_file.read(), self.command1)
+
+        assert_true(not os.path.isfile(self.command_manager._finished_commands_filename))
+
     def test_reset_running_commands(self):
         # SetUp
         self.command_manager.get_command_to_run()
diff --git a/smartdispatch/tests/test_utils.py b/smartdispatch/tests/test_utils.py
@@ -3,6 +3,7 @@
 import time
 import tempfile
 import shutil
+import unittest
 
 from subprocess import Popen, PIPE
 
@@ -12,12 +13,25 @@
 from numpy.testing import assert_array_equal
 
 
+class PrintBoxedTests(unittest.TestCase):
+
+    def setUp(self):
+        self.empty = ''
+        self.text = "This is weird test for a visual thing.\nWell maybe it's fine to test it's working."
+
+    def test_print_boxed(self):
+        utils.print_boxed(self.text)
+
+    def test_print_boxed_empty(self):
+        utils.print_boxed(self.empty)
+
+
 def test_chunks():
     sequence = range(10)
 
     for n in range(1, 11):
         expected = []
-        for start, end in zip(range(0, len(sequence), n), range(n, len(sequence)+n, n)):
+        for start, end in zip(range(0, len(sequence), n), range(n, len(sequence) + n, n)):
             expected.append(sequence[start:end])
 
         assert_array_equal(list(utils.chunks(sequence, n)), expected, "n:{0}".format(n))
diff --git a/smartdispatch/utils.py b/smartdispatch/utils.py
@@ -5,10 +5,37 @@
 import unicodedata
 import json
 
+from distutils.util import strtobool
 from subprocess import Popen, PIPE
 from contextlib import contextmanager
 
 
+def print_boxed(string):
+    splitted_string = string.split('\n')
+    max_len = max(map(len, splitted_string))
+    box_line = u"\u2500" * (max_len + 2)
+
+    out = u"\u250c" + box_line + u"\u2510\n"
+    out += '\n'.join([u"\u2502 {} \u2502".format(line.ljust(max_len)) for line in splitted_string])
+    out += u"\n\u2514" + box_line + u"\u2518"
+    print out
+
+
+def yes_no_prompt(query, default=None):
+    available_prompts = {None: " [y/n] ", 'y': " [Y/n] ", 'n': " [y/N] "}
+
+    if default not in available_prompts:
+        raise ValueError("Invalid default: '{}'".format(default))
+
+    while True:
+        try:
+            answer = raw_input("{0}{1}".format(query, available_prompts[default]))
+            return strtobool(answer)
+        except ValueError:
+            if answer == '' and default is not None:
+                return strtobool(default)
+
+
 def chunks(sequence, n):
     """ Yield successive n-sized chunks from sequence. """
     for i in xrange(0, len(sequence), n):