diff --git a/src/metaswitch/clearwater/etcd_shared/plugin_utils.py b/src/metaswitch/clearwater/etcd_shared/plugin_utils.py index 2ff8b58b..32a92c9f 100644 --- a/src/metaswitch/clearwater/etcd_shared/plugin_utils.py +++ b/src/metaswitch/clearwater/etcd_shared/plugin_utils.py @@ -15,45 +15,70 @@ _log = logging.getLogger("etcd_shared.plugin_utils") -def run_command(command_args, namespace=None, log_error=True): - """Runs the given shell command, logging the output and return code. +def run_commands(list_of_command_args, namespace=None, log_error=True): + """Runs the given shell commands in parallel, logging the output and return + code. If a namespace is supplied the command is run in the specified namespace. Note that this runs the provided array of command arguments in a subprocess call without shell, to avoid shell injection. Ensure the command is passed in as an array instead of a string. + + Returns 0 if all commands succeeded, and the return code of one of the + failed commands otherwise. """ - if namespace: - command_args[0:0] = ['ip', 'netns', 'exec', namespace] + namespace_prefix = ['ip', 'netns', 'exec', namespace] if namespace else [] + list_of_namespaced_command_args = [namespace_prefix + c for c in list_of_command_args] # Pass the close_fds argument to avoid the pidfile lock being held by # child processes - p = subprocess.Popen(command_args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) - stdout, stderr = p.communicate() - if p.returncode != 0: - # it failed, log the return code and output - if log_error: - _log.error("Command {} failed with return code {}, " - "stdout {!r}, and stderr {!r}".format(' '.join(command_args), - p.returncode, - stdout, - stderr)) - return p.returncode - else: - # it succeeded, log out stderr of the command run if present - if stderr: - _log.warning("Command {} succeeded, with stderr output {!r}". - format(' '.join(command_args), stderr)) + processes = [(subprocess.Popen(c, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True), c) + for c in list_of_namespaced_command_args] + + error_returncodes = [] + for p, command_args in processes: + stdout, stderr = p.communicate() + # We log: + # - everything (return code, stdout, stderr) in failure cases + # - only stderr in success cases + if p.returncode != 0: + if log_error: + _log.error("Command {} failed with return code {}\n" + " stdout {!r}\n stderr {!r}".format(' '.join(command_args), + p.returncode, + stdout, + stderr)) + error_returncodes.append(p.returncode) else: - _log.debug("Command {} succeeded".format(' '.join(command_args))) + if stderr: + _log.warning("Command {} succeeded, with stderr output {!r}". + format(' '.join(command_args), stderr)) + else: + _log.debug("Command {} succeeded".format(' '.join(command_args))) + # Return 0, unless any nonzero return codes are present, in which case + # arbitrarily return the first one. + if error_returncodes: + return error_returncodes[0] + else: return 0 +# Wrapper around run_commands which only runs a single command instead of +# multiple commands. +# +# It's structured this way because run_commands runs all the provided commands +# in parallel, and it's easier to have that as the standard function and then +# write a serial wrapper around it than the reverse. +def run_command(command_args, **kwargs): + """Runs the given shell command. See run_commands for full documentation""" + return run_commands([command_args], **kwargs) + + def safely_write(filename, contents, permissions=0644): """Writes a file without race conditions, by writing to a temporary file and then atomically renaming it""" diff --git a/src/metaswitch/clearwater/plugin_tests/test_apply_config_plugin.py b/src/metaswitch/clearwater/plugin_tests/test_apply_config_plugin.py index 3372c64a..11526639 100644 --- a/src/metaswitch/clearwater/plugin_tests/test_apply_config_plugin.py +++ b/src/metaswitch/clearwater/plugin_tests/test_apply_config_plugin.py @@ -21,119 +21,146 @@ # simulate run_command("check_node_health.py") succeeding ir failing. # The success function is not strictly necessary, but ensures symmetry. -def run_command_all_succeed(command): +def run_commands_all_succeed(command, **kwargs): return 0 -def run_command_check_node_health_fails(command): - if (command == \ +def run_commands_check_node_health_fails(command, **kwargs): + if (command[0] == \ ["/usr/share/clearwater/clearwater-queue-manager/scripts/check_node_health.py"]): return 1 else: return 0 +mock_run_commands_success = mock.MagicMock(side_effect=run_commands_all_succeed) +mock_run_commands_unhealthy = mock.MagicMock(side_effect=run_commands_check_node_health_fails) -class TestApplyConfigPlugin(unittest.TestCase): - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.subprocess.check_output') - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.run_command',\ - side_effect=run_command_all_succeed) - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.os.path.exists') - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.os.listdir') - def test_front_of_queue(self, mock_os_listdir, mock_os_path_exists, - mock_run_command, mock_subproc_check_output): - """Test Queue Manager front_of_queue function""" - - mock_subproc_check_output.return_value = "apply_config_key" - # Create the plugin - plugin = ApplyConfigPlugin(PluginParams(wait_plugin_complete='Y')) +mock_subproc_check_output = mock.MagicMock() +mock_subproc_check_output.return_value = "apply_config_key" - # Set up the mock environment and expectations - mock_os_path_exists.return_value = True - mock_os_listdir.return_value = ["test_restart_script"] +mock_os_path_exists = mock.MagicMock() +mock_os_listdir = mock.MagicMock() +mock_os_path_exists.return_value = True +mock_os_listdir.return_value = ["test_restart_script", "test_restart_script2"] - expected_command_call_list = \ - [mock.call(['service', 'clearwater-infrastructure', 'restart']), - mock.call(['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script']), - mock.call(['/usr/share/clearwater/clearwater-queue-manager/scripts/check_node_health.py']), - mock.call(['/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue', \ - 'remove_success', 'apply_config_key'])] +PLUGIN_MODULE = "clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin" - # Call the plugin hook +class TestApplyConfigPlugin(unittest.TestCase): + def setUp(self): + mock_os_path_exists.reset_mock() + mock_os_listdir.reset_mock() + mock_run_commands_success.reset_mock() + mock_run_commands_unhealthy.reset_mock() + + + @mock.patch(PLUGIN_MODULE + '.subprocess.check_output', new=mock_subproc_check_output) + @mock.patch(PLUGIN_MODULE + '.os.path.exists', new=mock_os_path_exists) + @mock.patch(PLUGIN_MODULE + '.os.listdir', new=mock_os_listdir) + @mock.patch(PLUGIN_MODULE + '.run_commands', new=mock_run_commands_success) + @mock.patch('metaswitch.clearwater.etcd_shared.plugin_utils.run_commands', new=mock_run_commands_success) + def test_front_of_queue(self): + # Create the plugin, and tell it we're at the front of the restart queue. + plugin = ApplyConfigPlugin(PluginParams(wait_plugin_complete='Y')) plugin.at_front_of_queue() - # Test our assertions + # The plugin will look for the restart scripts directory, and + # mock_os_path_exists and mock_os_listdir tell it there are two + # scripts. mock_os_path_exists.assert_called_once_with\ - ("/usr/share/clearwater/infrastructure/scripts/restart") + ("/usr/share/clearwater/infrastructure/scripts/restart/") mock_os_listdir.assert_called_once_with\ - ("/usr/share/clearwater/infrastructure/scripts/restart") - mock_run_command.assert_has_calls(expected_command_call_list) - - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.subprocess.check_output') - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.run_command',\ - side_effect=run_command_check_node_health_fails) - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.os.path.exists') - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.os.listdir') - def test_front_of_queue_fail_node_health(self, mock_os_listdir, - mock_os_path_exists, - mock_run_command, - mock_subproc_check_output): - """Test Queue Manager when check_node_health fails""" - - mock_subproc_check_output.return_value = "apply_config_key" - # Create the plugin - plugin = ApplyConfigPlugin(PluginParams(wait_plugin_complete='Y')) + ("/usr/share/clearwater/infrastructure/scripts/restart/") + + expected_commands = [] - # Set up the mock environment and expectations - mock_os_path_exists.return_value = True - mock_os_listdir.return_value = ["test_restart_script"] + # It then restarts clearwater-infrastructure + expected_commands.append([['service', 'clearwater-infrastructure', 'restart']]) - expected_command_call_list = \ - [mock.call(['service', 'clearwater-infrastructure', 'restart']), - mock.call(['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script']), - mock.call(['/usr/share/clearwater/clearwater-queue-manager/scripts/check_node_health.py']), - mock.call(['/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue', \ - 'remove_failure', u'apply_config_key'])] + # It then runs all the restart scripts in parallel + expected_commands.append([ + ['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script'], + ['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script2']]) - # Call the plugin hook + # It then checks the health of the node - mock_run_commands_success tells us it is healthy + expected_commands.append([['/usr/share/clearwater/clearwater-queue-manager/scripts/check_node_health.py']]) + + # Lastly, it reports success + expected_commands.append([['/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue', \ + 'remove_success', 'apply_config_key']]) + + expected_command_call_list = [mock.call(x) for x in expected_commands] + mock_run_commands_success.assert_has_calls(expected_command_call_list) + + + @mock.patch(PLUGIN_MODULE + '.subprocess.check_output', new=mock_subproc_check_output) + @mock.patch(PLUGIN_MODULE + '.os.path.exists', new=mock_os_path_exists) + @mock.patch(PLUGIN_MODULE + '.os.listdir', new=mock_os_listdir) + @mock.patch(PLUGIN_MODULE + '.run_commands', new=mock_run_commands_unhealthy) + @mock.patch('metaswitch.clearwater.etcd_shared.plugin_utils.run_commands', new=mock_run_commands_unhealthy) + def test_front_of_queue_fail_node_health(self): + # Create the plugin, and tell it we're at the front of the restart queue. + plugin = ApplyConfigPlugin(PluginParams(wait_plugin_complete='Y')) plugin.at_front_of_queue() - # Test our assertions + # The plugin will look for the restart scripts directory, and + # mock_os_path_exists and mock_os_listdir tell it there are two + # scripts. mock_os_path_exists.assert_called_once_with\ - ("/usr/share/clearwater/infrastructure/scripts/restart") + ("/usr/share/clearwater/infrastructure/scripts/restart/") mock_os_listdir.assert_called_once_with\ - ("/usr/share/clearwater/infrastructure/scripts/restart") - mock_run_command.assert_has_calls(expected_command_call_list) - - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.subprocess.check_output') - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.run_command',\ - side_effect=run_command_all_succeed) - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.os.path.exists') - @mock.patch('clearwater_etcd_plugins.clearwater_queue_manager.apply_config_plugin.os.listdir') - def test_front_of_queue_no_health_check(self, mock_os_listdir, - mock_os_path_exists, - mock_run_command, - mock_subproc_check_output): - """Test Queue Manager when we're not checking node health""" - - mock_subproc_check_output.return_value = "apply_config_key" - # Create the plugin - plugin = ApplyConfigPlugin(PluginParams(wait_plugin_complete='N')) + ("/usr/share/clearwater/infrastructure/scripts/restart/") - # Set up the mock environment and expectations - mock_os_path_exists.return_value = True - mock_os_listdir.return_value = ["test_restart_script"] + expected_commands = [] - expected_command_call_list = \ - [mock.call(['service', 'clearwater-infrastructure', 'restart']), - mock.call(['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script']), - mock.call(['/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue', \ - 'remove_success', 'apply_config_key'])] + # It then restarts clearwater-infrastructure + expected_commands.append([['service', 'clearwater-infrastructure', 'restart']]) - # Call the plugin hook + # It then runs all the restart scripts in parallel + expected_commands.append([ + ['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script'], + ['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script2']]) + + # It then checks the health of the node - mock_run_commands_unhealthy simulates a failure here + expected_commands.append([['/usr/share/clearwater/clearwater-queue-manager/scripts/check_node_health.py']]) + + # Therefore, it reports failure + expected_commands.append([['/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue', \ + 'remove_failure', 'apply_config_key']]) + + expected_command_call_list = [mock.call(x) for x in expected_commands] + mock_run_commands_unhealthy.assert_has_calls(expected_command_call_list) + + + @mock.patch(PLUGIN_MODULE + '.subprocess.check_output', new=mock_subproc_check_output) + @mock.patch(PLUGIN_MODULE + '.os.path.exists', new=mock_os_path_exists) + @mock.patch(PLUGIN_MODULE + '.os.listdir', new=mock_os_listdir) + @mock.patch(PLUGIN_MODULE + '.run_commands', new=mock_run_commands_success) + @mock.patch('metaswitch.clearwater.etcd_shared.plugin_utils.run_commands', new=mock_run_commands_success) + def test_front_of_queue_no_health_check(self): + # Create the plugin, and tell it we're at the front of the restart queue. + plugin = ApplyConfigPlugin(PluginParams(wait_plugin_complete='N')) plugin.at_front_of_queue() - # Test our assertions + # The plugin will look for the restart scripts directory, and + # mock_os_path_exists and mock_os_listdir tell it there are two + # scripts. mock_os_path_exists.assert_called_once_with\ - ("/usr/share/clearwater/infrastructure/scripts/restart") + ("/usr/share/clearwater/infrastructure/scripts/restart/") mock_os_listdir.assert_called_once_with\ - ("/usr/share/clearwater/infrastructure/scripts/restart") - mock_run_command.assert_has_calls(expected_command_call_list) + ("/usr/share/clearwater/infrastructure/scripts/restart/") + + expected_commands = [] + + # It then restarts clearwater-infrastructure + expected_commands.append([['service', 'clearwater-infrastructure', 'restart']]) + + # It then runs all the restart scripts in parallel + expected_commands.append([ + ['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script'], + ['/usr/share/clearwater/infrastructure/scripts/restart/test_restart_script2']]) + + # Because we disabled health checks, it immediately reports success + expected_commands.append([['/usr/share/clearwater/clearwater-queue-manager/scripts/modify_nodes_in_queue', \ + 'remove_success', 'apply_config_key']]) + + expected_command_call_list = [mock.call(x) for x in expected_commands] + mock_run_commands_success.assert_has_calls(expected_command_call_list)