From c928a8056ae45053970bbd7cf3aa51cd1c21c11d Mon Sep 17 00:00:00 2001 From: Kumabuchi Kenji Date: Thu, 12 Sep 2019 11:37:19 +0900 Subject: [PATCH 1/4] Add fence_heuristics_resource agent Signed-off-by: Kumabuchi Kenji --- .../fence_heuristics_resource.py | 125 ++++++++++++++++++ configure.ac | 1 + fence-agents.spec.in | 14 ++ make/fencebuild.mk | 1 + .../metadata/fence_heuristics_resource.xml | 105 +++++++++++++++ 5 files changed, 246 insertions(+) create mode 100755 agents/heuristics_resource/fence_heuristics_resource.py create mode 100644 tests/data/metadata/fence_heuristics_resource.xml diff --git a/agents/heuristics_resource/fence_heuristics_resource.py b/agents/heuristics_resource/fence_heuristics_resource.py new file mode 100755 index 000000000..84f98649a --- /dev/null +++ b/agents/heuristics_resource/fence_heuristics_resource.py @@ -0,0 +1,125 @@ +#!/usr/libexec/platform-python -tt + +import io +import re +import subprocess +import shlex +import sys, stat +import logging +import os +import atexit +import time +sys.path.append("/usr/share/fence") +from fencing import fail_usage, run_command, fence_action, all_opt +from fencing import atexit_handler, check_input, process_input, show_docs +from fencing import run_delay + +def heuristics_resource(con, options): + + if options["--action"] == "on": + return True + + if not "--resource" in options or options["--resource"] == "": + logging.error("resource parameter required") + return False + + crm_resource_path = options["--crm-resource-path"] + resource = options["--resource"] + standby_wait = int(options["--standby-wait"]) + p = None + cmd = "%s -r %s -W" % (crm_resource_path, resource) + search_str = re.compile(r"\s%s$" % os.uname()[1]) + + logging.info("Running command: %s", cmd) + try: + p = subprocess.Popen(shlex.split(cmd), + stdout=subprocess.PIPE); + except OSError: + logging.error("Command failed on OS level"); + return False + + if p != None: + p.wait() + if p.returncode == 0: + for line in p.stdout: + searchres = search_str.search(line.decode().strip()) + if searchres: + # This node is ACT! Continue fencing. + return True + logging.info("Resource %s NOT found on this node" % resource); + else: + logging.error("Command failed. rc=%s" % p.returncode); + + if standby_wait > 0: + # The SBY node waits for fencing from the ACT node, and + # tries to fencing to the ACT node when waking up from sleep. + logging.info("Standby wait %s sec" % standby_wait); + time.sleep(standby_wait) + return True + + return False + + +def define_new_opts(): + all_opt["resource"] = { + "getopt" : ":", + "longopt" : "resource", + "required" : "1", + "help" : "--resource=[resource-id] ID of the resource that should be running in the ACT node", + "shortdesc" : "Resource ID", + "default" : "", + "order" : 1 + } + all_opt["standby_wait"] = { + "getopt" : ":", + "longopt" : "standby-wait", + "required" : "0", + "help" : "--standby-wait=[seconds] Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", + "shortdesc" : "Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", + "default" : "0", + "order" : 1 + } + all_opt["crm_resource_path"] = { + "getopt" : ":", + "longopt" : "crm-resource-path", + "required" : "0", + "help" : "--crm-resource-path=[path] Path to crm_resource", + "shortdesc" : "Path to crm_resource", + "default" : "@CRM_RESOURCE_PATH@", + "order" : 1 + } + + +def main(): + device_opt = ["no_status", "no_password", "resource", "standby_wait", "crm_resource_path", "method"] + define_new_opts() + atexit.register(atexit_handler) + + all_opt["method"]["default"] = "cycle" + all_opt["method"]["help"] = "-m, --method=[method] Method to fence (cycle|onoff) (Default: cycle)" + + options = check_input(device_opt, process_input(device_opt)) + + docs = {} + docs["shortdesc"] = "Fence agent for resource-heuristic based fencing" + docs["longdesc"] = "fence_heuristics_resource uses resource-heuristics to control execution of another fence agent on the same fencing level.\ +\n.P\n\ +This is not a fence agent by itself! \ +Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristic_resource." + docs["vendorurl"] = "" + show_docs(options, docs) + + run_delay(options) + + result = fence_action(\ + None, \ + options, \ + None, \ + None, \ + reboot_cycle_fn = heuristics_resource, + sync_set_power_fn = heuristics_resource) + + sys.exit(result) + +if __name__ == "__main__": + main() diff --git a/configure.ac b/configure.ac index 9b88d5f62..830a05dee 100644 --- a/configure.ac +++ b/configure.ac @@ -279,6 +279,7 @@ AC_PATH_PROG([SNMPSET_PATH], [snmpset], [/usr/bin/snmpset]) AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget]) AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova]) AC_PATH_PROG([POWERMAN_PATH], [powerman], [/usr/bin/powerman]) +AC_PATH_PROG([CRM_RESOURCE_PATH], [crm_resource], [/usr/sbin/crm_resource]) AC_PATH_PROG([PING_CMD], [ping]) AC_PATH_PROG([PING6_CMD], [ping6]) diff --git a/fence-agents.spec.in b/fence-agents.spec.in index 9be8a9440..aed2f97b1 100644 --- a/fence-agents.spec.in +++ b/fence-agents.spec.in @@ -50,6 +50,7 @@ fence-agents-emerson \\ fence-agents-eps \\ fence-agents-hds-cb \\ fence-agents-heuristics-ping \\ +fence-agents-heuristics-resource \\ fence-agents-hpblade \\ fence-agents-ibmblade \\ fence-agents-ifmib \\ @@ -536,6 +537,19 @@ ping-heuristics. %{_sbindir}/fence_heuristics_ping %{_mandir}/man8/fence_heuristics_ping.8* +%package heuristics-resource +License: GPLv2+ and LGPLv2+ +Summary: Pseudo fence agent to affect other agents based on resource-heuristics +Requires: fence-agents-common = %{version}-%{release} +BuildArch: noarch +Obsoletes: fence-agents +%description heuristics-resource +Fence pseudo agent used to affect other agents based on +resource-heuristics. +%files heuristics-resource +%{_sbindir}/fence_heuristics_resource +%{_mandir}/man8/fence_heuristics_resource.8* + %package hpblade License: GPLv2+ and LGPLv2+ Summary: Fence agent for HP BladeSystem devices diff --git a/make/fencebuild.mk b/make/fencebuild.mk index 819e03e6b..bf754e03f 100644 --- a/make/fencebuild.mk +++ b/make/fencebuild.mk @@ -28,6 +28,7 @@ define gen_agent_from_py -e 's#@''SNMPGET_PATH@#${SNMPGET_PATH}#g' \ -e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \ -e 's#@''POWERMAN_PATH@#${POWERMAN_PATH}#g' \ + -e 's#@''CRM_RESOURCE_PATH@#${CRM_RESOURCE_PATH}#g' \ -e 's#@''PING_CMD@#${PING_CMD}#g' \ -e 's#@''PING6_CMD@#${PING6_CMD}#g' \ -e 's#@''PING4_CMD@#${PING4_CMD}#g' \ diff --git a/tests/data/metadata/fence_heuristics_resource.xml b/tests/data/metadata/fence_heuristics_resource.xml new file mode 100644 index 000000000..381049397 --- /dev/null +++ b/tests/data/metadata/fence_heuristics_resource.xml @@ -0,0 +1,105 @@ + + +fence_heuristics_resource uses resource-heuristics to control execution of another fence agent on the same fencing level. + +This is not a fence agent by itself! Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristic_resource. + + + + + + Fencing action + + + + Path to crm_resource + + + + + + Method to fence + + + + + Resource ID + + + + + Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits. + + + + + Disable logging to stderr. Does not affect --verbose or --debug-file or logging to syslog. + + + + + Verbose mode + + + + + Write debug information to given file + + + + + Write debug information to given file + + + + + Display version information and exit + + + + + Display help and exit + + + + + Wait X seconds before fencing is started + + + + + Wait X seconds for cmd prompt after login + + + + + Test X seconds for status change after ON/OFF + + + + + Wait X seconds after issuing ON/OFF + + + + + Wait X seconds for cmd prompt after issuing command + + + + + Count of attempts to retry power on + + + + + + + + + + + + From 6e156c0106a618883faa82288afbd10935df9f86 Mon Sep 17 00:00:00 2001 From: Kumabuchi Kenji Date: Fri, 8 Nov 2019 17:19:37 +0900 Subject: [PATCH 2/4] fixed to always escalate to the next fencing level Signed-off-by: Kumabuchi Kenji --- .../fence_heuristics_resource.py | 113 ++++++++++++------ configure.ac | 1 + make/fencebuild.mk | 1 + .../metadata/fence_heuristics_resource.xml | 26 +++- 4 files changed, 96 insertions(+), 45 deletions(-) diff --git a/agents/heuristics_resource/fence_heuristics_resource.py b/agents/heuristics_resource/fence_heuristics_resource.py index 84f98649a..b1ceec7ec 100755 --- a/agents/heuristics_resource/fence_heuristics_resource.py +++ b/agents/heuristics_resource/fence_heuristics_resource.py @@ -6,7 +6,6 @@ import shlex import sys, stat import logging -import os import atexit import time sys.path.append("/usr/share/fence") @@ -15,66 +14,92 @@ from fencing import run_delay def heuristics_resource(con, options): + # Search the node where the resource is running and determine + # the ACT node or not. For SBY node, a delay is generated. + # Note that this method always returns FALSE. - if options["--action"] == "on": - return True + if not "--nodename" in options or options["--nodename"] == "": + logging.error("nodename parameter required") + return False if not "--resource" in options or options["--resource"] == "": logging.error("resource parameter required") return False - crm_resource_path = options["--crm-resource-path"] + target = options["--nodename"] resource = options["--resource"] + promotable = options["--promotable"] in ["", "1"] standby_wait = int(options["--standby-wait"]) - p = None - cmd = "%s -r %s -W" % (crm_resource_path, resource) - search_str = re.compile(r"\s%s$" % os.uname()[1]) - - logging.info("Running command: %s", cmd) - try: - p = subprocess.Popen(shlex.split(cmd), - stdout=subprocess.PIPE); - except OSError: - logging.error("Command failed on OS level"); + crm_resource_path = options["--crm-resource-path"] + crm_node_path = options["--crm-node-path"] + + (rc, out, err) = run_command(options, "%s --name" % crm_node_path) + if rc != 0 or out == None: + logging.error("Can not get my nodename. rc=%s, stderr=%s" % (rc, err)) + return False + + mynodename = out.strip() + + if mynodename == target: + logging.info("Skip standby wait due to self-fencing.") + return False + + (rc, out, err) = run_command(options, "%s -r %s -W" % (crm_resource_path, resource)) + if rc != 0 or out == None: + logging.error("Command failed. rc=%s, stderr=%s" % (rc, err)) return False - if p != None: - p.wait() - if p.returncode == 0: - for line in p.stdout: - searchres = search_str.search(line.decode().strip()) - if searchres: - # This node is ACT! Continue fencing. - return True - logging.info("Resource %s NOT found on this node" % resource); - else: - logging.error("Command failed. rc=%s" % p.returncode); + search_str = re.compile(r"\s%s%s$" % (mynodename, '\sMaster' if promotable else '')) + for line in out.splitlines(): + searchres = search_str.search(line.strip()) + if searchres: + logging.info("This node is ACT! Skip standby wait.") + return False + + logging.info("Resource %s NOT found on this node" % resource) if standby_wait > 0: - # The SBY node waits for fencing from the ACT node, and - # tries to fencing to the ACT node when waking up from sleep. - logging.info("Standby wait %s sec" % standby_wait); + # The SBY node waits for fencing from the ACT node, and tries to fence + # the ACT node on next fencing level waking up from sleep. + logging.info("Standby wait %s sec" % standby_wait) time.sleep(standby_wait) - return True return False def define_new_opts(): + all_opt["nodename"] = { + "getopt" : "n:", + "longopt" : "nodename", + "required" : "1", + "help" : "-n, --nodename=[nodename] Name of node to be fenced", + "shortdesc" : "Name of node to be fenced", + "default" : "", + "order" : 1 + } all_opt["resource"] = { - "getopt" : ":", + "getopt" : "r:", "longopt" : "resource", "required" : "1", - "help" : "--resource=[resource-id] ID of the resource that should be running in the ACT node", + "help" : "-r, --resource=[resource-id] ID of the resource that should be running in the ACT node", "shortdesc" : "Resource ID", "default" : "", "order" : 1 } + all_opt["promotable"] = { + "getopt" : "p", + "longopt" : "promotable", + "required" : "0", + "help" : "-p, --promotable Specify if resource parameter is promotable (master/slave) resource", + "shortdesc" : "Handle the promotable resource. The node on which the master resource is running is considered as ACT.", + "default" : "False", + "order" : 1 + } all_opt["standby_wait"] = { - "getopt" : ":", + "getopt" : "w:", "longopt" : "standby-wait", "required" : "0", - "help" : "--standby-wait=[seconds] Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", + "help" : "-w, --standby-wait=[seconds] Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", "shortdesc" : "Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", "default" : "0", "order" : 1 @@ -83,15 +108,24 @@ def define_new_opts(): "getopt" : ":", "longopt" : "crm-resource-path", "required" : "0", - "help" : "--crm-resource-path=[path] Path to crm_resource", - "shortdesc" : "Path to crm_resource", + "help" : "--crm-resource-path=[path] Path to crm_resource", + "shortdesc" : "Path to crm_resource command", "default" : "@CRM_RESOURCE_PATH@", "order" : 1 } + all_opt["crm_node_path"] = { + "getopt" : ":", + "longopt" : "crm-node-path", + "required" : "0", + "help" : "--crm-node-path=[path] Path to crm_node", + "shortdesc" : "Path to crm_node command", + "default" : "@CRM_NODE_PATH@", + "order" : 1 + } def main(): - device_opt = ["no_status", "no_password", "resource", "standby_wait", "crm_resource_path", "method"] + device_opt = ["no_status", "no_password", "nodename", "resource", "promotable", "standby_wait", "crm_resource_path", "crm_node_path", "method"] define_new_opts() atexit.register(atexit_handler) @@ -101,11 +135,12 @@ def main(): options = check_input(device_opt, process_input(device_opt)) docs = {} - docs["shortdesc"] = "Fence agent for resource-heuristic based fencing" - docs["longdesc"] = "fence_heuristics_resource uses resource-heuristics to control execution of another fence agent on the same fencing level.\ + docs["shortdesc"] = "Fence agent for resource-heuristic based fencing delay" + docs["longdesc"] = "fence_heuristics_resource uses resource-heuristics to delay execution of fence agent running on next level.\ \n.P\n\ This is not a fence agent by itself! \ -Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristic_resource." +Its only purpose is to delay execution of another fence agent that lives on next fencing level. \ +Note that this agent always returns FALSE. Therefore, subsequent agents on the same fencing level will not run" docs["vendorurl"] = "" show_docs(options, docs) diff --git a/configure.ac b/configure.ac index 830a05dee..d7d8ccfe7 100644 --- a/configure.ac +++ b/configure.ac @@ -280,6 +280,7 @@ AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget]) AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova]) AC_PATH_PROG([POWERMAN_PATH], [powerman], [/usr/bin/powerman]) AC_PATH_PROG([CRM_RESOURCE_PATH], [crm_resource], [/usr/sbin/crm_resource]) +AC_PATH_PROG([CRM_NODE_PATH], [crm_node], [/usr/sbin/crm_node]) AC_PATH_PROG([PING_CMD], [ping]) AC_PATH_PROG([PING6_CMD], [ping6]) diff --git a/make/fencebuild.mk b/make/fencebuild.mk index bf754e03f..7f057ea61 100644 --- a/make/fencebuild.mk +++ b/make/fencebuild.mk @@ -29,6 +29,7 @@ define gen_agent_from_py -e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \ -e 's#@''POWERMAN_PATH@#${POWERMAN_PATH}#g' \ -e 's#@''CRM_RESOURCE_PATH@#${CRM_RESOURCE_PATH}#g' \ + -e 's#@''CRM_NODE_PATH@#${CRM_NODE_PATH}#g' \ -e 's#@''PING_CMD@#${PING_CMD}#g' \ -e 's#@''PING6_CMD@#${PING6_CMD}#g' \ -e 's#@''PING4_CMD@#${PING4_CMD}#g' \ diff --git a/tests/data/metadata/fence_heuristics_resource.xml b/tests/data/metadata/fence_heuristics_resource.xml index 381049397..157b98fc5 100644 --- a/tests/data/metadata/fence_heuristics_resource.xml +++ b/tests/data/metadata/fence_heuristics_resource.xml @@ -1,8 +1,8 @@ - -fence_heuristics_resource uses resource-heuristics to control execution of another fence agent on the same fencing level. + +fence_heuristics_resource uses resource-heuristics to delay execution of fence agent running on next level. -This is not a fence agent by itself! Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristic_resource. +This is not a fence agent by itself! Its only purpose is to delay execution of another fence agent that lives on next fencing level. Note that this agent always returns FALSE. Therefore, subsequent agents on the same fencing level will not run @@ -10,9 +10,13 @@ This is not a fence agent by itself! Its only purpose is to enable/disable anoth Fencing action + + + Path to crm_node command + - Path to crm_resource + Path to crm_resource command @@ -22,13 +26,23 @@ This is not a fence agent by itself! Its only purpose is to enable/disable anoth Method to fence + + + + Name of node to be fenced + + + + + Handle the promotable resource. The node on which the master resource is running is considered as ACT. + - + Resource ID - + Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits. From 03ff8291918db0776cee2320b98d7056aeff4b3f Mon Sep 17 00:00:00 2001 From: Kumabuchi Kenji Date: Fri, 15 Nov 2019 17:16:27 +0900 Subject: [PATCH 3/4] change to use crm_mon xml output --- .../fence_heuristics_resource.py | 125 +++++++++++------- configure.ac | 2 +- make/fencebuild.mk | 2 +- .../metadata/fence_heuristics_resource.xml | 19 +-- 4 files changed, 88 insertions(+), 60 deletions(-) diff --git a/agents/heuristics_resource/fence_heuristics_resource.py b/agents/heuristics_resource/fence_heuristics_resource.py index b1ceec7ec..bcd7c89b1 100755 --- a/agents/heuristics_resource/fence_heuristics_resource.py +++ b/agents/heuristics_resource/fence_heuristics_resource.py @@ -1,4 +1,4 @@ -#!/usr/libexec/platform-python -tt +#!@PYTHON@ -tt import io import re @@ -8,6 +8,8 @@ import logging import atexit import time +import xml.etree.ElementTree as ET +import distutils.util as dist sys.path.append("/usr/share/fence") from fencing import fail_usage, run_command, fence_action, all_opt from fencing import atexit_handler, check_input, process_input, show_docs @@ -27,45 +29,85 @@ def heuristics_resource(con, options): return False target = options["--nodename"] - resource = options["--resource"] - promotable = options["--promotable"] in ["", "1"] - standby_wait = int(options["--standby-wait"]) - crm_resource_path = options["--crm-resource-path"] + resource_id = options["--resource"] + wait_time = int(options["--standby-wait"]) crm_node_path = options["--crm-node-path"] + crm_mon_path = options["--crm-mon-path"] (rc, out, err) = run_command(options, "%s --name" % crm_node_path) - if rc != 0 or out == None: + if not rc == 0 or out is None: logging.error("Can not get my nodename. rc=%s, stderr=%s" % (rc, err)) return False - mynodename = out.strip() + node = out.strip() - if mynodename == target: + if node == target: logging.info("Skip standby wait due to self-fencing.") return False - (rc, out, err) = run_command(options, "%s -r %s -W" % (crm_resource_path, resource)) - if rc != 0 or out == None: - logging.error("Command failed. rc=%s, stderr=%s" % (rc, err)) + (rc, out, err) = run_command(options, "%s --as-xml" % crm_mon_path) + if not rc == 0 or out is None: + logging.error("crm_mon command failed. rc=%s, stderr=%s" % (rc, err)) return False - search_str = re.compile(r"\s%s%s$" % (mynodename, '\sMaster' if promotable else '')) - for line in out.splitlines(): - searchres = search_str.search(line.strip()) - if searchres: - logging.info("This node is ACT! Skip standby wait.") - return False - - logging.info("Resource %s NOT found on this node" % resource) - - if standby_wait > 0: - # The SBY node waits for fencing from the ACT node, and tries to fence - # the ACT node on next fencing level waking up from sleep. - logging.info("Standby wait %s sec" % standby_wait) - time.sleep(standby_wait) - + tree = ET.fromstring(out) + resources = tree.findall('./resources//*[@id="%s"]' % resource_id) + if len(resources) == 0: + logging.error("Resource '%s' not found." % resource_id) + elif len(resources) == 1: + resource = resources[0] + type = resource.tag + if type == "resource": + # primitive resource + standby_node = check_standby_node(resource, node) + failed = check_failed_attrib(resource) + if standby_node and not failed: + return standby_wait(wait_time) + elif type == "group": + # resource group + standby_node = True + failed = False + for child in resource: + failed |= check_failed_attrib(child) + standby_node &= check_standby_node(child, node) + if standby_node and not failed: + return standby_wait(wait_time) + elif type == "clone" and dist.strtobool(resource.get("multi_state")): + # promotable resource + master_nodes = 0 + standby_node = True + failed = False + for native in resource: + failed |= check_failed_attrib(native) + if native.get("role") in ["Master"]: + master_nodes += 1 + standby_node &= check_standby_node(native, node) + if master_nodes == 1 and standby_node and not failed: + return standby_wait(wait_time) + else: + # clone or bundle resource + logging.error("Unsupported resource type: '%s'" % type) + else: + logging.error("Multiple active resources found.") + + logging.info("Skip standby wait.") return False +def standby_wait(wait_time): + logging.info("Standby wait %s sec" % wait_time) + time.sleep(wait_time) + return False + +def check_failed_attrib(resource): + failed = dist.strtobool(resource.get("failed")) + ignored = dist.strtobool(resource.get("failure_ignored")) + return failed and not ignored + +def check_standby_node(resource, nodename): + running_nodes = [] + for node in resource: + running_nodes.append(node.get("name")) + return len(set(running_nodes)) == 1 and not running_nodes[0] == nodename def define_new_opts(): all_opt["nodename"] = { @@ -81,36 +123,27 @@ def define_new_opts(): "getopt" : "r:", "longopt" : "resource", "required" : "1", - "help" : "-r, --resource=[resource-id] ID of the resource that should be running in the ACT node", - "shortdesc" : "Resource ID", + "help" : "-r, --resource=[resource-id] ID of the resource that should be running on the ACT node. It does not make sense to specify a cloned or bundled resource unless it is promotable and has only a single master instance.", + "shortdesc" : "Resource ID. It does not make sense to specify a cloned or bundled resource unless it is promotable and has only a single master instance.", "default" : "", "order" : 1 } - all_opt["promotable"] = { - "getopt" : "p", - "longopt" : "promotable", - "required" : "0", - "help" : "-p, --promotable Specify if resource parameter is promotable (master/slave) resource", - "shortdesc" : "Handle the promotable resource. The node on which the master resource is running is considered as ACT.", - "default" : "False", - "order" : 1 - } all_opt["standby_wait"] = { "getopt" : "w:", "longopt" : "standby-wait", "required" : "0", - "help" : "-w, --standby-wait=[seconds] Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", - "shortdesc" : "Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits.", - "default" : "0", + "help" : "-w, --standby-wait=[seconds] Wait X seconds on SBY node. The agent will delay but not succeed.", + "shortdesc" : "Wait X seconds on SBY node. The agent will delay but not succeed.", + "default" : "5", "order" : 1 } - all_opt["crm_resource_path"] = { + all_opt["crm_mon_path"] = { "getopt" : ":", - "longopt" : "crm-resource-path", + "longopt" : "crm-mon-path", "required" : "0", - "help" : "--crm-resource-path=[path] Path to crm_resource", - "shortdesc" : "Path to crm_resource command", - "default" : "@CRM_RESOURCE_PATH@", + "help" : "--crm-mon-path=[path] Path to crm_mon", + "shortdesc" : "Path to crm_mon command", + "default" : "@CRM_MON_PATH@", "order" : 1 } all_opt["crm_node_path"] = { @@ -125,7 +158,7 @@ def define_new_opts(): def main(): - device_opt = ["no_status", "no_password", "nodename", "resource", "promotable", "standby_wait", "crm_resource_path", "crm_node_path", "method"] + device_opt = ["no_status", "no_password", "nodename", "resource", "standby_wait", "crm_mon_path", "crm_node_path", "method"] define_new_opts() atexit.register(atexit_handler) diff --git a/configure.ac b/configure.ac index d7d8ccfe7..b40f9a93f 100644 --- a/configure.ac +++ b/configure.ac @@ -279,7 +279,7 @@ AC_PATH_PROG([SNMPSET_PATH], [snmpset], [/usr/bin/snmpset]) AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget]) AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova]) AC_PATH_PROG([POWERMAN_PATH], [powerman], [/usr/bin/powerman]) -AC_PATH_PROG([CRM_RESOURCE_PATH], [crm_resource], [/usr/sbin/crm_resource]) +AC_PATH_PROG([CRM_MON_PATH], [crm_mon], [/usr/sbin/crm_mon]) AC_PATH_PROG([CRM_NODE_PATH], [crm_node], [/usr/sbin/crm_node]) AC_PATH_PROG([PING_CMD], [ping]) diff --git a/make/fencebuild.mk b/make/fencebuild.mk index 7f057ea61..a552d74f1 100644 --- a/make/fencebuild.mk +++ b/make/fencebuild.mk @@ -28,7 +28,7 @@ define gen_agent_from_py -e 's#@''SNMPGET_PATH@#${SNMPGET_PATH}#g' \ -e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \ -e 's#@''POWERMAN_PATH@#${POWERMAN_PATH}#g' \ - -e 's#@''CRM_RESOURCE_PATH@#${CRM_RESOURCE_PATH}#g' \ + -e 's#@''CRM_MON_PATH@#${CRM_MON_PATH}#g' \ -e 's#@''CRM_NODE_PATH@#${CRM_NODE_PATH}#g' \ -e 's#@''PING_CMD@#${PING_CMD}#g' \ -e 's#@''PING6_CMD@#${PING6_CMD}#g' \ diff --git a/tests/data/metadata/fence_heuristics_resource.xml b/tests/data/metadata/fence_heuristics_resource.xml index 157b98fc5..4ac693cfb 100644 --- a/tests/data/metadata/fence_heuristics_resource.xml +++ b/tests/data/metadata/fence_heuristics_resource.xml @@ -10,14 +10,14 @@ This is not a fence agent by itself! Its only purpose is to delay execution of a Fencing action + + + Path to crm_mon command + Path to crm_node command - - - Path to crm_resource command - @@ -31,20 +31,15 @@ This is not a fence agent by itself! Its only purpose is to delay execution of a Name of node to be fenced - - - - Handle the promotable resource. The node on which the master resource is running is considered as ACT. - - Resource ID + Resource ID. It does not make sense to specify a cloned or bundled resource unless it is promotable and has only a single master instance. - - Wait X seconds on SBY node. If a positive number is specified, fencing action of this agent will always succeed after waits. + + Wait X seconds on SBY node. The agent will delay but not succeed. From b71797fe635edc5c7e01fe47fe3258e9cdf9a117 Mon Sep 17 00:00:00 2001 From: Kumabuchi Kenji Date: Fri, 29 Nov 2019 15:52:10 +0900 Subject: [PATCH 4/4] Ensure that the resource is running on another cluster member, not a remote host --- .../fence_heuristics_resource.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/agents/heuristics_resource/fence_heuristics_resource.py b/agents/heuristics_resource/fence_heuristics_resource.py index bcd7c89b1..13adfdd49 100755 --- a/agents/heuristics_resource/fence_heuristics_resource.py +++ b/agents/heuristics_resource/fence_heuristics_resource.py @@ -51,6 +51,11 @@ def heuristics_resource(con, options): return False tree = ET.fromstring(out) + nodes = tree.findall('./nodes//*[@type="member"]') + nodelist = [] + for member in nodes: + nodelist.append(member.get("name")) + resources = tree.findall('./resources//*[@id="%s"]' % resource_id) if len(resources) == 0: logging.error("Resource '%s' not found." % resource_id) @@ -59,7 +64,7 @@ def heuristics_resource(con, options): type = resource.tag if type == "resource": # primitive resource - standby_node = check_standby_node(resource, node) + standby_node = check_standby_node(resource, node, nodelist) failed = check_failed_attrib(resource) if standby_node and not failed: return standby_wait(wait_time) @@ -69,7 +74,7 @@ def heuristics_resource(con, options): failed = False for child in resource: failed |= check_failed_attrib(child) - standby_node &= check_standby_node(child, node) + standby_node &= check_standby_node(child, node, nodelist) if standby_node and not failed: return standby_wait(wait_time) elif type == "clone" and dist.strtobool(resource.get("multi_state")): @@ -81,7 +86,7 @@ def heuristics_resource(con, options): failed |= check_failed_attrib(native) if native.get("role") in ["Master"]: master_nodes += 1 - standby_node &= check_standby_node(native, node) + standby_node &= check_standby_node(native, node, nodelist) if master_nodes == 1 and standby_node and not failed: return standby_wait(wait_time) else: @@ -103,11 +108,11 @@ def check_failed_attrib(resource): ignored = dist.strtobool(resource.get("failure_ignored")) return failed and not ignored -def check_standby_node(resource, nodename): +def check_standby_node(resource, nodename, nodelist): running_nodes = [] for node in resource: running_nodes.append(node.get("name")) - return len(set(running_nodes)) == 1 and not running_nodes[0] == nodename + return len(set(running_nodes)) == 1 and running_nodes[0] in nodelist and not running_nodes[0] == nodename def define_new_opts(): all_opt["nodename"] = {