ClusterLabs · liangxin1300 · Oct 22, 2025 · Oct 23, 2025 · Nov 13, 2025
diff --git a/crmsh/sbd.py b/crmsh/sbd.py
@@ -594,22 +594,6 @@ def enable_sbd_service(self):
                 logger.info("Enable %s on node %s", constants.SBD_SERVICE, node)
                 service_manager.enable_service(constants.SBD_SERVICE, node)
 
-    @staticmethod
-    def restart_cluster_if_possible(with_maintenance_mode=False):
-        if not ServiceManager().service_is_active(constants.PCMK_SERVICE):
-            return
-        if not xmlutil.CrmMonXmlParser().is_non_stonith_resource_running():
-            bootstrap.restart_cluster()
-        elif with_maintenance_mode:
-            if not utils.is_dlm_running():
-                bootstrap.restart_cluster()
-            else:
-                logger.warning("Resource is running, need to restart cluster service manually on each node")
-        else:
-            logger.warning("Resource is running, need to restart cluster service manually on each node")
-            logger.warning("Or, run with `crm -F` or `--force` option, the `sbd` subcommand will leverage maintenance mode for any changes that require restarting sbd.service")
-            logger.warning("Understand risks that running RA has no cluster protection while the cluster is in maintenance mode and restarting")
-
     def configure_sbd(self):
         '''
         Configure fence_sbd resource and related properties
@@ -747,6 +731,9 @@ def init_and_deploy_sbd(self, restart_first=False):
             self._load_attributes_from_bootstrap()
 
         with utils.leverage_maintenance_mode() as enabled:
+            if not utils.able_to_restart_cluster(enabled):
+                return
+
             self.initialize_sbd()
             self.update_configuration()
             self.enable_sbd_service()
@@ -761,7 +748,7 @@ def init_and_deploy_sbd(self, restart_first=False):
                 restart_cluster_first = restart_first or \
                         (self.diskless_sbd and not ServiceManager().service_is_active(constants.SBD_SERVICE))
                 if restart_cluster_first:
-                    SBDManager.restart_cluster_if_possible(with_maintenance_mode=enabled)
+                    bootstrap.restart_cluster()
 
                 self.configure_sbd()
                 bootstrap.adjust_properties(with_sbd=True)
@@ -771,7 +758,7 @@ def init_and_deploy_sbd(self, restart_first=False):
                 # This helps prevent unexpected issues, such as nodes being fenced
                 # due to large SBD_WATCHDOG_TIMEOUT values combined with smaller timeouts.
                 if not restart_cluster_first:
-                    SBDManager.restart_cluster_if_possible(with_maintenance_mode=enabled)
+                    bootstrap.restart_cluster()
 
     def join_sbd(self, remote_user, peer_host):
         '''

diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py
@@ -517,8 +517,11 @@ def _device_remove(self, devices_to_remove: typing.List[str]):
 
         logger.info("Remove devices: %s", ';'.join(devices_to_remove))
         update_dict = {"SBD_DEVICE": ";".join(left_device_list)}
-        sbd.SBDManager.update_sbd_configuration(update_dict)
-        sbd.SBDManager.restart_cluster_if_possible()
+        with utils.leverage_maintenance_mode() as enabled:
+            if not utils.able_to_restart_cluster(enabled):
+                return
+            sbd.SBDManager.update_sbd_configuration(update_dict)
+            bootstrap.restart_cluster()
 
     @command.completers_repeating(sbd_device_completer)
     def do_device(self, context, *args) -> bool:
@@ -601,22 +604,34 @@ def do_purge(self, context, *args) -> bool:
         if not self._service_is_active(constants.SBD_SERVICE):
             return False
 
+        purge_crashdump = False
+        if args:
+            if args[0] == "crashdump":
+                if not self._is_crashdump_configured():
+                    logger.error("SBD crashdump is not configured")
+                    return False
+                purge_crashdump = True
+            else:
+                logger.error("Invalid argument: %s", ' '.join(args))
+                logger.info("Usage: crm sbd purge [crashdump]")
+                return False
+
         utils.check_all_nodes_reachable("purging SBD")
 
-        if args and args[0] == "crashdump":
-            if not self._is_crashdump_configured():
-                logger.error("SBD crashdump is not configured")
+        with utils.leverage_maintenance_mode() as enabled:
+            if not utils.able_to_restart_cluster(enabled):
                 return False
-            self._set_crashdump_option(delete=True)
-            update_dict = self._set_crashdump_in_sysconfig(restore=True)
-            if update_dict:
-                sbd.SBDManager.update_sbd_configuration(update_dict)
-                sbd.SBDManager.restart_cluster_if_possible()
-            return True
 
-        sbd.purge_sbd_from_cluster()
-        sbd.SBDManager.restart_cluster_if_possible()
-        return True
+            if purge_crashdump:
+                self._set_crashdump_option(delete=True)
+                update_dict = self._set_crashdump_in_sysconfig(restore=True)
+                if update_dict:
+                    sbd.SBDManager.update_sbd_configuration(update_dict)
+            else:
+                sbd.purge_sbd_from_cluster()
+
+            bootstrap.restart_cluster()
+            return True
 
     def _print_sbd_type(self):
         if not self.service_manager.service_is_active(constants.SBD_SERVICE):

diff --git a/crmsh/utils.py b/crmsh/utils.py
@@ -3306,4 +3306,31 @@ def validate_and_get_reachable_nodes(
             member_list.remove(node)
 
     return member_list + remote_list
+
+
+def able_to_restart_cluster(in_maintenance_mode: bool = False) -> bool:
+    """
+    Check whether it is able to restart cluster now
+    1. If pacemaker is not running, return True
+    2. If no non-stonith resource is running, return True
+    3. If in maintenance mode and DLM is not running, return True
+    4. Otherwise, return False with warning messages to guide user
+    """
+    if not ServiceManager().service_is_active(constants.PCMK_SERVICE):
+        return True
+    crm_mon_parser = xmlutil.CrmMonXmlParser()
+    if not crm_mon_parser.is_non_stonith_resource_running():
+        return True
+    elif in_maintenance_mode:
+        if is_dlm_running():
+            dlm_related_ids = crm_mon_parser.get_resource_top_parent_id_set_via_type(constants.DLM_CONTROLD_RA)
+            logger.warning("Please stop DLM related resources (%s) and try again", ', '.join(dlm_related_ids))
+            return False
+        else:
+            return True
+    else:
+        logger.warning("Please stop all running resources and try again")
+        logger.warning("Or run this command with -F/--force option to leverage maintenance mode")
+        logger.warning("Understand risks that running RA has no cluster protection while the cluster is in maintenance mode and restarting")
+        return False
 # vim:ts=4:sw=4:et:
diff --git a/crmsh/xmlutil.py b/crmsh/xmlutil.py
@@ -1627,6 +1627,13 @@ def is_resource_started(self, ra):
         xpath = f'//resource[(@id="{ra}" or @resource_agent="{ra}") and @active="true" and @role="Started"]'
         return bool(self.xml_elem.xpath(xpath))
 
+    def get_resource_top_parent_id_set_via_type(self, ra_type):
+        """
+        Given configured ra type, get the topmost parent ra id set
+        """
+        xpath = f'//resource[@resource_agent="{ra_type}"]'
+        return set([get_topmost_rsc(elem).get('id') for elem in self.xml_elem.xpath(xpath)])
+
     def get_resource_id_list_via_type(self, ra_type):
         """
         Given configured ra type, get the ra id list

diff --git a/test/unittests/test_sbd.py b/test/unittests/test_sbd.py
@@ -406,57 +406,6 @@ def test_enable_sbd_service(self, mock_list_cluster_nodes, mock_ServiceManager,
             call("Enable %s on node %s", constants.SBD_SERVICE, 'node2')
         ])
 
-    @patch('crmsh.xmlutil.CrmMonXmlParser')
-    @patch('crmsh.sbd.ServiceManager')
-    def test_restart_cluster_if_possible_return(self, mock_ServiceManager, mock_CrmMonXmlParser):
-        mock_ServiceManager.return_value.service_is_active.return_value = False
-        SBDManager.restart_cluster_if_possible()
-        mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE)
-        mock_CrmMonXmlParser.assert_not_called()
-
-    @patch('logging.Logger.warning')
-    @patch('crmsh.utils.is_dlm_running')
-    @patch('crmsh.xmlutil.CrmMonXmlParser')
-    @patch('crmsh.sbd.ServiceManager')
-    def test_restart_cluster_if_possible_manually(
-            self, mock_ServiceManager, mock_CrmMonXmlParser, mock_is_dlm_running, mock_logger_warning,
-    ):
-        mock_ServiceManager.return_value.service_is_active.return_value = True
-        mock_CrmMonXmlParser.return_value.is_non_stonith_resource_running.return_value = True
-        mock_is_dlm_running.return_value = False
-        SBDManager.restart_cluster_if_possible()
-        mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE)
-        mock_logger_warning.assert_has_calls([
-            call("Resource is running, need to restart cluster service manually on each node"),
-            call("Or, run with `crm -F` or `--force` option, the `sbd` subcommand will leverage maintenance mode for any changes that require restarting sbd.service"),
-            call("Understand risks that running RA has no cluster protection while the cluster is in maintenance mode and restarting")
-        ])
-
-    @patch('logging.Logger.warning')
-    @patch('crmsh.utils.is_dlm_running')
-    @patch('crmsh.xmlutil.CrmMonXmlParser')
-    @patch('crmsh.sbd.ServiceManager')
-    def test_restart_cluster_if_possible_dlm_running(
-            self, mock_ServiceManager, mock_CrmMonXmlParser, mock_is_dlm_running, mock_logger_warning,
-    ):
-        mock_ServiceManager.return_value.service_is_active.return_value = True
-        mock_CrmMonXmlParser.return_value.is_non_stonith_resource_running.return_value = True
-        mock_is_dlm_running.return_value = True
-        SBDManager.restart_cluster_if_possible(with_maintenance_mode=True)
-        mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE)
-        mock_logger_warning.assert_called_once_with("Resource is running, need to restart cluster service manually on each node")
-
-    @patch('crmsh.bootstrap.restart_cluster')
-    @patch('logging.Logger.warning')
-    @patch('crmsh.xmlutil.CrmMonXmlParser')
-    @patch('crmsh.sbd.ServiceManager')
-    def test_restart_cluster_if_possible(self, mock_ServiceManager, mock_CrmMonXmlParser, mock_logger_warning, mock_restart_cluster):
-        mock_ServiceManager.return_value.service_is_active.return_value = True
-        mock_CrmMonXmlParser.return_value.is_non_stonith_resource_running.return_value = False
-        SBDManager.restart_cluster_if_possible()
-        mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE)
-        mock_restart_cluster.assert_called_once()
-
     @patch('crmsh.bootstrap.prompt_for_string')
     def test_prompt_for_sbd_device_diskless(self, mock_prompt_for_string):
         mock_prompt_for_string.return_value = "none"
@@ -644,10 +593,10 @@ def test_init_and_deploy_sbd_not_config_sbd(self, mock_ServiceManager):
         sbdmanager_instance._load_attributes_from_bootstrap.assert_not_called()
 
     @patch('crmsh.bootstrap.adjust_properties')
-    @patch('crmsh.sbd.SBDManager.restart_cluster_if_possible')
+    @patch('crmsh.bootstrap.restart_cluster')
     @patch('crmsh.sbd.SBDManager.enable_sbd_service')
     @patch('crmsh.sbd.ServiceManager')
-    def test_init_and_deploy_sbd(self, mock_ServiceManager, mock_enable_sbd_service, mock_restart_cluster_if_possible, mock_adjust_properties):
+    def test_init_and_deploy_sbd(self, mock_ServiceManager, mock_enable_sbd_service, mock_restart_cluster, mock_adjust_properties):
         mock_bootstrap_ctx = Mock(cluster_is_running=True)
         sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx)
         sbdmanager_instance.get_sbd_device_from_bootstrap = Mock()

diff --git a/test/unittests/test_ui_sbd.py b/test/unittests/test_ui_sbd.py
@@ -469,14 +469,14 @@ def test_device_remove_last_dev(self):
             self.sbd_instance_diskbased._device_remove(["/dev/sda1"])
         self.assertEqual(str(e.exception), "Not allowed to remove all devices")
 
-    @mock.patch('crmsh.sbd.SBDManager.restart_cluster_if_possible')
+    @mock.patch('crmsh.bootstrap.restart_cluster')
     @mock.patch('crmsh.sbd.SBDManager.update_sbd_configuration')
     @mock.patch('logging.Logger.info')
-    def test_device_remove(self, mock_logger_info, mock_update_sbd_configuration, mock_restart_cluster_if_possible):
+    def test_device_remove(self, mock_logger_info, mock_update_sbd_configuration, mock_restart_cluster):
         self.sbd_instance_diskbased.device_list_from_config = ["/dev/sda1", "/dev/sda2"]
         self.sbd_instance_diskbased._device_remove(["/dev/sda1"])
         mock_update_sbd_configuration.assert_called_once_with({"SBD_DEVICE": "/dev/sda2"})
-        mock_restart_cluster_if_possible.assert_called_once()
+        mock_restart_cluster.assert_called_once()
         mock_logger_info.assert_called_once_with("Remove devices: %s", "/dev/sda1")
 
     def test_do_device_no_service(self):
@@ -571,9 +571,10 @@ def test_do_purge_no_service(self, mock_purge_sbd_from_cluster):
         self.assertFalse(res)
         mock_purge_sbd_from_cluster.assert_not_called()
 
+    @mock.patch('crmsh.bootstrap.restart_cluster')
     @mock.patch('crmsh.utils.check_all_nodes_reachable')
     @mock.patch('crmsh.sbd.purge_sbd_from_cluster')
-    def test_do_purge(self, mock_purge_sbd_from_cluster, mock_check_all_nodes_reachable):
+    def test_do_purge(self, mock_purge_sbd_from_cluster, mock_check_all_nodes_reachable, mock_restart_cluster):
         self.sbd_instance_diskbased._load_attributes = mock.Mock()
         self.sbd_instance_diskbased._service_is_active = mock.Mock(return_value=True)
         res = self.sbd_instance_diskbased.do_purge(mock.Mock())