diff --git a/deps/rabbit/src/rabbit_db.erl b/deps/rabbit/src/rabbit_db.erl index 4abdd7efed7..df10f29ebf5 100644 --- a/deps/rabbit/src/rabbit_db.erl +++ b/deps/rabbit/src/rabbit_db.erl @@ -129,6 +129,10 @@ clear_init_finished() -> %% @doc Resets the database and the node. reset() -> + case rabbit:is_running() of + true -> ok = rabbit:stop(); + false -> ok + end, ok = case rabbit_khepri:is_enabled() of true -> reset_using_khepri(); false -> reset_using_mnesia() @@ -196,6 +200,7 @@ force_load_on_next_boot_using_mnesia() -> rabbit_mnesia:force_load_next_boot(). post_reset() -> + wipe_data_dir(), rabbit_feature_flags:reset(), %% The cluster status files that RabbitMQ uses when Mnesia is the database @@ -210,6 +215,18 @@ post_reset() -> ok. +wipe_data_dir() -> + DataDir = dir(), + Glob = filename:join(DataDir, "*"), + FilesToRemove = filelib:wildcard(Glob), + ?LOG_DEBUG( + "DB: wipe files in data directory `~ts`:~p", + [DataDir, FilesToRemove], + #{domain => ?RMQLOG_DOMAIN_DB}), + ok = rabbit_file:recursive_delete(FilesToRemove), + _ = file:delete(rabbit_guid:filename()), + ok. + %% ------------------------------------------------------------------- %% is_virgin_node(). %% ------------------------------------------------------------------- diff --git a/deps/rabbit/src/rabbit_db_cluster.erl b/deps/rabbit/src/rabbit_db_cluster.erl index 1fd720e527f..cb732367f4e 100644 --- a/deps/rabbit/src/rabbit_db_cluster.erl +++ b/deps/rabbit/src/rabbit_db_cluster.erl @@ -6,15 +6,114 @@ %% refers to Broadcom Inc. and/or its subsidiaries. All rights reserved. %% +%% TODO +%% +%% Opérations sur le cluster : +%% +%% En commun : +%% 1. On stoppe "rabbit" sur le nœud à ajouter/retirer +%% 2. On lock le registre des feature flags +%% +%% Join : +%% * reset du nœud (cf ci-dessous) +%% * copie des feature flags (+ reset registre) +%% * Mnesia : +%% * init avec checks +%% Khepri : +%% * setup +%% * préparation +%% * join +%% * reset registre feature flags (utile ?) +%% * notify_joined_cluster +%% +%% Reset/forget member, depuis un autre nœud que celui retiré : +%% * liste des membres +%% * Mnesia : +%% * online : +%% * offline : +%% Khepri : +%% * online : +%% * offline : +%% * amqqueue:forget_all() +%% * quorum queue shrink_all +%% * stream queue delete_all_replicas +%% * stream coordinator forget_node +%% * notify_left_cluster +%% * suppression des fichiers +%% * reset registre feature flags +%% * cluster status +%% * déconnexion du nœud (?) +%% +%% En commun : +%% 1. On unlock le registre des feature flags +%% 2. On redémarre "rabbit" +%% +%% À exécuter depuis quel nœud ? +%% [ ] depuis le nœud qu’ajoute/retire +%% [x] depuis le cluster qui est modifié +%% (doutes : risqué si le nœud à ajouter fait partie d’un autre cluster avant +%% son reset) +%% +%% JOIN (depuis le nœud qu’on ajoute) +%% +%% * est-ce que le nœud fait partie du cluster cible ? +%% * sinon : +%% +%% * stoppe rabbit +%% +%% * reset (cf ci-dessous) +%% +%% * lock registre feature flags +%% * can join ? +%% * copy feature flags + reset registre +%% * (Mnesia/Khepri specific) +%% * notify_joined_cluster +%% * unlock registre feature flags +%% +%% * start rabbit +%% +%% RESET +%% +%% * stoppe rabbit +%% +%% * lock registre feature flags +%% * en cluster ? oui -> forget member locked (cf ci-dessous) +%% * suppression des fichiers +%% * reset registre feature flags +%% * cluster status +%% * unlock registre feature flags +%% * déco des anciens collègues de cluster +%% +%% * start rabbit +%% +%% FORGET MEMBER +%% +%% * stoppe rabbit sur nœud distant +%% * lock registre feature flags +%% +%% * (Mnesia/Khepri specific) +%% * amqqueue:forget_all() +%% * quorum queue shrink_all +%% * stream queue delete_all_replicas +%% * stream coordinator forget_node +%% * notify_left_cluster +%% +%% * unlock registre feature flags +%% * start rabbit sur nœud distant +%% * déco de l’ancien membre +%% +%% (on ne redémarre pas rabbit ; est-ce qu’on arrête la VM ?) + -module(rabbit_db_cluster). -include_lib("kernel/include/logger.hrl"). +-include_lib("stdlib/include/assert.hrl"). -include_lib("rabbit_common/include/logging.hrl"). -export([ensure_feature_flags_are_in_sync/2, join/2, - forget_member/2]). + forget_member/2, forget_member_locked/2]). -export([change_node_type/1]). -export([is_clustered/0, members/0, @@ -97,34 +196,24 @@ join(RemoteNode, NodeType) %% RabbitMQ and Mnesia must be stopped to modify the cluster. In %% particular, we stop Mnesia regardless of the remotely selected %% database because we might change it during the join. - RestartMnesia = rabbit_mnesia:is_running(), - RestartFFCtl = rabbit_ff_controller:is_running(), - RestartRaSystems = rabbit_ra_systems:are_running(), + % RestartMnesia = rabbit_mnesia:is_running(), + % RestartFFCtl = rabbit_ff_controller:is_running(), RestartRabbit = rabbit:is_running(), case RestartRabbit of true -> rabbit:stop(); false -> - %% The Ra systems were started before we initialize the - %% database (because Khepri depends on one of them). - %% Therefore, there are files in the data directory. They - %% will go away with the reset and we will need to restart - %% Ra systems afterwards. - case RestartRaSystems of - true -> ok = rabbit_ra_systems:ensure_stopped(); - false -> ok - end, - - case RestartFFCtl of - true -> - ok = rabbit_ff_controller:wait_for_task_and_stop(); - false -> - ok - end, - case RestartMnesia of - true -> rabbit_mnesia:stop_mnesia(); - false -> ok - end + % case RestartFFCtl of + % true -> + % ok = rabbit_ff_controller:wait_for_task_and_stop(); + % false -> + % ok + % end, + % case RestartMnesia of + % true -> rabbit_mnesia:stop_mnesia(); + % false -> ok + % end, + ok end, %% We acquire the feature flags registry reload lock because @@ -141,6 +230,13 @@ join(RemoteNode, NodeType) ok = rabbit_db:reset(), rabbit_feature_flags:copy_feature_states_after_reset( RemoteNode) + catch + Class:Reason:Stacktrace -> + ?LOG_ERROR( + "DB: failed to reset node before adding it to a " + "cluster: ~p:~p:~p", [Class, Reason, Stacktrace], + #{domain => ?RMQLOG_DOMAIN_DB}), + erlang:raise(Class, Reason, Stacktrace) after rabbit_ff_registry_factory:release_state_change_lock() end, @@ -149,27 +245,16 @@ join(RemoteNode, NodeType) %% it is meant to be used. That's because we may switch back from %% Khepri to Mnesia. To be safe, remove possibly stale files from %% a previous instance where Mnesia was used. - case rabbit_khepri:is_enabled(RemoteNode) of - true -> ok; - false -> ok = rabbit_mnesia:reset_gracefully() - end, + % case rabbit_khepri:is_enabled(RemoteNode) of + % true -> ok; + % false -> ok = rabbit_mnesia:reset_gracefully() + % end, - ok = rabbit_node_monitor:notify_left_cluster(node()), + % ok = rabbit_node_monitor:notify_left_cluster(node()), %% Now that the files are all gone after the reset above, restart %% the Ra systems. They will recreate their folder in the process. - case RestartRabbit of - true -> - ok; - false -> - case RestartRaSystems of - true -> - ok = rabbit_ra_systems:ensure_started(), - ok = rabbit_khepri:setup(); - false -> - ok - end - end, + % ok = rabbit_khepri:setup(), ?LOG_INFO( "DB: joining cluster using remote nodes:~n~tp", [ClusterNodes], @@ -195,17 +280,18 @@ join(RemoteNode, NodeType) true -> rabbit:start(); false -> - case RestartFFCtl of - true -> - ok = rabbit_sup:start_child(rabbit_ff_controller); - false -> - ok - end, - NeedMnesia = not rabbit_khepri:is_enabled(), - case RestartMnesia andalso NeedMnesia of - true -> rabbit_mnesia:start_mnesia(false); - false -> ok - end + % case RestartFFCtl of + % true -> + % ok = rabbit_sup:start_child(rabbit_ff_controller); + % false -> + % ok + % end, + % NeedMnesia = not rabbit_khepri:is_enabled(), + % case RestartMnesia andalso NeedMnesia of + % true -> rabbit_mnesia:start_mnesia(false); + % false -> ok + % end, + ok end, case Ret of @@ -253,28 +339,71 @@ join_using_khepri(_ClusterNodes, ram = NodeType) -> RemoveWhenOffline :: boolean(). %% @doc Removes `Node' from the cluster. -forget_member(Node, RemoveWhenOffline) -> - case forget_member0(Node, RemoveWhenOffline) of - ok -> - rabbit_node_monitor:notify_left_cluster(Node); - Error -> - Error - end. - -forget_member0(Node, RemoveWhenOffline) -> +forget_member(Node, RemoveWhenOffline) when is_atom(Node) -> case rabbit:is_running(Node) of false -> - ?LOG_DEBUG( - "DB: removing cluster member `~ts`", [Node], - #{domain => ?RMQLOG_DOMAIN_DB}), - case rabbit_khepri:is_enabled() of - true -> forget_member_using_khepri(Node, RemoveWhenOffline); - false -> forget_member_using_mnesia(Node, RemoveWhenOffline) + {ok, InitialState} = lock_cluster_changes(Node), + try + forget_member_locked(Node, RemoveWhenOffline) + after + unlock_cluster_changes(InitialState) end; true -> {error, {failed_to_remove_node, Node, rabbit_still_running}} end. +forget_member_locked(Node, RemoveWhenOffline) + when is_atom(Node) andalso Node =/= node() -> + ?LOG_DEBUG( + "DB: removing cluster member `~ts`", [Node], + #{domain => ?RMQLOG_DOMAIN_DB}), + ?assertNot(rabbit:is_running(Node)), + Ret = case rabbit_khepri:is_enabled() of + true -> forget_member_using_khepri(Node, RemoveWhenOffline); + false -> forget_member_using_mnesia(Node, RemoveWhenOffline) + end, + case Ret of + ok -> post_forget_member_locked(Node, RemoveWhenOffline); + _ -> ok + end, + Ret; +forget_member_locked(Node, RemoveWhenOffline) + when is_atom(Node) andalso Node =:= node() -> + OtherNodes = members() -- [Node], + forget_member_locked_remotely(OtherNodes, Node, RemoveWhenOffline). + +forget_member_locked_remotely([OtherNode | Rest], Node, RemoveWhenOffline) -> + try + ?LOG_DEBUG( + "DB: removing cluster member `~ts` (this node); doing it from " + "remote node `~s`", + [Node, OtherNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + Ret = erpc:call( + OtherNode, + ?MODULE, forget_member_locked, [Node, RemoveWhenOffline]), + case Ret of + ok -> + ok; + Error -> + ?LOG_DEBUG( + "DB: failed to remove cluster member `~ts` from node " + "`~s`: ~0p", + [Node, OtherNode, Error], + #{domain => ?RMQLOG_DOMAIN_DB}), + forget_member_locked_remotely(Rest, Node, RemoveWhenOffline) + end + catch + _:Reason -> + ?LOG_DEBUG( + "DB: failed to remove cluster member `~ts` from node `~s`: ~0p", + [Node, OtherNode, Reason], + #{domain => ?RMQLOG_DOMAIN_DB}), + forget_member_locked_remotely(Rest, Node, RemoveWhenOffline) + end; +forget_member_locked_remotely([], _Node, _RemoveWhenOffline) -> + ok. + forget_member_using_mnesia(Node, RemoveWhenOffline) -> rabbit_mnesia:forget_cluster_node(Node, RemoveWhenOffline). @@ -287,6 +416,117 @@ forget_member_using_khepri(_Node, true) -> forget_member_using_khepri(Node, false = _RemoveWhenOffline) -> rabbit_khepri:remove_member(Node). +post_forget_member_locked(Node, false = _RemoveWhenOffline) -> + ?LOG_DEBUG( + "DB: removing node `~s` from various Ra clusters", [Node], + #{domain => ?RMQLOG_DOMAIN_DB}), + _ = rabbit_amqqueue:forget_all(Node), + _ = rabbit_quorum_queue:shrink_all(Node), + _ = rabbit_stream_queue:delete_all_replicas(Node), + _ = rabbit_stream_coordinator:forget_node(Node), + rabbit_node_monitor:notify_left_cluster(Node), + ok; +post_forget_member_locked(_Node, true = _RemoveWhenOffline) -> + ok. + +lock_cluster_changes(ChangingNode) -> + RabbitWasRunning = stop_rabbit_if_running(ChangingNode), + InitialState = #{changing_node => ChangingNode, + rabbit_was_running => RabbitWasRunning}, + + %% We acquire the feature flags registry reload lock because between + %% the time we reset the registry (as part of `rabbit_db:reset/0' and + %% the states copy from the remote node, there could be a concurrent + %% reload of the registry (for instance because of peer discovery on + %% another node) with the default/empty states. + %% + %% To make this work, the lock is also acquired from + %% `rabbit_ff_registry_wrapper'. + ?LOG_DEBUG( + "DB: lock feature flags registry to avoid concurrent changes to the " + "cluster from a feature flag callback", + #{domain => ?RMQLOG_DOMAIN_DB}), + rabbit_ff_registry_factory:acquire_state_change_lock(), + {ok, InitialState}. + +stop_rabbit_if_running(ThisNode) when ThisNode =:= node() -> + RabbitWasRunning = rabbit:is_running(), + case RabbitWasRunning of + true -> + ?LOG_DEBUG( + "DB: stop \"rabbit\" on this node (~ts) before making changes " + "to the cluster", + [ThisNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + ok = rabbit:stop(); + false -> + ?LOG_DEBUG( + "DB: \"rabbit\" already stopped on this node (~ts), ready for " + "changes to the cluster", + [ThisNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + ok + end, + RabbitWasRunning; +stop_rabbit_if_running(RemoteNode) when is_atom(RemoteNode) -> + try + RabbitWasRunning = erpc:call(RemoteNode, rabbit, is_running, []), + case RabbitWasRunning of + true -> + ?LOG_DEBUG( + "DB: stop \"rabbit\" on node `~ts` before making changes " + "to the cluster", + [RemoteNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + ok = erpc:call(RemoteNode, rabbit, stop, []); + false -> + ?LOG_DEBUG( + "DB: \"rabbit\" already stopped on node `~ts`, ready for " + "changes to the cluster", + [RemoteNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + ok + end, + RabbitWasRunning + catch + error:{erpc, noconnection} -> + ?LOG_DEBUG( + "DB: node `~ts` unreachable, considering that \"rabbit\" is " + "stopped on it, ready for changes to the cluster", + [RemoteNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + false + end. + +unlock_cluster_changes( + #{changing_node := ChangingNode, + rabbit_was_running := RabbitWasRunning}) -> + rabbit_ff_registry_factory:release_state_change_lock(), + start_rabbit_if_was_running(ChangingNode, RabbitWasRunning), + ok. + +start_rabbit_if_was_running(ChangingNode, false = _RabbitWasRunning) -> + ?LOG_DEBUG( + "DB: leaving \"rabbit\" stopped on node `~ts` after changes to the " + "cluster", + [ChangingNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + ok; +start_rabbit_if_was_running(ThisNode, true = _RabbitWasRunning) + when ThisNode =:= node() -> + ?LOG_DEBUG( + "DB: restart \"rabbit\" on this node (~ts) after changes to the " + "cluster", + [ThisNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + rabbit:start(); +start_rabbit_if_was_running(RemoteNode, true = _RabbitWasRunning) -> + ?LOG_DEBUG( + "DB: restart \"rabbit\" on node `~ts` after changes to the cluster", + [RemoteNode], + #{domain => ?RMQLOG_DOMAIN_DB}), + erpc:call(RemoteNode, rabbit, start, []). + %% ------------------------------------------------------------------- %% Cluster update. %% ------------------------------------------------------------------- diff --git a/deps/rabbit/src/rabbit_khepri.erl b/deps/rabbit/src/rabbit_khepri.erl index 6b7f0a6a898..31c01234343 100644 --- a/deps/rabbit/src/rabbit_khepri.erl +++ b/deps/rabbit/src/rabbit_khepri.erl @@ -302,6 +302,9 @@ ensure_ra_system_started() -> {ok, _} = application:ensure_all_started(khepri), ok = rabbit_ra_systems:ensure_ra_system_started(?RA_SYSTEM). +ensure_ra_system_stopped() -> + ok = rabbit_ra_systems:ensure_ra_system_stopped(?RA_SYSTEM). + retry_timeout() -> case application:get_env(rabbit, khepri_leader_wait_retry_timeout) of {ok, T} when is_integer(T) andalso T >= 0 -> T; @@ -372,19 +375,14 @@ await_replication() -> %% @private reset() -> - case rabbit:is_running() of - false -> - %% Rabbit should be stopped, but Khepri needs to be running. - %% Restart it. - ok = setup(), - ok = khepri_cluster:reset(?RA_CLUSTER_NAME), - ok = khepri:stop(?RA_CLUSTER_NAME), + ?assertNot(rabbit:is_running()), - _ = file:delete(rabbit_guid:filename()), - ok; - true -> - throw({error, rabbitmq_unexpectedly_running}) - end. + ok = setup(), + ThisNode = node(), + rabbit_db_cluster:forget_member(ThisNode, false), + ok = khepri:stop(?RA_CLUSTER_NAME), + ok = ensure_ra_system_stopped(), + ok. -spec dir() -> Dir when Dir :: file:filename_all().