Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/mds/FSMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -856,8 +856,7 @@ const MDSMap::mds_info_t* FSMap::find_replacement_for(mds_role_t role) const
for (const auto& [gid, info] : fs.mds_map.mds_info) {
if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) {
if (info.is_frozen()) {
/* the standby-replay is frozen, do nothing! */
return nullptr;
continue;
} else {
ceph_assert(info.compat.writeable(fs.mds_map.compat));
return &info;
Expand Down
21 changes: 15 additions & 6 deletions src/mds/MDSMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1263,16 +1263,25 @@ mds_gid_t MDSMap::get_standby_replay(mds_rank_t r) const {
return MDS_GID_NONE;
}

bool MDSMap::is_followable(mds_rank_t r) const {
int MDSMap::is_followable(mds_rank_t r) const {
bool found = true;
bool degraded = false;
int standby_replay_count = 0;
if (auto it1 = up.find(r); it1 != up.end()) {
if (auto it2 = mds_info.find(it1->second); it2 != mds_info.end()) {
auto& info = it2->second;
if (!info.is_degraded() && !has_standby_replay(r)) {
return true;
for (auto& [gid,info] : mds_info) {
if (gid == it1->second) {
found = true;
if (info.is_degraded()) {
degraded = true;
break;
}
}
if (info.rank == r && info.state == STATE_STANDBY_REPLAY) {
++standby_replay_count;
}
}
}
return false;
return found && !degraded ? standby_replay_count : 0;
}

bool MDSMap::is_laggy_gid(mds_gid_t gid) const {
Expand Down
2 changes: 1 addition & 1 deletion src/mds/MDSMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ class MDSMap {
return get_standby_replay(r) != MDS_GID_NONE;
}

bool is_followable(mds_rank_t r) const;
int is_followable(mds_rank_t r) const;
bool is_laggy_gid(mds_gid_t gid) const;

// degraded = some recovery in process. fixes active membership and
Expand Down
11 changes: 6 additions & 5 deletions src/mds/MDSRank.cc
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,8 @@ void MDSRankDispatcher::tick()
set_mdsmap_multimds_snaps_allowed();
}
}

}
if (is_active() || is_standby_replay()) {
if (whoami == 0) {
scrubstack->advance_scrub_status();
scrubstack->purge_old_scrub_counters();
Expand Down Expand Up @@ -2874,7 +2875,7 @@ void MDSRankDispatcher::handle_asok_command(
r = config_client(client_id, !got_value, option, value, *css);
} else if (command == "scrub start" ||
command == "scrub_start") {
if (!is_active()) {
if (!is_active() && ! is_standby_replay()) {
*css << "MDS is not active";
r = -EINVAL;
goto out;
Expand Down Expand Up @@ -2905,7 +2906,7 @@ void MDSRankDispatcher::handle_asok_command(
}));
return;
} else if (command == "scrub abort") {
if (!is_active()) {
if (!is_active() && !is_standby_replay()) {
*css << "MDS is not active";
r = -EINVAL;
goto out;
Expand All @@ -2925,7 +2926,7 @@ void MDSRankDispatcher::handle_asok_command(
}));
return;
} else if (command == "scrub pause") {
if (!is_active()) {
if (!is_active() && !is_standby_replay()) {
*css << "MDS is not active";
r = -EINVAL;
goto out;
Expand All @@ -2945,7 +2946,7 @@ void MDSRankDispatcher::handle_asok_command(
}));
return;
} else if (command == "scrub resume") {
if (!is_active()) {
if (!is_active() && !is_standby_replay()) {
*css << "MDS is not active";
r = -EINVAL;
goto out;
Expand Down
5 changes: 3 additions & 2 deletions src/mon/MDSMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2445,13 +2445,14 @@ bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, const Filesystem& fs)
bool changed = false;
for (const auto& rank : mds_map.in) {
dout(20) << "examining " << rank << dendl;
if (mds_map.is_followable(rank)) {
int followables = mds_map.is_followable(rank);
if (followables < 2 ) {
dout(1) << " setting mds." << info->global_id
<< " to follow mds rank " << rank << dendl;
fsmap.assign_standby_replay(info->global_id, fs.get_fscid(), rank);
do_propose = true;
changed = true;
break;
//break;
Comment on lines +2448 to +2455

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it about having at least 2 standby-replay mds?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally this should be rather configurable but for now this allows up to 2 MDS-es in standby-replay mode.

}
}
if (!changed) break;
Expand Down
Loading