Merge pull request #15308 from jcsp/wip-19706

mon: don't kill MDSs unless some beacons are getting through

Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
John Spray 2017-06-15 10:50:44 -04:00 committed by GitHub
commit 18fbf24c7a
5 changed files with 37 additions and 2 deletions

View File

@ -1,4 +1,7 @@
overrides:
ceph:
log-whitelist:
- not responding, replacing
ceph-fuse:
disabled: true
tasks:

View File

@ -1,2 +1,7 @@
tasks:
- mds_thrash:
overrides:
ceph:
log-whitelist:
- not responding, replacing

View File

@ -2,3 +2,8 @@ tasks:
- install:
- ceph:
- mds_thrash:
overrides:
ceph:
log-whitelist:
- not responding, replacing

View File

@ -102,6 +102,7 @@ class CephFSTestCase(CephTestCase):
# To avoid any issues with e.g. unlink bugs, we destroy and recreate
# the filesystem rather than just doing a rm -rf of files
self.mds_cluster.mds_stop()
self.mds_cluster.mds_fail()
self.mds_cluster.delete_all_filesystems()
self.fs = None # is now invalid!

View File

@ -503,6 +503,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
return false;
}
mon->clog->info() << "MDS daemon '" << m->get_name() << "' restarted";
fail_mds_gid(existing);
failed_mds = true;
}
@ -1920,12 +1921,24 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid,
<< " " << ceph_mds_state_name(info.state)
<< " since " << beacon.stamp << dendl;
// We will only take decisive action (replacing/removing a daemon)
// if we have some indicating that some other daemon(s) are successfully
// getting beacons through recently.
utime_t latest_beacon;
for (const auto & i : last_beacon) {
latest_beacon = MAX(i.second.stamp, latest_beacon);
}
const bool may_replace = latest_beacon >
(ceph_clock_now() -
MAX(g_conf->mds_beacon_interval, g_conf->mds_beacon_grace * 0.5));
// are we in?
// and is there a non-laggy standby that can take over for us?
mds_gid_t sgid;
if (info.rank >= 0 &&
info.state != MDSMap::STATE_STANDBY &&
info.state != MDSMap::STATE_STANDBY_REPLAY &&
may_replace &&
!pending_fsmap.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
(sgid = pending_fsmap.find_replacement_for({fscid, info.rank}, info.name,
g_conf->mon_force_standby_active)) != MDS_GID_NONE)
@ -1937,6 +1950,11 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid,
<< " " << ceph_mds_state_name(info.state)
<< " with " << sgid << "/" << si.name << " " << si.addr << dendl;
mon->clog->warn() << "MDS daemon '" << info.name << "'"
<< " is not responding, replacing it "
<< "as rank " << info.rank
<< " with standby '" << si.name << "'";
// Remember what NS the old one was in
const fs_cluster_id_t fscid = pending_fsmap.mds_roles.at(gid);
@ -1948,11 +1966,14 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid,
pending_fsmap.promote(sgid, fs, info.rank);
*mds_propose = true;
} else if (info.state == MDSMap::STATE_STANDBY_REPLAY ||
info.state == MDSMap::STATE_STANDBY) {
} else if ((info.state == MDSMap::STATE_STANDBY_REPLAY ||
info.state == MDSMap::STATE_STANDBY) && may_replace) {
dout(10) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank
<< "." << info.inc << " " << ceph_mds_state_name(info.state)
<< dendl;
mon->clog->info() << "MDS standby '" << info.name
<< "' is not responding, removing it from the set of "
<< "standbys";
fail_mds_gid(gid);
*mds_propose = true;
} else if (!info.laggy()) {