From 42f46b70d0daeed3b255b64dd3597e3ceda153e3 Mon Sep 17 00:00:00 2001 From: John Spray Date: Sun, 15 Mar 2015 23:18:52 +0000 Subject: [PATCH] mon/MDSMonitor: handle DAMAGED in beacon Signed-off-by: John Spray --- src/mon/MDSMonitor.cc | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index a9044ae2422..eaab48da6b2 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -547,6 +547,38 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) m->put(); return false; } + } else if (state == MDSMap::STATE_DAMAGED) { + if (!mon->osdmon()->is_writeable()) { + dout(4) << __func__ << ": DAMAGED from rank " << info.rank + << " waiting for osdmon writeable to blacklist it" << dendl; + mon->osdmon()->wait_for_writeable(new C_RetryMessage(this, m)); + return false; + } + + // Record this MDS rank as damaged, so that other daemons + // won't try to run it. + dout(4) << __func__ << ": marking rank " + << info.rank << " damaged" << dendl; + + + // Blacklist this MDS daemon + const utime_t until = ceph_clock_now(g_ceph_context); + pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist( + info.addr, until); + request_proposal(mon->osdmon()); + + // Clear out daemon state and add rank to damaged list + pending_mdsmap.up.erase(info.rank); + pending_mdsmap.damaged.insert(info.rank); + last_beacon.erase(gid); + + // Call erase() last because the `info` reference becomes invalid + // after we remove the instance from the map. + pending_mdsmap.mds_info.erase(gid); + + // Respond to MDS, so that it knows it can continue to shut down + mon->send_reply(m, new MMDSBeacon(mon->monmap->fsid, m->get_global_id(), + m->get_name(), mdsmap.get_epoch(), state, seq)); } else { info.state = state; info.state_seq = seq;