Merge remote-tracking branch 'origin/wip-mds-names'

This commit is contained in:
Greg Farnum 2013-02-14 11:31:30 -08:00
commit c8f090aed9
5 changed files with 60 additions and 13 deletions

View File

@ -219,6 +219,7 @@ OPTION(mds_dir_max_commit_size, OPT_INT, 90) // MB
OPTION(mds_decay_halflife, OPT_FLOAT, 5)
OPTION(mds_beacon_interval, OPT_FLOAT, 4)
OPTION(mds_beacon_grace, OPT_FLOAT, 15)
OPTION(mds_enforce_unique_name, OPT_BOOL, true)
OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes
OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle
OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session

View File

@ -906,6 +906,20 @@ void MDS::handle_mds_map(MMDSMap *m)
if (want_state == MDSMap::STATE_BOOT) {
dout(10) << "not in map yet" << dendl;
} else {
// did i get kicked by someone else?
if (g_conf->mds_enforce_unique_name) {
if (uint64_t existing = mdsmap->find_mds_gid_by_name(name)) {
MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
if (i.global_id > monc->get_global_id()) {
dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, new instance has larger gid " << i.global_id
<< ", suicide" << dendl;
suicide();
goto out;
}
}
}
dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, respawning myself" << dendl;
respawn();

View File

@ -236,6 +236,16 @@ public:
assert(up.count(m) && mds_info.count(up[m]));
return mds_info[up[m]];
}
uint64_t find_mds_gid_by_name(const string& s) {
for (map<uint64_t,mds_info_t>::const_iterator p = mds_info.begin();
p != mds_info.end();
++p) {
if (p->second.name == s) {
return p->first;
}
}
return 0;
}
// counts
unsigned get_num_in_mds() {

View File

@ -354,6 +354,13 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m)
// boot?
if (state == MDSMap::STATE_BOOT) {
// zap previous instance of this name?
if (g_conf->mds_enforce_unique_name) {
while (uint64_t existing = pending_mdsmap.find_mds_gid_by_name(m->get_name())) {
fail_mds_gid(existing);
}
}
// add
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[gid];
info.global_id = gid;
@ -376,7 +383,6 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m)
}
}
// initialize the beacon timer
last_beacon[gid].stamp = ceph_clock_now(g_ceph_context);
last_beacon[gid].seq = seq;
@ -672,10 +678,30 @@ bool MDSMonitor::preprocess_command(MMonCommand *m)
return false;
}
void MDSMonitor::fail_mds_gid(uint64_t gid)
{
assert(pending_mdsmap.mds_info.count(gid));
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[gid];
dout(10) << "fail_mds_gid " << gid << " mds." << info.name << " rank " << info.rank << dendl;
utime_t until = ceph_clock_now(g_ceph_context);
until += g_conf->mds_blacklist_interval;
pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist(info.addr, until);
mon->osdmon()->propose_pending();
if (info.rank >= 0) {
pending_mdsmap.up.erase(info.rank);
pending_mdsmap.failed.insert(info.rank);
}
pending_mdsmap.mds_info.erase(gid);
}
int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
{
std::string err;
int w = strict_strtol(arg.c_str(), 10, &err);
int w = strict_strtoll(arg.c_str(), 10, &err);
if (!err.empty()) {
// Try to interpret the arg as an MDS name
const MDSMap::mds_info_t *mds_info = mdsmap.find_by_name(arg);
@ -688,18 +714,12 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
if (pending_mdsmap.up.count(w)) {
uint64_t gid = pending_mdsmap.up[w];
if (pending_mdsmap.mds_info.count(gid)) {
utime_t until = ceph_clock_now(g_ceph_context);
until += g_conf->mds_blacklist_interval;
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[pending_mdsmap.up[w]];
pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist(info.addr, until);
mon->osdmon()->propose_pending();
pending_mdsmap.mds_info.erase(gid);
}
pending_mdsmap.up.erase(w);
pending_mdsmap.failed.insert(w);
if (pending_mdsmap.mds_info.count(gid))
fail_mds_gid(gid);
ss << "failed mds." << w;
} else if (pending_mdsmap.mds_info.count(w)) {
fail_mds_gid(w);
ss << "failed mds gid " << w;
}
return 0;
}

View File

@ -93,6 +93,8 @@ class MDSMonitor : public PaxosService {
void get_health(list<pair<health_status_t,string> >& summary,
list<pair<health_status_t,string> > *detail) const;
int fail_mds(std::ostream &ss, const std::string &arg);
void fail_mds_gid(uint64_t gid);
int cluster_fail(std::ostream &ss);
bool preprocess_command(MMonCommand *m);