mds: set resolve/rejoin gather MDS set in advance

For active MDS, it may receive resolve/rejoin message before receiving
the mdsmap message that claims the MDS cluster is in resolving/rejoning
state. So instead of set the gather MDS set when receiving the mdsmap.
set them in advance when detecting MDS' failure.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
This commit is contained in:
Yan, Zheng 2013-03-15 00:08:39 +08:00 committed by Greg Farnum
parent ed85dd61a5
commit e381bb3930
2 changed files with 21 additions and 25 deletions

View File

@ -2436,18 +2436,17 @@ void MDCache::resolve_start()
if (rootdir)
adjust_subtree_auth(rootdir, CDIR_AUTH_UNKNOWN);
}
resolve_gather = recovery_set;
resolve_gather.erase(mds->get_nodeid());
rejoin_gather = resolve_gather;
}
void MDCache::send_resolves()
{
// reset resolve state
got_resolve.clear();
other_ambiguous_imports.clear();
send_slave_resolves();
if (!resolve_ack_gather.empty()) {
dout(10) << "send_resolves still waiting for resolve ack from ("
<< need_resolve_ack << ")" << dendl;
<< resolve_ack_gather << ")" << dendl;
return;
}
if (!need_resolve_rollback.empty()) {
@ -2499,7 +2498,7 @@ void MDCache::send_slave_resolves()
++p) {
dout(10) << "sending slave resolve to mds." << p->first << dendl;
mds->send_message_mds(p->second, p->first);
need_resolve_ack.insert(p->first);
resolve_ack_gather.insert(p->first);
}
}
@ -2602,16 +2601,15 @@ void MDCache::handle_mds_failure(int who)
recovery_set.erase(mds->get_nodeid());
dout(1) << "handle_mds_failure mds." << who << " : recovery peers are " << recovery_set << dendl;
// adjust my recovery lists
wants_resolve.erase(who); // MDS will ask again
got_resolve.erase(who); // i'll get another.
resolve_gather.insert(who);
discard_delayed_resolve(who);
rejoin_gather.insert(who);
rejoin_sent.erase(who); // i need to send another
rejoin_ack_gather.erase(who); // i'll need/get another.
dout(10) << " wants_resolve " << wants_resolve << dendl;
dout(10) << " got_resolve " << got_resolve << dendl;
dout(10) << " resolve_gather " << resolve_gather << dendl;
dout(10) << " resolve_ack_gather " << resolve_ack_gather << dendl;
dout(10) << " rejoin_sent " << rejoin_sent << dendl;
dout(10) << " rejoin_gather " << rejoin_gather << dendl;
dout(10) << " rejoin_ack_gather " << rejoin_ack_gather << dendl;
@ -2792,7 +2790,7 @@ void MDCache::handle_resolve(MMDSResolve *m)
return;
}
if (!need_resolve_ack.empty() || !need_resolve_rollback.empty()) {
if (!resolve_ack_gather.empty() || !need_resolve_rollback.empty()) {
dout(10) << "delay processing subtree resolve" << dendl;
discard_delayed_resolve(from);
delayed_resolve[from] = m;
@ -2879,7 +2877,7 @@ void MDCache::handle_resolve(MMDSResolve *m)
}
// did i get them all?
got_resolve.insert(from);
resolve_gather.erase(from);
maybe_resolve_finish();
@ -2905,12 +2903,12 @@ void MDCache::discard_delayed_resolve(int who)
void MDCache::maybe_resolve_finish()
{
assert(need_resolve_ack.empty());
assert(resolve_ack_gather.empty());
assert(need_resolve_rollback.empty());
if (got_resolve != recovery_set) {
dout(10) << "maybe_resolve_finish still waiting for more resolves, got ("
<< got_resolve << "), need (" << recovery_set << ")" << dendl;
if (!resolve_gather.empty()) {
dout(10) << "maybe_resolve_finish still waiting for resolves ("
<< resolve_gather << ")" << dendl;
return;
} else {
dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl;
@ -2930,7 +2928,7 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack)
dout(10) << "handle_resolve_ack " << *ack << " from " << ack->get_source() << dendl;
int from = ack->get_source().num();
if (!need_resolve_ack.count(from)) {
if (!resolve_ack_gather.count(from)) {
ack->put();
return;
}
@ -3005,8 +3003,8 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack)
assert(p->second->slave_to_mds != from);
}
need_resolve_ack.erase(from);
if (need_resolve_ack.empty() && need_resolve_rollback.empty()) {
resolve_ack_gather.erase(from);
if (resolve_ack_gather.empty() && need_resolve_rollback.empty()) {
send_subtree_resolves();
process_delayed_resolve();
}
@ -3073,7 +3071,7 @@ void MDCache::finish_rollback(metareqid_t reqid) {
if (mds->is_resolve())
finish_uncommitted_slave_update(reqid, need_resolve_rollback[reqid]);
need_resolve_rollback.erase(reqid);
if (need_resolve_ack.empty() && need_resolve_rollback.empty()) {
if (resolve_ack_gather.empty() && need_resolve_rollback.empty()) {
send_subtree_resolves();
process_delayed_resolve();
}
@ -3421,7 +3419,6 @@ void MDCache::rejoin_send_rejoins()
if (*p == mds->get_nodeid()) continue; // nothing to myself!
if (rejoin_sent.count(*p)) continue; // already sent a rejoin to this node!
if (mds->is_rejoin()) {
rejoin_gather.insert(*p);
rejoins[*p] = new MMDSCacheRejoin(MMDSCacheRejoin::OP_WEAK);
rejoins[*p]->copy_cap_exports(cap_export_bl);
} else if (mds->mdsmap->is_rejoin(*p))

View File

@ -329,9 +329,8 @@ protected:
friend class ECommitted;
bool resolves_pending;
set<int> wants_resolve; // nodes i need to send my resolve to
set<int> got_resolve; // nodes i got resolves from
set<int> need_resolve_ack; // nodes i need a resolve_ack from
set<int> resolve_gather; // nodes i need resolves from
set<int> resolve_ack_gather; // nodes i need a resolve_ack from
map<metareqid_t, int> need_resolve_rollback; // rollbacks i'm writing to the journal
map<int, MMDSResolve*> delayed_resolve;