Merge pull request #13807 from jcsp/wip-19118

mds: try to avoid false positive heartbeat timeouts
This commit is contained in:
John Spray 2017-03-15 21:15:43 +00:00 committed by GitHub
commit 5f6fa0e2f7
3 changed files with 10 additions and 1 deletions

View File

@ -5610,6 +5610,8 @@ void MDCache::export_remaining_imported_caps()
mds->send_message_client_counted(stale, q->first);
}
}
mds->heartbeat_reset();
}
for (map<inodeno_t, list<MDSInternalContextBase*> >::iterator p = cap_reconnect_waiters.begin();

View File

@ -487,6 +487,8 @@ bool MDSRank::_dispatch(Message *m, bool new_msg)
dout(0) << "unrecognized message " << *m << dendl;
return false;
}
heartbeat_reset();
}
if (dispatch_depth > 1)

View File

@ -237,7 +237,6 @@ class MDSRank {
bool _dispatch(Message *m, bool new_msg);
ceph::heartbeat_handle_d *hb; // Heartbeat for threads using mds_lock
void heartbeat_reset();
bool is_stale_message(Message *m) const;
@ -308,6 +307,12 @@ class MDSRank {
void respawn();
// <<<
/**
* Call this periodically if inside a potentially long running piece
* of code while holding the mds_lock
*/
void heartbeat_reset();
/**
* Report state DAMAGED to the mon, and then pass on to respawn(). Call
* this when an unrecoverable error is encountered while attempting