mds: keep per-client revoking caps list

...to avoid doing an O(caps) scan to find out
which clients are responsible for any late-revoking
caps during health checks.

Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2014-09-10 13:37:37 +01:00
parent a6a0fd814b
commit bf590f8a5d
5 changed files with 60 additions and 30 deletions

View File

@ -266,15 +266,10 @@ void Beacon::notify_health(MDS const *mds)
// Detect clients failing to respond to modifications to capabilities in
// CLIENT_CAPS messages.
std::list<const Capability*> late_caps;
mds->locker->get_late_cap_releases(&late_caps);
std::set<client_t> late_clients;
for (std::list<const Capability*>::iterator i =late_caps.begin(); i != late_caps.end(); ++i) {
const Capability *cap = *i;
late_clients.insert(cap->get_client());
}
for (std::set<client_t>::iterator i = late_clients.begin(); i != late_clients.end(); ++i) {
std::list<client_t> late_clients;
mds->locker->get_late_revoking_clients(&late_clients);
for (std::list<client_t>::iterator i = late_clients.begin();
i != late_clients.end(); ++i) {
std::ostringstream oss;
oss << "client." << *i << " failing to respond to capability release";
MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str());

View File

@ -2573,6 +2573,7 @@ void CInode::remove_client_cap(client_t client)
cap->item_session_caps.remove_myself();
cap->item_revoking_caps.remove_myself();
cap->item_client_revoking_caps.remove_myself();
containing_realm->remove_cap(client, cap);
if (client == loner_cap)

View File

@ -195,8 +195,10 @@ public:
}
}
if (_issued == _pending)
if (_issued == _pending) {
item_revoking_caps.remove_myself();
item_client_revoking_caps.remove_myself();
}
//check_rdcaps_list();
}
// we may get a release racing with revocations, which means our revokes will be ignored
@ -231,6 +233,7 @@ public:
xlist<Capability*>::item item_session_caps;
xlist<Capability*>::item item_snaprealm_caps;
xlist<Capability*>::item item_revoking_caps;
xlist<Capability*>::item item_client_revoking_caps;
Capability(CInode *i = NULL, uint64_t id = 0, client_t c = 0) :
inode(i), client(c),
@ -243,7 +246,8 @@ public:
suppress(0), state(0),
client_follows(0), client_xattr_version(0),
client_inline_version(0),
item_session_caps(this), item_snaprealm_caps(this), item_revoking_caps(this) {
item_session_caps(this), item_snaprealm_caps(this),
item_revoking_caps(this), item_client_revoking_caps(this) {
g_num_cap++;
g_num_capa++;
}

View File

@ -1941,6 +1941,7 @@ bool Locker::issue_caps(CInode *in, Capability *only_cap)
int op = (before & ~after) ? CEPH_CAP_OP_REVOKE : CEPH_CAP_OP_GRANT;
if (op == CEPH_CAP_OP_REVOKE) {
revoking_caps.push_back(&cap->item_revoking_caps);
revoking_caps_by_client[cap->get_client()].push_back(&cap->item_client_revoking_caps);
cap->set_last_revoke_stamp(ceph_clock_now(g_ceph_context));
cap->reset_num_revoke_warnings();
}
@ -3211,6 +3212,49 @@ void Locker::remove_client_cap(CInode *in, client_t client)
try_eval(in, CEPH_CAP_LOCKS);
}
/**
* Return true if any currently revoking caps exceed the
* mds_revoke_cap_timeout threshold.
*/
bool Locker::any_late_revoking_caps(xlist<Capability*> const &revoking) const
{
xlist<Capability*>::const_iterator p = revoking.begin();
if (p.end()) {
// No revoking caps at the moment
return false;
} else {
utime_t now = ceph_clock_now(g_ceph_context);
utime_t age = now - (*p)->get_last_revoke_stamp();
if (age <= g_conf->mds_revoke_cap_timeout) {
return false;
} else {
return true;
}
}
}
void Locker::get_late_revoking_clients(std::list<client_t> *result) const
{
if (!any_late_revoking_caps(revoking_caps)) {
// Fast path: no misbehaving clients, execute in O(1)
return;
}
// Slow path: execute in O(N_clients)
std::map<client_t, xlist<Capability*> >::const_iterator client_rc_iter;
for (client_rc_iter = revoking_caps_by_client.begin();
client_rc_iter != revoking_caps_by_client.end(); ++client_rc_iter) {
xlist<Capability*> const &client_rc = client_rc_iter->second;
bool any_late = any_late_revoking_caps(client_rc);
if (any_late) {
result->push_back(client_rc_iter->first);
}
}
}
void Locker::caps_tick()
{
utime_t now = ceph_clock_now(g_ceph_context);
@ -3220,7 +3264,6 @@ void Locker::caps_tick()
for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
Capability *cap = *p;
utime_t age = now - cap->get_last_revoke_stamp();
dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
if (age <= g_conf->mds_revoke_cap_timeout) {
@ -3242,23 +3285,6 @@ void Locker::caps_tick()
}
}
void Locker::get_late_cap_releases(std::list<const Capability*> *late_caps) const
{
assert(late_caps != NULL);
utime_t now = ceph_clock_now(g_ceph_context);
for (xlist<Capability*>::const_iterator p = revoking_caps.begin(); !p.end(); ++p) {
Capability *cap = *p;
utime_t age = now - cap->get_last_revoke_stamp();
if (age <= g_conf->mds_revoke_cap_timeout) {
break;
} else {
late_caps->push_back(cap);
}
}
}
void Locker::handle_client_lease(MClientLease *m)
{

View File

@ -196,7 +196,8 @@ public:
void remove_client_cap(CInode *in, client_t client);
void get_late_cap_releases(std::list<const Capability*> *late_caps) const;
void get_late_revoking_clients(std::list<client_t> *result) const;
bool any_late_revoking_caps(xlist<Capability*> const &revoking) const;
protected:
void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq);
@ -210,7 +211,10 @@ public:
void _do_cap_release(client_t client, inodeno_t ino, uint64_t cap_id, ceph_seq_t mseq, ceph_seq_t seq);
void caps_tick();
// Maintain a global list to quickly find if any caps are late revoking
xlist<Capability*> revoking_caps;
// Maintain a per-client list to find clients responsible for late ones quickly
std::map<client_t, xlist<Capability*> > revoking_caps_by_client;
// local
public: