From bf590f8a5df4f8199d524e3448853f0f6f8338c9 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 10 Sep 2014 13:37:37 +0100 Subject: [PATCH] mds: keep per-client revoking caps list ...to avoid doing an O(caps) scan to find out which clients are responsible for any late-revoking caps during health checks. Signed-off-by: John Spray --- src/mds/Beacon.cc | 13 +++------- src/mds/CInode.cc | 1 + src/mds/Capability.h | 8 ++++-- src/mds/Locker.cc | 62 +++++++++++++++++++++++++++++++------------- src/mds/Locker.h | 6 ++++- 5 files changed, 60 insertions(+), 30 deletions(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 5140f98398b..2256f320d1a 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -266,15 +266,10 @@ void Beacon::notify_health(MDS const *mds) // Detect clients failing to respond to modifications to capabilities in // CLIENT_CAPS messages. - std::list late_caps; - mds->locker->get_late_cap_releases(&late_caps); - std::set late_clients; - for (std::list::iterator i =late_caps.begin(); i != late_caps.end(); ++i) { - const Capability *cap = *i; - late_clients.insert(cap->get_client()); - } - - for (std::set::iterator i = late_clients.begin(); i != late_clients.end(); ++i) { + std::list late_clients; + mds->locker->get_late_revoking_clients(&late_clients); + for (std::list::iterator i = late_clients.begin(); + i != late_clients.end(); ++i) { std::ostringstream oss; oss << "client." << *i << " failing to respond to capability release"; MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str()); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 8624b66896e..175cfc3926e 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -2573,6 +2573,7 @@ void CInode::remove_client_cap(client_t client) cap->item_session_caps.remove_myself(); cap->item_revoking_caps.remove_myself(); + cap->item_client_revoking_caps.remove_myself(); containing_realm->remove_cap(client, cap); if (client == loner_cap) diff --git a/src/mds/Capability.h b/src/mds/Capability.h index 53ac91da2cf..113a010089a 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -195,8 +195,10 @@ public: } } - if (_issued == _pending) + if (_issued == _pending) { item_revoking_caps.remove_myself(); + item_client_revoking_caps.remove_myself(); + } //check_rdcaps_list(); } // we may get a release racing with revocations, which means our revokes will be ignored @@ -231,6 +233,7 @@ public: xlist::item item_session_caps; xlist::item item_snaprealm_caps; xlist::item item_revoking_caps; + xlist::item item_client_revoking_caps; Capability(CInode *i = NULL, uint64_t id = 0, client_t c = 0) : inode(i), client(c), @@ -243,7 +246,8 @@ public: suppress(0), state(0), client_follows(0), client_xattr_version(0), client_inline_version(0), - item_session_caps(this), item_snaprealm_caps(this), item_revoking_caps(this) { + item_session_caps(this), item_snaprealm_caps(this), + item_revoking_caps(this), item_client_revoking_caps(this) { g_num_cap++; g_num_capa++; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 944925e762d..d1d69f359e6 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1941,6 +1941,7 @@ bool Locker::issue_caps(CInode *in, Capability *only_cap) int op = (before & ~after) ? CEPH_CAP_OP_REVOKE : CEPH_CAP_OP_GRANT; if (op == CEPH_CAP_OP_REVOKE) { revoking_caps.push_back(&cap->item_revoking_caps); + revoking_caps_by_client[cap->get_client()].push_back(&cap->item_client_revoking_caps); cap->set_last_revoke_stamp(ceph_clock_now(g_ceph_context)); cap->reset_num_revoke_warnings(); } @@ -3211,6 +3212,49 @@ void Locker::remove_client_cap(CInode *in, client_t client) try_eval(in, CEPH_CAP_LOCKS); } + +/** + * Return true if any currently revoking caps exceed the + * mds_revoke_cap_timeout threshold. + */ +bool Locker::any_late_revoking_caps(xlist const &revoking) const +{ + xlist::const_iterator p = revoking.begin(); + if (p.end()) { + // No revoking caps at the moment + return false; + } else { + utime_t now = ceph_clock_now(g_ceph_context); + utime_t age = now - (*p)->get_last_revoke_stamp(); + if (age <= g_conf->mds_revoke_cap_timeout) { + return false; + } else { + return true; + } + } +} + + +void Locker::get_late_revoking_clients(std::list *result) const +{ + if (!any_late_revoking_caps(revoking_caps)) { + // Fast path: no misbehaving clients, execute in O(1) + return; + } + + // Slow path: execute in O(N_clients) + std::map >::const_iterator client_rc_iter; + for (client_rc_iter = revoking_caps_by_client.begin(); + client_rc_iter != revoking_caps_by_client.end(); ++client_rc_iter) { + xlist const &client_rc = client_rc_iter->second; + bool any_late = any_late_revoking_caps(client_rc); + if (any_late) { + result->push_back(client_rc_iter->first); + } + } +} + + void Locker::caps_tick() { utime_t now = ceph_clock_now(g_ceph_context); @@ -3220,7 +3264,6 @@ void Locker::caps_tick() for (xlist::iterator p = revoking_caps.begin(); !p.end(); ++p) { Capability *cap = *p; - utime_t age = now - cap->get_last_revoke_stamp(); dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl; if (age <= g_conf->mds_revoke_cap_timeout) { @@ -3242,23 +3285,6 @@ void Locker::caps_tick() } } -void Locker::get_late_cap_releases(std::list *late_caps) const -{ - assert(late_caps != NULL); - - utime_t now = ceph_clock_now(g_ceph_context); - - for (xlist::const_iterator p = revoking_caps.begin(); !p.end(); ++p) { - Capability *cap = *p; - - utime_t age = now - cap->get_last_revoke_stamp(); - if (age <= g_conf->mds_revoke_cap_timeout) { - break; - } else { - late_caps->push_back(cap); - } - } -} void Locker::handle_client_lease(MClientLease *m) { diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 77060cb2c98..6df00c4a75d 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -196,7 +196,8 @@ public: void remove_client_cap(CInode *in, client_t client); - void get_late_cap_releases(std::list *late_caps) const; + void get_late_revoking_clients(std::list *result) const; + bool any_late_revoking_caps(xlist const &revoking) const; protected: void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq); @@ -210,7 +211,10 @@ public: void _do_cap_release(client_t client, inodeno_t ino, uint64_t cap_id, ceph_seq_t mseq, ceph_seq_t seq); void caps_tick(); + // Maintain a global list to quickly find if any caps are late revoking xlist revoking_caps; + // Maintain a per-client list to find clients responsible for late ones quickly + std::map > revoking_caps_by_client; // local public: