From fd04d5e6622ec8c4912b9d146f368d091a088e4c Mon Sep 17 00:00:00 2001 From: John Spray Date: Sun, 7 Sep 2014 16:22:37 +0100 Subject: [PATCH] mds: health metric for late releasing caps Follow up on Yan Zheng's "mds: warn clients which aren't revoking cap" to include a health metric for this condition as well as the clog messages. Signed-off-by: John Spray --- src/mds/Beacon.cc | 18 ++++++++++++++++++ src/mds/Capability.h | 2 +- src/mds/Locker.cc | 31 +++++++++++++++++++++++++++++-- src/mds/Locker.h | 3 +++ src/messages/MMDSBeacon.h | 3 ++- 5 files changed, 53 insertions(+), 4 deletions(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 7f6af82d830..5140f98398b 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -264,6 +264,24 @@ void Beacon::notify_health(MDS const *mds) health.metrics.push_back(m); } + // Detect clients failing to respond to modifications to capabilities in + // CLIENT_CAPS messages. + std::list late_caps; + mds->locker->get_late_cap_releases(&late_caps); + std::set late_clients; + for (std::list::iterator i =late_caps.begin(); i != late_caps.end(); ++i) { + const Capability *cap = *i; + late_clients.insert(cap->get_client()); + } + + for (std::set::iterator i = late_clients.begin(); i != late_clients.end(); ++i) { + std::ostringstream oss; + oss << "client." << *i << " failing to respond to capability release"; + MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str()); + m.metadata["client_id"] = stringify(i->v); + health.metrics.push_back(m); + } + // Detect clients failing to generate cap releases from SESSION_RECALL messages // May be due to buggy client or resource-hogging application. set sessions; diff --git a/src/mds/Capability.h b/src/mds/Capability.h index 37adab19fd5..53ac91da2cf 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -286,7 +286,7 @@ public: void clear_new() { state &= ~STATE_NEW; } CInode *get_inode() { return inode; } - client_t get_client() { return client; } + client_t get_client() const { return client; } // caps this client wants to hold int wanted() { return _wanted; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index d42f452a56e..944925e762d 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -3215,20 +3215,47 @@ void Locker::caps_tick() { utime_t now = ceph_clock_now(g_ceph_context); + dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl; + for (xlist::iterator p = revoking_caps.begin(); !p.end(); ++p) { Capability *cap = *p; + utime_t age = now - cap->get_last_revoke_stamp(); - if (age <= g_conf->mds_revoke_cap_timeout) + dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl; + if (age <= g_conf->mds_revoke_cap_timeout) { + dout(20) << __func__ << " age below timeout " << g_conf->mds_revoke_cap_timeout << dendl; break; + } // exponential backoff of warning intervals if (age > g_conf->mds_revoke_cap_timeout * (1 << cap->get_num_revoke_warnings())) { cap->inc_num_revoke_warnings(); stringstream ss; - ss << "client." << cap->get_client() << " isn't responding to MClientCaps(revoke), ino " + ss << "client." << cap->get_client() << " isn't responding to mclientcaps(revoke), ino " << cap->get_inode()->ino() << " pending " << ccap_string(cap->pending()) << " issued " << ccap_string(cap->issued()) << ", sent " << age << " seconds ago\n"; mds->clog->warn() << ss.str(); + dout(20) << __func__ << " " << ss.str() << dendl; + } else { + dout(20) << __func__ << " silencing log message (backoff) for " << cap->get_client() << "." << cap->get_inode()->ino() << dendl; + } + } +} + +void Locker::get_late_cap_releases(std::list *late_caps) const +{ + assert(late_caps != NULL); + + utime_t now = ceph_clock_now(g_ceph_context); + + for (xlist::const_iterator p = revoking_caps.begin(); !p.end(); ++p) { + Capability *cap = *p; + + utime_t age = now - cap->get_last_revoke_stamp(); + if (age <= g_conf->mds_revoke_cap_timeout) { + break; + } else { + late_caps->push_back(cap); } } } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index dbcaacc5919..77060cb2c98 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -49,6 +49,7 @@ class LogSegment; class SimpleLock; class ScatterLock; class LocalLock; + class MDCache; typedef ceph::shared_ptr MDRequestRef; @@ -195,6 +196,8 @@ public: void remove_client_cap(CInode *in, client_t client); + void get_late_cap_releases(std::list *late_caps) const; + protected: void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq); void handle_client_caps(class MClientCaps *m); diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index dd79493a573..6fc41392b84 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -33,7 +33,8 @@ enum mds_metric_t { MDS_HEALTH_NULL = 0, MDS_HEALTH_TRIM = 1, - MDS_HEALTH_CLIENT_RECALL = 2 + MDS_HEALTH_CLIENT_RECALL = 2, + MDS_HEALTH_CLIENT_LATE_RELEASE = 3 }; /**