mirror of
https://github.com/ceph/ceph
synced 2025-03-20 01:07:42 +00:00
mds: health metric for late releasing caps
Follow up on Yan Zheng's "mds: warn clients which aren't revoking cap" to include a health metric for this condition as well as the clog messages. Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
parent
05d69580b0
commit
fd04d5e662
@ -264,6 +264,24 @@ void Beacon::notify_health(MDS const *mds)
|
||||
health.metrics.push_back(m);
|
||||
}
|
||||
|
||||
// Detect clients failing to respond to modifications to capabilities in
|
||||
// CLIENT_CAPS messages.
|
||||
std::list<const Capability*> late_caps;
|
||||
mds->locker->get_late_cap_releases(&late_caps);
|
||||
std::set<client_t> late_clients;
|
||||
for (std::list<const Capability*>::iterator i =late_caps.begin(); i != late_caps.end(); ++i) {
|
||||
const Capability *cap = *i;
|
||||
late_clients.insert(cap->get_client());
|
||||
}
|
||||
|
||||
for (std::set<client_t>::iterator i = late_clients.begin(); i != late_clients.end(); ++i) {
|
||||
std::ostringstream oss;
|
||||
oss << "client." << *i << " failing to respond to capability release";
|
||||
MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str());
|
||||
m.metadata["client_id"] = stringify(i->v);
|
||||
health.metrics.push_back(m);
|
||||
}
|
||||
|
||||
// Detect clients failing to generate cap releases from SESSION_RECALL messages
|
||||
// May be due to buggy client or resource-hogging application.
|
||||
set<Session*> sessions;
|
||||
|
@ -286,7 +286,7 @@ public:
|
||||
void clear_new() { state &= ~STATE_NEW; }
|
||||
|
||||
CInode *get_inode() { return inode; }
|
||||
client_t get_client() { return client; }
|
||||
client_t get_client() const { return client; }
|
||||
|
||||
// caps this client wants to hold
|
||||
int wanted() { return _wanted; }
|
||||
|
@ -3215,20 +3215,47 @@ void Locker::caps_tick()
|
||||
{
|
||||
utime_t now = ceph_clock_now(g_ceph_context);
|
||||
|
||||
dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl;
|
||||
|
||||
for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
|
||||
Capability *cap = *p;
|
||||
|
||||
|
||||
utime_t age = now - cap->get_last_revoke_stamp();
|
||||
if (age <= g_conf->mds_revoke_cap_timeout)
|
||||
dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
|
||||
if (age <= g_conf->mds_revoke_cap_timeout) {
|
||||
dout(20) << __func__ << " age below timeout " << g_conf->mds_revoke_cap_timeout << dendl;
|
||||
break;
|
||||
}
|
||||
// exponential backoff of warning intervals
|
||||
if (age > g_conf->mds_revoke_cap_timeout * (1 << cap->get_num_revoke_warnings())) {
|
||||
cap->inc_num_revoke_warnings();
|
||||
stringstream ss;
|
||||
ss << "client." << cap->get_client() << " isn't responding to MClientCaps(revoke), ino "
|
||||
ss << "client." << cap->get_client() << " isn't responding to mclientcaps(revoke), ino "
|
||||
<< cap->get_inode()->ino() << " pending " << ccap_string(cap->pending())
|
||||
<< " issued " << ccap_string(cap->issued()) << ", sent " << age << " seconds ago\n";
|
||||
mds->clog->warn() << ss.str();
|
||||
dout(20) << __func__ << " " << ss.str() << dendl;
|
||||
} else {
|
||||
dout(20) << __func__ << " silencing log message (backoff) for " << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Locker::get_late_cap_releases(std::list<const Capability*> *late_caps) const
|
||||
{
|
||||
assert(late_caps != NULL);
|
||||
|
||||
utime_t now = ceph_clock_now(g_ceph_context);
|
||||
|
||||
for (xlist<Capability*>::const_iterator p = revoking_caps.begin(); !p.end(); ++p) {
|
||||
Capability *cap = *p;
|
||||
|
||||
utime_t age = now - cap->get_last_revoke_stamp();
|
||||
if (age <= g_conf->mds_revoke_cap_timeout) {
|
||||
break;
|
||||
} else {
|
||||
late_caps->push_back(cap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ class LogSegment;
|
||||
class SimpleLock;
|
||||
class ScatterLock;
|
||||
class LocalLock;
|
||||
|
||||
class MDCache;
|
||||
typedef ceph::shared_ptr<MDRequestImpl> MDRequestRef;
|
||||
|
||||
@ -195,6 +196,8 @@ public:
|
||||
|
||||
void remove_client_cap(CInode *in, client_t client);
|
||||
|
||||
void get_late_cap_releases(std::list<const Capability*> *late_caps) const;
|
||||
|
||||
protected:
|
||||
void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq);
|
||||
void handle_client_caps(class MClientCaps *m);
|
||||
|
@ -33,7 +33,8 @@
|
||||
enum mds_metric_t {
|
||||
MDS_HEALTH_NULL = 0,
|
||||
MDS_HEALTH_TRIM = 1,
|
||||
MDS_HEALTH_CLIENT_RECALL = 2
|
||||
MDS_HEALTH_CLIENT_RECALL = 2,
|
||||
MDS_HEALTH_CLIENT_LATE_RELEASE = 3
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user