mirror of
https://github.com/ceph/ceph
synced 2025-03-22 18:27:10 +00:00
mds: health metric for late releasing caps
Follow up on Yan Zheng's "mds: warn clients which aren't revoking cap" to include a health metric for this condition as well as the clog messages. Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
parent
05d69580b0
commit
fd04d5e662
@ -264,6 +264,24 @@ void Beacon::notify_health(MDS const *mds)
|
|||||||
health.metrics.push_back(m);
|
health.metrics.push_back(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Detect clients failing to respond to modifications to capabilities in
|
||||||
|
// CLIENT_CAPS messages.
|
||||||
|
std::list<const Capability*> late_caps;
|
||||||
|
mds->locker->get_late_cap_releases(&late_caps);
|
||||||
|
std::set<client_t> late_clients;
|
||||||
|
for (std::list<const Capability*>::iterator i =late_caps.begin(); i != late_caps.end(); ++i) {
|
||||||
|
const Capability *cap = *i;
|
||||||
|
late_clients.insert(cap->get_client());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::set<client_t>::iterator i = late_clients.begin(); i != late_clients.end(); ++i) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "client." << *i << " failing to respond to capability release";
|
||||||
|
MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str());
|
||||||
|
m.metadata["client_id"] = stringify(i->v);
|
||||||
|
health.metrics.push_back(m);
|
||||||
|
}
|
||||||
|
|
||||||
// Detect clients failing to generate cap releases from SESSION_RECALL messages
|
// Detect clients failing to generate cap releases from SESSION_RECALL messages
|
||||||
// May be due to buggy client or resource-hogging application.
|
// May be due to buggy client or resource-hogging application.
|
||||||
set<Session*> sessions;
|
set<Session*> sessions;
|
||||||
|
@ -286,7 +286,7 @@ public:
|
|||||||
void clear_new() { state &= ~STATE_NEW; }
|
void clear_new() { state &= ~STATE_NEW; }
|
||||||
|
|
||||||
CInode *get_inode() { return inode; }
|
CInode *get_inode() { return inode; }
|
||||||
client_t get_client() { return client; }
|
client_t get_client() const { return client; }
|
||||||
|
|
||||||
// caps this client wants to hold
|
// caps this client wants to hold
|
||||||
int wanted() { return _wanted; }
|
int wanted() { return _wanted; }
|
||||||
|
@ -3215,20 +3215,47 @@ void Locker::caps_tick()
|
|||||||
{
|
{
|
||||||
utime_t now = ceph_clock_now(g_ceph_context);
|
utime_t now = ceph_clock_now(g_ceph_context);
|
||||||
|
|
||||||
|
dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl;
|
||||||
|
|
||||||
for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
|
for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
|
||||||
Capability *cap = *p;
|
Capability *cap = *p;
|
||||||
|
|
||||||
|
|
||||||
utime_t age = now - cap->get_last_revoke_stamp();
|
utime_t age = now - cap->get_last_revoke_stamp();
|
||||||
if (age <= g_conf->mds_revoke_cap_timeout)
|
dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
|
||||||
|
if (age <= g_conf->mds_revoke_cap_timeout) {
|
||||||
|
dout(20) << __func__ << " age below timeout " << g_conf->mds_revoke_cap_timeout << dendl;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
// exponential backoff of warning intervals
|
// exponential backoff of warning intervals
|
||||||
if (age > g_conf->mds_revoke_cap_timeout * (1 << cap->get_num_revoke_warnings())) {
|
if (age > g_conf->mds_revoke_cap_timeout * (1 << cap->get_num_revoke_warnings())) {
|
||||||
cap->inc_num_revoke_warnings();
|
cap->inc_num_revoke_warnings();
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
ss << "client." << cap->get_client() << " isn't responding to MClientCaps(revoke), ino "
|
ss << "client." << cap->get_client() << " isn't responding to mclientcaps(revoke), ino "
|
||||||
<< cap->get_inode()->ino() << " pending " << ccap_string(cap->pending())
|
<< cap->get_inode()->ino() << " pending " << ccap_string(cap->pending())
|
||||||
<< " issued " << ccap_string(cap->issued()) << ", sent " << age << " seconds ago\n";
|
<< " issued " << ccap_string(cap->issued()) << ", sent " << age << " seconds ago\n";
|
||||||
mds->clog->warn() << ss.str();
|
mds->clog->warn() << ss.str();
|
||||||
|
dout(20) << __func__ << " " << ss.str() << dendl;
|
||||||
|
} else {
|
||||||
|
dout(20) << __func__ << " silencing log message (backoff) for " << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Locker::get_late_cap_releases(std::list<const Capability*> *late_caps) const
|
||||||
|
{
|
||||||
|
assert(late_caps != NULL);
|
||||||
|
|
||||||
|
utime_t now = ceph_clock_now(g_ceph_context);
|
||||||
|
|
||||||
|
for (xlist<Capability*>::const_iterator p = revoking_caps.begin(); !p.end(); ++p) {
|
||||||
|
Capability *cap = *p;
|
||||||
|
|
||||||
|
utime_t age = now - cap->get_last_revoke_stamp();
|
||||||
|
if (age <= g_conf->mds_revoke_cap_timeout) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
late_caps->push_back(cap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,7 @@ class LogSegment;
|
|||||||
class SimpleLock;
|
class SimpleLock;
|
||||||
class ScatterLock;
|
class ScatterLock;
|
||||||
class LocalLock;
|
class LocalLock;
|
||||||
|
|
||||||
class MDCache;
|
class MDCache;
|
||||||
typedef ceph::shared_ptr<MDRequestImpl> MDRequestRef;
|
typedef ceph::shared_ptr<MDRequestImpl> MDRequestRef;
|
||||||
|
|
||||||
@ -195,6 +196,8 @@ public:
|
|||||||
|
|
||||||
void remove_client_cap(CInode *in, client_t client);
|
void remove_client_cap(CInode *in, client_t client);
|
||||||
|
|
||||||
|
void get_late_cap_releases(std::list<const Capability*> *late_caps) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq);
|
void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq);
|
||||||
void handle_client_caps(class MClientCaps *m);
|
void handle_client_caps(class MClientCaps *m);
|
||||||
|
@ -33,7 +33,8 @@
|
|||||||
enum mds_metric_t {
|
enum mds_metric_t {
|
||||||
MDS_HEALTH_NULL = 0,
|
MDS_HEALTH_NULL = 0,
|
||||||
MDS_HEALTH_TRIM = 1,
|
MDS_HEALTH_TRIM = 1,
|
||||||
MDS_HEALTH_CLIENT_RECALL = 2
|
MDS_HEALTH_CLIENT_RECALL = 2,
|
||||||
|
MDS_HEALTH_CLIENT_LATE_RELEASE = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user