mds: add health warning for oversized cache

Various issues can lead to an oversized cache, which
is a precursor to an OOM condition: let's give users
a clearer message with some useful counts that might
hint at what is wrong.

Fixes: http://tracker.ceph.com/issues/16570
Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2016-07-07 14:00:58 +01:00
parent b3bea59b7e
commit 3b5a6a4fa0
4 changed files with 17 additions and 1 deletions

View File

@ -462,6 +462,18 @@ void Beacon::notify_health(MDSRank const *mds)
"MDS in read-only mode");
health.metrics.push_back(m);
}
// Report if we have significantly exceeded our cache size limit
if (mds->mdcache->get_num_inodes() > g_conf->mds_cache_size * 1.5) {
std::ostringstream oss;
oss << "Too many inodes in cache (" << mds->mdcache->get_num_inodes()
<< "/" << g_conf->mds_cache_size << "), "
<< mds->mdcache->num_inodes_with_caps << " inodes in use by clients, "
<< mds->mdcache->get_num_strays() << " stray files";
MDSHealthMetric m(MDS_HEALTH_CACHE_OVERSIZED, HEALTH_WARN, oss.str());
health.metrics.push_back(m);
}
}
MDSMap::DaemonState Beacon::get_want_state() const

View File

@ -963,6 +963,7 @@ public:
public:
void eval_remote(CDentry *dn);
void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin);
uint64_t get_num_strays() const { return stray_manager.get_num_strays(); }
protected:
void scan_stray_dir(dirfrag_t next=dirfrag_t());

View File

@ -170,6 +170,8 @@ class StrayManager
bool eval_stray(CDentry *dn, bool delay=false);
uint64_t get_num_strays() const { return num_strays; }
/**
* Where eval_stray was previously invoked with delay=true, call
* eval_stray again for any dentries that were put on the

View File

@ -39,7 +39,8 @@ enum mds_metric_t {
MDS_HEALTH_CLIENT_OLDEST_TID_MANY,
MDS_HEALTH_DAMAGE,
MDS_HEALTH_READ_ONLY,
MDS_HEALTH_SLOW_REQUEST
MDS_HEALTH_SLOW_REQUEST,
MDS_HEALTH_CACHE_OVERSIZED
};
/**