mirror of
https://github.com/ceph/ceph
synced 2024-12-17 00:46:05 +00:00
mon: report pgs stuck inactive/unclean/stale in health check
Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com> Reviewed-by: Sage Weil <sage.weil@dreamhost.com>
This commit is contained in:
parent
d10e1f46df
commit
b9a675a293
@ -100,6 +100,7 @@ OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between
|
||||
OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings
|
||||
OPTION(mon_accept_timeout, OPT_FLOAT, 10.0) // on leader, if paxos update isn't accepted
|
||||
OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s
|
||||
OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info)
|
||||
OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full"
|
||||
OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full
|
||||
OPTION(mon_globalid_prealloc, OPT_INT, 100) // how many globalids to prealloc
|
||||
|
@ -1151,7 +1151,35 @@ enum health_status_t PGMonitor::get_health(std::ostream &ss) const
|
||||
note["repair"] += p->second;
|
||||
if (p->first & PG_STATE_SPLITTING)
|
||||
note["splitting"] += p->second;
|
||||
if (p->first & PG_STATE_RECOVERING)
|
||||
note["recovering"] += p->second;
|
||||
if (p->first & PG_STATE_INCOMPLETE)
|
||||
note["incomplete"] += p->second;
|
||||
if (p->first & PG_STATE_BACKFILL)
|
||||
note["backfill"] += p->second;
|
||||
}
|
||||
|
||||
hash_map<pg_t, pg_stat_t> stuck_pgs;
|
||||
utime_t now(ceph_clock_now(g_ceph_context));
|
||||
utime_t cutoff = now - utime_t(g_conf->mon_pg_stuck_threshold, 0);
|
||||
|
||||
pg_map.get_stuck_stats(PGMap::STUCK_INACTIVE, cutoff, stuck_pgs);
|
||||
if (!stuck_pgs.empty()) {
|
||||
note["stuck inactive"] = stuck_pgs.size();
|
||||
}
|
||||
stuck_pgs.clear();
|
||||
|
||||
pg_map.get_stuck_stats(PGMap::STUCK_UNCLEAN, cutoff, stuck_pgs);
|
||||
if (!stuck_pgs.empty()) {
|
||||
note["stuck unclean"] = stuck_pgs.size();
|
||||
}
|
||||
stuck_pgs.clear();
|
||||
|
||||
pg_map.get_stuck_stats(PGMap::STUCK_STALE, cutoff, stuck_pgs);
|
||||
if (!stuck_pgs.empty()) {
|
||||
note["stuck stale"] = stuck_pgs.size();
|
||||
}
|
||||
|
||||
if (!note.empty()) {
|
||||
ret = HEALTH_WARN;
|
||||
for (map<string,int>::iterator p = note.begin(); p != note.end(); p++) {
|
||||
@ -1192,7 +1220,7 @@ int PGMonitor::dump_stuck_pg_stats(ostream& ss,
|
||||
{
|
||||
string format = "plain";
|
||||
string val;
|
||||
int threshold = 300;
|
||||
int threshold = g_conf->mon_pg_stuck_threshold;
|
||||
int seconds;
|
||||
ostringstream err;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user