mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
Merge pull request #1692 from ceph/wip-7784
mon: OSDMonitor: HEALTH_WARN on 'mon osd down out interval == 0' Reviewed-by: Sage Weil <sage@inktank.com>
This commit is contained in:
commit
d07ce84148
@ -5,3 +5,8 @@ v0.80
|
||||
* OSDMap's json-formatted dump changed for keys 'full' and 'nearfull'.
|
||||
What was previously being outputted as 'true' or 'false' strings are
|
||||
now being outputted 'true' and 'false' booleans according to json syntax.
|
||||
|
||||
* HEALTH_WARN on 'mon osd down out interval == 0'. Having this option set
|
||||
to zero on the leader acts much like having the 'noout' flag set. This
|
||||
warning will only be reported if the monitor getting the 'health' or
|
||||
'status' request has this option set to zero.
|
||||
|
@ -176,6 +176,7 @@ OPTION(mon_osd_report_timeout, OPT_INT, 900) // grace period before declaring
|
||||
OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-replay mds to be active
|
||||
OPTION(mon_warn_on_old_mons, OPT_BOOL, true) // should mons set health to WARN if part of quorum is old?
|
||||
OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are not optimal
|
||||
OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0'
|
||||
OPTION(mon_min_osdmap_epochs, OPT_INT, 500)
|
||||
OPTION(mon_max_pgmap_epochs, OPT_INT, 500)
|
||||
OPTION(mon_max_log_epochs, OPT_INT, 500)
|
||||
|
@ -2062,6 +2062,29 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
|
||||
}
|
||||
}
|
||||
|
||||
// Warn if 'mon_osd_down_out_interval' is set to zero.
|
||||
// Having this option set to zero on the leader acts much like the
|
||||
// 'noout' flag. It's hard to figure out what's going wrong with clusters
|
||||
// without the 'noout' flag set but acting like that just the same, so
|
||||
// we report a HEALTH_WARN in case this option is set to zero.
|
||||
// This is an ugly hack to get the warning out, but until we find a way
|
||||
// to spread global options throughout the mon cluster and have all mons
|
||||
// using a base set of the same options, we need to work around this sort
|
||||
// of things.
|
||||
// There's also the obvious drawback that if this is set on a single
|
||||
// monitor on a 3-monitor cluster, this warning will only be shown every
|
||||
// third monitor connection.
|
||||
if (g_conf->mon_warn_on_osd_down_out_interval_zero &&
|
||||
g_conf->mon_osd_down_out_interval == 0) {
|
||||
ostringstream ss;
|
||||
ss << "mon." << mon->name << " has mon_osd_down_out_interval set to 0";
|
||||
summary.push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
if (detail) {
|
||||
ss << "; this has the same effect as the 'noout' flag";
|
||||
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
|
||||
}
|
||||
}
|
||||
|
||||
get_pools_health(summary, detail);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user