1
0
mirror of https://github.com/ceph/ceph synced 2025-04-01 14:51:13 +00:00

mon: Log "ceph health detail" periodically in cluster log

change mon_health_to_clog_interval from 1_hr -> 10_min to
log health summary or detail more frequently.

Fixes: https://tracker.ceph.com/issues/48042

Signed-off-by: Prashant Dhange <pdhange@redhat.com>
This commit is contained in:
Prashant D 2020-10-30 06:40:43 -04:00
parent d2769ff180
commit f45712c190
5 changed files with 21 additions and 2 deletions

View File

@ -26,6 +26,10 @@
>=15.0.0
--------
* MON: The cluster log now logs health detail every ``mon_health_to_clog_interval``,
which has been changed from 1hr to 10min. Logging of health detail will be
skipped if there is no change in health summary since last known.
* The ``ceph df`` command now lists the number of pgs in each pool.
* Monitors now have config option ``mon_allow_pool_size_one``, which is disabled

View File

@ -40,6 +40,7 @@
mon cluster log file level = debug
debug asserts on shutdown = true
mon health detail to clog = false
[osd]
osd journal size = 100

View File

@ -263,6 +263,7 @@ OPTION(mon_reweight_max_change, OPT_DOUBLE)
OPTION(mon_health_to_clog, OPT_BOOL)
OPTION(mon_health_to_clog_interval, OPT_INT)
OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE)
OPTION(mon_health_detail_to_clog, OPT_BOOL)
OPTION(mon_data_avail_crit, OPT_INT)
OPTION(mon_data_avail_warn, OPT_INT)
OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes)

View File

@ -1941,7 +1941,7 @@ std::vector<Option> get_global_options() {
.set_description("log monitor health to cluster log"),
Option("mon_health_to_clog_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(1_hr)
.set_default(10_min)
.add_service("mon")
.set_description("frequency to log monitor health to cluster log")
.add_see_also("mon_health_to_clog"),
@ -1951,6 +1951,10 @@ std::vector<Option> get_global_options() {
.add_service("mon")
.set_description(""),
Option("mon_health_detail_to_clog", Option::TYPE_BOOL, Option::LEVEL_DEV)
.set_default(true)
.set_description("log health detail to cluster log"),
Option("mon_health_max_detail", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(50)
.add_service("mon")

View File

@ -2835,7 +2835,16 @@ void Monitor::do_health_to_clog(bool force)
summary == health_status_cache.summary &&
level == health_status_cache.overall)
return;
clog->health(level) << "overall " << summary;
if (g_conf()->mon_health_detail_to_clog &&
summary != health_status_cache.summary &&
level != HEALTH_OK) {
string details;
level = healthmon()->get_health_status(true, nullptr, &details);
clog->health(level) << "Health detail: " << details;
} else {
clog->health(level) << "overall " << summary;
}
health_status_cache.summary = summary;
health_status_cache.overall = level;
}