mirror of
https://github.com/ceph/ceph
synced 2025-04-01 14:51:13 +00:00
mon: Log "ceph health detail" periodically in cluster log
change mon_health_to_clog_interval from 1_hr -> 10_min to log health summary or detail more frequently. Fixes: https://tracker.ceph.com/issues/48042 Signed-off-by: Prashant Dhange <pdhange@redhat.com>
This commit is contained in:
parent
d2769ff180
commit
f45712c190
@ -26,6 +26,10 @@
|
||||
>=15.0.0
|
||||
--------
|
||||
|
||||
* MON: The cluster log now logs health detail every ``mon_health_to_clog_interval``,
|
||||
which has been changed from 1hr to 10min. Logging of health detail will be
|
||||
skipped if there is no change in health summary since last known.
|
||||
|
||||
* The ``ceph df`` command now lists the number of pgs in each pool.
|
||||
|
||||
* Monitors now have config option ``mon_allow_pool_size_one``, which is disabled
|
||||
|
@ -40,6 +40,7 @@
|
||||
|
||||
mon cluster log file level = debug
|
||||
debug asserts on shutdown = true
|
||||
mon health detail to clog = false
|
||||
|
||||
[osd]
|
||||
osd journal size = 100
|
||||
|
@ -263,6 +263,7 @@ OPTION(mon_reweight_max_change, OPT_DOUBLE)
|
||||
OPTION(mon_health_to_clog, OPT_BOOL)
|
||||
OPTION(mon_health_to_clog_interval, OPT_INT)
|
||||
OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE)
|
||||
OPTION(mon_health_detail_to_clog, OPT_BOOL)
|
||||
OPTION(mon_data_avail_crit, OPT_INT)
|
||||
OPTION(mon_data_avail_warn, OPT_INT)
|
||||
OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes)
|
||||
|
@ -1941,7 +1941,7 @@ std::vector<Option> get_global_options() {
|
||||
.set_description("log monitor health to cluster log"),
|
||||
|
||||
Option("mon_health_to_clog_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
|
||||
.set_default(1_hr)
|
||||
.set_default(10_min)
|
||||
.add_service("mon")
|
||||
.set_description("frequency to log monitor health to cluster log")
|
||||
.add_see_also("mon_health_to_clog"),
|
||||
@ -1951,6 +1951,10 @@ std::vector<Option> get_global_options() {
|
||||
.add_service("mon")
|
||||
.set_description(""),
|
||||
|
||||
Option("mon_health_detail_to_clog", Option::TYPE_BOOL, Option::LEVEL_DEV)
|
||||
.set_default(true)
|
||||
.set_description("log health detail to cluster log"),
|
||||
|
||||
Option("mon_health_max_detail", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
|
||||
.set_default(50)
|
||||
.add_service("mon")
|
||||
|
@ -2835,7 +2835,16 @@ void Monitor::do_health_to_clog(bool force)
|
||||
summary == health_status_cache.summary &&
|
||||
level == health_status_cache.overall)
|
||||
return;
|
||||
clog->health(level) << "overall " << summary;
|
||||
|
||||
if (g_conf()->mon_health_detail_to_clog &&
|
||||
summary != health_status_cache.summary &&
|
||||
level != HEALTH_OK) {
|
||||
string details;
|
||||
level = healthmon()->get_health_status(true, nullptr, &details);
|
||||
clog->health(level) << "Health detail: " << details;
|
||||
} else {
|
||||
clog->health(level) << "overall " << summary;
|
||||
}
|
||||
health_status_cache.summary = summary;
|
||||
health_status_cache.overall = level;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user