Merge pull request #25112 from dzafman/wip-scrub-warning

scrub warning check incorrectly uses mon scrub interval

Reviewed-by: Gregory Farnum <gfarnum@redhat.com>
Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
David Zafman 2019-01-28 10:46:18 -08:00 committed by GitHub
commit 3e6ff119e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 218 additions and 77 deletions

View File

@ -160,6 +160,11 @@
* The 'cephfs-data-scan scan_links' now automatically repair inotables and
snaptable.
* Configuration values mon_warn_not_scrubbed/mon_warn_not_deep_scrubbed have been
renamed. They are now mon_warn_pg_not_scrubbed_ratio/mon_warn_pg_not_deep_scrubbed_ratio
respectively. This is to clarify that these warnings are related to pg scrubbing
and are a ratio of the related interval. These options are now enabled by default.
>=13.1.0
--------

View File

@ -741,8 +741,8 @@ _______________
One or more PGs has not been scrubbed recently. PGs are normally
scrubbed every ``mon_scrub_interval`` seconds, and this warning
triggers when ``mon_warn_not_scrubbed`` such intervals have elapsed
without a scrub.
triggers when ``mon_warn_pg_not_scrubbed_ratio`` percentage of interval has elapsed
without a scrub since it was due.
PGs will not scrub if they are not flagged as *clean*, which may
happen if they are misplaced or degraded (see *PG_AVAILABILITY* and
@ -757,8 +757,8 @@ ____________________
One or more PGs has not been deep scrubbed recently. PGs are normally
scrubbed every ``osd_deep_mon_scrub_interval`` seconds, and this warning
triggers when ``mon_warn_not_deep_scrubbed`` such intervals have elapsed
without a scrub.
triggers when ``mon_warn_pg_not_deep_scrubbed_ratio`` percentage of interval has elapsed
without a scrub since it was due.
PGs will not (deep) scrub if they are not flagged as *clean*, which may
happen if they are misplaced or degraded (see *PG_AVAILABILITY* and

View File

@ -5242,7 +5242,7 @@ function TEST_periodic_scrub_replicated() {
# Can't upgrade with this set
ceph osd set nodeep-scrub
# Let map change propagate to OSDs
flush pg_stats
flush_pg_stats
sleep 5
# Fake a schedule scrub
@ -5271,6 +5271,91 @@ function TEST_periodic_scrub_replicated() {
rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
}
function TEST_scrub_warning() {
local dir=$1
local poolname=psr_pool
local objname=POBJ
local scrubs=5
local deep_scrubs=5
local i1_day=86400
local i7_days=$(calc $i1_day \* 7)
local i14_days=$(calc $i1_day \* 14)
local overdue=0.5
local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) )
local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) )
setup $dir || return 1
run_mon $dir a --osd_pool_default_size=1 || return 1
run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1
run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1
for i in $(seq 1 $(expr $scrubs + $deep_scrubs))
do
create_pool $poolname-$i 1 1 || return 1
wait_for_clean || return 1
if [ $i = "1" ];
then
ceph osd pool set $poolname-$i scrub_max_interval $i14_days
fi
if [ $i = $(expr $scrubs + 1) ];
then
ceph osd pool set $poolname-$i deep_scrub_interval $i14_days
fi
done
# Only 1 osd
local primary=0
ceph osd set noscrub || return 1
ceph osd set nodeep-scrub || return 1
ceph config set global osd_scrub_interval_randomize_ratio 0
ceph config set global osd_deep_scrub_randomize_ratio 0
ceph config set global osd_scrub_max_interval ${i7_days}
ceph config set global osd_deep_scrub_interval ${i7_days}
# Fake schedule scrubs
for i in $(seq 1 $scrubs)
do
if [ $i = "1" ];
then
overdue_seconds=$pool_overdue_seconds
else
overdue_seconds=$conf_overdue_seconds
fi
CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \
trigger_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1
done
# Fake schedule deep scrubs
for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs))
do
if [ $i = "$(expr $scrubs + 1)" ];
then
overdue_seconds=$pool_overdue_seconds
else
overdue_seconds=$conf_overdue_seconds
fi
CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \
trigger_deep_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1
done
flush_pg_stats
ceph health
ceph health detail
ceph health | grep -q "$deep_scrubs pgs not deep-scrubbed in time" || return 1
ceph health | grep -q "$scrubs pgs not scrubbed in time" || return 1
COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l)
if [ "$COUNT" != $scrubs ]; then
ceph health detail | grep "not scrubbed since"
return 1
fi
COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l)
if [ "$COUNT" != $deep_scrubs ]; then
ceph health detail | grep "not deep-scrubbed since"
return 1
fi
return 0
}
#
# Corrupt snapset in replicated pool
#

View File

@ -267,8 +267,8 @@ OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE)
OPTION(mon_data_avail_crit, OPT_INT)
OPTION(mon_data_avail_warn, OPT_INT)
OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes)
OPTION(mon_warn_not_scrubbed, OPT_INT)
OPTION(mon_warn_not_deep_scrubbed, OPT_INT)
OPTION(mon_warn_pg_not_scrubbed_ratio, OPT_FLOAT)
OPTION(mon_warn_pg_not_deep_scrubbed_ratio, OPT_FLOAT)
OPTION(mon_scrub_interval, OPT_INT) // once a day
OPTION(mon_scrub_timeout, OPT_INT) // let's give it 5 minutes; why not.
OPTION(mon_scrub_max_keys, OPT_INT) // max number of keys to scrub each time

View File

@ -1716,16 +1716,18 @@ std::vector<Option> get_global_options() {
.add_service("mon")
.set_description("issue MON_DISK_BIG health warning when mon database is above this size"),
Option("mon_warn_not_scrubbed", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(0)
.add_service("mon")
.set_description("if non-zero, issue PG_NOT_SCRUBBED when PG(s) have not been scrubbed for more than this long beyond the configured mon_scrub_interval (seconds)")
.add_see_also("osd_scrub_min_interval"),
Option("mon_warn_pg_not_scrubbed_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.5)
.set_min(0)
.set_description("Percentage of the scrub max interval past the scrub max interval to warn")
.set_long_description("")
.add_see_also("osd_scrub_max_interval"),
Option("mon_warn_not_deep_scrubbed", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(0)
.add_service("mon")
.set_description("if non-zero, issue PG_NOT_DEEP_SCRUBBED when PG(s) have not been scrubbed for more than this long beyond the configured mon_scrub_interval (seconds)")
Option("mon_warn_pg_not_deep_scrubbed_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.75)
.set_min(0)
.set_description("Percentage of the deep scrub interval past the deep scrub interval to warn")
.set_long_description("")
.add_see_also("osd_deep_scrub_interval"),
Option("mon_scrub_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
@ -3311,7 +3313,8 @@ std::vector<Option> get_global_options() {
Option("osd_scrub_backoff_ratio", Option::TYPE_FLOAT, Option::LEVEL_DEV)
.set_default(.66)
.set_description("Backoff ratio after a failed scrub scheduling attempt"),
.set_long_description("This is the precentage of ticks that do NOT schedule scrubs, 66% means that 1 out of 3 ticks will schedule scrubs")
.set_description("Backoff ratio for scheduling scrubs"),
Option("osd_scrub_chunk_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(5)
@ -3346,9 +3349,8 @@ std::vector<Option> get_global_options() {
Option("osd_deep_scrub_randomize_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0.15)
.set_description("Ratio of deep scrub interval to randomly vary")
.set_long_description("This prevents a deep scrub 'stampede' by randomly varying the scrub intervals so that they are soon uniformly distributed over the week")
.add_see_also("osd_deep_scrub_interval"),
.set_description("Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep)")
.set_long_description("This prevents a deep scrub 'stampede' by spreading deep scrubs so they are uniformly distributed over the week"),
Option("osd_deep_scrub_stride", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
.set_default(512_K)

View File

@ -2941,24 +2941,28 @@ void PGMap::get_health_checks(
// PG_NOT_SCRUBBED
// PG_NOT_DEEP_SCRUBBED
{
if (cct->_conf->mon_warn_not_scrubbed ||
cct->_conf->mon_warn_not_deep_scrubbed) {
list<string> detail, deep_detail;
int detail_max = max, deep_detail_max = max;
int detail_more = 0, deep_detail_more = 0;
int detail_total = 0, deep_detail_total = 0;
const double age = cct->_conf->mon_warn_not_scrubbed +
cct->_conf->mon_scrub_interval;
utime_t cutoff = now;
cutoff -= age;
const double deep_age = cct->_conf->mon_warn_not_deep_scrubbed +
cct->_conf->osd_deep_scrub_interval;
utime_t deep_cutoff = now;
deep_cutoff -= deep_age;
for (auto& p : pg_stat) {
if (cct->_conf->mon_warn_not_scrubbed &&
p.second.last_scrub_stamp < cutoff) {
if (cct->_conf->mon_warn_pg_not_scrubbed_ratio ||
cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio) {
list<string> detail, deep_detail;
int detail_max = max, deep_detail_max = max;
int detail_more = 0, deep_detail_more = 0;
int detail_total = 0, deep_detail_total = 0;
for (auto& p : pg_stat) {
int64_t pnum = p.first.pool();
auto pool = osdmap.get_pg_pool(pnum);
if (!pool)
continue;
if (cct->_conf->mon_warn_pg_not_scrubbed_ratio) {
double scrub_max_interval = 0;
pool->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &scrub_max_interval);
if (scrub_max_interval <= 0) {
scrub_max_interval = cct->_conf->osd_scrub_max_interval;
}
const double age = (cct->_conf->mon_warn_pg_not_scrubbed_ratio * scrub_max_interval) +
scrub_max_interval;
utime_t cutoff = now;
cutoff -= age;
if (p.second.last_scrub_stamp < cutoff) {
if (detail_max > 0) {
ostringstream ss;
ss << "pg " << p.first << " not scrubbed since "
@ -2970,8 +2974,18 @@ void PGMap::get_health_checks(
}
++detail_total;
}
if (cct->_conf->mon_warn_not_deep_scrubbed &&
p.second.last_deep_scrub_stamp < deep_cutoff) {
}
if (cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio) {
double deep_scrub_interval = 0;
pool->opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval);
if (deep_scrub_interval <= 0) {
deep_scrub_interval = cct->_conf->osd_deep_scrub_interval;
}
double deep_age = (cct->_conf->mon_warn_pg_not_deep_scrubbed_ratio * deep_scrub_interval) +
deep_scrub_interval;
utime_t deep_cutoff = now;
deep_cutoff -= deep_age;
if (p.second.last_deep_scrub_stamp < deep_cutoff) {
if (deep_detail_max > 0) {
ostringstream ss;
ss << "pg " << p.first << " not deep-scrubbed since "
@ -2982,36 +2996,36 @@ void PGMap::get_health_checks(
++deep_detail_more;
}
++deep_detail_total;
}
}
if (detail_total) {
ostringstream ss;
ss << detail_total << " pgs not scrubbed for " << age;
auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str());
if (!detail.empty()) {
d.detail.swap(detail);
if (detail_more) {
ostringstream ss;
ss << detail_more << " more pgs... ";
d.detail.push_back(ss.str());
}
}
}
if (deep_detail_total) {
ostringstream ss;
ss << deep_detail_total << " pgs not deep-scrubbed for " << deep_age;
auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str());
}
if (detail_total) {
ostringstream ss;
ss << detail_total << " pgs not scrubbed in time";
auto& d = checks->add("PG_NOT_SCRUBBED", HEALTH_WARN, ss.str());
if (!deep_detail.empty()) {
d.detail.swap(deep_detail);
if (!detail.empty()) {
d.detail.swap(detail);
if (deep_detail_more) {
ostringstream ss;
ss << deep_detail_more << " more pgs... ";
d.detail.push_back(ss.str());
}
if (detail_more) {
ostringstream ss;
ss << detail_more << " more pgs... ";
d.detail.push_back(ss.str());
}
}
}
if (deep_detail_total) {
ostringstream ss;
ss << deep_detail_total << " pgs not deep-scrubbed in time";
auto& d = checks->add("PG_NOT_DEEP_SCRUBBED", HEALTH_WARN, ss.str());
if (!deep_detail.empty()) {
d.detail.swap(deep_detail);
if (deep_detail_more) {
ostringstream ss;
ss << deep_detail_more << " more pgs... ";
d.detail.push_back(ss.str());
}
}
}

View File

@ -3371,10 +3371,19 @@ void OSD::final_init()
r = admin_socket->register_command(
"trigger_scrub",
"trigger_scrub " \
"name=pgid,type=CephString ",
"name=pgid,type=CephString " \
"name=time,type=CephInt,req=false",
test_ops_hook,
"Trigger a scheduled scrub ");
ceph_assert(r == 0);
r = admin_socket->register_command(
"trigger_deep_scrub",
"trigger_deep_scrub " \
"name=pgid,type=CephString " \
"name=time,type=CephInt,req=false",
test_ops_hook,
"Trigger a scheduled deep scrub ");
ceph_assert(r == 0);
r = admin_socket->register_command(
"injectfull",
"injectfull " \
@ -5508,8 +5517,9 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
<< "to " << service->cct->_conf->osd_recovery_delay_start;
return;
}
if (command == "trigger_scrub") {
if (command == "trigger_scrub" || command == "trigger_deep_scrub") {
spg_t pgid;
bool deep = (command == "trigger_deep_scrub");
OSDMapRef curmap = service->get_osdmap();
string pgidstr;
@ -5520,6 +5530,9 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
return;
}
int64_t time;
cmd_getval(service->cct, cmdmap, "time", time, (int64_t)0);
PGRef pg = service->osd->_lookup_lock_pg(pgid);
if (pg == nullptr) {
ss << "Can't find pg " << pgid;
@ -5530,16 +5543,31 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
pg->unreg_next_scrub();
const pg_pool_t *p = curmap->get_pg_pool(pgid.pool());
double pool_scrub_max_interval = 0;
p->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &pool_scrub_max_interval);
double scrub_max_interval = pool_scrub_max_interval > 0 ?
pool_scrub_max_interval : g_conf()->osd_scrub_max_interval;
double scrub_max_interval;
if (deep) {
p->opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &pool_scrub_max_interval);
scrub_max_interval = pool_scrub_max_interval > 0 ?
pool_scrub_max_interval : g_conf()->osd_deep_scrub_interval;
} else {
p->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &pool_scrub_max_interval);
scrub_max_interval = pool_scrub_max_interval > 0 ?
pool_scrub_max_interval : g_conf()->osd_scrub_max_interval;
}
// Instead of marking must_scrub force a schedule scrub
utime_t stamp = ceph_clock_now();
stamp -= scrub_max_interval;
stamp -= 100.0; // push back last scrub more for good measure
pg->set_last_scrub_stamp(stamp);
if (time == 0)
stamp -= scrub_max_interval;
else
stamp -= (float)time;
stamp -= 100.0; // push back last scrub more for good measure
if (deep) {
pg->set_last_deep_scrub_stamp(stamp);
} else {
pg->set_last_scrub_stamp(stamp);
}
pg->reg_next_scrub();
ss << "ok";
pg->publish_stats_to_osd();
ss << "ok - set" << (deep ? " deep" : "" ) << " stamp " << stamp;
} else {
ss << "Not primary";
}

View File

@ -3204,14 +3204,14 @@ void PG::_update_calc_stats()
for (auto& ml: sml.second) {
int missing_shards;
if (sml.first == shard_id_t::NO_SHARD) {
dout(0) << __func__ << " ml " << ml.second << " upset size " << upset.size() << " up " << ml.first.up << dendl;
dout(20) << __func__ << " ml " << ml.second << " upset size " << upset.size() << " up " << ml.first.up << dendl;
missing_shards = (int)upset.size() - ml.first.up;
} else {
// Handle shards not even in upset below
if (!find_shard(upset, sml.first))
continue;
missing_shards = std::max(0, 1 - ml.first.up);
dout(0) << __func__ << " shard " << sml.first << " ml " << ml.second << " missing shards " << missing_shards << dendl;
dout(20) << __func__ << " shard " << sml.first << " ml " << ml.second << " missing shards " << missing_shards << dendl;
}
int odegraded = ml.second * missing_shards;
// Copies on other osds but limited to the possible degraded

View File

@ -315,9 +315,15 @@ public:
}
void set_last_scrub_stamp(utime_t t) {
info.stats.last_scrub_stamp = t;
info.history.last_scrub_stamp = t;
}
void set_last_deep_scrub_stamp(utime_t t) {
info.stats.last_deep_scrub_stamp = t;
info.history.last_deep_scrub_stamp = t;
}
bool is_deleting() const {
return deleting;
}
@ -1389,6 +1395,7 @@ protected:
void _update_calc_stats();
void _update_blocked_by();
friend class TestOpsSocketHook;
void publish_stats_to_osd();
void clear_publish_stats();