osd: change scrub min/max thresholds

The previous 'osd scrub min interval' was mostly meaningless and useless.
Meanwhile, the 'osd scrub max interval' would only trigger a scrub if the
load was sufficiently low; if it was high, the PG might *never* scrub.

Instead, make the 'min' what the max used to be.  If it has been more than
this many seconds, and the load is low, scrub.  And add an additional
condition that if it has been more than the max threshold, scrub the PG
no matter what--regardless of the load.

Note that this does not change the default scrub interval for less-loaded
clusters, but it *does* change the meaning of existing config options.

Fixes: #3786
Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Sage Weil 2013-01-13 22:04:58 -08:00
parent 16d67c798b
commit 299548024a
3 changed files with 34 additions and 11 deletions

View File

@ -0,0 +1,11 @@
* The meaning of 'osd scrub max interval' has been changed: it is now
the amount of time after which a PG will be scrubbed regardless of
the system load. The option 'osd scrub min interval' now has the
previous meaning: the amount of time before a PG is scrubbed if the
load is low. The defaults have been adjusted such that the only
change in behavior is that PGs will now be scrubbed after a week
even if the system load remains high. However, if either of these
options have been changed in ceph.conf, those settings should be
reviewed in light of their adjusted meanings.

View File

@ -344,8 +344,8 @@ OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk
OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now
OPTION(osd_max_scrubs, OPT_INT, 1)
OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
OPTION(osd_scrub_min_interval, OPT_FLOAT, 300)
OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24) // once a day
OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
OPTION(osd_auto_weight, OPT_BOOL, false)

View File

@ -3563,11 +3563,13 @@ void OSD::sched_scrub()
{
assert(osd_lock.is_locked());
bool should = scrub_should_schedule();
bool load_is_low = scrub_should_schedule();
dout(20) << "sched_scrub should=" << (int)should << dendl;
dout(20) << "sched_scrub load_is_low=" << (int)load_is_low << dendl;
utime_t max = ceph_clock_now(g_ceph_context);
utime_t min = max;
min -= g_conf->osd_scrub_min_interval;
max -= g_conf->osd_scrub_max_interval;
//dout(20) << " " << last_scrub_pg << dendl;
@ -3577,20 +3579,30 @@ void OSD::sched_scrub()
utime_t t = pos.first;
pg_t pgid = pos.second;
if (t > max) {
if (t > min) {
dout(10) << " " << pgid << " at " << t
<< " > " << max << " (" << g_conf->osd_scrub_max_interval << " seconds ago)" << dendl;
<< " > min " << min << " (" << g_conf->osd_scrub_min_interval << " seconds ago)" << dendl;
break;
}
if (t > max && !load_is_low) {
// save ourselves some effort
break;
}
dout(10) << " on " << t << " " << pgid << dendl;
PG *pg = _lookup_lock_pg(pgid);
if (pg) {
if (pg->is_active() &&
(should || pg->scrubber.must_scrub) &&
pg->sched_scrub()) {
pg->unlock();
break;
(load_is_low ||
t < max ||
pg->scrubber.must_scrub)) {
dout(10) << " " << pgid << " at " << t
<< (pg->scrubber.must_scrub ? ", explicitly requested" : "")
<< (t < max ? ", last_scrub > max" : "")
<< dendl;
if (pg->sched_scrub()) {
pg->unlock();
break;
}
}
pg->unlock();
}