mirror of
https://github.com/ceph/ceph
synced 2024-12-12 22:46:21 +00:00
osd: change scrub min/max thresholds
The previous 'osd scrub min interval' was mostly meaningless and useless. Meanwhile, the 'osd scrub max interval' would only trigger a scrub if the load was sufficiently low; if it was high, the PG might *never* scrub. Instead, make the 'min' what the max used to be. If it has been more than this many seconds, and the load is low, scrub. And add an additional condition that if it has been more than the max threshold, scrub the PG no matter what--regardless of the load. Note that this does not change the default scrub interval for less-loaded clusters, but it *does* change the meaning of existing config options. Fixes: #3786 Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
parent
16d67c798b
commit
299548024a
@ -0,0 +1,11 @@
|
||||
|
||||
|
||||
* The meaning of 'osd scrub max interval' has been changed: it is now
|
||||
the amount of time after which a PG will be scrubbed regardless of
|
||||
the system load. The option 'osd scrub min interval' now has the
|
||||
previous meaning: the amount of time before a PG is scrubbed if the
|
||||
load is low. The defaults have been adjusted such that the only
|
||||
change in behavior is that PGs will now be scrubbed after a week
|
||||
even if the system load remains high. However, if either of these
|
||||
options have been changed in ceph.conf, those settings should be
|
||||
reviewed in light of their adjusted meanings.
|
@ -344,8 +344,8 @@ OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk
|
||||
OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now
|
||||
OPTION(osd_max_scrubs, OPT_INT, 1)
|
||||
OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
|
||||
OPTION(osd_scrub_min_interval, OPT_FLOAT, 300)
|
||||
OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24) // once a day
|
||||
OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
|
||||
OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
|
||||
OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week
|
||||
OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
|
||||
OPTION(osd_auto_weight, OPT_BOOL, false)
|
||||
|
@ -3563,11 +3563,13 @@ void OSD::sched_scrub()
|
||||
{
|
||||
assert(osd_lock.is_locked());
|
||||
|
||||
bool should = scrub_should_schedule();
|
||||
bool load_is_low = scrub_should_schedule();
|
||||
|
||||
dout(20) << "sched_scrub should=" << (int)should << dendl;
|
||||
dout(20) << "sched_scrub load_is_low=" << (int)load_is_low << dendl;
|
||||
|
||||
utime_t max = ceph_clock_now(g_ceph_context);
|
||||
utime_t min = max;
|
||||
min -= g_conf->osd_scrub_min_interval;
|
||||
max -= g_conf->osd_scrub_max_interval;
|
||||
|
||||
//dout(20) << " " << last_scrub_pg << dendl;
|
||||
@ -3577,20 +3579,30 @@ void OSD::sched_scrub()
|
||||
utime_t t = pos.first;
|
||||
pg_t pgid = pos.second;
|
||||
|
||||
if (t > max) {
|
||||
if (t > min) {
|
||||
dout(10) << " " << pgid << " at " << t
|
||||
<< " > " << max << " (" << g_conf->osd_scrub_max_interval << " seconds ago)" << dendl;
|
||||
<< " > min " << min << " (" << g_conf->osd_scrub_min_interval << " seconds ago)" << dendl;
|
||||
break;
|
||||
}
|
||||
if (t > max && !load_is_low) {
|
||||
// save ourselves some effort
|
||||
break;
|
||||
}
|
||||
|
||||
dout(10) << " on " << t << " " << pgid << dendl;
|
||||
PG *pg = _lookup_lock_pg(pgid);
|
||||
if (pg) {
|
||||
if (pg->is_active() &&
|
||||
(should || pg->scrubber.must_scrub) &&
|
||||
pg->sched_scrub()) {
|
||||
pg->unlock();
|
||||
break;
|
||||
(load_is_low ||
|
||||
t < max ||
|
||||
pg->scrubber.must_scrub)) {
|
||||
dout(10) << " " << pgid << " at " << t
|
||||
<< (pg->scrubber.must_scrub ? ", explicitly requested" : "")
|
||||
<< (t < max ? ", last_scrub > max" : "")
|
||||
<< dendl;
|
||||
if (pg->sched_scrub()) {
|
||||
pg->unlock();
|
||||
break;
|
||||
}
|
||||
}
|
||||
pg->unlock();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user