Merge pull request #29342 from Jeegn-Chen/wip-scrub-extended-sleep

osd: support osd_scrub_extended_sleep

Reviewed-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
Kefu Chai 2019-08-13 09:09:52 +08:00 committed by GitHub
commit f13c7c83d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 72 additions and 3 deletions

View File

@ -187,6 +187,49 @@ function TEST_interval_changes() {
teardown $dir || return 1
}
function TEST_scrub_extented_sleep() {
local dir=$1
local poolname=test
local OSDS=3
local objects=15
TESTDATA="testdata.$$"
setup $dir || return 1
run_mon $dir a --osd_pool_default_size=3 || return 1
run_mgr $dir x || return 1
local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
for osd in $(seq 0 $(expr $OSDS - 1))
do
run_osd $dir $osd --osd_scrub_sleep=0 \
--osd_scrub_extended_sleep=10 \
--bluestore_cache_autotune=false \
--osd_scrub_begin_hour=$scrub_begin_hour \
--osd_scrub_end_hour=$scrub_end_hour || return 1
done
# Create a pool with a single pg
create_pool $poolname 1 1
wait_for_clean || return 1
# Trigger a scrub on a PG
local pgid=$(get_pg $poolname SOMETHING)
local primary=$(get_primary $poolname SOMETHING)
local last_scrub=$(get_last_scrub_stamp $pgid)
CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid || return 1
# Due to the long delay, the scrub should not be done within 3 seconds
for ((i=0; i < 3; i++)); do
if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
return 1
fi
sleep 1
done
teardown $dir || return 1
}
main osd-scrub-test "$@"
# Local Variables:

View File

@ -720,6 +720,7 @@ OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE) // the probability to back off the
OPTION(osd_scrub_chunk_min, OPT_INT)
OPTION(osd_scrub_chunk_max, OPT_INT)
OPTION(osd_scrub_sleep, OPT_FLOAT) // sleep between [deep]scrub ops
OPTION(osd_scrub_extended_sleep, OPT_FLOAT) // more sleep between [deep]scrub ops
OPTION(osd_scrub_auto_repair, OPT_BOOL) // whether auto-repair inconsistencies upon deep-scrubbing
OPTION(osd_scrub_auto_repair_num_errors, OPT_U32) // only auto-repair when number of errors is below this threshold
OPTION(osd_deep_scrub_interval, OPT_FLOAT) // once a week

View File

@ -3523,6 +3523,14 @@ std::vector<Option> get_global_options() {
.set_default(0)
.set_description("Duration to inject a delay during scrubbing"),
Option("osd_scrub_extended_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(0)
.set_description("Duration to inject a delay during scrubbing out of scrubbing hours")
.add_see_also("osd_scrub_begin_hour")
.add_see_also("osd_scrub_end_hour")
.add_see_also("osd_scrub_begin_week_day")
.add_see_also("osd_scrub_end_week_day"),
Option("osd_scrub_auto_repair", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.set_description("Automatically repair damaged objects detected during scrub"),

View File

@ -7246,6 +7246,20 @@ bool OSDService::ScrubJob::ScrubJob::operator<(const OSDService::ScrubJob& rhs)
return pgid < rhs.pgid;
}
double OSD::scrub_sleep_time(bool must_scrub)
{
if (must_scrub) {
return cct->_conf->osd_scrub_sleep;
}
utime_t now = ceph_clock_now();
if (scrub_time_permit(now)) {
return cct->_conf->osd_scrub_sleep;
}
double normal_sleep = cct->_conf->osd_scrub_sleep;
double extended_sleep = cct->_conf->osd_scrub_extended_sleep;
return std::max(extended_sleep, normal_sleep);
}
bool OSD::scrub_time_permit(utime_t now)
{
struct tm bdt;

View File

@ -1890,6 +1890,8 @@ protected:
return service.get_tid();
}
double scrub_sleep_time(bool must_scrub);
// -- generic pg peering --
PeeringCtx create_context();
void dispatch_context(PeeringCtx &ctx, PG *pg, OSDMapRef curmap,

View File

@ -2441,7 +2441,9 @@ void PG::replica_scrub(
*/
void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
{
if (cct->_conf->osd_scrub_sleep > 0 &&
OSDService *osds = osd;
double scrub_sleep = osds->osd->scrub_sleep_time(scrubber.must_scrub);
if (scrub_sleep > 0 &&
(scrubber.state == PG::Scrubber::NEW_CHUNK ||
scrubber.state == PG::Scrubber::INACTIVE) &&
scrubber.needs_sleep) {
@ -2449,7 +2451,6 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
// Do an async sleep so we don't block the op queue
OSDService *osds = osd;
spg_t pgid = get_pgid();
int state = scrubber.state;
auto scrub_requeue_callback =
@ -2474,7 +2475,7 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
pg->unlock();
});
std::lock_guard l(osd->sleep_lock);
osd->sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
osd->sleep_timer.add_event_after(scrub_sleep,
scrub_requeue_callback);
scrubber.sleeping = true;
scrubber.sleep_start = ceph_clock_now();