mirror of
https://github.com/ceph/ceph
synced 2025-01-29 22:43:40 +00:00
Merge pull request #29342 from Jeegn-Chen/wip-scrub-extended-sleep
osd: support osd_scrub_extended_sleep Reviewed-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
commit
f13c7c83d9
@ -187,6 +187,49 @@ function TEST_interval_changes() {
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
function TEST_scrub_extented_sleep() {
|
||||
local dir=$1
|
||||
local poolname=test
|
||||
local OSDS=3
|
||||
local objects=15
|
||||
|
||||
TESTDATA="testdata.$$"
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=3 || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
|
||||
local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//')
|
||||
for osd in $(seq 0 $(expr $OSDS - 1))
|
||||
do
|
||||
run_osd $dir $osd --osd_scrub_sleep=0 \
|
||||
--osd_scrub_extended_sleep=10 \
|
||||
--bluestore_cache_autotune=false \
|
||||
--osd_scrub_begin_hour=$scrub_begin_hour \
|
||||
--osd_scrub_end_hour=$scrub_end_hour || return 1
|
||||
done
|
||||
|
||||
# Create a pool with a single pg
|
||||
create_pool $poolname 1 1
|
||||
wait_for_clean || return 1
|
||||
|
||||
# Trigger a scrub on a PG
|
||||
local pgid=$(get_pg $poolname SOMETHING)
|
||||
local primary=$(get_primary $poolname SOMETHING)
|
||||
local last_scrub=$(get_last_scrub_stamp $pgid)
|
||||
CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid || return 1
|
||||
|
||||
# Due to the long delay, the scrub should not be done within 3 seconds
|
||||
for ((i=0; i < 3; i++)); do
|
||||
if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
main osd-scrub-test "$@"
|
||||
|
||||
# Local Variables:
|
||||
|
@ -720,6 +720,7 @@ OPTION(osd_scrub_backoff_ratio, OPT_DOUBLE) // the probability to back off the
|
||||
OPTION(osd_scrub_chunk_min, OPT_INT)
|
||||
OPTION(osd_scrub_chunk_max, OPT_INT)
|
||||
OPTION(osd_scrub_sleep, OPT_FLOAT) // sleep between [deep]scrub ops
|
||||
OPTION(osd_scrub_extended_sleep, OPT_FLOAT) // more sleep between [deep]scrub ops
|
||||
OPTION(osd_scrub_auto_repair, OPT_BOOL) // whether auto-repair inconsistencies upon deep-scrubbing
|
||||
OPTION(osd_scrub_auto_repair_num_errors, OPT_U32) // only auto-repair when number of errors is below this threshold
|
||||
OPTION(osd_deep_scrub_interval, OPT_FLOAT) // once a week
|
||||
|
@ -3523,6 +3523,14 @@ std::vector<Option> get_global_options() {
|
||||
.set_default(0)
|
||||
.set_description("Duration to inject a delay during scrubbing"),
|
||||
|
||||
Option("osd_scrub_extended_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
|
||||
.set_default(0)
|
||||
.set_description("Duration to inject a delay during scrubbing out of scrubbing hours")
|
||||
.add_see_also("osd_scrub_begin_hour")
|
||||
.add_see_also("osd_scrub_end_hour")
|
||||
.add_see_also("osd_scrub_begin_week_day")
|
||||
.add_see_also("osd_scrub_end_week_day"),
|
||||
|
||||
Option("osd_scrub_auto_repair", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
|
||||
.set_default(false)
|
||||
.set_description("Automatically repair damaged objects detected during scrub"),
|
||||
|
@ -7246,6 +7246,20 @@ bool OSDService::ScrubJob::ScrubJob::operator<(const OSDService::ScrubJob& rhs)
|
||||
return pgid < rhs.pgid;
|
||||
}
|
||||
|
||||
double OSD::scrub_sleep_time(bool must_scrub)
|
||||
{
|
||||
if (must_scrub) {
|
||||
return cct->_conf->osd_scrub_sleep;
|
||||
}
|
||||
utime_t now = ceph_clock_now();
|
||||
if (scrub_time_permit(now)) {
|
||||
return cct->_conf->osd_scrub_sleep;
|
||||
}
|
||||
double normal_sleep = cct->_conf->osd_scrub_sleep;
|
||||
double extended_sleep = cct->_conf->osd_scrub_extended_sleep;
|
||||
return std::max(extended_sleep, normal_sleep);
|
||||
}
|
||||
|
||||
bool OSD::scrub_time_permit(utime_t now)
|
||||
{
|
||||
struct tm bdt;
|
||||
|
@ -1890,6 +1890,8 @@ protected:
|
||||
return service.get_tid();
|
||||
}
|
||||
|
||||
double scrub_sleep_time(bool must_scrub);
|
||||
|
||||
// -- generic pg peering --
|
||||
PeeringCtx create_context();
|
||||
void dispatch_context(PeeringCtx &ctx, PG *pg, OSDMapRef curmap,
|
||||
|
@ -2441,7 +2441,9 @@ void PG::replica_scrub(
|
||||
*/
|
||||
void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
|
||||
{
|
||||
if (cct->_conf->osd_scrub_sleep > 0 &&
|
||||
OSDService *osds = osd;
|
||||
double scrub_sleep = osds->osd->scrub_sleep_time(scrubber.must_scrub);
|
||||
if (scrub_sleep > 0 &&
|
||||
(scrubber.state == PG::Scrubber::NEW_CHUNK ||
|
||||
scrubber.state == PG::Scrubber::INACTIVE) &&
|
||||
scrubber.needs_sleep) {
|
||||
@ -2449,7 +2451,6 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
|
||||
dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
|
||||
|
||||
// Do an async sleep so we don't block the op queue
|
||||
OSDService *osds = osd;
|
||||
spg_t pgid = get_pgid();
|
||||
int state = scrubber.state;
|
||||
auto scrub_requeue_callback =
|
||||
@ -2474,7 +2475,7 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
|
||||
pg->unlock();
|
||||
});
|
||||
std::lock_guard l(osd->sleep_lock);
|
||||
osd->sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
|
||||
osd->sleep_timer.add_event_after(scrub_sleep,
|
||||
scrub_requeue_callback);
|
||||
scrubber.sleeping = true;
|
||||
scrubber.sleep_start = ceph_clock_now();
|
||||
|
Loading…
Reference in New Issue
Block a user