diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 0582fa1417b..74cf1485a38 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -107,6 +107,86 @@ function TEST_corrupt_and_repair_replicated() { teardown $dir || return 1 } +# +# Allow repair to be scheduled when some recovering is still undergoing on the same OSD +# +function TEST_allow_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 + + teardown $dir || return 1 +} + +# +# Skip non-repair scrub correctly during recovery +# +function TEST_skip_non_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 + + teardown $dir || return 1 +} + +function scrub_and_not_schedule() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) start a non-repair scrub + # + local pg=$(get_pg $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + # + # 2) Assure the scrub is not scheduled + # + for ((i=0; i < 3; i++)); do + if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done + + # + # 3) Access to the file must OK + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + function corrupt_and_repair_two() { local dir=$1 local poolname=$2 diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index bad93ac316e..5d315341ecf 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -699,6 +699,7 @@ OPTION(osd_max_push_cost, OPT_U64) // max size of push message OPTION(osd_max_push_objects, OPT_U64) // max objects in single push op OPTION(osd_max_scrubs, OPT_INT) OPTION(osd_scrub_during_recovery, OPT_BOOL) // Allow new scrubs to start while recovery is active on the OSD +OPTION(osd_repair_during_recovery, OPT_BOOL) // Allow new requested repairs to start while recovery is active on the OSD OPTION(osd_scrub_begin_hour, OPT_INT) OPTION(osd_scrub_end_hour, OPT_INT) OPTION(osd_scrub_begin_week_day, OPT_INT) @@ -767,6 +768,7 @@ OPTION(osd_debug_random_push_read_error, OPT_DOUBLE) OPTION(osd_debug_verify_cached_snaps, OPT_BOOL) OPTION(osd_debug_deep_scrub_sleep, OPT_FLOAT) OPTION(osd_debug_no_acting_change, OPT_BOOL) +OPTION(osd_debug_pretend_recovery_active, OPT_BOOL) OPTION(osd_enable_op_tracker, OPT_BOOL) // enable/disable OSD op tracking OPTION(osd_num_op_tracker_shard, OPT_U32) // The number of shards for holding the ops OPTION(osd_op_history_size, OPT_U32) // Max number of completed ops to track diff --git a/src/common/options.cc b/src/common/options.cc index 47cf024462d..a5d1f59fb70 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -3406,6 +3406,10 @@ std::vector