mirror of
https://github.com/ceph/ceph
synced 2025-02-20 17:37:29 +00:00
test: recovery_scrub: do not display 'repair' status on auto-repair deep-scrub
A new test: auto_repair_bluestore_tag. Based on auto_repair_bluestore_basic. Sets auto-repair, starts a periodic deep-scrub, then verifies that the PG state, while scrubbing, is 'scrubbing+deep' and not 'scrubbing+deep+repair'. Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
This commit is contained in:
parent
80c756b009
commit
d6eb3e3a3c
@ -285,11 +285,11 @@ function auto_repair_erasure_coded() {
|
||||
--osd-scrub-min-interval=5 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
if [ "$allow_overwrites" = "true" ]; then
|
||||
if [ "$allow_overwrites" = "true" ]; then
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
else
|
||||
else
|
||||
run_osd_filestore $dir $id $ceph_osd_args || return 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
create_rbd_pool || return 1
|
||||
wait_for_clean || return 1
|
||||
@ -329,6 +329,127 @@ function TEST_auto_repair_erasure_coded_overwrites() {
|
||||
fi
|
||||
}
|
||||
|
||||
# initiate a scrub, then check for the (expected) 'scrubbing' and the
|
||||
# (not expected until an error was identified) 'repair'
|
||||
# Arguments: osd#, pg, sleep time
|
||||
function initiate_and_fetch_state() {
|
||||
local the_osd="osd.$1"
|
||||
local pgid=$2
|
||||
local last_scrub=$(get_last_scrub_stamp $pgid)
|
||||
|
||||
set_config "osd" "$1" "osd_scrub_sleep" "$3"
|
||||
set_config "osd" "$1" "osd_scrub_auto_repair" "true"
|
||||
|
||||
flush_pg_stats
|
||||
date --rfc-3339=ns
|
||||
|
||||
# note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
|
||||
|
||||
# wait for 'scrubbing' to appear
|
||||
for ((i=0; i < 40; i++)); do
|
||||
|
||||
st=`ceph pg $pgid query --format json | jq '.state' `
|
||||
echo $i ") state now: " $st
|
||||
|
||||
case "$st" in
|
||||
*scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
|
||||
*scrubbing* ) echo "found scrub"; return 0;;
|
||||
*inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;;
|
||||
* ) echo $st;;
|
||||
esac
|
||||
|
||||
if [ $((i % 5)) == 4 ] ; then
|
||||
echo "loop --------> " $i
|
||||
flush_pg_stats
|
||||
else
|
||||
sleep 0.3
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
|
||||
return 1
|
||||
}
|
||||
|
||||
function wait_end_of_scrub() { # osd# pg
|
||||
local the_osd="osd.$1"
|
||||
local pgid=$2
|
||||
|
||||
for ((i=0; i < 40; i++)); do
|
||||
st=`ceph pg $pgid query --format json | jq '.state' `
|
||||
echo "wait-scrub-end state now: " $st
|
||||
[[ $st =~ (.*scrubbing.*) ]] || break
|
||||
if [ $((i % 5)) == 4 ] ; then
|
||||
flush_pg_stats
|
||||
fi
|
||||
sleep 0.3
|
||||
done
|
||||
|
||||
if [[ $st =~ (.*scrubbing.*) ]]
|
||||
then
|
||||
# a timeout
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
function TEST_auto_repair_bluestore_tag() {
|
||||
local dir=$1
|
||||
local poolname=testpool
|
||||
|
||||
# Launch a cluster with 3 seconds scrub interval
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
done
|
||||
|
||||
create_pool $poolname 1 1 || return 1
|
||||
ceph osd pool set $poolname size 2
|
||||
wait_for_clean || return 1
|
||||
|
||||
# Put an object
|
||||
local payload=ABCDEF
|
||||
echo $payload > $dir/ORIGINAL
|
||||
rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
|
||||
|
||||
# Remove the object from one shard physically
|
||||
# Restarted osd get $ceph_osd_args passed
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
|
||||
|
||||
local pgid=$(get_pg $poolname SOMETHING)
|
||||
local primary=$(get_primary $poolname SOMETHING)
|
||||
echo "Affected PG " $pgid " w/ primary " $primary
|
||||
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
|
||||
initiate_and_fetch_state $primary $pgid "3.0"
|
||||
r=$?
|
||||
echo "initiate_and_fetch_state ret: " $r
|
||||
set_config "osd" "$1" "osd_scrub_sleep" "0"
|
||||
if [ $r -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
wait_end_of_scrub "$primary" "$pgid" || return 1
|
||||
ceph pg dump pgs
|
||||
|
||||
# Verify - the file should be back
|
||||
# Restarted osd get $ceph_osd_args passed
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1
|
||||
diff $dir/ORIGINAL $dir/COPY || return 1
|
||||
grep scrub_finish $dir/osd.${primary}.log
|
||||
|
||||
# Tear down
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
|
||||
function TEST_auto_repair_bluestore_basic() {
|
||||
local dir=$1
|
||||
local poolname=testpool
|
||||
@ -338,7 +459,7 @@ function TEST_auto_repair_bluestore_basic() {
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
@ -387,7 +508,7 @@ function TEST_auto_repair_bluestore_scrub() {
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
@ -442,7 +563,7 @@ function TEST_auto_repair_bluestore_failed() {
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
@ -512,7 +633,7 @@ function TEST_auto_repair_bluestore_failed_norecov() {
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
|
Loading…
Reference in New Issue
Block a user