mirror of
https://github.com/ceph/ceph
synced 2025-01-01 00:22:25 +00:00
tests/standalone: fix scrub-related tests following command changes
Using ceph tell $pgid [deep]-scrub to initiate an 'operator initiated' scrub, and ceph tell $pgid schedule[-deep]-scrub for causing a 'periodic scrub' to be scheduled. Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
This commit is contained in:
parent
653ae9ebc1
commit
fbb7d73e6f
@ -1865,6 +1865,9 @@ function test_repair() {
|
||||
# **get_last_scrub_stamp** function reports a timestamp different from
|
||||
# the one stored before starting the scrub.
|
||||
#
|
||||
# The scrub is initiated using the "operator initiated" method, and
|
||||
# the scrub triggered is not subject to no-scrub flags etc.
|
||||
#
|
||||
# @param pgid the id of the PG
|
||||
# @return 0 on success, 1 on error
|
||||
#
|
||||
@ -1899,6 +1902,48 @@ function test_pg_scrub() {
|
||||
|
||||
#######################################################################
|
||||
|
||||
##
|
||||
# Trigger a "scheduled" scrub on **pgid** (by mnaually modifying the relevant
|
||||
# last-scrub stamp) and wait until it completes. The pg_scrub
|
||||
# function will fail if scrubbing does not complete within $TIMEOUT
|
||||
# seconds. The pg_scrub is complete whenever the
|
||||
# **get_last_scrub_stamp** function reports a timestamp different from
|
||||
# the one stored before starting the scrub.
|
||||
#
|
||||
# @param pgid the id of the PG
|
||||
# @return 0 on success, 1 on error
|
||||
#
|
||||
function pg_schedule_scrub() {
|
||||
local pgid=$1
|
||||
local last_scrub=$(get_last_scrub_stamp $pgid)
|
||||
ceph pg scrub $pgid
|
||||
wait_for_scrub $pgid "$last_scrub"
|
||||
}
|
||||
|
||||
function pg_schedule_deep_scrub() {
|
||||
local pgid=$1
|
||||
local last_scrub=$(get_last_scrub_stamp $pgid last_deep_scrub_stamp)
|
||||
ceph pg deep-scrub $pgid
|
||||
wait_for_scrub $pgid "$last_scrub" last_deep_scrub_stamp
|
||||
}
|
||||
|
||||
function test_pg_schedule_scrub() {
|
||||
local dir=$1
|
||||
|
||||
setup $dir || return 1
|
||||
run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
|
||||
run_mgr $dir x || return 1
|
||||
run_osd $dir 0 || return 1
|
||||
create_rbd_pool || return 1
|
||||
wait_for_clean || return 1
|
||||
pg_schedule_scrub 1.0 || return 1
|
||||
kill_daemons $dir KILL osd || return 1
|
||||
! TIMEOUT=1 pg_scrub 1.0 || return 1
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
##
|
||||
# Run the *command* and expect it to fail (i.e. return a non zero status).
|
||||
# The output (stderr and stdout) is stored in a temporary file in *dir*
|
||||
|
@ -77,7 +77,7 @@ function TEST_truncated_sna_record() {
|
||||
(( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname
|
||||
|
||||
# scrub the PG
|
||||
ceph pg $pgid deep_scrub || return 1
|
||||
ceph pg $pgid deep-scrub || return 1
|
||||
|
||||
# we aren't just waiting for the scrub to terminate, but also for the
|
||||
# logs to be published
|
||||
@ -149,7 +149,7 @@ function TEST_truncated_sna_record() {
|
||||
local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'`
|
||||
ceph pg dump pgs
|
||||
sleep 2
|
||||
ceph pg $pgid deep_scrub || return 1
|
||||
ceph pg $pgid deep-scrub || return 1
|
||||
sleep 5
|
||||
ceph pg dump pgs
|
||||
(( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
|
||||
@ -161,7 +161,7 @@ function TEST_truncated_sna_record() {
|
||||
echo "prev count: $prev_err_cnt"
|
||||
|
||||
# scrub again. No errors expected this time
|
||||
ceph pg $pgid deep_scrub || return 1
|
||||
ceph pg $pgid deep-scrub || return 1
|
||||
sleep 5
|
||||
ceph pg dump pgs
|
||||
(( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log
|
||||
|
@ -16,6 +16,7 @@
|
||||
#
|
||||
set -x
|
||||
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
||||
source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
|
||||
|
||||
if [ `uname` = FreeBSD ]; then
|
||||
# erasure coding overwrites are only tested on Bluestore
|
||||
@ -160,7 +161,7 @@ function scrub_and_not_schedule() {
|
||||
#
|
||||
local pg=$(get_pg $poolname SOMETHING)
|
||||
local last_scrub=$(get_last_scrub_stamp $pg)
|
||||
ceph pg scrub $pg
|
||||
ceph tell $pg schedule-scrub
|
||||
|
||||
#
|
||||
# 2) Assure the scrub is not scheduled
|
||||
@ -329,8 +330,7 @@ function initiate_and_fetch_state() {
|
||||
date --rfc-3339=ns
|
||||
|
||||
# note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
|
||||
env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) schedule-deep-scrub "$pgid"
|
||||
|
||||
# wait for 'scrubbing' to appear
|
||||
for ((i=0; i < 80; i++)); do
|
||||
@ -436,19 +436,17 @@ function TEST_auto_repair_bluestore_tag() {
|
||||
|
||||
function TEST_auto_repair_bluestore_basic() {
|
||||
local dir=$1
|
||||
local poolname=testpool
|
||||
local -A cluster_conf=(
|
||||
['osds_num']="3"
|
||||
['pgs_in_pool']="1"
|
||||
['pool_name']="testpool"
|
||||
['extras']=" --osd_scrub_auto_repair=true"
|
||||
)
|
||||
local extr_dbg=3
|
||||
standard_scrub_cluster $dir cluster_conf
|
||||
local poolid=${cluster_conf['pool_id']}
|
||||
local poolname=${cluster_conf['pool_name']}
|
||||
|
||||
# Launch a cluster with 5 seconds scrub interval
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
local ceph_osd_args="--osd-scrub-auto-repair=true \
|
||||
--osd_deep_scrub_randomize_ratio=0 \
|
||||
--osd-scrub-interval-randomize-ratio=0"
|
||||
for id in $(seq 0 2) ; do
|
||||
run_osd $dir $id $ceph_osd_args || return 1
|
||||
done
|
||||
|
||||
create_pool $poolname 1 1 || return 1
|
||||
ceph osd pool set $poolname size 2
|
||||
wait_for_clean || return 1
|
||||
|
||||
@ -460,12 +458,14 @@ function TEST_auto_repair_bluestore_basic() {
|
||||
# Remove the object from one shard physically
|
||||
# Restarted osd get $ceph_osd_args passed
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
|
||||
ceph tell osd.* config set osd_scrub_auto_repair true
|
||||
|
||||
local pgid=$(get_pg $poolname SOMETHING)
|
||||
local primary=$(get_primary $poolname SOMETHING)
|
||||
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
|
||||
ceph tell $pgid deep_scrub
|
||||
ceph tell $pgid scrub
|
||||
# note: the scrub initiated must be a "regular" (periodic) deep scrub - not an
|
||||
# operator-initiated one (as there's no 'auto-repair' for the latter)
|
||||
ceph tell $pgid schedule-deep-scrub
|
||||
|
||||
# Wait for auto repair
|
||||
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
|
||||
@ -510,12 +510,16 @@ function TEST_auto_repair_bluestore_scrub() {
|
||||
local pgid=$(get_pg $poolname SOMETHING)
|
||||
local primary=$(get_primary $poolname SOMETHING)
|
||||
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
|
||||
ceph tell $pgid scrub
|
||||
ceph tell $pgid schedule-scrub
|
||||
|
||||
# Wait for scrub -> auto repair
|
||||
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
|
||||
ceph pg dump pgs
|
||||
# Actually this causes 2 scrubs, so we better wait a little longer
|
||||
sleep 2
|
||||
ceph pg dump pgs
|
||||
sleep 2
|
||||
ceph pg dump pgs
|
||||
sleep 5
|
||||
wait_for_clean || return 1
|
||||
ceph pg dump pgs
|
||||
@ -567,8 +571,7 @@ function TEST_auto_repair_bluestore_failed() {
|
||||
local pgid=$(get_pg $poolname obj1)
|
||||
local primary=$(get_primary $poolname obj1)
|
||||
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
|
||||
ceph tell $pgid deep_scrub
|
||||
ceph tell $pgid scrub
|
||||
ceph tell $pgid schedule-deep-scrub
|
||||
|
||||
# Wait for auto repair
|
||||
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
|
||||
@ -631,12 +634,12 @@ function TEST_auto_repair_bluestore_failed_norecov() {
|
||||
# obj2 can't be repaired
|
||||
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1
|
||||
objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1
|
||||
ceph tell osd.* config set osd_scrub_auto_repair true
|
||||
|
||||
local pgid=$(get_pg $poolname obj1)
|
||||
local primary=$(get_primary $poolname obj1)
|
||||
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
|
||||
ceph tell $pgid deep_scrub
|
||||
ceph tell $pgid scrub
|
||||
ceph tell $pgid schedule-deep-scrub
|
||||
|
||||
# Wait for auto repair
|
||||
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
|
||||
@ -5793,7 +5796,7 @@ function TEST_periodic_scrub_replicated() {
|
||||
flush_pg_stats
|
||||
local last_scrub=$(get_last_scrub_stamp $pg)
|
||||
# Fake a schedule scrub
|
||||
ceph tell $pg scrub || return 1
|
||||
ceph tell $pg schedule-scrub || return 1
|
||||
# Wait for schedule regular scrub
|
||||
wait_for_scrub $pg "$last_scrub"
|
||||
|
||||
@ -5811,7 +5814,7 @@ function TEST_periodic_scrub_replicated() {
|
||||
sleep 5
|
||||
|
||||
# Fake a schedule scrub
|
||||
ceph tell $pg scrub || return 1
|
||||
ceph tell $pg schedule-scrub || return 1
|
||||
# Wait for schedule regular scrub
|
||||
# to notice scrub and skip it
|
||||
local found=false
|
||||
@ -5828,7 +5831,7 @@ function TEST_periodic_scrub_replicated() {
|
||||
|
||||
flush_pg_stats
|
||||
# Request a regular scrub and it will be done
|
||||
pg_scrub $pg
|
||||
pg_schedule_scrub $pg
|
||||
grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
|
||||
|
||||
# deep-scrub error is no longer present
|
||||
@ -5885,7 +5888,7 @@ function TEST_scrub_warning() {
|
||||
else
|
||||
overdue_seconds=$conf_overdue_seconds
|
||||
fi
|
||||
ceph tell ${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) || return 1
|
||||
ceph tell ${i}.0 schedule-scrub $(expr ${overdue_seconds} + ${i}00) || return 1
|
||||
done
|
||||
# Fake schedule deep scrubs
|
||||
for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs))
|
||||
@ -5896,7 +5899,7 @@ function TEST_scrub_warning() {
|
||||
else
|
||||
overdue_seconds=$conf_overdue_seconds
|
||||
fi
|
||||
ceph tell ${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) || return 1
|
||||
ceph tell ${i}.0 schedule-deep-scrub $(expr ${overdue_seconds} + ${i}00) || return 1
|
||||
done
|
||||
flush_pg_stats
|
||||
|
||||
@ -5905,7 +5908,7 @@ function TEST_scrub_warning() {
|
||||
ceph health | grep -q " pgs not deep-scrubbed in time" || return 1
|
||||
ceph health | grep -q " pgs not scrubbed in time" || return 1
|
||||
|
||||
# note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub
|
||||
# note that the 'ceph tell pg deep-scrub' command now also sets the regular scrub
|
||||
# time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for
|
||||
# regular scrubbing. For now, we'll allow both responses.
|
||||
COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l)
|
||||
@ -6222,15 +6225,15 @@ function TEST_request_scrub_priority() {
|
||||
otherpgs="${otherpgs}${opg} "
|
||||
local other_last_scrub=$(get_last_scrub_stamp $pg)
|
||||
# Fake a schedule scrub
|
||||
ceph tell $opg scrub $opg || return 1
|
||||
ceph tell $opg schedule-scrub $opg || return 1
|
||||
done
|
||||
|
||||
sleep 15
|
||||
flush_pg_stats
|
||||
|
||||
# Request a regular scrub and it will be done
|
||||
# Force a shallow scrub and it will be done
|
||||
local last_scrub=$(get_last_scrub_stamp $pg)
|
||||
ceph pg scrub $pg
|
||||
ceph tell $pg scrub || return 1
|
||||
|
||||
ceph osd unset noscrub || return 1
|
||||
ceph osd unset nodeep-scrub || return 1
|
||||
|
@ -231,11 +231,11 @@ function TEST_scrub_extended_sleep() {
|
||||
create_pool $poolname 1 1
|
||||
wait_for_clean || return 1
|
||||
|
||||
# Trigger a scrub on a PG
|
||||
# Trigger a periodic scrub on a PG (no 'extended sleep' for h.p. scrubs)
|
||||
local pgid=$(get_pg $poolname SOMETHING)
|
||||
local primary=$(get_primary $poolname SOMETHING)
|
||||
local last_scrub=$(get_last_scrub_stamp $pgid)
|
||||
ceph tell $pgid scrub || return 1
|
||||
ceph tell $pgid schedule-scrub || return 1
|
||||
|
||||
# Allow scrub to start extended sleep
|
||||
PASSED="false"
|
||||
@ -330,12 +330,7 @@ function _scrub_abort() {
|
||||
local primary=$(get_primary $poolname obj1)
|
||||
local pgid="${poolid}.0"
|
||||
|
||||
ceph tell $pgid $type || return 1
|
||||
# deep-scrub won't start without scrub noticing
|
||||
if [ "$type" = "deep_scrub" ];
|
||||
then
|
||||
ceph tell $pgid scrub || return 1
|
||||
fi
|
||||
ceph tell $pgid schedule-$type || return 1
|
||||
|
||||
# Wait for scrubbing to start
|
||||
set -o pipefail
|
||||
@ -359,7 +354,7 @@ function _scrub_abort() {
|
||||
fi
|
||||
|
||||
ceph osd set $stopscrub
|
||||
if [ "$type" = "deep_scrub" ];
|
||||
if [ "$type" = "deep-scrub" ];
|
||||
then
|
||||
ceph osd set noscrub
|
||||
fi
|
||||
@ -390,7 +385,7 @@ function _scrub_abort() {
|
||||
ceph config set osd "osd_scrub_sleep" "0.1"
|
||||
|
||||
ceph osd unset $stopscrub
|
||||
if [ "$type" = "deep_scrub" ];
|
||||
if [ "$type" = "deep-scrub" ];
|
||||
then
|
||||
ceph osd unset noscrub
|
||||
fi
|
||||
@ -405,7 +400,7 @@ function TEST_scrub_abort() {
|
||||
|
||||
function TEST_deep_scrub_abort() {
|
||||
local dir=$1
|
||||
_scrub_abort $dir deep_scrub
|
||||
_scrub_abort $dir deep-scrub
|
||||
}
|
||||
|
||||
function TEST_scrub_permit_time() {
|
||||
@ -441,7 +436,7 @@ function TEST_scrub_permit_time() {
|
||||
# current time to set last_scrub_stamp, it sets the deadline
|
||||
# back by osd_max_interval which would cause the time permit checking
|
||||
# to be skipped. Set back 1 day, the default scrub_min_interval.
|
||||
ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1
|
||||
ceph tell $pgid schedule-scrub $(( 24 * 60 * 60 )) || return 1
|
||||
|
||||
# Scrub should not run
|
||||
for ((i=0; i < 30; i++)); do
|
||||
@ -495,7 +490,7 @@ function TEST_just_deep_scrubs() {
|
||||
local dbg_counter_at_start=${sched_data['query_scrub_seq']}
|
||||
echo "test counter @ start: $dbg_counter_at_start"
|
||||
|
||||
ceph pg $pgid deep_scrub
|
||||
ceph tell $pgid schedule-deep-scrub
|
||||
|
||||
sleep 5 # 5s is the 'pg dump' interval
|
||||
declare -A sc_data_2
|
||||
@ -574,8 +569,7 @@ function TEST_dump_scrub_schedule() {
|
||||
|
||||
saved_last_stamp=${sched_data['query_last_stamp']}
|
||||
ceph tell osd.* config set osd_scrub_sleep "0"
|
||||
ceph pg deep-scrub $pgid
|
||||
ceph pg scrub $pgid
|
||||
ceph tell $pgid deep-scrub
|
||||
|
||||
# wait for the 'last duration' entries to change. Note that the 'dump' one will need
|
||||
# up to 5 seconds to sync
|
||||
@ -602,7 +596,7 @@ function TEST_dump_scrub_schedule() {
|
||||
sleep 2
|
||||
saved_last_stamp=${sched_data['query_last_stamp']}
|
||||
|
||||
ceph pg $pgid scrub
|
||||
ceph tell $pgid schedule-scrub
|
||||
sleep 1
|
||||
sched_data=()
|
||||
declare -A expct_scrub_peri_sched=( ['query_is_future']="false" )
|
||||
|
@ -1310,7 +1310,7 @@ def main():
|
||||
if final_e:
|
||||
raise final_e
|
||||
|
||||
# Block until command completion (currently scrub and deep_scrub only)
|
||||
# Block until command completion (currently scrub and deep scrub only)
|
||||
if block:
|
||||
wait(childargs, waitdata)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user