mirror of
https://github.com/ceph/ceph
synced 2025-02-12 13:29:17 +00:00
The following tests in the test files mentioned below use the "osd_scrub_sleep" option to introduce delays during scrubbing to help determine scrubbing states, validate reservations during scrubbing etc.. This works when using the "wpq" scheduler. But when the "mclock_scheduler" is enabled, the "osd_scrub_sleep" is disabled and overridden to 0. This is done to delegate the scheduling of the background scrubs to the "mclock_scheduler" based on the set QoS parameters. Due to this, the checks to verify the scrub states, reservations etc. fail since the window to check them is very short due to scrubs completing very quickly. This affects a small subset of scrub tests mentioned below, 1. osd-scrub-dump.sh -> TEST_recover_unexpected() 2. osd-scrub-repair.sh -> TEST_auto_repair_bluestore_tag() 3. osd-scrub-test.sh -> TEST_scrub_abort(), TEST_deep_scrub_abort() Only for the above tests, until there's a reliable way to query scrub states with "--osd-scrub-sleep" set to 0, the "osd_op_queue" config option is set to "wpq". Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
175 lines
4.6 KiB
Bash
Executable File
175 lines
4.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# Copyright (C) 2019 Red Hat <contact@redhat.com>
|
|
#
|
|
# Author: David Zafman <dzafman@redhat.com>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU Library Public License as published by
|
|
# the Free Software Foundation; either version 2, or (at your option)
|
|
# any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Library Public License for more details.
|
|
#
|
|
|
|
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
|
|
|
MAX_SCRUBS=4
|
|
SCRUB_SLEEP=2
|
|
POOL_SIZE=3
|
|
|
|
function run() {
|
|
local dir=$1
|
|
shift
|
|
local CHUNK_MAX=5
|
|
|
|
export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one
|
|
export CEPH_ARGS
|
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
|
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
|
CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS "
|
|
CEPH_ARGS+="--osd_scrub_chunk_max=$CHUNK_MAX "
|
|
CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP "
|
|
CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE "
|
|
# Set scheduler to "wpq" until there's a reliable way to query scrub states
|
|
# with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
|
|
# scrub sleep to 0 and as a result the checks in the test fail.
|
|
CEPH_ARGS+="--osd_op_queue=wpq "
|
|
|
|
export -n CEPH_CLI_TEST_DUP_COMMAND
|
|
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
|
for func in $funcs ; do
|
|
setup $dir || return 1
|
|
$func $dir || return 1
|
|
teardown $dir || return 1
|
|
done
|
|
}
|
|
|
|
function TEST_recover_unexpected() {
|
|
local dir=$1
|
|
shift
|
|
local OSDS=6
|
|
local PGS=16
|
|
local POOLS=3
|
|
local OBJS=1000
|
|
|
|
run_mon $dir a || return 1
|
|
run_mgr $dir x || return 1
|
|
for o in $(seq 0 $(expr $OSDS - 1))
|
|
do
|
|
run_osd $dir $o
|
|
done
|
|
|
|
for i in $(seq 1 $POOLS)
|
|
do
|
|
create_pool test$i $PGS $PGS
|
|
done
|
|
|
|
wait_for_clean || return 1
|
|
|
|
dd if=/dev/urandom of=datafile bs=4k count=2
|
|
for i in $(seq 1 $POOLS)
|
|
do
|
|
for j in $(seq 1 $OBJS)
|
|
do
|
|
rados -p test$i put obj$j datafile
|
|
done
|
|
done
|
|
rm datafile
|
|
|
|
ceph osd set noscrub
|
|
ceph osd set nodeep-scrub
|
|
|
|
for qpg in $(ceph pg dump pgs --format=json-pretty | jq '.pg_stats[].pgid')
|
|
do
|
|
primary=$(ceph pg dump pgs --format=json | jq ".pg_stats[] | select(.pgid == $qpg) | .acting_primary")
|
|
eval pg=$qpg # strip quotes around qpg
|
|
ceph tell $pg scrub
|
|
done
|
|
|
|
ceph pg dump pgs
|
|
|
|
max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs')
|
|
if [ $max != $MAX_SCRUBS ]; then
|
|
echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations"
|
|
return 1
|
|
fi
|
|
|
|
ceph osd unset noscrub
|
|
|
|
ok=false
|
|
for i in $(seq 0 300)
|
|
do
|
|
ceph pg dump pgs
|
|
if ceph pg dump pgs | grep scrubbing; then
|
|
ok=true
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if test $ok = "false"; then
|
|
echo "ERROR: Test set-up failed no scrubbing"
|
|
return 1
|
|
fi
|
|
|
|
local total=0
|
|
local zerocount=0
|
|
local maxzerocount=3
|
|
while(true)
|
|
do
|
|
pass=0
|
|
for o in $(seq 0 $(expr $OSDS - 1))
|
|
do
|
|
CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations
|
|
scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote')
|
|
if [ $scrubs -gt $MAX_SCRUBS ]; then
|
|
echo "ERROR: More than $MAX_SCRUBS currently reserved"
|
|
return 1
|
|
fi
|
|
pass=$(expr $pass + $scrubs)
|
|
done
|
|
if [ $pass = "0" ]; then
|
|
zerocount=$(expr $zerocount + 1)
|
|
fi
|
|
if [ $zerocount -gt $maxzerocount ]; then
|
|
break
|
|
fi
|
|
total=$(expr $total + $pass)
|
|
sleep $(expr $SCRUB_SLEEP \* 2)
|
|
done
|
|
|
|
# Check that there are no more scrubs
|
|
for i in $(seq 0 5)
|
|
do
|
|
if ceph pg dump pgs | grep scrubbing; then
|
|
echo "ERROR: Extra scrubs after test completion...not expected"
|
|
return 1
|
|
fi
|
|
sleep $SCRUB_SLEEP
|
|
done
|
|
|
|
echo $total total reservations seen
|
|
|
|
# Sort of arbitraty number based on PGS * POOLS * POOL_SIZE as the number of total scrub
|
|
# reservations that must occur. However, the loop above might see the same reservation more
|
|
# than once.
|
|
actual_reservations=$(expr $PGS \* $POOLS \* $POOL_SIZE)
|
|
if [ $total -lt $actual_reservations ]; then
|
|
echo "ERROR: Unexpectedly low amount of scrub reservations seen during test"
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
|
|
main osd-scrub-dump "$@"
|
|
|
|
# Local Variables:
|
|
# compile-command: "cd build ; make check && \
|
|
# ../qa/run-standalone.sh osd-scrub-dump.sh"
|
|
# End:
|