mirror of
https://github.com/ceph/ceph
synced 2025-02-24 19:47:44 +00:00
test/osd/osd-fast-mark-down.sh: introduce large timeout
One second might be not enough for loaded system to fully process the fast mark down cycle, so introduce a loop that checks for OSD to be marked as down within 30 seconds, later that can be extended (or shortened) as necessary. Fixes: http://tracker.ceph.com/issues/17918 Signed-off-by: Piotr Dałek <git@predictor.org.pl>
This commit is contained in:
parent
8b3bc583c0
commit
a269bb7188
@ -18,6 +18,7 @@
|
||||
|
||||
source $(dirname $0)/../detect-build-env-vars.sh
|
||||
source $CEPH_ROOT/qa/workunits/ceph-helpers.sh
|
||||
MAX_PROPAGATION_TIME=30
|
||||
|
||||
function run() {
|
||||
local dir=$1
|
||||
@ -62,7 +63,7 @@ function test_fast_kill() {
|
||||
killid=0
|
||||
previd=0
|
||||
|
||||
# kill random osd and see if 1 sec after, the osd count decreased.
|
||||
# kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased.
|
||||
for i in {1..2}; do
|
||||
while [ $killid -eq $previd ]; do
|
||||
killid=${pids[$RANDOM%${#pids[@]}]}
|
||||
@ -70,20 +71,37 @@ function test_fast_kill() {
|
||||
previd=$killid
|
||||
|
||||
kill -9 $killid
|
||||
sleep 1
|
||||
time_left=$MAX_PROPAGATION_TIME
|
||||
down_osds=0
|
||||
|
||||
while [ $time_left -gt 0 ]; do
|
||||
sleep 1
|
||||
time_left=$[$time_left - 1];
|
||||
|
||||
grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
down_osds=$(ceph osd tree | grep -c down)
|
||||
if [ $down_osds -lt $i ]; then
|
||||
# osds not marked down yet, try again in a second
|
||||
continue
|
||||
elif [ $down_osds -gt $i ]; then
|
||||
echo Too many \($down_osds\) osds died!
|
||||
teardown $dir
|
||||
return 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
down_osds=$(ceph osd tree | grep -c down)
|
||||
if [ $down_osds -lt $i ]; then
|
||||
echo Killed the OSD, yet it is not marked down
|
||||
ceph osd tree
|
||||
teardown $dir
|
||||
return 1
|
||||
elif [ $down_osds -gt $i ]; then
|
||||
echo Too many \($down_osds\) osds died!
|
||||
teardown $dir
|
||||
teardown $dir
|
||||
return 1
|
||||
fi
|
||||
|
||||
done
|
||||
pkill -SIGTERM rados
|
||||
teardown $dir || return 1
|
||||
|
Loading…
Reference in New Issue
Block a user