#! /usr/bin/env bash # # Copyright (C) 2017 Red Hat # # Author: David Zafman # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Library Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library Public License for more details. # source $CEPH_ROOT/qa/standalone/ceph-helpers.sh function run() { local dir=$1 shift export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one export CEPH_ARGS CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " export -n CEPH_CLI_TEST_DUP_COMMAND local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} for func in $funcs ; do $func $dir || return 1 done } # Simple test for "not scheduling scrubs due to active recovery" # OSD::sched_scrub() called on all OSDs during ticks function TEST_recovery_scrub_1() { local dir=$1 local poolname=test TESTDATA="testdata.$$" OSDS=4 PGS=1 OBJECTS=100 setup $dir || return 1 run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ --osd_scrub_interval_randomize_ratio=0.0 || return 1 run_mgr $dir x || return 1 for osd in $(seq 0 $(expr $OSDS - 1)) do run_osd $dir $osd --osd_scrub_during_recovery=false || return 1 done # Create a pool with $PGS pgs create_pool $poolname $PGS $PGS wait_for_clean || return 1 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') ceph pg dump pgs dd if=/dev/urandom of=$TESTDATA bs=1M count=50 for i in $(seq 1 $OBJECTS) do rados -p $poolname put obj${i} $TESTDATA done rm -f $TESTDATA ceph osd pool set $poolname size 4 # Wait for recovery to start set -o pipefail count=0 while(true) do if ceph --format json pg dump pgs | jq '.pg_stats | [.[] | .state | contains("recovering")]' | grep -q true then break fi sleep 2 if test "$count" -eq "10" then echo "Recovery never started" return 1 fi count=$(expr $count + 1) done set +o pipefail ceph pg dump pgs sleep 10 # Work around for http://tracker.ceph.com/issues/38195 kill_daemons $dir #|| return 1 declare -a err_strings err_strings[0]="not scheduling scrubs due to active recovery" for osd in $(seq 0 $(expr $OSDS - 1)) do grep "not scheduling scrubs" $dir/osd.${osd}.log done for err_string in "${err_strings[@]}" do found=false count=0 for osd in $(seq 0 $(expr $OSDS - 1)) do if grep -q "$err_string" $dir/osd.${osd}.log then found=true count=$(expr $count + 1) fi done if [ "$found" = "false" ]; then echo "Missing log message '$err_string'" ERRORS=$(expr $ERRORS + 1) fi [ $count -eq $OSDS ] || return 1 done teardown $dir || return 1 if [ $ERRORS != "0" ]; then echo "TEST FAILED WITH $ERRORS ERRORS" return 1 fi echo "TEST PASSED" return 0 } # osd_scrub_during_recovery=true make sure scrub happens function TEST_recovery_scrub_2() { local dir=$1 local poolname=test TESTDATA="testdata.$$" OSDS=8 PGS=32 OBJECTS=4 setup $dir || return 1 run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ --osd_scrub_interval_randomize_ratio=0.0 || return 1 run_mgr $dir x || return 1 for osd in $(seq 0 $(expr $OSDS - 1)) do run_osd $dir $osd --osd_scrub_during_recovery=true || return 1 done # Create a pool with $PGS pgs create_pool $poolname $PGS $PGS wait_for_clean || return 1 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') dd if=/dev/urandom of=$TESTDATA bs=1M count=50 for i in $(seq 1 $OBJECTS) do rados -p $poolname put obj${i} $TESTDATA done rm -f $TESTDATA ceph osd pool set $poolname size 3 ceph pg dump pgs # Wait for recovery to start set -o pipefail count=0 while(true) do if ceph --format json pg dump pgs | jq '.pg_stats | [.[] | .state | contains("recovering")]' | grep -q true then break fi sleep 2 if test "$count" -eq "10" then echo "Recovery never started" return 1 fi count=$(expr $count + 1) done set +o pipefail ceph pg dump pgs pids="" for pg in $(seq 0 $(expr $PGS - 1)) do run_in_background pids pg_scrub $poolid.$(printf "%x" $pg) done ceph pg dump pgs wait_background pids return_code=$? if [ $return_code -ne 0 ]; then return $return_code; fi ERRORS=0 pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') pid=$(cat $pidfile) if ! kill -0 $pid then echo "OSD crash occurred" #tail -100 $dir/osd.0.log ERRORS=$(expr $ERRORS + 1) fi # Work around for http://tracker.ceph.com/issues/38195 kill_daemons $dir #|| return 1 declare -a err_strings err_strings[0]="not scheduling scrubs due to active recovery" for osd in $(seq 0 $(expr $OSDS - 1)) do grep "not scheduling scrubs" $dir/osd.${osd}.log done for err_string in "${err_strings[@]}" do found=false for osd in $(seq 0 $(expr $OSDS - 1)) do if grep "$err_string" $dir/osd.${osd}.log > /dev/null; then found=true fi done if [ "$found" = "true" ]; then echo "Found log message not expected '$err_string'" ERRORS=$(expr $ERRORS + 1) fi done teardown $dir || return 1 if [ $ERRORS != "0" ]; then echo "TEST FAILED WITH $ERRORS ERRORS" return 1 fi echo "TEST PASSED" return 0 } main osd-recovery-scrub "$@" # Local Variables: # compile-command: "cd build ; make -j4 && \ # ../qa/run-standalone.sh osd-recovery-scrub.sh" # End: