mirror of
https://github.com/ceph/ceph
synced 2025-01-04 02:02:36 +00:00
e6e10246c6
If the cluster dies during the rados bench, the maximum running time is no more considered and all emitted aios are pending. rados bench never quits and the global testing timeout (3600 sec : 1 hour) have to be reach to get a failure. This situation is dramatic for a background test or a CI run as it locks the whole job for too long for an event that will never occurs. This ideal solution would be having 'rados bench' considering a failure once the timeout is reached when aios are pending. A possible workaround here is to put use the system command 'timeout' before calling rados bench and fail if rados didn't completed on time. To avoid side effects, this patch is doubling rados timeout. If rados didn't completed after twice the expected time, it have to fail to avoid locking the whole testing job. Please find below the way it worked on a real test case. We can see no IO after t>2 but despite timeout=4 the bench continue. Thanks to this patch, the bench is stopped at t=8 and return 1. 5: /home/erwan/ceph/src/test/smoke.sh:55: TEST_multimon: timeout 8 rados -p foo bench 4 write -b 4096 --no-cleanup 5: hints = 1 5: Maintaining 16 concurrent writes of 4096 bytes to objects of size 4096 for up to 4 seconds or 0 objects 5: Object prefix: benchmark_data_mr-meeseeks_184960 5: sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 5: 0 0 0 0 0 0 - 0 5: 1 16 1144 1128 4.40538 4.40625 0.00412965 0.0141116 5: 2 16 2147 2131 4.16134 3.91797 0.00985654 0.0109079 5: 3 16 2147 2131 2.77424 0 - 0.0109079 5: 4 16 2147 2131 2.0807 0 - 0.0109079 5: 5 16 2147 2131 1.66456 0 - 0.0109079 5: 6 16 2147 2131 1.38714 0 - 0.0109079 5: 7 16 2147 2131 1.18897 0 - 0.0109079 5: /home/erwan/ceph/src/test/smoke.sh:55: TEST_multimon: return 1 5: /home/erwan/ceph/src/test/smoke.sh:18: run: return 1 Signed-off-by: Erwan Velu <erwan@redhat.com>
84 lines
2.2 KiB
Bash
Executable File
84 lines
2.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
|
|
|
[ `uname` = FreeBSD ] && exit 0
|
|
|
|
function run() {
|
|
local dir=$1
|
|
shift
|
|
|
|
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
|
|
export CEPH_ARGS
|
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
|
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
|
# avoid running out of fds in rados bench
|
|
CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 "
|
|
CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 "
|
|
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
|
for func in $funcs ; do
|
|
setup $dir || return 1
|
|
$func $dir || return 1
|
|
teardown $dir || return 1
|
|
done
|
|
}
|
|
|
|
function TEST_filestore_to_bluestore() {
|
|
local dir=$1
|
|
|
|
local flimit=$(ulimit -n)
|
|
if [ $flimit -lt 1536 ]; then
|
|
echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens."
|
|
fi
|
|
|
|
run_mon $dir a || return 1
|
|
run_mgr $dir x || return 1
|
|
run_osd $dir 0 || return 1
|
|
osd_pid=$(cat $dir/osd.0.pid)
|
|
run_osd $dir 1 || return 1
|
|
run_osd $dir 2 || return 1
|
|
|
|
sleep 5
|
|
|
|
create_pool foo 16
|
|
|
|
# write some objects
|
|
timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
|
|
|
|
# kill
|
|
while kill $osd_pid; do sleep 1 ; done
|
|
ceph osd down 0
|
|
|
|
mv $dir/0 $dir/0.old || return 1
|
|
mkdir $dir/0 || return 1
|
|
ofsid=$(cat $dir/0.old/fsid)
|
|
echo "osd fsid $ofsid"
|
|
O=$CEPH_ARGS
|
|
CEPH_ARGS+="--log-file $dir/cot.log --log-max-recent 0 "
|
|
ceph-objectstore-tool --type bluestore --data-path $dir/0 --fsid $ofsid \
|
|
--op mkfs --no-mon-config || return 1
|
|
ceph-objectstore-tool --data-path $dir/0.old --target-data-path $dir/0 \
|
|
--op dup || return 1
|
|
CEPH_ARGS=$O
|
|
|
|
run_osd_bluestore $dir 0 || return 1
|
|
|
|
while ! ceph osd stat | grep '3 up' ; do sleep 1 ; done
|
|
ceph osd metadata 0 | grep bluestore || return 1
|
|
|
|
ceph osd scrub 0
|
|
|
|
# give it some time
|
|
sleep 15
|
|
# and make sure mon is sync'ed
|
|
flush_pg_stats
|
|
|
|
wait_for_clean || return 1
|
|
}
|
|
|
|
main osd-dup "$@"
|
|
|
|
# Local Variables:
|
|
# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh"
|
|
# End:
|