qa/suites/krbd: stress test for recovering from watch errors for -o exclusive

This is based on a test added in commit 237aa221eb ("qa/suites/krbd:
stress test for recovering from watch errors") for regular mappings.

Fixes: https://tracker.ceph.com/issues/67097
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Ilya Dryomov 2024-07-24 08:44:46 +02:00
parent 363cc218e9
commit 8fee41da8b
2 changed files with 50 additions and 0 deletions

View File

@ -0,0 +1,19 @@
overrides:
ceph:
conf:
global:
osd pool default size: 1
osd:
osd shutdown pgref assert: true
roles:
- [mon.a, mgr.x, osd.0, client.0]
tasks:
- install:
extra_system_packages:
- fio
- ceph:
- workunit:
clients:
all:
- rbd/krbd_watch_errors_exclusive.sh

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -ex
set -o pipefail
readonly IMAGE_NAME="watch-errors-exclusive-test"
rbd create -s 1G --image-feature exclusive-lock,object-map "${IMAGE_NAME}"
# induce a watch error every 30 seconds
dev="$(sudo rbd device map -o exclusive,osdkeepalive=60 "${IMAGE_NAME}")"
dev_id="${dev#/dev/rbd}"
sudo dmesg -C
# test that a workload doesn't encounter EIO errors
fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \
--rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \
--time_based --runtime=1h --eta=never
num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")"
echo "Recorded ${num_errors} watch errors"
sudo rbd device unmap "${dev}"
if ((num_errors < 60)); then
echo "Too few watch errors"
exit 1
fi
echo OK