mirror of
https://github.com/ceph/ceph
synced 2025-01-02 00:52:22 +00:00
qa: Add tests to validate syncing of images using rbd-mirror
Introduce functional tests to validate that the images under workloads are correctly mirrored between two clusters using snapshot based mirroring. Run workload on a primary image using a krbd or nbd client. Take mirror snapshots of the image under workload. Unmount the mapped image and calculate its MD5 checksum before demoting it. After demotion, wait for the mirror status of the image to be 'up+unknown' in both the clusters. This is to make sure that the non-primary image in the other cluster is ready to be promoted. Now promote the non-primary image in the other cluster. Map the promoted image and calculate its MD5 checksum. Verify that the checksums of the demoted and promoted images in the two clusters are the same. The above test is run as part of two different workunits: - a workunit that validates the syncing of multiple mirrored images with workloads running on them - another workunit that validates the syncing of a single mirrored image with workload running on it and the image is set as primary alternatively between the two clusters, as it happens during failover and failback scenarios. Fixes: https://tracker.ceph.com/issues/61617 Signed-off-by: Ramana Raja <rraja@redhat.com> Co-authored-by: Ilya Dryomov <idryomov@redhat.com> Co-authored-by: Christopher Hoffman <choffman@redhat.com>
This commit is contained in:
parent
ea3a567f7f
commit
b7aae5c3c5
@ -0,0 +1,13 @@
|
||||
overrides:
|
||||
install:
|
||||
ceph:
|
||||
extra_system_packages:
|
||||
- pv
|
||||
tasks:
|
||||
- workunit:
|
||||
clients:
|
||||
cluster1.client.mirror:
|
||||
- rbd/compare_mirror_image_alternate_primary.sh
|
||||
env:
|
||||
RBD_DEVICE_TYPE: 'krbd'
|
||||
timeout: 3h
|
@ -0,0 +1,15 @@
|
||||
overrides:
|
||||
install:
|
||||
ceph:
|
||||
extra_packages:
|
||||
- rbd-nbd
|
||||
extra_system_packages:
|
||||
- pv
|
||||
tasks:
|
||||
- workunit:
|
||||
clients:
|
||||
cluster1.client.mirror:
|
||||
- rbd/compare_mirror_image_alternate_primary.sh
|
||||
env:
|
||||
RBD_DEVICE_TYPE: 'nbd'
|
||||
timeout: 3h
|
@ -0,0 +1,13 @@
|
||||
overrides:
|
||||
install:
|
||||
ceph:
|
||||
extra_system_packages:
|
||||
- pv
|
||||
tasks:
|
||||
- workunit:
|
||||
clients:
|
||||
cluster1.client.mirror:
|
||||
- rbd/compare_mirror_images.sh
|
||||
env:
|
||||
RBD_DEVICE_TYPE: 'krbd'
|
||||
timeout: 3h
|
@ -0,0 +1,15 @@
|
||||
overrides:
|
||||
install:
|
||||
ceph:
|
||||
extra_packages:
|
||||
- rbd-nbd
|
||||
extra_system_packages:
|
||||
- pv
|
||||
tasks:
|
||||
- workunit:
|
||||
clients:
|
||||
cluster1.client.mirror:
|
||||
- rbd/compare_mirror_images.sh
|
||||
env:
|
||||
RBD_DEVICE_TYPE: 'nbd'
|
||||
timeout: 3h
|
106
qa/workunits/rbd/compare_mirror_image_alternate_primary.sh
Executable file
106
qa/workunits/rbd/compare_mirror_image_alternate_primary.sh
Executable file
@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
IMAGE=image-alternate-primary
|
||||
MIRROR_IMAGE_MODE=snapshot
|
||||
MIRROR_POOL_MODE=image
|
||||
MOUNT=test-alternate-primary
|
||||
RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
|
||||
RBD_MIRROR_INSTANCES=1
|
||||
RBD_MIRROR_MODE=snapshot
|
||||
RBD_MIRROR_USE_EXISTING_CLUSTER=1
|
||||
|
||||
. $(dirname $0)/rbd_mirror_helpers.sh
|
||||
|
||||
take_mirror_snapshots() {
|
||||
local cluster=$1
|
||||
local pool=$2
|
||||
local image=$3
|
||||
|
||||
for i in {1..30}; do
|
||||
mirror_image_snapshot $cluster $pool $image
|
||||
sleep 3
|
||||
done
|
||||
}
|
||||
|
||||
slow_untar_workload() {
|
||||
local mountpt=$1
|
||||
|
||||
cp linux-5.4.tar.gz $mountpt
|
||||
# run workload that updates the data and metadata of multiple files on disk.
|
||||
# rate limit the workload such that the mirror snapshots can be taken as the
|
||||
# contents of the image are progressively changed by the workload.
|
||||
local ret=0
|
||||
timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
|
||||
| pv -L 256K | tar xf - -C $mountpt" || ret=$?
|
||||
if ((ret != 124)); then
|
||||
echo "Workload completed prematurely"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
start_mirrors ${CLUSTER1}
|
||||
start_mirrors ${CLUSTER2}
|
||||
|
||||
# initial setup
|
||||
create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \
|
||||
${RBD_MIRROR_MODE} 10G
|
||||
|
||||
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
|
||||
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
|
||||
-o try-netlink ${POOL}/${IMAGE})
|
||||
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
|
||||
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
|
||||
${POOL}/${IMAGE})
|
||||
else
|
||||
echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
|
||||
exit 1
|
||||
fi
|
||||
sudo mkfs.ext4 ${DEV}
|
||||
mkdir ${MOUNT}
|
||||
|
||||
wget https://download.ceph.com/qa/linux-5.4.tar.gz
|
||||
|
||||
for i in {1..25}; do
|
||||
# create mirror snapshots every few seconds under I/O
|
||||
sudo mount ${DEV} ${MOUNT}
|
||||
sudo chown $(whoami) ${MOUNT}
|
||||
rm -rf ${MOUNT}/*
|
||||
take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} &
|
||||
SNAP_PID=$!
|
||||
slow_untar_workload ${MOUNT}
|
||||
wait $SNAP_PID
|
||||
sudo umount ${MOUNT}
|
||||
|
||||
# calculate hash before demotion of primary image
|
||||
DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
|
||||
sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV}
|
||||
|
||||
demote_image ${CLUSTER1} ${POOL} ${IMAGE}
|
||||
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown'
|
||||
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown'
|
||||
promote_image ${CLUSTER2} ${POOL} ${IMAGE}
|
||||
|
||||
# calculate hash after promotion of secondary image
|
||||
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
|
||||
DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
|
||||
-o try-netlink ${POOL}/${IMAGE})
|
||||
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
|
||||
DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE})
|
||||
fi
|
||||
PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
|
||||
|
||||
if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then
|
||||
echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TEMP=${CLUSTER1}
|
||||
CLUSTER1=${CLUSTER2}
|
||||
CLUSTER2=${TEMP}
|
||||
done
|
||||
|
||||
echo OK
|
170
qa/workunits/rbd/compare_mirror_images.sh
Executable file
170
qa/workunits/rbd/compare_mirror_images.sh
Executable file
@ -0,0 +1,170 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
IMG_PREFIX=image-primary
|
||||
MIRROR_IMAGE_MODE=snapshot
|
||||
MIRROR_POOL_MODE=image
|
||||
MNTPT_PREFIX=test-primary
|
||||
RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
|
||||
RBD_MIRROR_INSTANCES=1
|
||||
RBD_MIRROR_MODE=snapshot
|
||||
RBD_MIRROR_USE_EXISTING_CLUSTER=1
|
||||
|
||||
. $(dirname $0)/rbd_mirror_helpers.sh
|
||||
|
||||
take_mirror_snapshots() {
|
||||
local cluster=$1
|
||||
local pool=$2
|
||||
local image=$3
|
||||
|
||||
for i in {1..30}; do
|
||||
mirror_image_snapshot $cluster $pool $image
|
||||
sleep 3
|
||||
done
|
||||
}
|
||||
|
||||
slow_untar_workload() {
|
||||
local mountpt=$1
|
||||
|
||||
cp linux-5.4.tar.gz $mountpt
|
||||
# run workload that updates the data and metadata of multiple files on disk.
|
||||
# rate limit the workload such that the mirror snapshots can be taken as the
|
||||
# contents of the image are progressively changed by the workload.
|
||||
local ret=0
|
||||
timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
|
||||
| pv -L 256K | tar xf - -C $mountpt" || ret=$?
|
||||
if ((ret != 124)); then
|
||||
echo "Workload completed prematurely"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
wait_for_image_removal() {
|
||||
local cluster=$1
|
||||
local pool=$2
|
||||
local image=$3
|
||||
|
||||
for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
|
||||
if ! rbd --cluster $cluster ls $pool | grep -wq $image; then
|
||||
return 0
|
||||
fi
|
||||
sleep $s
|
||||
done
|
||||
|
||||
echo "image ${pool}/${image} not removed from cluster ${cluster}"
|
||||
return 1
|
||||
}
|
||||
|
||||
compare_demoted_promoted_image() {
|
||||
local dev=${DEVS[$1-1]}
|
||||
local img=${IMG_PREFIX}$1
|
||||
local mntpt=${MNTPT_PREFIX}$1
|
||||
local demote_md5 promote_md5
|
||||
|
||||
sudo umount ${mntpt}
|
||||
|
||||
# calculate hash before demotion of primary image
|
||||
demote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
|
||||
sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \
|
||||
${POOL}/${img}
|
||||
|
||||
demote_image ${CLUSTER1} ${POOL} ${img}
|
||||
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown'
|
||||
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown'
|
||||
promote_image ${CLUSTER2} ${POOL} ${img}
|
||||
|
||||
# calculate hash after promotion of secondary image
|
||||
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
|
||||
dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
|
||||
-o try-netlink ${POOL}/${img})
|
||||
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
|
||||
dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img})
|
||||
fi
|
||||
promote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
|
||||
sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev}
|
||||
|
||||
if [[ "${demote_md5}" != "${promote_md5}" ]]; then
|
||||
echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
start_mirrors ${CLUSTER1}
|
||||
start_mirrors ${CLUSTER2}
|
||||
|
||||
wget https://download.ceph.com/qa/linux-5.4.tar.gz
|
||||
|
||||
for i in {1..10}; do
|
||||
DEVS=()
|
||||
SNAP_PIDS=()
|
||||
COMPARE_PIDS=()
|
||||
WORKLOAD_PIDS=()
|
||||
RET=0
|
||||
for j in {1..10}; do
|
||||
IMG=${IMG_PREFIX}${j}
|
||||
MNTPT=${MNTPT_PREFIX}${j}
|
||||
create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \
|
||||
${RBD_MIRROR_MODE} 10G
|
||||
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
|
||||
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
|
||||
-o try-netlink ${POOL}/${IMG})
|
||||
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
|
||||
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
|
||||
${POOL}/${IMG})
|
||||
else
|
||||
echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
|
||||
exit 1
|
||||
fi
|
||||
DEVS+=($DEV)
|
||||
sudo mkfs.ext4 ${DEV}
|
||||
mkdir ${MNTPT}
|
||||
sudo mount ${DEV} ${MNTPT}
|
||||
sudo chown $(whoami) ${MNTPT}
|
||||
# create mirror snapshots under I/O every few seconds
|
||||
take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} &
|
||||
SNAP_PIDS+=($!)
|
||||
slow_untar_workload ${MNTPT} &
|
||||
WORKLOAD_PIDS+=($!)
|
||||
done
|
||||
for pid in ${SNAP_PIDS[@]}; do
|
||||
wait $pid || RET=$?
|
||||
done
|
||||
if ((RET != 0)); then
|
||||
echo "take_mirror_snapshots failed"
|
||||
exit 1
|
||||
fi
|
||||
for pid in ${WORKLOAD_PIDS[@]}; do
|
||||
wait $pid || RET=$?
|
||||
done
|
||||
if ((RET != 0)); then
|
||||
echo "slow_untar_workload failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for j in {1..10}; do
|
||||
compare_demoted_promoted_image $j &
|
||||
COMPARE_PIDS+=($!)
|
||||
done
|
||||
for pid in ${COMPARE_PIDS[@]}; do
|
||||
wait $pid || RET=$?
|
||||
done
|
||||
if ((RET != 0)); then
|
||||
echo "compare_demoted_promoted_image failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for j in {1..10}; do
|
||||
IMG=${IMG_PREFIX}${j}
|
||||
# Allow for removal of non-primary image by checking that mirroring
|
||||
# image status is "up+replaying"
|
||||
wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG}
|
||||
remove_image ${CLUSTER2} ${POOL} ${IMG}
|
||||
wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG}
|
||||
rm -rf ${MNTPT_PREFIX}${j}
|
||||
done
|
||||
done
|
||||
|
||||
echo OK
|
Loading…
Reference in New Issue
Block a user