diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml new file mode 100644 index 00000000000..771400d01ee --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_image_alternate_primary.sh + env: + RBD_DEVICE_TYPE: 'krbd' + timeout: 3h diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml new file mode 100644 index 00000000000..e87d0e8cecc --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml @@ -0,0 +1,15 @@ +overrides: + install: + ceph: + extra_packages: + - rbd-nbd + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_image_alternate_primary.sh + env: + RBD_DEVICE_TYPE: 'nbd' + timeout: 3h diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml new file mode 100644 index 00000000000..fc161987f7b --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_images.sh + env: + RBD_DEVICE_TYPE: 'krbd' + timeout: 3h diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml new file mode 100644 index 00000000000..ed02ed25702 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml @@ -0,0 +1,15 @@ +overrides: + install: + ceph: + extra_packages: + - rbd-nbd + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_images.sh + env: + RBD_DEVICE_TYPE: 'nbd' + timeout: 3h diff --git a/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh new file mode 100755 index 00000000000..338f43f1e53 --- /dev/null +++ b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +set -ex + +IMAGE=image-alternate-primary +MIRROR_IMAGE_MODE=snapshot +MIRROR_POOL_MODE=image +MOUNT=test-alternate-primary +RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff' +RBD_MIRROR_INSTANCES=1 +RBD_MIRROR_MODE=snapshot +RBD_MIRROR_USE_EXISTING_CLUSTER=1 + +. $(dirname $0)/rbd_mirror_helpers.sh + +take_mirror_snapshots() { + local cluster=$1 + local pool=$2 + local image=$3 + + for i in {1..30}; do + mirror_image_snapshot $cluster $pool $image + sleep 3 + done +} + +slow_untar_workload() { + local mountpt=$1 + + cp linux-5.4.tar.gz $mountpt + # run workload that updates the data and metadata of multiple files on disk. + # rate limit the workload such that the mirror snapshots can be taken as the + # contents of the image are progressively changed by the workload. + local ret=0 + timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \ + | pv -L 256K | tar xf - -C $mountpt" || ret=$? + if ((ret != 124)); then + echo "Workload completed prematurely" + return 1 + fi +} + +setup + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +# initial setup +create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \ + ${RBD_MIRROR_MODE} 10G + +if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \ + -o try-netlink ${POOL}/${IMAGE}) +elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \ + ${POOL}/${IMAGE}) +else + echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}" + exit 1 +fi +sudo mkfs.ext4 ${DEV} +mkdir ${MOUNT} + +wget https://download.ceph.com/qa/linux-5.4.tar.gz + +for i in {1..25}; do + # create mirror snapshots every few seconds under I/O + sudo mount ${DEV} ${MOUNT} + sudo chown $(whoami) ${MOUNT} + rm -rf ${MOUNT}/* + take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} & + SNAP_PID=$! + slow_untar_workload ${MOUNT} + wait $SNAP_PID + sudo umount ${MOUNT} + + # calculate hash before demotion of primary image + DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV} + + demote_image ${CLUSTER1} ${POOL} ${IMAGE} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${IMAGE} + + # calculate hash after promotion of secondary image + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \ + -o try-netlink ${POOL}/${IMAGE}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE}) + fi + PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}') + + if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then + echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}" + exit 1 + fi + + TEMP=${CLUSTER1} + CLUSTER1=${CLUSTER2} + CLUSTER2=${TEMP} +done + +echo OK diff --git a/qa/workunits/rbd/compare_mirror_images.sh b/qa/workunits/rbd/compare_mirror_images.sh new file mode 100755 index 00000000000..1b3cc300de9 --- /dev/null +++ b/qa/workunits/rbd/compare_mirror_images.sh @@ -0,0 +1,170 @@ +#!/bin/bash + +set -ex + +IMG_PREFIX=image-primary +MIRROR_IMAGE_MODE=snapshot +MIRROR_POOL_MODE=image +MNTPT_PREFIX=test-primary +RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff' +RBD_MIRROR_INSTANCES=1 +RBD_MIRROR_MODE=snapshot +RBD_MIRROR_USE_EXISTING_CLUSTER=1 + +. $(dirname $0)/rbd_mirror_helpers.sh + +take_mirror_snapshots() { + local cluster=$1 + local pool=$2 + local image=$3 + + for i in {1..30}; do + mirror_image_snapshot $cluster $pool $image + sleep 3 + done +} + +slow_untar_workload() { + local mountpt=$1 + + cp linux-5.4.tar.gz $mountpt + # run workload that updates the data and metadata of multiple files on disk. + # rate limit the workload such that the mirror snapshots can be taken as the + # contents of the image are progressively changed by the workload. + local ret=0 + timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \ + | pv -L 256K | tar xf - -C $mountpt" || ret=$? + if ((ret != 124)); then + echo "Workload completed prematurely" + return 1 + fi +} + +wait_for_image_removal() { + local cluster=$1 + local pool=$2 + local image=$3 + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + if ! rbd --cluster $cluster ls $pool | grep -wq $image; then + return 0 + fi + sleep $s + done + + echo "image ${pool}/${image} not removed from cluster ${cluster}" + return 1 +} + +compare_demoted_promoted_image() { + local dev=${DEVS[$1-1]} + local img=${IMG_PREFIX}$1 + local mntpt=${MNTPT_PREFIX}$1 + local demote_md5 promote_md5 + + sudo umount ${mntpt} + + # calculate hash before demotion of primary image + demote_md5=$(sudo md5sum ${dev} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \ + ${POOL}/${img} + + demote_image ${CLUSTER1} ${POOL} ${img} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${img} + + # calculate hash after promotion of secondary image + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \ + -o try-netlink ${POOL}/${img}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img}) + fi + promote_md5=$(sudo md5sum ${dev} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev} + + if [[ "${demote_md5}" != "${promote_md5}" ]]; then + echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}" + return 1 + fi +} + +setup + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +wget https://download.ceph.com/qa/linux-5.4.tar.gz + +for i in {1..10}; do + DEVS=() + SNAP_PIDS=() + COMPARE_PIDS=() + WORKLOAD_PIDS=() + RET=0 + for j in {1..10}; do + IMG=${IMG_PREFIX}${j} + MNTPT=${MNTPT_PREFIX}${j} + create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \ + ${RBD_MIRROR_MODE} 10G + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \ + -o try-netlink ${POOL}/${IMG}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \ + ${POOL}/${IMG}) + else + echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}" + exit 1 + fi + DEVS+=($DEV) + sudo mkfs.ext4 ${DEV} + mkdir ${MNTPT} + sudo mount ${DEV} ${MNTPT} + sudo chown $(whoami) ${MNTPT} + # create mirror snapshots under I/O every few seconds + take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} & + SNAP_PIDS+=($!) + slow_untar_workload ${MNTPT} & + WORKLOAD_PIDS+=($!) + done + for pid in ${SNAP_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "take_mirror_snapshots failed" + exit 1 + fi + for pid in ${WORKLOAD_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "slow_untar_workload failed" + exit 1 + fi + + for j in {1..10}; do + compare_demoted_promoted_image $j & + COMPARE_PIDS+=($!) + done + for pid in ${COMPARE_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "compare_demoted_promoted_image failed" + exit 1 + fi + + for j in {1..10}; do + IMG=${IMG_PREFIX}${j} + # Allow for removal of non-primary image by checking that mirroring + # image status is "up+replaying" + wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG} + remove_image ${CLUSTER2} ${POOL} ${IMG} + wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG} + rm -rf ${MNTPT_PREFIX}${j} + done +done + +echo OK