mirror of
https://github.com/ceph/ceph
synced 2024-12-21 19:02:10 +00:00
21790484dc
Initially the asok commands were registered only after the image replayer start (and unregistered on stop) because their names were built using remote pool and image names, which became known only after start. Now, the asok commands are registered on the image replayer construction using the temporary name "remote_pool_name/global_image_id". They are re-registered using "remote_pool_name/remote_image_name" when the image replayer is started. Also the commands are not unregistered on the image replayer stop. Signed-off-by: Mykola Golub <mgolub@mirantis.com>
536 lines
14 KiB
Bash
Executable File
536 lines
14 KiB
Bash
Executable File
#!/bin/sh
|
|
#
|
|
# rbd_mirror.sh - test rbd-mirror daemon
|
|
#
|
|
# The scripts starts two ("local" and "remote") clusters using mstart.sh script,
|
|
# creates a temporary directory, used for cluster configs, daemon logs, admin
|
|
# socket, temporary files, and launches rbd-mirror daemon.
|
|
#
|
|
# There are several env variables useful when troubleshooting a test failure:
|
|
#
|
|
# RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes,
|
|
# destroy the clusters and remove the temp directory)
|
|
# on exit, so it is possible to check the test state
|
|
# after failure.
|
|
# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory
|
|
# (should not exist) instead of running mktemp(1).
|
|
#
|
|
# The cleanup can be done as a separate step, running the script with
|
|
# `cleanup ${RBD_MIRROR_TEMDIR}' arguments.
|
|
#
|
|
# Note, as other workunits tests, rbd_mirror.sh expects to find ceph binaries
|
|
# in PATH.
|
|
#
|
|
# Thus a typical troubleshooting session:
|
|
#
|
|
# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with
|
|
# TEMPDIR pointing to a known location:
|
|
#
|
|
# cd $CEPH_SRC_PATH
|
|
# PATH=$CEPH_SRC_PATH:$PATH
|
|
# RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
|
|
# ../qa/workunits/rbd/rbd_mirror.sh
|
|
#
|
|
# After the test failure cd to TEMPDIR and check the current state:
|
|
#
|
|
# cd /tmp/tmp.rbd_mirror
|
|
# ls
|
|
# less rbd-mirror.cluster1_daemon.$pid.log
|
|
# ceph --cluster cluster1 -s
|
|
# ceph --cluster cluster1 -s
|
|
# rbd --cluster cluster2 -p mirror ls
|
|
# rbd --cluster cluster2 -p mirror journal status --image test
|
|
# ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help
|
|
# ...
|
|
#
|
|
# Also you can execute commands (functions) from the script:
|
|
#
|
|
# cd $CEPH_SRC_PATH
|
|
# export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror
|
|
# ../qa/workunits/rbd/rbd_mirror.sh status
|
|
# ../qa/workunits/rbd/rbd_mirror.sh stop_mirror cluster1
|
|
# ../qa/workunits/rbd/rbd_mirror.sh start_mirror cluster2
|
|
# ../qa/workunits/rbd/rbd_mirror.sh flush cluster2
|
|
# ...
|
|
#
|
|
# Eventually, run the cleanup:
|
|
#
|
|
# cd $CEPH_SRC_PATH
|
|
# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \
|
|
# ../qa/workunits/rbd/rbd_mirror.sh cleanup
|
|
#
|
|
|
|
CLUSTER1=cluster1
|
|
CLUSTER2=cluster2
|
|
POOL=mirror
|
|
SRC_DIR=$(readlink -f $(dirname $0)/../../../src)
|
|
TEMPDIR=
|
|
|
|
# These vars facilitate running this script in an environment with
|
|
# ceph installed from packages, like teuthology. These are not defined
|
|
# by default.
|
|
#
|
|
# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters
|
|
# RBD_MIRROR_USE_EXISTING_DAEMON - if set, use an existing instance of rbd-mirror
|
|
# running as ceph client $CEPH_ID. If empty,
|
|
# this script will start and stop rbd-mirror
|
|
|
|
#
|
|
# Functions
|
|
#
|
|
|
|
daemon_asok_file()
|
|
{
|
|
local local_cluster=$1
|
|
local cluster=$2
|
|
|
|
if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
|
|
echo $(ceph-conf --cluster $local_cluster --name "client.${CEPH_ID}" 'admin socket')
|
|
else
|
|
echo "${TEMPDIR}/rbd-mirror.${local_cluster}_daemon.${cluster}.asok"
|
|
fi
|
|
}
|
|
|
|
daemon_pid_file()
|
|
{
|
|
local cluster=$1
|
|
|
|
if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
|
|
echo $(ceph-conf --cluster $cluster --name "client.${CEPH_ID}" 'pid file')
|
|
else
|
|
echo "${TEMPDIR}/rbd-mirror.${cluster}_daemon.pid"
|
|
fi
|
|
}
|
|
|
|
setup()
|
|
{
|
|
local c
|
|
trap cleanup INT TERM EXIT
|
|
|
|
if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
|
|
mkdir "${RBD_MIRROR_TEMDIR}"
|
|
TEMPDIR="${RBD_MIRROR_TEMDIR}"
|
|
else
|
|
TEMPDIR=`mktemp -d`
|
|
fi
|
|
|
|
if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
|
|
cd ${SRC_DIR}
|
|
./mstart.sh ${CLUSTER1} -n
|
|
./mstart.sh ${CLUSTER2} -n
|
|
|
|
ln -s $(readlink -f run/${CLUSTER1}/ceph.conf) \
|
|
${TEMPDIR}/${CLUSTER1}.conf
|
|
ln -s $(readlink -f run/${CLUSTER2}/ceph.conf) \
|
|
${TEMPDIR}/${CLUSTER2}.conf
|
|
|
|
cd ${TEMPDIR}
|
|
fi
|
|
|
|
ceph --cluster ${CLUSTER1} osd pool create ${POOL} 64 64
|
|
ceph --cluster ${CLUSTER2} osd pool create ${POOL} 64 64
|
|
|
|
rbd --cluster ${CLUSTER1} mirror pool enable ${POOL} pool
|
|
rbd --cluster ${CLUSTER2} mirror pool enable ${POOL} pool
|
|
|
|
rbd --cluster ${CLUSTER1} mirror pool peer add ${POOL} ${CLUSTER2}
|
|
rbd --cluster ${CLUSTER2} mirror pool peer add ${POOL} ${CLUSTER1}
|
|
}
|
|
|
|
cleanup()
|
|
{
|
|
test -n "${RBD_MIRROR_NOCLEANUP}" && return
|
|
|
|
set +e
|
|
|
|
stop_mirror "${CLUSTER1}"
|
|
stop_mirror "${CLUSTER2}"
|
|
|
|
if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
|
|
cd ${SRC_DIR}
|
|
./mstop.sh ${CLUSTER1}
|
|
./mstop.sh ${CLUSTER2}
|
|
else
|
|
ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
|
|
ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
|
|
fi
|
|
rm -Rf ${TEMPDIR}
|
|
}
|
|
|
|
start_mirror()
|
|
{
|
|
local cluster=$1
|
|
|
|
test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
|
|
|
|
rbd-mirror \
|
|
--cluster ${cluster} \
|
|
--pid-file=$(daemon_pid_file "${cluster}") \
|
|
--log-file=${TEMPDIR}/rbd-mirror.\$cluster_daemon.\$pid.log \
|
|
--admin-socket=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.asok \
|
|
--debug-rbd=30 --debug-journaler=30 \
|
|
--debug-rbd_mirror=30 \
|
|
--daemonize=true
|
|
}
|
|
|
|
stop_mirror()
|
|
{
|
|
local cluster=$1
|
|
|
|
test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return
|
|
|
|
local pid
|
|
pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || :
|
|
if [ -n "${pid}" ]
|
|
then
|
|
kill ${pid}
|
|
for s in 1 2 4 8 16 32; do
|
|
sleep $s
|
|
ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break
|
|
done
|
|
ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}'
|
|
fi
|
|
rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}")
|
|
rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}")
|
|
rm -f $(daemon_pid_file "${cluster}")
|
|
}
|
|
|
|
status()
|
|
{
|
|
local cluster daemon image
|
|
|
|
for cluster in ${CLUSTER1} ${CLUSTER2}
|
|
do
|
|
echo "${cluster} status"
|
|
ceph --cluster ${cluster} -s
|
|
echo
|
|
|
|
echo "${cluster} ${POOL} images"
|
|
rbd --cluster ${cluster} -p ${POOL} ls
|
|
echo
|
|
|
|
echo "${cluster} ${POOL} mirror pool status"
|
|
rbd --cluster ${cluster} -p ${POOL} mirror pool status --verbose
|
|
echo
|
|
|
|
for image in `rbd --cluster ${cluster} -p ${POOL} ls 2>/dev/null`
|
|
do
|
|
echo "image ${image} info"
|
|
rbd --cluster ${cluster} -p ${POOL} info ${image}
|
|
echo
|
|
echo "image ${image} journal status"
|
|
rbd --cluster ${cluster} -p ${POOL} journal status --image ${image}
|
|
echo
|
|
done
|
|
done
|
|
|
|
local ret
|
|
|
|
for cluster in "${CLUSTER1}" "${CLUSTER2}"
|
|
do
|
|
local pid_file=$(daemon_pid_file ${cluster} )
|
|
if [ ! -e ${pid_file} ]
|
|
then
|
|
echo "${cluster} rbd-mirror not running or unknown" \
|
|
"(${pid_file} not exist)"
|
|
continue
|
|
fi
|
|
|
|
local pid
|
|
pid=$(cat ${pid_file} 2>/dev/null) || :
|
|
if [ -z "${pid}" ]
|
|
then
|
|
echo "${cluster} rbd-mirror not running or unknown" \
|
|
"(can't find pid using ${pid_file})"
|
|
ret=1
|
|
continue
|
|
fi
|
|
|
|
echo "${daemon} rbd-mirror process in ps output:"
|
|
if ps auxww |
|
|
awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}'
|
|
then
|
|
echo
|
|
echo "${cluster} rbd-mirror not running" \
|
|
"(can't find pid $pid in ps output)"
|
|
ret=1
|
|
continue
|
|
fi
|
|
echo
|
|
|
|
local asok_file=$(daemon_asok_file ${cluster} ${cluster})
|
|
if [ ! -S "${asok_file}" ]
|
|
then
|
|
echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)"
|
|
ret=1
|
|
continue
|
|
fi
|
|
|
|
echo "${cluster} rbd-mirror status"
|
|
ceph --admin-daemon ${asok_file} rbd mirror status
|
|
echo
|
|
done
|
|
|
|
return ${ret}
|
|
}
|
|
|
|
flush()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
local cmd="rbd mirror flush"
|
|
|
|
if [ -n "${image}" ]
|
|
then
|
|
cmd="${cmd} ${POOL}/${image}"
|
|
fi
|
|
|
|
local asok_file=$(daemon_asok_file "${cluster}" "${cluster}")
|
|
test -S "${asok_file}"
|
|
|
|
ceph --admin-daemon ${asok_file} ${cmd}
|
|
}
|
|
|
|
test_image_replay_state()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
local test_state=$3
|
|
local current_state=stopped
|
|
|
|
local asok_file=$(daemon_asok_file "${cluster}" "${cluster}")
|
|
test -S "${asok_file}"
|
|
|
|
ceph --admin-daemon ${asok_file} help |
|
|
fgrep "\"rbd mirror status ${POOL}/${image}\"" &&
|
|
ceph --admin-daemon ${asok_file} rbd mirror status ${POOL}/${image} |
|
|
grep -i 'state.*Replaying' &&
|
|
current_state=started
|
|
|
|
test "${test_state}" = "${current_state}"
|
|
}
|
|
|
|
wait_for_image_replay_state()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
local state=$3
|
|
local s
|
|
|
|
# TODO: add a way to force rbd-mirror to update replayers
|
|
for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
|
|
sleep ${s}
|
|
test_image_replay_state "${cluster}" "${image}" "${state}" && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
wait_for_image_replay_started()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
wait_for_image_replay_state "${cluster}" "${image}" started
|
|
}
|
|
|
|
wait_for_image_replay_stopped()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
wait_for_image_replay_state "${cluster}" "${image}" stopped
|
|
}
|
|
|
|
get_position()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
local id_regexp=$3
|
|
|
|
# Parse line like below, looking for the first position
|
|
# [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]]
|
|
|
|
local status_log=${TEMPDIR}/${CLUSTER2}-${POOL}-${image}.status
|
|
rbd --cluster ${cluster} -p ${POOL} journal status --image ${image} |
|
|
tee ${status_log} >&2
|
|
sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*$/\1/p' \
|
|
${status_log}
|
|
}
|
|
|
|
get_master_position()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
get_position "${cluster}" "${image}" ''
|
|
}
|
|
|
|
get_mirror_position()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
get_position "${cluster}" "${image}" '..*'
|
|
}
|
|
|
|
wait_for_replay_complete()
|
|
{
|
|
local local_cluster=$1
|
|
local cluster=$2
|
|
local image=$3
|
|
local s master_pos mirror_pos
|
|
|
|
for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16; do
|
|
sleep ${s}
|
|
flush "${local_cluster}" "${image}"
|
|
master_pos=$(get_master_position "${cluster}" "${image}")
|
|
mirror_pos=$(get_mirror_position "${cluster}" "${image}")
|
|
test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
test_status_in_pool_dir()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
local state_pattern=$3
|
|
local description_pattern=$4
|
|
|
|
local status_log=${TEMPDIR}/${cluster}-${image}.mirror_status
|
|
rbd --cluster ${cluster} -p ${POOL} mirror image status ${image} |
|
|
tee ${status_log}
|
|
grep "state: .*${state_pattern}" ${status_log}
|
|
grep "description: .*${description_pattern}" ${status_log}
|
|
}
|
|
|
|
create_image()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
rbd --cluster ${cluster} -p ${POOL} create --size 128 \
|
|
--image-feature exclusive-lock --image-feature journaling ${image}
|
|
}
|
|
|
|
write_image()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
local count=$3
|
|
|
|
rbd --cluster ${cluster} -p ${POOL} bench-write ${image} \
|
|
--io-size 4096 --io-threads 1 --io-total $((4096 * count)) \
|
|
--io-pattern rand
|
|
}
|
|
|
|
compare_images()
|
|
{
|
|
local image=$1
|
|
|
|
local rmt_export=${TEMPDIR}/${CLUSTER2}-${POOL}-${image}.export
|
|
local loc_export=${TEMPDIR}/${CLUSTER1}-${POOL}-${image}.export
|
|
|
|
rm -f ${rmt_export} ${loc_export}
|
|
rbd --cluster ${CLUSTER2} -p ${POOL} export ${image} ${rmt_export}
|
|
rbd --cluster ${CLUSTER1} -p ${POOL} export ${image} ${loc_export}
|
|
cmp ${rmt_export} ${loc_export}
|
|
}
|
|
|
|
demote_image()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
rbd --cluster=${cluster} mirror image demote ${POOL}/${image}
|
|
}
|
|
|
|
promote_image()
|
|
{
|
|
local cluster=$1
|
|
local image=$2
|
|
|
|
rbd --cluster=${cluster} mirror image promote ${POOL}/${image}
|
|
}
|
|
|
|
#
|
|
# Main
|
|
#
|
|
|
|
if [ "$#" -gt 0 ]
|
|
then
|
|
if [ -z "${RBD_MIRROR_TEMDIR}" ]
|
|
then
|
|
echo "RBD_MIRROR_TEMDIR is not set" >&2
|
|
exit 1
|
|
fi
|
|
|
|
TEMPDIR="${RBD_MIRROR_TEMDIR}"
|
|
cd ${TEMPDIR}
|
|
$@
|
|
exit $?
|
|
fi
|
|
|
|
set -xe
|
|
|
|
setup
|
|
|
|
echo "TEST: add image and test replay"
|
|
start_mirror ${CLUSTER1}
|
|
image=test
|
|
create_image ${CLUSTER2} ${image}
|
|
wait_for_image_replay_started ${CLUSTER1} ${image}
|
|
write_image ${CLUSTER2} ${image} 100
|
|
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${image}
|
|
test_status_in_pool_dir ${CLUSTER1} ${image} 'up+replaying' 'master_position'
|
|
test_status_in_pool_dir ${CLUSTER2} ${image} 'down+unknown'
|
|
compare_images ${image}
|
|
|
|
echo "TEST: stop mirror, add image, start mirror and test replay"
|
|
stop_mirror ${CLUSTER1}
|
|
image1=test1
|
|
create_image ${CLUSTER2} ${image1}
|
|
write_image ${CLUSTER2} ${image1} 100
|
|
start_mirror ${CLUSTER1}
|
|
wait_for_image_replay_started ${CLUSTER1} ${image1}
|
|
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${image1}
|
|
test_status_in_pool_dir ${CLUSTER1} ${image1} 'up+replaying' 'master_position'
|
|
test_status_in_pool_dir ${CLUSTER2} ${image1} 'down+unknown'
|
|
compare_images ${image1}
|
|
|
|
echo "TEST: test the first image is replaying after restart"
|
|
write_image ${CLUSTER2} ${image} 100
|
|
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${image}
|
|
test_status_in_pool_dir ${CLUSTER1} ${image} 'up+replaying' 'master_position'
|
|
compare_images ${image}
|
|
|
|
echo "TEST: failover and failback"
|
|
start_mirror ${CLUSTER2}
|
|
|
|
# failover
|
|
demote_image ${CLUSTER2} ${image}
|
|
wait_for_image_replay_stopped ${CLUSTER1} ${image}
|
|
test_status_in_pool_dir ${CLUSTER1} ${image} 'up+stopped'
|
|
test_status_in_pool_dir ${CLUSTER2} ${image} 'up+stopped'
|
|
promote_image ${CLUSTER1} ${image}
|
|
wait_for_image_replay_started ${CLUSTER2} ${image}
|
|
write_image ${CLUSTER1} ${image} 100
|
|
wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${image}
|
|
test_status_in_pool_dir ${CLUSTER1} ${image} 'up+stopped'
|
|
test_status_in_pool_dir ${CLUSTER2} ${image} 'up+replaying' 'master_position'
|
|
compare_images ${image}
|
|
|
|
# failback
|
|
demote_image ${CLUSTER1} ${image}
|
|
wait_for_image_replay_stopped ${CLUSTER2} ${image}
|
|
test_status_in_pool_dir ${CLUSTER2} ${image} 'up+stopped'
|
|
promote_image ${CLUSTER2} ${image}
|
|
wait_for_image_replay_started ${CLUSTER1} ${image}
|
|
write_image ${CLUSTER2} ${image} 100
|
|
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${image}
|
|
test_status_in_pool_dir ${CLUSTER1} ${image} 'up+replaying' 'master_position'
|
|
test_status_in_pool_dir ${CLUSTER2} ${image} 'up+stopped'
|
|
compare_images ${image}
|
|
|
|
echo OK
|