osd/PeeringState: restart peering on any previous down acting member coming back
One of our customers wants to verify the data safety of Ceph during scaling
the cluster up, and the test case looks like:
- keep checking the status of a speficied pg, who's up is [1, 2, 3]
- add more osds: up [1, 2, 3] -> up [1, 4, 5], acting = [1, 2, 3], backfill_targets = [4, 5],
pg is remapped
- stop osd.2: up [1, 4, 5], acting = [1, 3], backfill_targets = [4, 5], pg is undersized
- restart osd.2, acting will stay unchanged as 2 belongs to neither current up nor acting set,
hence leaving the corresponding pg pinning undersized for a long time until all backfill
targets completes
It does not pose any critical problem -- we'll end up getting that pg back into active + clean,
except that the long live DEGRADED warnings keep bothering our customer who cares about data
safety more than any thing else.
The right way to achieve the above goal is for:
boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
to check whether the newly booted node could be validly chosen for the acting set and
request a new temp mapping. The new temp mapping would then trigger a real interval change
that will get rid of the DEGRADED warning.
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
Signed-off-by: Yan Jun <yan.jun8@zte.com.cn>
2020-02-14 10:26:52 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
#
|
|
|
|
# Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn>
|
|
|
|
#
|
|
|
|
# Author: xie xingguo <xie.xingguo@zte.com.cn>
|
|
|
|
# Author: Yan Jun <yan.jun8@zte.com.cn>
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Library Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
# any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Library Public License for more details.
|
|
|
|
#
|
|
|
|
|
|
|
|
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
|
|
|
|
|
|
|
function run() {
|
|
|
|
local dir=$1
|
|
|
|
shift
|
|
|
|
|
|
|
|
export poolname=test
|
|
|
|
export testobjects=100
|
|
|
|
export loglen=12
|
|
|
|
export trim=$(expr $loglen / 2)
|
|
|
|
export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
|
|
|
|
export CEPH_ARGS
|
|
|
|
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
|
|
|
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
|
|
|
# so we will not force auth_log_shard to be acting_primary
|
|
|
|
CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
|
|
|
|
# use small pg_log settings, so we always do backfill instead of recovery
|
|
|
|
CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim "
|
2021-06-16 11:26:54 +00:00
|
|
|
CEPH_ARGS+="--osd_mclock_profile=high_recovery_ops "
|
osd/PeeringState: restart peering on any previous down acting member coming back
One of our customers wants to verify the data safety of Ceph during scaling
the cluster up, and the test case looks like:
- keep checking the status of a speficied pg, who's up is [1, 2, 3]
- add more osds: up [1, 2, 3] -> up [1, 4, 5], acting = [1, 2, 3], backfill_targets = [4, 5],
pg is remapped
- stop osd.2: up [1, 4, 5], acting = [1, 3], backfill_targets = [4, 5], pg is undersized
- restart osd.2, acting will stay unchanged as 2 belongs to neither current up nor acting set,
hence leaving the corresponding pg pinning undersized for a long time until all backfill
targets completes
It does not pose any critical problem -- we'll end up getting that pg back into active + clean,
except that the long live DEGRADED warnings keep bothering our customer who cares about data
safety more than any thing else.
The right way to achieve the above goal is for:
boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
to check whether the newly booted node could be validly chosen for the acting set and
request a new temp mapping. The new temp mapping would then trigger a real interval change
that will get rid of the DEGRADED warning.
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
Signed-off-by: Yan Jun <yan.jun8@zte.com.cn>
2020-02-14 10:26:52 +00:00
|
|
|
|
|
|
|
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
|
|
|
for func in $funcs ; do
|
|
|
|
setup $dir || return 1
|
|
|
|
$func $dir || return 1
|
|
|
|
teardown $dir || return 1
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function TEST_repeer_on_down_acting_member_coming_back() {
|
|
|
|
local dir=$1
|
|
|
|
local dummyfile='/etc/fstab'
|
|
|
|
|
|
|
|
local num_osds=6
|
|
|
|
local osds="$(seq 0 $(expr $num_osds - 1))"
|
|
|
|
run_mon $dir a || return 1
|
|
|
|
run_mgr $dir x || return 1
|
|
|
|
for i in $osds
|
|
|
|
do
|
|
|
|
run_osd $dir $i || return 1
|
|
|
|
done
|
|
|
|
|
|
|
|
create_pool $poolname 1 1
|
|
|
|
ceph osd pool set $poolname size 3
|
|
|
|
ceph osd pool set $poolname min_size 2
|
|
|
|
local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid')
|
|
|
|
local pgid=$poolid.0
|
|
|
|
|
|
|
|
# enable required feature-bits for upmap
|
|
|
|
ceph osd set-require-min-compat-client luminous
|
|
|
|
# reset up to [1,2,3]
|
|
|
|
ceph osd pg-upmap $pgid 1 2 3 || return 1
|
|
|
|
|
|
|
|
flush_pg_stats || return 1
|
|
|
|
wait_for_clean || return 1
|
|
|
|
|
|
|
|
echo "writing initial objects"
|
|
|
|
# write a bunch of objects
|
|
|
|
for i in $(seq 1 $testobjects)
|
|
|
|
do
|
|
|
|
rados -p $poolname put existing_$i $dummyfile
|
|
|
|
done
|
|
|
|
|
|
|
|
WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
|
|
|
|
|
|
|
|
# reset up to [1,4,5]
|
|
|
|
ceph osd pg-upmap $pgid 1 4 5 || return 1
|
|
|
|
|
|
|
|
# wait for peering to complete
|
|
|
|
sleep 2
|
|
|
|
|
|
|
|
# make sure osd.2 belongs to current acting set
|
|
|
|
ceph pg $pgid query | jq '.acting' | grep 2 || return 1
|
|
|
|
|
|
|
|
# kill osd.2
|
|
|
|
kill_daemons $dir KILL osd.2 || return 1
|
|
|
|
ceph osd down osd.2
|
|
|
|
|
|
|
|
# again, wait for peering to complete
|
|
|
|
sleep 2
|
|
|
|
|
|
|
|
# osd.2 should have been moved out from acting set
|
|
|
|
ceph pg $pgid query | jq '.acting' | grep 2 && return 1
|
|
|
|
|
|
|
|
# bring up osd.2
|
|
|
|
activate_osd $dir 2 || return 1
|
|
|
|
wait_for_osd up 2
|
|
|
|
|
|
|
|
# again, wait for peering to complete
|
|
|
|
sleep 2
|
|
|
|
|
|
|
|
# primary should be able to re-add osd.2 into acting
|
|
|
|
ceph pg $pgid query | jq '.acting' | grep 2 || return 1
|
|
|
|
|
|
|
|
WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean
|
|
|
|
|
|
|
|
if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log')
|
|
|
|
then
|
|
|
|
echo failure
|
|
|
|
return 1
|
|
|
|
fi
|
|
|
|
echo "success"
|
|
|
|
|
|
|
|
delete_pool $poolname
|
|
|
|
kill_daemons $dir || return 1
|
|
|
|
}
|
|
|
|
|
|
|
|
main repeer-on-acting-back "$@"
|
|
|
|
|
|
|
|
# Local Variables:
|
|
|
|
# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh"
|
|
|
|
# End:
|