ceph/qa/standalone/osd/osd-recovery-stats.sh
xie xingguo 22786cffa8 osd/PG: force auth_log_shard to be primary when appropriate
So if there are a lot fo missing objects on primary, we can
make use of auth_log_shard to restore client I/O quickly.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
2018-08-31 16:29:25 +08:00

513 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Copyright (C) 2017 Red Hat <contact@redhat.com>
#
# Author: David Zafman <dzafman@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library Public License for more details.
#
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift
# Fix port????
export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
# so we will not force auth_log_shard to be acting_primary
CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
export margin=10
export objects=200
export poolname=test
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function below_margin() {
local -i check=$1
shift
local -i target=$1
return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 ))
}
function above_margin() {
local -i check=$1
shift
local -i target=$1
return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 ))
}
FIND_UPACT='grep "pg[[]${PG}.*recovering.*_update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"'
FIND_FIRST='grep "pg[[]${PG}.*recovering.*_update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"'
FIND_LAST='grep "pg[[]${PG}.*recovering.*_update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"'
function check() {
local dir=$1
local PG=$2
local primary=$3
local type=$4
local degraded_start=$5
local degraded_end=$6
local misplaced_start=$7
local misplaced_end=$8
local primary_start=${9:-}
local primary_end=${10:-}
local log=$dir/osd.${primary}.log
local addp=" "
if [ "$type" = "erasure" ];
then
addp="p"
fi
UPACT=$(eval $FIND_UPACT)
# Check 3rd line at start because of false recovery starts
local which="degraded"
FIRST=$(eval $FIND_FIRST)
below_margin $FIRST $degraded_start || return 1
LAST=$(eval $FIND_LAST)
above_margin $LAST $degraded_end || return 1
# Check 3rd line at start because of false recovery starts
which="misplaced"
FIRST=$(eval $FIND_FIRST)
below_margin $FIRST $misplaced_start || return 1
LAST=$(eval $FIND_LAST)
above_margin $LAST $misplaced_end || return 1
# This is the value of set into MISSING_ON_PRIMARY
if [ -n "$primary_start" ];
then
which="shard $primary"
FIRST=$(eval $FIND_FIRST)
below_margin $FIRST $primary_start || return 1
LAST=$(eval $FIND_LAST)
above_margin $LAST $primary_end || return 1
fi
}
# [1,0,?] -> [1,2,4]
# degraded 500 -> 0
# active+recovering+degraded
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467
function do_recovery_out1() {
local dir=$1
shift
local type=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
run_osd $dir 4 || return 1
run_osd $dir 5 || return 1
if [ $type = "erasure" ];
then
ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
create_pool $poolname 1 1 $type myprofile
else
create_pool $poolname 1 1 $type
fi
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
# Only 2 OSDs so only 1 not primary
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set norecover
kill $(cat $dir/osd.${otherosd}.pid)
ceph osd down osd.${otherosd}
ceph osd out osd.${otherosd}
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
check $dir $PG $primary $type $objects 0 0 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
function TEST_recovery_replicated_out1() {
local dir=$1
do_recovery_out1 $dir replicated || return 1
}
function TEST_recovery_erasure_out1() {
local dir=$1
do_recovery_out1 $dir erasure || return 1
}
# [0, 1] -> [2,3,4,5]
# degraded 1000 -> 0
# misplaced 1000 -> 0
# missing on primary 500 -> 0
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748
function TEST_recovery_sizeup() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
run_osd $dir 4 || return 1
run_osd $dir 5 || return 1
create_pool $poolname 1 1
ceph osd pool set $poolname size 2
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
# Only 2 OSDs so only 1 not primary
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set norecover
ceph osd out osd.$primary osd.$otherosd
ceph osd pool set test size 4
ceph osd unset norecover
# Get new primary
primary=$(get_primary $poolname obj1)
ceph tell osd.${primary} debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
local degraded=$(expr $objects \* 2)
local misplaced=$(expr $objects \* 2)
local log=$dir/osd.${primary}.log
check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
# [0, 1, 2, 4] -> [3, 5]
# misplaced 1000 -> 0
# missing on primary 500 -> 0
# active+recovering+degraded
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248
function TEST_recovery_sizedown() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
run_osd $dir 4 || return 1
run_osd $dir 5 || return 1
create_pool $poolname 1 1
ceph osd pool set $poolname size 4
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
# Only 2 OSDs so only 1 not primary
local allosds=$(get_osds $poolname obj1)
ceph osd set norecover
for osd in $allosds
do
ceph osd out osd.$osd
done
ceph osd pool set test size 2
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
# Get new primary
primary=$(get_primary $poolname obj1)
local misplaced=$(expr $objects \* 2)
local log=$dir/osd.${primary}.log
check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1
UPACT=$(grep "pg[[]${PG}.*recovering.*_update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/")
# This is the value of set into MISSING_ON_PRIMARY
FIRST=$(grep "pg[[]${PG}.*recovering.*_update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/")
below_margin $FIRST $objects || return 1
LAST=$(grep "pg[[]${PG}.*recovering.*_update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/")
above_margin $LAST 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
# [1] -> [1,2]
# degraded 300 -> 200
# active+recovering+undersized+degraded
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563
function TEST_recovery_undersized() {
local dir=$1
local osds=3
run_mon $dir a || return 1
run_mgr $dir x || return 1
for i in $(seq 0 $(expr $osds - 1))
do
run_osd $dir $i || return 1
done
create_pool $poolname 1 1
ceph osd pool set $poolname size 1
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
ceph osd set norecover
# Mark any osd not the primary (only 1 replica so also has no replica)
for i in $(seq 0 $(expr $osds - 1))
do
if [ $i = $primary ];
then
continue
fi
ceph osd out osd.$i
break
done
ceph osd pool set test size 4
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
# Give extra sleep time because code below doesn't have the sophistication of wait_for_clean()
sleep 10
flush_pg_stats || return 1
# Wait for recovery to finish
# Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded
# to active+undersized+degraded
for i in $(seq 1 60)
do
if ceph pg dump pgs | grep ^$PG | grep -qv recovering
then
break
fi
if [ $i = "60" ];
then
echo "Timeout waiting for recovery to finish"
return 1
fi
sleep 1
done
# Get new primary
primary=$(get_primary $poolname obj1)
local log=$dir/osd.${primary}.log
local first_degraded=$(expr $objects \* 3)
local last_degraded=$(expr $objects \* 2)
check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
# [1,0,2] -> [1,3,NONE]/[1,3,2]
# degraded 100 -> 0
# misplaced 100 -> 100
# active+recovering+degraded+remapped
# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP
# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242
function TEST_recovery_erasure_remapped() {
local dir=$1
run_mon $dir a || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
run_osd $dir 3 || return 1
ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd
create_pool $poolname 1 1 erasure myprofile
ceph osd pool set $poolname min_size 2
wait_for_clean || return 1
for i in $(seq 1 $objects)
do
rados -p $poolname put obj$i /dev/null
done
local primary=$(get_primary $poolname obj1)
local PG=$(get_pg $poolname obj1)
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set norecover
kill $(cat $dir/osd.${otherosd}.pid)
ceph osd down osd.${otherosd}
ceph osd out osd.${otherosd}
# Mark osd not the primary and not down/out osd as just out
for i in 0 1 2 3
do
if [ $i = $primary ];
then
continue
fi
if [ $i = $otherosd ];
then
continue
fi
ceph osd out osd.$i
break
done
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
local log=$dir/osd.${primary}.log
check $dir $PG $primary erasure $objects 0 $objects $objects || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
function TEST_recovery_multi() {
local dir=$1
local osds=6
run_mon $dir a || return 1
run_mgr $dir x || return 1
for i in $(seq 0 $(expr $osds - 1))
do
run_osd $dir $i || return 1
done
create_pool $poolname 1 1
ceph osd pool set $poolname size 3
ceph osd pool set $poolname min_size 1
wait_for_clean || return 1
rados -p $poolname put obj1 /dev/null
local primary=$(get_primary $poolname obj1)
local otherosd=$(get_not_primary $poolname obj1)
ceph osd set noout
ceph osd set norecover
kill $(cat $dir/osd.${otherosd}.pid)
ceph osd down osd.${otherosd}
local half=$(expr $objects / 2)
for i in $(seq 2 $half)
do
rados -p $poolname put obj$i /dev/null
done
kill $(cat $dir/osd.${primary}.pid)
ceph osd down osd.${primary}
run_osd $dir ${otherosd}
sleep 3
for i in $(seq $(expr $half + 1) $objects)
do
rados -p $poolname put obj$i /dev/null
done
local PG=$(get_pg $poolname obj1)
local otherosd=$(get_not_primary $poolname obj$objects)
ceph osd unset noout
ceph osd out osd.$primary osd.$otherosd
run_osd $dir ${primary}
sleep 3
ceph osd pool set test size 4
ceph osd unset norecover
ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0
sleep 2
wait_for_clean || return 1
# Get new primary
primary=$(get_primary $poolname obj1)
local log=$dir/osd.${primary}.log
check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1
delete_pool $poolname
kill_daemons $dir || return 1
}
main osd-recovery-stats "$@"
# Local Variables:
# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"
# End: