mirror of
https://github.com/ceph/ceph
synced 2025-01-31 07:22:56 +00:00
Merge pull request #18145 from dzafman/wip-18162
osd: object added to missing set for backfill, but is not in recovering, error! Reviewed-by: Josh Durgin <jdurgin@redhat.com>
This commit is contained in:
commit
a5e917fb9b
@ -56,6 +56,13 @@ function setup_osds() {
|
||||
grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1
|
||||
}
|
||||
|
||||
function get_state() {
|
||||
local pgid=$1
|
||||
local sname=state
|
||||
ceph --format json pg dump pgs 2>/dev/null | \
|
||||
jq -r ".[] | select(.pgid==\"$pgid\") | .$sname"
|
||||
}
|
||||
|
||||
function create_erasure_coded_pool() {
|
||||
local poolname=$1
|
||||
shift
|
||||
@ -116,39 +123,6 @@ function rados_get() {
|
||||
rm $dir/COPY
|
||||
}
|
||||
|
||||
function rados_put_get() {
|
||||
local dir=$1
|
||||
local poolname=$2
|
||||
local objname=${3:-SOMETHING}
|
||||
local recovery=$4
|
||||
|
||||
#
|
||||
# get and put an object, compare they are equal
|
||||
#
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
# We can read even though caller injected read error on one of the shards
|
||||
rados_get $dir $poolname $objname || return 1
|
||||
|
||||
if [ -n "$recovery" ];
|
||||
then
|
||||
#
|
||||
# take out the last OSD used to store the object,
|
||||
# bring it back, and check for clean PGs which means
|
||||
# recovery didn't crash the primary.
|
||||
#
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
# Kill OSD
|
||||
kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
wait_for_clean || return 1
|
||||
fi
|
||||
|
||||
rm $dir/ORIGINAL
|
||||
}
|
||||
|
||||
function inject_remove() {
|
||||
local pooltype=$1
|
||||
@ -169,33 +143,15 @@ function inject_remove() {
|
||||
objectstore_tool $dir $osd_id $objname remove || return 1
|
||||
}
|
||||
|
||||
function rados_get_data_recovery() {
|
||||
local inject=$1
|
||||
shift
|
||||
local dir=$1
|
||||
shift
|
||||
local shard_id=$1
|
||||
|
||||
# inject eio to speificied shard
|
||||
#
|
||||
local poolname=pool-jerasure
|
||||
local objname=obj-$inject-$$-$shard_id
|
||||
inject_$inject ec data $poolname $objname $dir $shard_id || return 1
|
||||
rados_put_get $dir $poolname $objname recovery || return 1
|
||||
|
||||
shard_id=$(expr $shard_id + 1)
|
||||
inject_$inject ec data $poolname $objname $dir $shard_id || return 1
|
||||
# Now 2 out of 3 shards get EIO, so should fail
|
||||
rados_get $dir $poolname $objname fail || return 1
|
||||
}
|
||||
|
||||
# Test with an inject error
|
||||
function rados_get_data() {
|
||||
function rados_put_get_data() {
|
||||
local inject=$1
|
||||
shift
|
||||
local dir=$1
|
||||
shift
|
||||
local shard_id=$1
|
||||
shift
|
||||
local arg=$1
|
||||
|
||||
# inject eio to speificied shard
|
||||
#
|
||||
@ -205,10 +161,29 @@ function rados_get_data() {
|
||||
inject_$inject ec data $poolname $objname $dir $shard_id || return 1
|
||||
rados_get $dir $poolname $objname || return 1
|
||||
|
||||
if [ "$arg" = "recovery" ];
|
||||
then
|
||||
#
|
||||
# take out the last OSD used to store the object,
|
||||
# bring it back, and check for clean PGs which means
|
||||
# recovery didn't crash the primary.
|
||||
#
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
# Kill OSD
|
||||
kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
wait_for_clean || return 1
|
||||
fi
|
||||
|
||||
shard_id=$(expr $shard_id + 1)
|
||||
inject_$inject ec data $poolname $objname $dir $shard_id || return 1
|
||||
# Now 2 out of 3 shards get an error, so should fail
|
||||
rados_get $dir $poolname $objname fail || return 1
|
||||
rm $dir/ORIGINAL
|
||||
}
|
||||
|
||||
# Change the size of speificied shard
|
||||
@ -266,6 +241,7 @@ function rados_get_data_bad_size() {
|
||||
shard_id=$(expr $shard_id + 1)
|
||||
set_size $objname $dir $shard_id $bytes $mode || return 1
|
||||
rados_get $dir $poolname $objname fail || return 1
|
||||
rm $dir/ORIGINAL
|
||||
}
|
||||
|
||||
#
|
||||
@ -283,7 +259,7 @@ function TEST_rados_get_subread_eio_shard_0() {
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio on primary OSD (0) and replica OSD (1)
|
||||
local shard_id=0
|
||||
rados_get_data eio $dir $shard_id || return 1
|
||||
rados_put_get_data eio $dir $shard_id || return 1
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
@ -295,7 +271,7 @@ function TEST_rados_get_subread_eio_shard_1() {
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio into replicas OSD (1) and OSD (2)
|
||||
local shard_id=1
|
||||
rados_get_data eio $dir $shard_id || return 1
|
||||
rados_put_get_data eio $dir $shard_id || return 1
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
@ -310,7 +286,7 @@ function TEST_rados_get_subread_missing() {
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject remove into replicas OSD (1) and OSD (2)
|
||||
local shard_id=1
|
||||
rados_get_data remove $dir $shard_id || return 1
|
||||
rados_put_get_data remove $dir $shard_id || return 1
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
@ -359,23 +335,21 @@ function TEST_rados_get_with_subreadall_eio_shard_0() {
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio on primary OSD (0)
|
||||
local shard_id=0
|
||||
rados_get_data_recovery eio $dir $shard_id || return 1
|
||||
rados_put_get_data eio $dir $shard_id recovery || return 1
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
function TEST_rados_get_with_subreadall_eio_shard_1() {
|
||||
local dir=$1
|
||||
local shard_id=0
|
||||
local shard_id=1
|
||||
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname 2 1 || return 1
|
||||
# inject eio on replica OSD (1)
|
||||
local shard_id=1
|
||||
rados_get_data_recovery eio $dir $shard_id || return 1
|
||||
rados_put_get_data eio $dir $shard_id recovery || return 1
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
@ -403,7 +377,159 @@ function TEST_ec_recovery_errors() {
|
||||
# Cluster should recover this object
|
||||
wait_for_clean || return 1
|
||||
|
||||
#rados_get_data_recovery eio $dir $shard_id || return 1
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
# Test backfill with unfound object
|
||||
function TEST_ec_backfill_unfound() {
|
||||
local dir=$1
|
||||
local objname=myobject
|
||||
local lastobj=300
|
||||
# Must be between 1 and $lastobj
|
||||
local testobj=obj250
|
||||
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
|
||||
setup_osds 5 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname 3 2 || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
|
||||
ceph osd down ${last_osd} || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
|
||||
done
|
||||
|
||||
inject_eio ec data $poolname $testobj $dir 0 || return 1
|
||||
inject_eio ec data $poolname $testobj $dir 1 || return 1
|
||||
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
|
||||
sleep 15
|
||||
|
||||
for tmp in $(seq 1 100); do
|
||||
state=$(get_state 2.0)
|
||||
echo $state | grep backfill_unfound
|
||||
if [ "$?" = "0" ]; then
|
||||
break
|
||||
fi
|
||||
echo $state
|
||||
sleep 1
|
||||
done
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph pg 2.0 list_missing | grep -q $testobj || return 1
|
||||
|
||||
# Command should hang because object is unfound
|
||||
timeout 5 rados -p $poolname get $testobj $dir/CHECK
|
||||
test $? = "124" || return 1
|
||||
|
||||
ceph pg 2.0 mark_unfound_lost delete
|
||||
|
||||
wait_for_clean || return 1
|
||||
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
if [ obj${i} = "$testobj" ]; then
|
||||
# Doesn't exist anymore
|
||||
! rados -p $poolname get $testobj $dir/CHECK || return 1
|
||||
else
|
||||
rados --pool $poolname get obj${i} $dir/CHECK || return 1
|
||||
diff -q $dir/ORIGINAL $dir/CHECK || return 1
|
||||
fi
|
||||
done
|
||||
|
||||
rm -f ${dir}/ORIGINAL ${dir}/CHECK
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
# Test recovery with unfound object
|
||||
function TEST_ec_recovery_unfound() {
|
||||
local dir=$1
|
||||
local objname=myobject
|
||||
local lastobj=100
|
||||
# Must be between 1 and $lastobj
|
||||
local testobj=obj75
|
||||
|
||||
setup_osds 5 || return 1
|
||||
|
||||
local poolname=pool-jerasure
|
||||
create_erasure_coded_pool $poolname 3 2 || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
|
||||
ceph osd down ${last_osd} || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
|
||||
done
|
||||
|
||||
inject_eio ec data $poolname $testobj $dir 0 || return 1
|
||||
inject_eio ec data $poolname $testobj $dir 1 || return 1
|
||||
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
|
||||
sleep 15
|
||||
|
||||
for tmp in $(seq 1 100); do
|
||||
state=$(get_state 2.0)
|
||||
echo $state | grep recovery_unfound
|
||||
if [ "$?" = "0" ]; then
|
||||
break
|
||||
fi
|
||||
echo "$state "
|
||||
sleep 1
|
||||
done
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph pg 2.0 list_missing | grep -q $testobj || return 1
|
||||
|
||||
# Command should hang because object is unfound
|
||||
timeout 5 rados -p $poolname get $testobj $dir/CHECK
|
||||
test $? = "124" || return 1
|
||||
|
||||
ceph pg 2.0 mark_unfound_lost delete
|
||||
|
||||
wait_for_clean || return 1
|
||||
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
if [ obj${i} = "$testobj" ]; then
|
||||
# Doesn't exist anymore
|
||||
! rados -p $poolname get $testobj $dir/CHECK || return 1
|
||||
else
|
||||
rados --pool $poolname get obj${i} $dir/CHECK || return 1
|
||||
diff -q $dir/ORIGINAL $dir/CHECK || return 1
|
||||
fi
|
||||
done
|
||||
|
||||
rm -f ${dir}/ORIGINAL ${dir}/CHECK
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
305
qa/standalone/osd/osd-rep-recov-eio.sh
Executable file
305
qa/standalone/osd/osd-rep-recov-eio.sh
Executable file
@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Copyright (C) 2017 Red Hat <contact@redhat.com>
|
||||
#
|
||||
#
|
||||
# Author: Kefu Chai <kchai@redhat.com>
|
||||
# Author: David Zafman <dzafman@redhat.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Library Public License for more details.
|
||||
#
|
||||
|
||||
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
||||
|
||||
function run() {
|
||||
local dir=$1
|
||||
shift
|
||||
|
||||
export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
create_rbd_pool || return 1
|
||||
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
function setup_osds() {
|
||||
local count=$1
|
||||
shift
|
||||
|
||||
for id in $(seq 0 $(expr $count - 1)) ; do
|
||||
run_osd $dir $id || return 1
|
||||
done
|
||||
wait_for_clean || return 1
|
||||
}
|
||||
|
||||
function get_state() {
|
||||
local pgid=$1
|
||||
local sname=state
|
||||
ceph --format json pg dump pgs 2>/dev/null | \
|
||||
jq -r ".[] | select(.pgid==\"$pgid\") | .$sname"
|
||||
}
|
||||
|
||||
function delete_pool() {
|
||||
local poolname=$1
|
||||
|
||||
ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it
|
||||
}
|
||||
|
||||
function rados_put() {
|
||||
local dir=$1
|
||||
local poolname=$2
|
||||
local objname=${3:-SOMETHING}
|
||||
|
||||
for marker in AAA BBB CCCC DDDD ; do
|
||||
printf "%*s" 1024 $marker
|
||||
done > $dir/ORIGINAL
|
||||
#
|
||||
# get and put an object, compare they are equal
|
||||
#
|
||||
rados --pool $poolname put $objname $dir/ORIGINAL || return 1
|
||||
}
|
||||
|
||||
function rados_get() {
|
||||
local dir=$1
|
||||
local poolname=$2
|
||||
local objname=${3:-SOMETHING}
|
||||
local expect=${4:-ok}
|
||||
|
||||
#
|
||||
# Expect a failure to get object
|
||||
#
|
||||
if [ $expect = "fail" ];
|
||||
then
|
||||
! rados --pool $poolname get $objname $dir/COPY
|
||||
return
|
||||
fi
|
||||
#
|
||||
# Expect hang trying to get object
|
||||
#
|
||||
if [ $expect = "hang" ];
|
||||
then
|
||||
timeout 5 rados --pool $poolname get $objname $dir/COPY
|
||||
test "$?" = "124"
|
||||
return
|
||||
fi
|
||||
#
|
||||
# get an object, compare with $dir/ORIGINAL
|
||||
#
|
||||
rados --pool $poolname get $objname $dir/COPY || return 1
|
||||
diff $dir/ORIGINAL $dir/COPY || return 1
|
||||
rm $dir/COPY
|
||||
}
|
||||
|
||||
function rados_get_data() {
|
||||
local inject=$1
|
||||
shift
|
||||
local dir=$1
|
||||
|
||||
local poolname=pool-rep
|
||||
local objname=obj-$inject-$$
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
inject_$inject rep data $poolname $objname $dir 0 || return 1
|
||||
rados_get $dir $poolname $objname || return 1
|
||||
|
||||
inject_$inject rep data $poolname $objname $dir 0 || return 1
|
||||
inject_$inject rep data $poolname $objname $dir 1 || return 1
|
||||
rados_get $dir $poolname $objname || return 1
|
||||
|
||||
inject_$inject rep data $poolname $objname $dir 0 || return 1
|
||||
inject_$inject rep data $poolname $objname $dir 1 || return 1
|
||||
inject_$inject rep data $poolname $objname $dir 2 || return 1
|
||||
rados_get $dir $poolname $objname hang || return 1
|
||||
}
|
||||
|
||||
function TEST_rados_get_with_eio() {
|
||||
local dir=$1
|
||||
|
||||
setup_osds 4 || return 1
|
||||
|
||||
local poolname=pool-rep
|
||||
create_pool $poolname 1 1 || return 1
|
||||
wait_for_clean || return 1
|
||||
rados_get_data eio $dir || return 1
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
# Test backfill with unfound object
|
||||
function TEST_rep_backfill_unfound() {
|
||||
local dir=$1
|
||||
local objname=myobject
|
||||
local lastobj=300
|
||||
# Must be between 1 and $lastobj
|
||||
local testobj=obj250
|
||||
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10'
|
||||
setup_osds 3 || return 1
|
||||
|
||||
local poolname=test-pool
|
||||
create_pool $poolname 1 1 || return 1
|
||||
wait_for_clean || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
|
||||
ceph osd down ${last_osd} || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
|
||||
done
|
||||
|
||||
inject_eio rep data $poolname $testobj $dir 0 || return 1
|
||||
inject_eio rep data $poolname $testobj $dir 1 || return 1
|
||||
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
|
||||
sleep 15
|
||||
|
||||
for tmp in $(seq 1 100); do
|
||||
state=$(get_state 2.0)
|
||||
echo $state | grep backfill_unfound
|
||||
if [ "$?" = "0" ]; then
|
||||
break
|
||||
fi
|
||||
echo "$state "
|
||||
sleep 1
|
||||
done
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph pg 2.0 list_missing | grep -q $testobj || return 1
|
||||
|
||||
# Command should hang because object is unfound
|
||||
timeout 5 rados -p $poolname get $testobj $dir/CHECK
|
||||
test $? = "124" || return 1
|
||||
|
||||
ceph pg 2.0 mark_unfound_lost delete
|
||||
|
||||
wait_for_clean || return 1
|
||||
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
if [ obj${i} = "$testobj" ]; then
|
||||
# Doesn't exist anymore
|
||||
! rados -p $poolname get $testobj $dir/CHECK || return 1
|
||||
else
|
||||
rados --pool $poolname get obj${i} $dir/CHECK || return 1
|
||||
diff -q $dir/ORIGINAL $dir/CHECK || return 1
|
||||
fi
|
||||
done
|
||||
|
||||
rm -f ${dir}/ORIGINAL ${dir}/CHECK
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
# Test recovery with unfound object
|
||||
function TEST_rep_recovery_unfound() {
|
||||
local dir=$1
|
||||
local objname=myobject
|
||||
local lastobj=100
|
||||
# Must be between 1 and $lastobj
|
||||
local testobj=obj75
|
||||
|
||||
setup_osds 3 || return 1
|
||||
|
||||
local poolname=test-pool
|
||||
create_pool $poolname 1 1 || return 1
|
||||
wait_for_clean || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
rados_put $dir $poolname $objname || return 1
|
||||
|
||||
local -a initial_osds=($(get_osds $poolname $objname))
|
||||
local last_osd=${initial_osds[-1]}
|
||||
kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1
|
||||
ceph osd down ${last_osd} || return 1
|
||||
ceph osd out ${last_osd} || return 1
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1
|
||||
done
|
||||
|
||||
inject_eio rep data $poolname $testobj $dir 0 || return 1
|
||||
inject_eio rep data $poolname $testobj $dir 1 || return 1
|
||||
|
||||
run_osd $dir ${last_osd} || return 1
|
||||
ceph osd in ${last_osd} || return 1
|
||||
|
||||
sleep 15
|
||||
|
||||
for tmp in $(seq 1 100); do
|
||||
state=$(get_state 2.0)
|
||||
echo $state | grep -v recovering
|
||||
if [ "$?" = "0" ]; then
|
||||
break
|
||||
fi
|
||||
echo "$state "
|
||||
sleep 1
|
||||
done
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph pg 2.0 list_missing | grep -q $testobj || return 1
|
||||
|
||||
# Command should hang because object is unfound
|
||||
timeout 5 rados -p $poolname get $testobj $dir/CHECK
|
||||
test $? = "124" || return 1
|
||||
|
||||
ceph pg 2.0 mark_unfound_lost delete
|
||||
|
||||
wait_for_clean || return 1
|
||||
|
||||
for i in $(seq 1 $lastobj)
|
||||
do
|
||||
if [ obj${i} = "$testobj" ]; then
|
||||
# Doesn't exist anymore
|
||||
! rados -p $poolname get $testobj $dir/CHECK || return 1
|
||||
else
|
||||
rados --pool $poolname get obj${i} $dir/CHECK || return 1
|
||||
diff -q $dir/ORIGINAL $dir/CHECK || return 1
|
||||
fi
|
||||
done
|
||||
|
||||
rm -f ${dir}/ORIGINAL ${dir}/CHECK
|
||||
|
||||
delete_pool $poolname
|
||||
}
|
||||
|
||||
main osd-rep-recov-eio.sh "$@"
|
||||
|
||||
# Local Variables:
|
||||
# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh"
|
||||
# End:
|
@ -2134,6 +2134,8 @@ void PGMap::get_health_checks(
|
||||
{ PG_STATE_INCOMPLETE, {UNAVAILABLE, {}} },
|
||||
{ PG_STATE_REPAIR, {DAMAGED, {}} },
|
||||
{ PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} },
|
||||
{ PG_STATE_RECOVERY_UNFOUND, {DAMAGED, {}} },
|
||||
{ PG_STATE_BACKFILL_UNFOUND, {DAMAGED, {}} },
|
||||
{ PG_STATE_BACKFILL_TOOFULL, {DEGRADED_FULL, {}} },
|
||||
{ PG_STATE_RECOVERY_TOOFULL, {DEGRADED_FULL, {}} },
|
||||
{ PG_STATE_DEGRADED, {DEGRADED, {}} },
|
||||
|
@ -217,6 +217,7 @@ void ECBackend::_failed_push(const hobject_t &hoid,
|
||||
dout(10) << __func__ << ": canceling recovery op for obj " << hoid
|
||||
<< dendl;
|
||||
assert(recovery_ops.count(hoid));
|
||||
eversion_t v = recovery_ops[hoid].v;
|
||||
recovery_ops.erase(hoid);
|
||||
|
||||
list<pg_shard_t> fl;
|
||||
@ -224,6 +225,8 @@ void ECBackend::_failed_push(const hobject_t &hoid,
|
||||
fl.push_back(i.first);
|
||||
}
|
||||
get_parent()->failed_push(fl, hoid);
|
||||
get_parent()->backfill_add_missing(hoid, v);
|
||||
get_parent()->finish_degraded_object(hoid);
|
||||
}
|
||||
|
||||
struct OnRecoveryReadComplete :
|
||||
|
@ -5775,7 +5775,7 @@ void PG::find_unfound(epoch_t queued, RecoveryCtx *rctx)
|
||||
new PG::CephPeeringEvt(
|
||||
queued,
|
||||
queued,
|
||||
PG::DeferBackfill(cct->_conf->osd_recovery_retry_interval)));
|
||||
PG::UnfoundBackfill()));
|
||||
queue_peering_event(evt);
|
||||
action = "in backfill";
|
||||
} else if (state_test(PG_STATE_RECOVERING)) {
|
||||
@ -5783,7 +5783,7 @@ void PG::find_unfound(epoch_t queued, RecoveryCtx *rctx)
|
||||
new PG::CephPeeringEvt(
|
||||
queued,
|
||||
queued,
|
||||
PG::DeferRecovery(cct->_conf->osd_recovery_retry_interval)));
|
||||
PG::UnfoundRecovery()));
|
||||
queue_peering_event(evt);
|
||||
action = "in recovery";
|
||||
} else {
|
||||
@ -6358,6 +6358,37 @@ PG::RecoveryState::Backfilling::react(const DeferBackfill &c)
|
||||
return transit<NotBackfilling>();
|
||||
}
|
||||
|
||||
boost::statechart::result
|
||||
PG::RecoveryState::Backfilling::react(const UnfoundBackfill &c)
|
||||
{
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
ldout(pg->cct, 10) << "backfill has unfound, can't continue" << dendl;
|
||||
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
|
||||
|
||||
pg->state_set(PG_STATE_BACKFILL_UNFOUND);
|
||||
pg->state_clear(PG_STATE_BACKFILLING);
|
||||
|
||||
for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
|
||||
it != pg->backfill_targets.end();
|
||||
++it) {
|
||||
assert(*it != pg->pg_whoami);
|
||||
ConnectionRef con = pg->osd->get_con_osd_cluster(
|
||||
it->osd, pg->get_osdmap()->get_epoch());
|
||||
if (con) {
|
||||
pg->osd->send_message_osd_cluster(
|
||||
new MBackfillReserve(
|
||||
MBackfillReserve::CANCEL,
|
||||
spg_t(pg->info.pgid.pgid, it->shard),
|
||||
pg->get_osdmap()->get_epoch()),
|
||||
con.get());
|
||||
}
|
||||
}
|
||||
|
||||
pg->waiting_on_backfill.clear();
|
||||
|
||||
return transit<NotBackfilling>();
|
||||
}
|
||||
|
||||
boost::statechart::result
|
||||
PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
|
||||
{
|
||||
@ -6537,6 +6568,7 @@ void PG::RecoveryState::NotBackfilling::exit()
|
||||
{
|
||||
context< RecoveryMachine >().log_exit(state_name, enter_time);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
pg->state_clear(PG_STATE_BACKFILL_UNFOUND);
|
||||
utime_t dur = ceph_clock_now() - enter_time;
|
||||
pg->osd->recoverystate_perf->tinc(rs_notbackfilling_latency, dur);
|
||||
}
|
||||
@ -6555,6 +6587,7 @@ void PG::RecoveryState::NotRecovering::exit()
|
||||
{
|
||||
context< RecoveryMachine >().log_exit(state_name, enter_time);
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
pg->state_clear(PG_STATE_RECOVERY_UNFOUND);
|
||||
utime_t dur = ceph_clock_now() - enter_time;
|
||||
pg->osd->recoverystate_perf->tinc(rs_notrecovering_latency, dur);
|
||||
}
|
||||
@ -6929,6 +6962,18 @@ PG::RecoveryState::Recovering::react(const DeferRecovery &evt)
|
||||
return transit<NotRecovering>();
|
||||
}
|
||||
|
||||
boost::statechart::result
|
||||
PG::RecoveryState::Recovering::react(const UnfoundRecovery &evt)
|
||||
{
|
||||
PG *pg = context< RecoveryMachine >().pg;
|
||||
ldout(pg->cct, 10) << "recovery has unfound, can't continue" << dendl;
|
||||
pg->state_set(PG_STATE_RECOVERY_UNFOUND);
|
||||
pg->state_clear(PG_STATE_RECOVERING);
|
||||
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
|
||||
release_reservations(true);
|
||||
return transit<NotRecovering>();
|
||||
}
|
||||
|
||||
void PG::RecoveryState::Recovering::exit()
|
||||
{
|
||||
context< RecoveryMachine >().log_exit(state_name, enter_time);
|
||||
|
45
src/osd/PG.h
45
src/osd/PG.h
@ -1815,6 +1815,18 @@ public:
|
||||
*out << "DeferRecovery: delay " << delay;
|
||||
}
|
||||
};
|
||||
struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> {
|
||||
explicit UnfoundBackfill() {}
|
||||
void print(std::ostream *out) const {
|
||||
*out << "UnfoundBackfill";
|
||||
}
|
||||
};
|
||||
struct UnfoundRecovery : boost::statechart::event<UnfoundRecovery> {
|
||||
explicit UnfoundRecovery() {}
|
||||
void print(std::ostream *out) const {
|
||||
*out << "UnfoundRecovery";
|
||||
}
|
||||
};
|
||||
protected:
|
||||
TrivialEvent(Initialize)
|
||||
TrivialEvent(Load)
|
||||
@ -2101,7 +2113,9 @@ protected:
|
||||
boost::statechart::custom_reaction< Backfilled >,
|
||||
boost::statechart::custom_reaction< AllReplicasActivated >,
|
||||
boost::statechart::custom_reaction< DeferRecovery >,
|
||||
boost::statechart::custom_reaction< DeferBackfill >
|
||||
boost::statechart::custom_reaction< DeferBackfill >,
|
||||
boost::statechart::custom_reaction< UnfoundRecovery >,
|
||||
boost::statechart::custom_reaction< UnfoundBackfill >
|
||||
> reactions;
|
||||
boost::statechart::result react(const QueryState& q);
|
||||
boost::statechart::result react(const ActMap&);
|
||||
@ -2119,6 +2133,12 @@ protected:
|
||||
boost::statechart::result react(const DeferBackfill& evt) {
|
||||
return discard_event();
|
||||
}
|
||||
boost::statechart::result react(const UnfoundRecovery& evt) {
|
||||
return discard_event();
|
||||
}
|
||||
boost::statechart::result react(const UnfoundBackfill& evt) {
|
||||
return discard_event();
|
||||
}
|
||||
};
|
||||
|
||||
struct Clean : boost::statechart::state< Clean, Active >, NamedState {
|
||||
@ -2147,11 +2167,13 @@ protected:
|
||||
typedef boost::mpl::list<
|
||||
boost::statechart::transition< Backfilled, Recovered >,
|
||||
boost::statechart::custom_reaction< DeferBackfill >,
|
||||
boost::statechart::custom_reaction< UnfoundBackfill >,
|
||||
boost::statechart::custom_reaction< RemoteReservationRejected >
|
||||
> reactions;
|
||||
explicit Backfilling(my_context ctx);
|
||||
boost::statechart::result react(const RemoteReservationRejected& evt);
|
||||
boost::statechart::result react(const DeferBackfill& evt);
|
||||
boost::statechart::result react(const UnfoundBackfill& evt);
|
||||
void exit();
|
||||
};
|
||||
|
||||
@ -2191,13 +2213,18 @@ protected:
|
||||
struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
|
||||
typedef boost::mpl::list<
|
||||
boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
|
||||
boost::statechart::custom_reaction< DeferRecovery >
|
||||
boost::statechart::custom_reaction< DeferRecovery >,
|
||||
boost::statechart::custom_reaction< UnfoundRecovery >
|
||||
> reactions;
|
||||
explicit NotRecovering(my_context ctx);
|
||||
boost::statechart::result react(const DeferRecovery& evt) {
|
||||
/* no-op */
|
||||
return discard_event();
|
||||
}
|
||||
boost::statechart::result react(const UnfoundRecovery& evt) {
|
||||
/* no-op */
|
||||
return discard_event();
|
||||
}
|
||||
void exit();
|
||||
};
|
||||
|
||||
@ -2214,7 +2241,9 @@ protected:
|
||||
boost::statechart::custom_reaction< MLogRec >,
|
||||
boost::statechart::custom_reaction< Activate >,
|
||||
boost::statechart::custom_reaction< DeferRecovery >,
|
||||
boost::statechart::custom_reaction< DeferBackfill >
|
||||
boost::statechart::custom_reaction< DeferBackfill >,
|
||||
boost::statechart::custom_reaction< UnfoundRecovery >,
|
||||
boost::statechart::custom_reaction< UnfoundBackfill >
|
||||
> reactions;
|
||||
boost::statechart::result react(const QueryState& q);
|
||||
boost::statechart::result react(const MInfoRec& infoevt);
|
||||
@ -2228,6 +2257,12 @@ protected:
|
||||
boost::statechart::result react(const DeferBackfill& evt) {
|
||||
return discard_event();
|
||||
}
|
||||
boost::statechart::result react(const UnfoundRecovery& evt) {
|
||||
return discard_event();
|
||||
}
|
||||
boost::statechart::result react(const UnfoundBackfill& evt) {
|
||||
return discard_event();
|
||||
}
|
||||
};
|
||||
|
||||
struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
|
||||
@ -2295,6 +2330,7 @@ protected:
|
||||
typedef boost::mpl::list <
|
||||
boost::statechart::custom_reaction< AllReplicasRecovered >,
|
||||
boost::statechart::custom_reaction< DeferRecovery >,
|
||||
boost::statechart::custom_reaction< UnfoundRecovery >,
|
||||
boost::statechart::custom_reaction< RequestBackfill >
|
||||
> reactions;
|
||||
explicit Recovering(my_context ctx);
|
||||
@ -2302,6 +2338,7 @@ protected:
|
||||
void release_reservations(bool cancel = false);
|
||||
boost::statechart::result react(const AllReplicasRecovered &evt);
|
||||
boost::statechart::result react(const DeferRecovery& evt);
|
||||
boost::statechart::result react(const UnfoundRecovery& evt);
|
||||
boost::statechart::result react(const RequestBackfill &evt);
|
||||
};
|
||||
|
||||
@ -2576,6 +2613,8 @@ protected:
|
||||
bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
|
||||
bool is_peering() const { return state_test(PG_STATE_PEERING); }
|
||||
bool is_down() const { return state_test(PG_STATE_DOWN); }
|
||||
bool is_recovery_unfound() const { return state_test(PG_STATE_RECOVERY_UNFOUND); }
|
||||
bool is_backfill_unfound() const { return state_test(PG_STATE_BACKFILL_UNFOUND); }
|
||||
bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); }
|
||||
bool is_clean() const { return state_test(PG_STATE_CLEAN); }
|
||||
bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
|
||||
|
@ -106,6 +106,7 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
|
||||
const hobject_t oid) = 0;
|
||||
|
||||
virtual void failed_push(const list<pg_shard_t> &from, const hobject_t &soid) = 0;
|
||||
virtual void finish_degraded_object(const hobject_t& oid) = 0;
|
||||
virtual void primary_failed(const hobject_t &soid) = 0;
|
||||
virtual bool primary_error(const hobject_t& soid, eversion_t v) = 0;
|
||||
virtual void cancel_pull(const hobject_t &soid) = 0;
|
||||
@ -122,6 +123,11 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
|
||||
eversion_t v
|
||||
) = 0;
|
||||
|
||||
virtual void backfill_add_missing(
|
||||
const hobject_t &oid,
|
||||
eversion_t v
|
||||
) = 0;
|
||||
|
||||
virtual void remove_missing_object(const hobject_t &oid,
|
||||
eversion_t v,
|
||||
Context *on_complete) = 0;
|
||||
|
@ -525,6 +525,14 @@ void PrimaryLogPG::on_primary_error(
|
||||
dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
|
||||
primary_failed(oid);
|
||||
primary_error(oid, v);
|
||||
backfill_add_missing(oid, v);
|
||||
}
|
||||
|
||||
void PrimaryLogPG::backfill_add_missing(
|
||||
const hobject_t &oid,
|
||||
eversion_t v)
|
||||
{
|
||||
dout(0) << __func__ << ": oid " << oid << " version " << v << dendl;
|
||||
backfills_in_flight.erase(oid);
|
||||
missing_loc.add_missing(oid, v, eversion_t());
|
||||
}
|
||||
@ -694,7 +702,7 @@ void PrimaryLogPG::wait_for_blocked_object(const hobject_t& soid, OpRequestRef o
|
||||
|
||||
void PrimaryLogPG::maybe_force_recovery()
|
||||
{
|
||||
// no force if not in degraded/recovery/backfill stats
|
||||
// no force if not in degraded/recovery/backfill states
|
||||
if (!is_degraded() &&
|
||||
!state_test(PG_STATE_RECOVERING |
|
||||
PG_STATE_RECOVERY_WAIT |
|
||||
@ -2307,6 +2315,7 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
|
||||
// force recovery of the oldest missing object if too many logs
|
||||
maybe_force_recovery();
|
||||
}
|
||||
|
||||
PrimaryLogPG::cache_result_t PrimaryLogPG::maybe_handle_manifest_detail(
|
||||
OpRequestRef op,
|
||||
bool write_ordered,
|
||||
@ -10571,7 +10580,23 @@ void PrimaryLogPG::mark_all_unfound_lost(
|
||||
release_backoffs(p.first);
|
||||
}
|
||||
requeue_object_waiters(waiting_for_unreadable_object);
|
||||
queue_recovery();
|
||||
if (is_recovery_unfound()) {
|
||||
queue_peering_event(
|
||||
CephPeeringEvtRef(
|
||||
std::make_shared<CephPeeringEvt>(
|
||||
get_osdmap()->get_epoch(),
|
||||
get_osdmap()->get_epoch(),
|
||||
DoRecovery())));
|
||||
} else if (is_backfill_unfound()) {
|
||||
queue_peering_event(
|
||||
CephPeeringEvtRef(
|
||||
std::make_shared<CephPeeringEvt>(
|
||||
get_osdmap()->get_epoch(),
|
||||
get_osdmap()->get_epoch(),
|
||||
RequestBackfill())));
|
||||
} else {
|
||||
queue_recovery();
|
||||
}
|
||||
|
||||
stringstream ss;
|
||||
ss << "pg has " << num_unfound
|
||||
|
@ -277,6 +277,7 @@ public:
|
||||
const hobject_t &soid,
|
||||
const object_stat_sum_t &delta_stats) override;
|
||||
void on_primary_error(const hobject_t &oid, eversion_t v) override;
|
||||
void backfill_add_missing(const hobject_t &oid, eversion_t v) override;
|
||||
void remove_missing_object(const hobject_t &oid,
|
||||
eversion_t v,
|
||||
Context *on_complete) override;
|
||||
|
@ -811,6 +811,10 @@ std::string pg_state_string(uint64_t state)
|
||||
oss << "forced_recovery+";
|
||||
if (state & PG_STATE_DOWN)
|
||||
oss << "down+";
|
||||
if (state & PG_STATE_RECOVERY_UNFOUND)
|
||||
oss << "recovery_unfound+";
|
||||
if (state & PG_STATE_BACKFILL_UNFOUND)
|
||||
oss << "backfill_unfound+";
|
||||
if (state & PG_STATE_UNDERSIZED)
|
||||
oss << "undersized+";
|
||||
if (state & PG_STATE_DEGRADED)
|
||||
@ -862,6 +866,10 @@ boost::optional<uint64_t> pg_string_state(const std::string& state)
|
||||
type = PG_STATE_CLEAN;
|
||||
else if (state == "down")
|
||||
type = PG_STATE_DOWN;
|
||||
else if (state == "recovery_unfound")
|
||||
type = PG_STATE_RECOVERY_UNFOUND;
|
||||
else if (state == "backfill_unfound")
|
||||
type = PG_STATE_BACKFILL_UNFOUND;
|
||||
else if (state == "scrubbing")
|
||||
type = PG_STATE_SCRUBBING;
|
||||
else if (state == "degraded")
|
||||
|
@ -978,8 +978,8 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
|
||||
#define PG_STATE_ACTIVE (1ULL << 1) // i am active. (primary: replicas too)
|
||||
#define PG_STATE_CLEAN (1ULL << 2) // peers are complete, clean of stray replicas.
|
||||
#define PG_STATE_DOWN (1ULL << 4) // a needed replica is down, PG offline
|
||||
//#define PG_STATE_REPLAY (1ULL << 5) // crashed, waiting for replay
|
||||
//#define PG_STATE_STRAY (1ULL << 6) // i must notify the primary i exist.
|
||||
#define PG_STATE_RECOVERY_UNFOUND (1ULL << 5) // recovery stopped due to unfound
|
||||
#define PG_STATE_BACKFILL_UNFOUND (1ULL << 6) // backfill stopped due to unfound
|
||||
//#define PG_STATE_SPLITTING (1ULL << 7) // i am splitting
|
||||
#define PG_STATE_SCRUBBING (1ULL << 8) // scrubbing
|
||||
//#define PG_STATE_SCRUBQ (1ULL << 9) // queued for scrub
|
||||
|
Loading…
Reference in New Issue
Block a user