Merge pull request #4174 from trociny/wip-10976.master

osd: fix PG::all_unfound_are_queried_or_lost for non-existent osds Reviewed-by: Kefu Chai <tchaikov@gmail.com Reviewed-by: Samuel Just <sjust@redhat.com>
2025-01-20 10:01:45 +00:00 · 2015-04-09 11:25:03 -07:00 · 2015-04-09 11:25:03 -07:00 · aec2f5de3b
commit aec2f5de3b
parent eeb2cf5cc1 0c396f85ed
3 changed files with 129 additions and 13 deletions
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@ -792,6 +792,8 @@ bool PG::all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const
    if (iter != peer_info.end() &&
        (iter->second.is_empty() || iter->second.dne()))
      continue;
+    if (!osdmap->exists(peer->osd))
+      continue;
    const osd_info_t &osd_info(osdmap->get_info(peer->osd));
    if (osd_info.lost_at <= osd_info.up_from) {
      // If there is even one OSD in might_have_unfound that isn't lost, we
--- a/src/test/test_common.sh
+++ b/src/test/test_common.sh
@ -38,12 +38,33 @@ die() {
        exit 1
 }

+# Test that flag is set (the element is found in the list)
+is_set()
+{
+	local flag=$1; shift
+	local flags="$@"
+	local i
+
+	for i in ${flags}; do
+		if [ "${flag}" = "${i}" ]; then
+			return 0
+		fi
+	done
+	return 1
+}
+
 # Stop an OSD started by vstart
 stop_osd() {
        osd_index=$1
        pidfile="out/osd.$osd_index.pid"
        if [ -e $pidfile ]; then
-                kill `cat $pidfile` && return 0
+                if kill `cat $pidfile` ; then
+                        poll_cmd "eval test -e $pidfile ; echo \$?" "1" 1 30
+                        [ $? -eq 1 ] && return 0
+                        echo "ceph-osd process did not terminate correctly"
+                else
+                        echo "kill `cat $pidfile` failed"
+                fi
        else
                echo "ceph-osd process $osd_index is not running"
        fi
@ -144,7 +165,7 @@ start_recovery() {
        CEPH_NUM_OSD=$1
        osd=0
        while [ $osd -lt $CEPH_NUM_OSD ]; do
-                ./ceph -c ./ceph.conf osd tell $osd debug kick_recovery_wq 0
+                ./ceph -c ./ceph.conf tell osd.$osd debug kick_recovery_wq 0
                osd=$((osd+1))
        done
 }
--- a/src/test/test_lost.sh
+++ b/src/test/test_lost.sh
@ -19,6 +19,26 @@ setup() {

        # set recovery start to a really long time to ensure that we don't start recovery
        ./vstart.sh -d -n -o "$vstart_config" || die "vstart failed"
+
+	# for exiting pools set size not greater than number of OSDs,
+	# so recovery from degraded ps is possible
+	local changed=0
+	for pool in `./ceph osd pool ls`; do
+	    local size=`./ceph osd pool get ${pool} size | awk '{print $2}'`
+	    if [ "${size}" -gt "${CEPH_NUM_OSD}" ]; then
+		./ceph osd pool set ${pool} size ${CEPH_NUM_OSD}
+		changed=1
+	    fi
+	done
+	if [ ${changed} -eq 1 ]; then
+	    # XXX: When a pool has degraded pgs due to size greater than number
+	    # of OSDs, after decreasing the size the recovery still could stuck
+	    # and requires an additional kick.
+	    ./ceph osd out 0
+	    ./ceph osd in 0
+	fi
+
+	poll_cmd "./ceph health" HEALTH_OK 1 30
 }

 recovery1_impl() {
@ -65,7 +85,13 @@ recovery1() {
 }

 lost1_impl() {
-	try_to_fetch_unfound=$1
+	local flags="$@"
+	local lost_action=delete
+	local pgs_unfound pg
+
+	if is_set revert_lost $flags; then
+	    lost_action=revert
+	fi

        # Write lots and lots of objects
        write_objects 1 1 20 8000 $TEST_POOL
@ -91,7 +117,20 @@ lost1_impl() {
 	poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120
        [ $? -eq 1 ] || die "Failed to see unfound objects."

-	if [ "$try_to_fetch_unfound" -eq 1 ]; then
+	pgs_unfound=`./ceph health detail |awk '$1 = "pg" && /[0-9] unfound$/ {print $2}'`
+
+	[ -n "$pgs_unfound" ] || die "no pg with unfound objects"
+
+	for pg in $pgs_unfound; do
+	    ./ceph pg $pg mark_unfound_lost revert &&
+	    die "mark_unfound_lost unexpectedly succeeded for pg $pg"
+	done
+
+	if ! is_set mark_osd_lost $flags && ! is_set rm_osd $flags; then
+	    return
+	fi
+
+	if is_set try_to_fetch_unfound $flags; then
 	  # Ask for an object while it's still unfound, and
 	  # verify we get woken to an error when it's declared lost.
 	  echo "trying to get one of the unfound objects"
@ -101,19 +140,43 @@ lost1_impl() {
 	  ) &
 	fi

-        # Lose all objects.
-	./ceph osd lost 0 --yes-i-really-mean-it
+	if is_set mark_osd_lost $flags; then
+	  ./ceph osd lost 0 --yes-i-really-mean-it
+	fi
+
+	if is_set rm_osd $flags; then
+	    ./ceph osd rm 0
+	fi
+
+	if ! is_set auto_mark_unfound_lost $flags; then
+	    for pg in $pgs_unfound; do
+		./ceph pg $pg mark_unfound_lost ${lost_action} ||
+		  die "mark_unfound_lost failed for pg $pg"
+	    done
+	fi
+
+	start_recovery 2

 	# Unfound objects go away and are turned into lost objects.
 	poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
        [ $? -eq 1 ] || die "Unfound objects didn't go away."

+	for pg in `ceph pg ls | awk '/^[0-9]/ {print $1}'`; do
+	    ./ceph pg $pg mark_unfound_lost revert 2>&1 |
+	      grep 'pg has no unfound objects' ||
+	      die "pg $pg has unfound objects"
+	done
+
 	# Reading from a lost object gives back an error code.
 	# TODO: check error code
-	./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 &&\
+	./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01
+	if [ lost_action = delete -a $? -eq 0 ]; then
 	  die "expected radostool error"
+	elif [ lost_action = revert -a $? -ne 0 ]; then
+	  die "unexpected radostool error"
+	fi

-	if [ "$try_to_fetch_unfound" -eq 1 ]; then
+	if is_set try_to_fetch_unfound $flags; then
 	  echo "waiting for the try_to_fetch_unfound \
 radostool instance to finish"
 	  wait
@ -122,16 +185,31 @@ radostool instance to finish"

 lost1() {
        setup 2 'osd recovery delay start = 10000'
-        lost1_impl 0
+        lost1_impl mark_osd_lost revert_lost
 }

 lost2() {
        setup 2 'osd recovery delay start = 10000'
-        lost1_impl 1
+        lost1_impl mark_osd_lost try_to_fetch_unfound
+}
+
+lost3() {
+        setup 2 'osd recovery delay start = 10000'
+        lost1_impl rm_osd
+}
+
+lost4() {
+        setup 2 'osd recovery delay start = 10000'
+        lost1_impl mark_osd_lost rm_osd
+}
+
+lost5() {
+        setup 2 'osd recovery delay start = 10000'
+        lost1_impl mark_osd_lost auto_mark_unfound_lost
 }

 all_osds_die_impl() {
-        poll_cmd "./ceph osd stat -o -" '3 up, 3 in' 20 240
+        poll_cmd "./ceph osd stat" '3 up, 3 in' 20 240
        [ $? -eq 1 ] || die "didn't start 3 osds"

        stop_osd 0
@ -139,7 +217,7 @@ all_osds_die_impl() {
        stop_osd 2

 	# wait for the MOSDPGStat timeout
-        poll_cmd "./ceph osd stat -o -" '0 up' 20 240
+        poll_cmd "./ceph osd stat" '0 up' 20 240
        [ $? -eq 1 ] || die "all osds weren't marked as down"
 }

@ -156,9 +234,24 @@ run() {

        lost1 || die "test failed"

-        lost2 || die "test failed"
+	# XXX: try_to_fetch_unfound test currently hangs on "waiting for the
+	# try_to_fetch_unfound radostool instance to finish"
+	#lost2 || die "test failed"
+
+	lost3 || die "test failed"
+
+	lost4 || die "test failed"
+
+	# XXX: automatically marking lost is not implemented
+	#lost5 || die "test failed"

        all_osds_die || die "test failed"
 }

+if [ -z "$@" ]; then
+	run
+	echo OK
+	exit 0
+fi
+
 $@