mon: Improve health status for backfill_toofull and recovery_toofull

Treat backfull_toofull as a warning condition because it can resolve itself.
Includes test case for PG_BACKFILL_FULL
Includes test case for recovery_toofull / PG_RECOVERY_FULL

Fixes: https://tracker.ceph.com/issues/39555

Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
David Zafman 2019-05-21 18:29:30 -07:00
parent c036b9265f
commit fa698e18e1
6 changed files with 230 additions and 14 deletions

View File

@ -489,16 +489,27 @@ The state of specific problematic PGs can be queried with::
ceph tell <pgid> query
PG_DEGRADED_FULL
PG_RECOVERY_FULL
________________
Data redundancy may be reduced or at risk for some data due to a lack
of free space in the cluster. Specifically, one or more PGs has the
*backfill_toofull* or *recovery_toofull* flag set, meaning that the
*recovery_toofull* flag set, meaning that the
cluster is unable to migrate or recover data because one or more OSDs
is above the *full* threshold.
See the discussion for *OSD_FULL* above for steps to resolve this condition.
PG_BACKFILL_FULL
________________
Data redundancy may be reduced or at risk for some data due to a lack
of free space in the cluster. Specifically, one or more PGs has the
*backfill_toofull* flag set, meaning that the
cluster is unable to migrate or recover data because one or more OSDs
is above the *backfillfull* threshold.
See the discussion for *OSD_BACKFILLFULL* or *OSD_FULL* above for
See the discussion for *OSD_BACKFILLFULL* above for
steps to resolve this condition.
PG_DAMAGED

View File

@ -385,6 +385,11 @@ and, ``backfill_toofull`` indicates that a backfill operation was requested,
but couldn't be completed due to insufficient storage capacity. When a
placement group cannot be backfilled, it may be considered ``incomplete``.
The ``backfill_toofull`` state may be transient. It is possible that as PGs
are moved around, space may become available. The ``backfill_toofull`` is
similar to ``backfill_wait`` in that as soon as conditions change
backfill can proceed.
Ceph provides a number of settings to manage the load spike associated with
reassigning placement groups to an OSD (especially a new OSD). By default,
``osd_max_backfills`` sets the maximum number of concurrent backfills to and from

View File

@ -69,8 +69,8 @@ map is ``active + clean``.
The placement group is waiting in line to start backfill.
*backfill_toofull*
A backfill operation is waiting because the destination OSD is over its
full ratio.
A backfill operation is waiting because the destination OSD is over
the backfillfull ratio.
*backfill_unfound*
Backfill stopped due to unfound objects.

View File

@ -247,6 +247,21 @@ function TEST_backfill_test_multi() {
fi
ceph pg dump pgs
ceph status
ceph status --format=json-pretty > $dir/stat.json
eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
if [ "$SEV" != "HEALTH_WARN" ]; then
echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
ERRORS="$(expr $ERRORS + 1)"
fi
eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
echo "PG_BACKFILL_FULL message '$MSG' mismatched"
ERRORS="$(expr $ERRORS + 1)"
fi
rm -f $dir/stat.json
if [ $ERRORS != "0" ];
then

View File

@ -0,0 +1,179 @@
#!/usr/bin/env bash
#
# Copyright (C) 2018 Red Hat <contact@redhat.com>
#
# Author: David Zafman <dzafman@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library Public License for more details.
#
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
function run() {
local dir=$1
shift
export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--osd_max_backfills=10 "
export objects=600
export poolprefix=test
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function get_num_in_state() {
local state=$1
local expression
expression+="select(contains(\"${state}\"))"
ceph --format json pg dump pgs 2>/dev/null | \
jq ".pg_stats | [.[] | .state | $expression] | length"
}
function wait_for_state() {
local state=$1
local num_in_state=-1
local cur_in_state
local -a delays=($(get_timeout_delays $2 5))
local -i loop=0
flush_pg_stats || return 1
while test $(get_num_pgs) == 0 ; do
sleep 1
done
while true ; do
cur_in_state=$(get_num_in_state ${state})
test $cur_in_state = "0" && break
if test $cur_in_state != $num_in_state ; then
loop=0
num_in_state=$cur_in_state
elif (( $loop >= ${#delays[*]} )) ; then
ceph pg dump pgs
return 1
fi
sleep ${delays[$loop]}
loop+=1
done
return 0
}
function wait_for_recovery_toofull() {
local timeout=$1
wait_for_state recovery_toofull $timeout
}
# Create 1 pools with size 1
# set ful-ratio to 50%
# Write data 600 5K (3000K)
# Inject fake_statfs_for_testing to 3600K (83% full)
# Incresase the pool size to 2
# The pool shouldn't have room to recovery
function TEST_recovery_test_simple() {
local dir=$1
local pools=1
local OSDS=2
run_mon $dir a || return 1
run_mgr $dir x || return 1
export CEPH_ARGS
for osd in $(seq 0 $(expr $OSDS - 1))
do
run_osd $dir $osd || return 1
done
ceph osd set-nearfull-ratio .40
ceph osd set-backfillfull-ratio .45
ceph osd set-full-ratio .50
for p in $(seq 1 $pools)
do
create_pool "${poolprefix}$p" 1 1
ceph osd pool set "${poolprefix}$p" size 1
done
wait_for_clean || return 1
dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
for o in $(seq 1 $objects)
do
rados -p "${poolprefix}$p" put obj$o $dir/datafile
done
for o in $(seq 0 $(expr $OSDS - 1))
do
ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
done
sleep 5
ceph pg dump pgs
for p in $(seq 1 $pools)
do
ceph osd pool set "${poolprefix}$p" size 2
done
# If this times out, we'll detected errors below
wait_for_recovery_toofull 30
ERRORS=0
if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
then
echo "One pool should have been in recovery_toofull"
ERRORS="$(expr $ERRORS + 1)"
fi
ceph pg dump pgs
ceph status
ceph status --format=json-pretty > $dir/stat.json
eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
if [ "$SEV" != "HEALTH_ERR" ]; then
echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
ERRORS="$(expr $ERRORS + 1)"
fi
eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
echo "PG_RECOVERY_FULL message '$MSG' mismatched"
ERRORS="$(expr $ERRORS + 1)"
fi
rm -f $dir/stat.json
if [ $ERRORS != "0" ];
then
return 1
fi
for i in $(seq 1 $pools)
do
delete_pool "${poolprefix}$i"
done
kill_daemons $dir || return 1
}
main osd-recovery-space "$@"
# Local Variables:
# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
# End:

View File

@ -2336,10 +2336,11 @@ void PGMap::get_health_checks(
typedef enum pg_consequence_t {
UNAVAILABLE = 1, // Client IO to the pool may block
DEGRADED = 2, // Fewer than the requested number of replicas are present
DEGRADED_FULL = 3, // Fewer than the request number of replicas may be present
// and insufficiet resources are present to fix this
DAMAGED = 4 // The data may be missing or inconsistent on disk and
BACKFILL_FULL = 3, // Backfill is blocked for space considerations
// This may or may not be a deadlock condition.
DAMAGED = 4, // The data may be missing or inconsistent on disk and
// requires repair
RECOVERY_FULL = 5 // Recovery is blocked because OSDs are full
} pg_consequence_t;
// For a given PG state, how should it be reported at the pool level?
@ -2382,8 +2383,8 @@ void PGMap::get_health_checks(
{ PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} },
{ PG_STATE_RECOVERY_UNFOUND, {DAMAGED, {}} },
{ PG_STATE_BACKFILL_UNFOUND, {DAMAGED, {}} },
{ PG_STATE_BACKFILL_TOOFULL, {DEGRADED_FULL, {}} },
{ PG_STATE_RECOVERY_TOOFULL, {DEGRADED_FULL, {}} },
{ PG_STATE_BACKFILL_TOOFULL, {BACKFILL_FULL, {}} },
{ PG_STATE_RECOVERY_TOOFULL, {RECOVERY_FULL, {}} },
{ PG_STATE_DEGRADED, {DEGRADED, {}} },
{ PG_STATE_DOWN, {UNAVAILABLE, {}} },
// Delayed (wait until stuck) reports
@ -2527,16 +2528,21 @@ void PGMap::get_health_checks(
summary = "Degraded data redundancy: ";
sev = HEALTH_WARN;
break;
case DEGRADED_FULL:
health_code = "PG_DEGRADED_FULL";
summary = "Degraded data redundancy (low space): ";
sev = HEALTH_ERR;
case BACKFILL_FULL:
health_code = "PG_BACKFILL_FULL";
summary = "Low space hindering backfill (add storage if this doesn't resolve itself): ";
sev = HEALTH_WARN;
break;
case DAMAGED:
health_code = "PG_DAMAGED";
summary = "Possible data damage: ";
sev = HEALTH_ERR;
break;
case RECOVERY_FULL:
health_code = "PG_RECOVERY_FULL";
summary = "Full OSDs blocking recovery: ";
sev = HEALTH_ERR;
break;
default:
ceph_abort();
}