mirror of
https://github.com/ceph/ceph
synced 2025-02-19 00:47:49 +00:00
mon: Improve health status for backfill_toofull and recovery_toofull
Treat backfull_toofull as a warning condition because it can resolve itself. Includes test case for PG_BACKFILL_FULL Includes test case for recovery_toofull / PG_RECOVERY_FULL Fixes: https://tracker.ceph.com/issues/39555 Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
parent
c036b9265f
commit
fa698e18e1
@ -489,16 +489,27 @@ The state of specific problematic PGs can be queried with::
|
||||
ceph tell <pgid> query
|
||||
|
||||
|
||||
PG_DEGRADED_FULL
|
||||
PG_RECOVERY_FULL
|
||||
________________
|
||||
|
||||
Data redundancy may be reduced or at risk for some data due to a lack
|
||||
of free space in the cluster. Specifically, one or more PGs has the
|
||||
*backfill_toofull* or *recovery_toofull* flag set, meaning that the
|
||||
*recovery_toofull* flag set, meaning that the
|
||||
cluster is unable to migrate or recover data because one or more OSDs
|
||||
is above the *full* threshold.
|
||||
|
||||
See the discussion for *OSD_FULL* above for steps to resolve this condition.
|
||||
|
||||
PG_BACKFILL_FULL
|
||||
________________
|
||||
|
||||
Data redundancy may be reduced or at risk for some data due to a lack
|
||||
of free space in the cluster. Specifically, one or more PGs has the
|
||||
*backfill_toofull* flag set, meaning that the
|
||||
cluster is unable to migrate or recover data because one or more OSDs
|
||||
is above the *backfillfull* threshold.
|
||||
|
||||
See the discussion for *OSD_BACKFILLFULL* or *OSD_FULL* above for
|
||||
See the discussion for *OSD_BACKFILLFULL* above for
|
||||
steps to resolve this condition.
|
||||
|
||||
PG_DAMAGED
|
||||
|
@ -385,6 +385,11 @@ and, ``backfill_toofull`` indicates that a backfill operation was requested,
|
||||
but couldn't be completed due to insufficient storage capacity. When a
|
||||
placement group cannot be backfilled, it may be considered ``incomplete``.
|
||||
|
||||
The ``backfill_toofull`` state may be transient. It is possible that as PGs
|
||||
are moved around, space may become available. The ``backfill_toofull`` is
|
||||
similar to ``backfill_wait`` in that as soon as conditions change
|
||||
backfill can proceed.
|
||||
|
||||
Ceph provides a number of settings to manage the load spike associated with
|
||||
reassigning placement groups to an OSD (especially a new OSD). By default,
|
||||
``osd_max_backfills`` sets the maximum number of concurrent backfills to and from
|
||||
|
@ -69,8 +69,8 @@ map is ``active + clean``.
|
||||
The placement group is waiting in line to start backfill.
|
||||
|
||||
*backfill_toofull*
|
||||
A backfill operation is waiting because the destination OSD is over its
|
||||
full ratio.
|
||||
A backfill operation is waiting because the destination OSD is over
|
||||
the backfillfull ratio.
|
||||
|
||||
*backfill_unfound*
|
||||
Backfill stopped due to unfound objects.
|
||||
|
@ -247,6 +247,21 @@ function TEST_backfill_test_multi() {
|
||||
fi
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph status
|
||||
|
||||
ceph status --format=json-pretty > $dir/stat.json
|
||||
|
||||
eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
|
||||
if [ "$SEV" != "HEALTH_WARN" ]; then
|
||||
echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
|
||||
ERRORS="$(expr $ERRORS + 1)"
|
||||
fi
|
||||
eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
|
||||
if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
|
||||
echo "PG_BACKFILL_FULL message '$MSG' mismatched"
|
||||
ERRORS="$(expr $ERRORS + 1)"
|
||||
fi
|
||||
rm -f $dir/stat.json
|
||||
|
||||
if [ $ERRORS != "0" ];
|
||||
then
|
||||
|
179
qa/standalone/osd/osd-recovery-space.sh
Executable file
179
qa/standalone/osd/osd-recovery-space.sh
Executable file
@ -0,0 +1,179 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Copyright (C) 2018 Red Hat <contact@redhat.com>
|
||||
#
|
||||
# Author: David Zafman <dzafman@redhat.com>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Library Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Library Public License for more details.
|
||||
#
|
||||
|
||||
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
|
||||
|
||||
function run() {
|
||||
local dir=$1
|
||||
shift
|
||||
|
||||
export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
CEPH_ARGS+="--osd_max_backfills=10 "
|
||||
export objects=600
|
||||
export poolprefix=test
|
||||
|
||||
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
|
||||
for func in $funcs ; do
|
||||
setup $dir || return 1
|
||||
$func $dir || return 1
|
||||
teardown $dir || return 1
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
function get_num_in_state() {
|
||||
local state=$1
|
||||
local expression
|
||||
expression+="select(contains(\"${state}\"))"
|
||||
ceph --format json pg dump pgs 2>/dev/null | \
|
||||
jq ".pg_stats | [.[] | .state | $expression] | length"
|
||||
}
|
||||
|
||||
|
||||
function wait_for_state() {
|
||||
local state=$1
|
||||
local num_in_state=-1
|
||||
local cur_in_state
|
||||
local -a delays=($(get_timeout_delays $2 5))
|
||||
local -i loop=0
|
||||
|
||||
flush_pg_stats || return 1
|
||||
while test $(get_num_pgs) == 0 ; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
while true ; do
|
||||
cur_in_state=$(get_num_in_state ${state})
|
||||
test $cur_in_state = "0" && break
|
||||
if test $cur_in_state != $num_in_state ; then
|
||||
loop=0
|
||||
num_in_state=$cur_in_state
|
||||
elif (( $loop >= ${#delays[*]} )) ; then
|
||||
ceph pg dump pgs
|
||||
return 1
|
||||
fi
|
||||
sleep ${delays[$loop]}
|
||||
loop+=1
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
function wait_for_recovery_toofull() {
|
||||
local timeout=$1
|
||||
wait_for_state recovery_toofull $timeout
|
||||
}
|
||||
|
||||
|
||||
# Create 1 pools with size 1
|
||||
# set ful-ratio to 50%
|
||||
# Write data 600 5K (3000K)
|
||||
# Inject fake_statfs_for_testing to 3600K (83% full)
|
||||
# Incresase the pool size to 2
|
||||
# The pool shouldn't have room to recovery
|
||||
function TEST_recovery_test_simple() {
|
||||
local dir=$1
|
||||
local pools=1
|
||||
local OSDS=2
|
||||
|
||||
run_mon $dir a || return 1
|
||||
run_mgr $dir x || return 1
|
||||
export CEPH_ARGS
|
||||
|
||||
for osd in $(seq 0 $(expr $OSDS - 1))
|
||||
do
|
||||
run_osd $dir $osd || return 1
|
||||
done
|
||||
|
||||
ceph osd set-nearfull-ratio .40
|
||||
ceph osd set-backfillfull-ratio .45
|
||||
ceph osd set-full-ratio .50
|
||||
|
||||
for p in $(seq 1 $pools)
|
||||
do
|
||||
create_pool "${poolprefix}$p" 1 1
|
||||
ceph osd pool set "${poolprefix}$p" size 1
|
||||
done
|
||||
|
||||
wait_for_clean || return 1
|
||||
|
||||
dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
|
||||
for o in $(seq 1 $objects)
|
||||
do
|
||||
rados -p "${poolprefix}$p" put obj$o $dir/datafile
|
||||
done
|
||||
|
||||
for o in $(seq 0 $(expr $OSDS - 1))
|
||||
do
|
||||
ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
|
||||
done
|
||||
sleep 5
|
||||
|
||||
ceph pg dump pgs
|
||||
|
||||
for p in $(seq 1 $pools)
|
||||
do
|
||||
ceph osd pool set "${poolprefix}$p" size 2
|
||||
done
|
||||
|
||||
# If this times out, we'll detected errors below
|
||||
wait_for_recovery_toofull 30
|
||||
|
||||
ERRORS=0
|
||||
if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
|
||||
then
|
||||
echo "One pool should have been in recovery_toofull"
|
||||
ERRORS="$(expr $ERRORS + 1)"
|
||||
fi
|
||||
|
||||
ceph pg dump pgs
|
||||
ceph status
|
||||
ceph status --format=json-pretty > $dir/stat.json
|
||||
|
||||
eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
|
||||
if [ "$SEV" != "HEALTH_ERR" ]; then
|
||||
echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
|
||||
ERRORS="$(expr $ERRORS + 1)"
|
||||
fi
|
||||
eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
|
||||
if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
|
||||
echo "PG_RECOVERY_FULL message '$MSG' mismatched"
|
||||
ERRORS="$(expr $ERRORS + 1)"
|
||||
fi
|
||||
rm -f $dir/stat.json
|
||||
|
||||
if [ $ERRORS != "0" ];
|
||||
then
|
||||
return 1
|
||||
fi
|
||||
|
||||
for i in $(seq 1 $pools)
|
||||
do
|
||||
delete_pool "${poolprefix}$i"
|
||||
done
|
||||
kill_daemons $dir || return 1
|
||||
}
|
||||
|
||||
|
||||
main osd-recovery-space "$@"
|
||||
|
||||
# Local Variables:
|
||||
# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
|
||||
# End:
|
@ -2336,10 +2336,11 @@ void PGMap::get_health_checks(
|
||||
typedef enum pg_consequence_t {
|
||||
UNAVAILABLE = 1, // Client IO to the pool may block
|
||||
DEGRADED = 2, // Fewer than the requested number of replicas are present
|
||||
DEGRADED_FULL = 3, // Fewer than the request number of replicas may be present
|
||||
// and insufficiet resources are present to fix this
|
||||
DAMAGED = 4 // The data may be missing or inconsistent on disk and
|
||||
BACKFILL_FULL = 3, // Backfill is blocked for space considerations
|
||||
// This may or may not be a deadlock condition.
|
||||
DAMAGED = 4, // The data may be missing or inconsistent on disk and
|
||||
// requires repair
|
||||
RECOVERY_FULL = 5 // Recovery is blocked because OSDs are full
|
||||
} pg_consequence_t;
|
||||
|
||||
// For a given PG state, how should it be reported at the pool level?
|
||||
@ -2382,8 +2383,8 @@ void PGMap::get_health_checks(
|
||||
{ PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} },
|
||||
{ PG_STATE_RECOVERY_UNFOUND, {DAMAGED, {}} },
|
||||
{ PG_STATE_BACKFILL_UNFOUND, {DAMAGED, {}} },
|
||||
{ PG_STATE_BACKFILL_TOOFULL, {DEGRADED_FULL, {}} },
|
||||
{ PG_STATE_RECOVERY_TOOFULL, {DEGRADED_FULL, {}} },
|
||||
{ PG_STATE_BACKFILL_TOOFULL, {BACKFILL_FULL, {}} },
|
||||
{ PG_STATE_RECOVERY_TOOFULL, {RECOVERY_FULL, {}} },
|
||||
{ PG_STATE_DEGRADED, {DEGRADED, {}} },
|
||||
{ PG_STATE_DOWN, {UNAVAILABLE, {}} },
|
||||
// Delayed (wait until stuck) reports
|
||||
@ -2527,16 +2528,21 @@ void PGMap::get_health_checks(
|
||||
summary = "Degraded data redundancy: ";
|
||||
sev = HEALTH_WARN;
|
||||
break;
|
||||
case DEGRADED_FULL:
|
||||
health_code = "PG_DEGRADED_FULL";
|
||||
summary = "Degraded data redundancy (low space): ";
|
||||
sev = HEALTH_ERR;
|
||||
case BACKFILL_FULL:
|
||||
health_code = "PG_BACKFILL_FULL";
|
||||
summary = "Low space hindering backfill (add storage if this doesn't resolve itself): ";
|
||||
sev = HEALTH_WARN;
|
||||
break;
|
||||
case DAMAGED:
|
||||
health_code = "PG_DAMAGED";
|
||||
summary = "Possible data damage: ";
|
||||
sev = HEALTH_ERR;
|
||||
break;
|
||||
case RECOVERY_FULL:
|
||||
health_code = "PG_RECOVERY_FULL";
|
||||
summary = "Full OSDs blocking recovery: ";
|
||||
sev = HEALTH_ERR;
|
||||
break;
|
||||
default:
|
||||
ceph_abort();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user