mirror of
https://github.com/ceph/ceph
synced 2025-01-31 15:32:38 +00:00
tests: Optimizing wait_for_clean()
wait_for_clean() is a very common call when running the make check. It does wait the cluster to be stable before continuing. This script was doing the same calls twice and could be optimized by making the useful calls only once. is_clean() function was checking num_pgs & get_num_active_clean() The main loop itself was also calling get_num_active_clean() This patch is inlining the is_clean() inside this loop to benefit from a single get_num_active_clean() call. This avoid a useless call of (ceph + xmlstarlet). This patch does move all the 'timer reset' conditions into an else avoiding spawning other ceph+xmlstarlet call while we already know we should reset the timer. The last modification is to reduce the sleeping time as the state of the cluster is changing very fast. This whole patch could looks like almost not a big win but for a test like test/osd/osd-scrub-repair.sh, we drop from 9m56 to 9m30 while reducing the number system calls. At the scale of make check, that's a lot of saving. Signed-off-by: Erwan Velu <erwan@redhat.com>
This commit is contained in:
parent
b3f7392d9d
commit
84197f1641
@ -1040,32 +1040,36 @@ function test_is_clean() {
|
||||
|
||||
##
|
||||
# Wait until the cluster becomes clean or if it does not make progress
|
||||
# for $TIMEOUT seconds. The function **is_clean** is used to determine
|
||||
# if the cluster is clean. Progress is measured either vian the
|
||||
# **get_is_making_recovery_progress** predicate or if the number of
|
||||
# clean PGs changes.
|
||||
# for $TIMEOUT seconds.
|
||||
# Progress is measured either via the **get_is_making_recovery_progress**
|
||||
# predicate or if the number of clean PGs changes (as returned by get_num_active_clean)
|
||||
#
|
||||
# @return 0 if the cluster is clean, 1 otherwise
|
||||
#
|
||||
function wait_for_clean() {
|
||||
local status=1
|
||||
local num_active_clean=$(get_num_active_clean)
|
||||
local num_active_clean=-1
|
||||
local cur_active_clean
|
||||
local -i timer=0
|
||||
while ! is_clean ; do
|
||||
if get_is_making_recovery_progress ; then
|
||||
timer=0
|
||||
elif (( timer >= $TIMEOUT )) ; then
|
||||
ceph report
|
||||
return 1
|
||||
fi
|
||||
local num_pgs=$(get_num_pgs)
|
||||
test $num_pgs != 0 || return 1
|
||||
|
||||
while true ; do
|
||||
# Comparing get_num_active_clean & get_num_pgs is used to determine
|
||||
# if the cluster is clean. That's almost an inline of is_clean() to
|
||||
# get more performance by avoiding multiple calls of get_num_active_clean.
|
||||
cur_active_clean=$(get_num_active_clean)
|
||||
test $cur_active_clean = $num_pgs && break
|
||||
if test $cur_active_clean != $num_active_clean ; then
|
||||
timer=0
|
||||
num_active_clean=$cur_active_clean
|
||||
elif get_is_making_recovery_progress ; then
|
||||
timer=0
|
||||
elif (( timer >= $(($TIMEOUT * 10)))) ; then
|
||||
ceph report
|
||||
return 1
|
||||
fi
|
||||
sleep 1
|
||||
sleep .1
|
||||
timer=$(expr $timer + 1)
|
||||
done
|
||||
return 0
|
||||
|
Loading…
Reference in New Issue
Block a user