mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
ceph_manager: do_pg_scrub() don't resubmit a request for 2 minutes
Add 6 minute fatal timeout Warn when repair is being resubmitted because it causes races Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
parent
7625ebd8a2
commit
60cdb05380
@ -1561,10 +1561,20 @@ class CephManager:
|
||||
Scrub pg and wait for scrubbing to finish
|
||||
"""
|
||||
init = self.get_last_scrub_stamp(pool, pgnum)
|
||||
RESEND_TIMEOUT = 120 # Must be a multiple of SLEEP_TIME
|
||||
FATAL_TIMEOUT = RESEND_TIMEOUT * 3
|
||||
SLEEP_TIME = 10
|
||||
timer = 0
|
||||
while init == self.get_last_scrub_stamp(pool, pgnum):
|
||||
assert timer < FATAL_TIMEOUT, "fatal timeout trying to " + stype
|
||||
self.log("waiting for scrub type %s" % (stype,))
|
||||
self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
|
||||
time.sleep(10)
|
||||
if (timer % RESEND_TIMEOUT) == 0:
|
||||
self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
|
||||
# The first time in this loop is the actual request
|
||||
if timer != 0 and stype == "repair":
|
||||
self.log("WARNING: Resubmitted a non-idempotent repair")
|
||||
time.sleep(SLEEP_TIME)
|
||||
timer += SLEEP_TIME
|
||||
|
||||
def get_single_pg_stats(self, pgid):
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user