From cc902a1f6b5590d6ddab409ac3cc5c8d4bd107ce Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 19 Jun 2017 11:59:10 -0400 Subject: [PATCH] qa/tasks/ceph: osd_scrub_pgs: reissue scrub requests in loop The scrub commands are not reliable: if the OSD doesn't happen to be connected at the time the command is issued it may not get delivered. Re-request scrubs for each PG that has not yet been scrubbed so that we don't wait forever when the original request is dropped. Signed-off-by: Sage Weil --- qa/tasks/ceph.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index a64d9aae602..fa43530a6f5 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -1078,6 +1078,13 @@ def osd_scrub_pgs(ctx, config): gap_cnt = 0 else: gap_cnt += 1 + if gap_cnt % 6 == 0: + for (pgid, tmval) in timez: + # re-request scrub every so often in case the earlier + # request was missed. do not do it everytime because + # the scrub may be in progress or not reported yet and + # we will starve progress. + manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) if gap_cnt > retries: raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.') if loop: