From cc902a1f6b5590d6ddab409ac3cc5c8d4bd107ce Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@redhat.com>
Date: Mon, 19 Jun 2017 11:59:10 -0400
Subject: [PATCH] qa/tasks/ceph: osd_scrub_pgs: reissue scrub requests in loop

The scrub commands are not reliable: if the OSD doesn't happen to
be connected at the time the command is issued it may not get
delivered.  Re-request scrubs for each PG that has not yet been
scrubbed so that we don't wait forever when the original request
is dropped.

Signed-off-by: Sage Weil <sage@redhat.com>
---
 qa/tasks/ceph.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
index a64d9aae602..fa43530a6f5 100644
--- a/qa/tasks/ceph.py
+++ b/qa/tasks/ceph.py
@@ -1078,6 +1078,13 @@ def osd_scrub_pgs(ctx, config):
             gap_cnt = 0
         else:
             gap_cnt += 1
+            if gap_cnt % 6 == 0:
+                for (pgid, tmval) in timez:
+                    # re-request scrub every so often in case the earlier
+                    # request was missed.  do not do it everytime because
+                    # the scrub may be in progress or not reported yet and
+                    # we will starve progress.
+                    manager.raw_cluster_cmd('pg', 'deep-scrub', pgid)
             if gap_cnt > retries:
                 raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.')
         if loop: