From bf4168245dec531cb75086fc42f5cbfeb80ce365 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 16 Dec 2021 11:14:08 -0500 Subject: [PATCH] qa: test that scrub causes reintegration Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/filesystem.py | 7 ++++- qa/tasks/cephfs/test_strays.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 3f9b518a5a1..5dd18825f46 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -1163,7 +1163,12 @@ class Filesystem(MDSCluster): return self.json_asok(command, 'mds', info['name'], timeout=timeout) def rank_tell(self, command, rank=0, status=None): - return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command)) + try: + out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command) + return json.loads(out) + except json.decoder.JSONDecodeError: + log.error("could not decode: {}".format(out)) + raise def ranks_tell(self, command, status=None): if status is None: diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py index 6e96ee4d529..2cf80e4de3a 100644 --- a/qa/tasks/cephfs/test_strays.py +++ b/qa/tasks/cephfs/test_strays.py @@ -456,6 +456,57 @@ class TestStrays(CephFSTestCase): # We purged it at the last self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1) + def test_reintegration_via_scrub(self): + """ + That reintegration is triggered via recursive scrub. + """ + + self.mount_a.run_shell_payload(""" + mkdir -p a b + for i in `seq 1 50`; do + touch a/"$i" + ln a/"$i" b/"$i" + done + sync -f . + """) + + self.mount_a.remount() # drop caps/cache + self.fs.rank_tell(["flush", "journal"]) + self.fs.rank_fail() + self.fs.wait_for_daemons() + + # only / in cache, reintegration cannot happen + self.wait_until_equal( + lambda: len(self.fs.rank_tell(["dump", "tree", "/"])), + expect_val=3, + timeout=60 + ) + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.mount_a.run_shell_payload(""" + rm a/* + sync -f . + """) + self.wait_until_equal( + lambda: len(self.fs.rank_tell(["dump", "tree", "/"])), + expect_val=3, + timeout=60 + ) + self.assertEqual(self.get_mdc_stat("num_strays"), 50) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertEqual(last_reintegrated, curr_reintegrated) + + self.fs.rank_tell(["scrub", "start", "/", "recursive,force"]) + + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + # N.B.: reintegrate (rename RPC) may be tried multiple times from different code paths + self.assertGreaterEqual(curr_reintegrated, last_reintegrated+50) + def test_mv_hardlink_cleanup(self): """ That when doing a rename from A to B, and B has hardlinks,