diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 5bef25ca662..a86640ebef1 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -436,36 +436,6 @@ class CephFSMount(object): self._kill_background(p) self.background_procs.remove(p) - def spam_dir_background(self, path): - """ - Create directory `path` and do lots of metadata operations - in it until further notice. - """ - assert(self.is_mounted()) - abs_path = os.path.join(self.mountpoint, path) - - pyscript = dedent(""" - import sys - import time - import os - - abs_path = "{abs_path}" - - if not os.path.exists(abs_path): - os.makedirs(abs_path) - - n = 0 - while True: - file_path = os.path.join(abs_path, "tmp%d" % n) - f = open(file_path, 'w') - f.close() - n = n + 1 - """).format(abs_path=abs_path) - - rproc = self._run_python(pyscript) - self.background_procs.append(rproc) - return rproc - def get_global_id(self): raise NotImplementedError() diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py index 8496b144e1e..1b03afc0fc4 100644 --- a/qa/tasks/cephfs/test_journal_repair.py +++ b/qa/tasks/cephfs/test_journal_repair.py @@ -173,26 +173,33 @@ class TestJournalRepair(CephFSTestCase): self.mds_cluster.mds_stop(unneeded_mds) self.mds_cluster.mds_fail(unneeded_mds) - # Do a bunch of I/O such that at least some will hit the second MDS: create - # lots of directories so that the balancer should find it easy to make a decision - # to allocate some of them to the second mds. - spammers = [] - for n in range(0, 16): - dir_name = "spam_{0}".format(n) - spammers.append(self.mount_a.spam_dir_background(dir_name)) + # Create a dir on each rank + self.mount_a.run_shell(["mkdir", "alpha"]) + self.mount_a.run_shell(["mkdir", "bravo"]) + self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0") + self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1") def subtrees_assigned(): got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0]) - rank_1_count = len([s for s in got_subtrees if s['auth_first'] == 1]) - # Greater than 1, because there is typically 1 for ~mds1, and once it - # has been assigned something in addition to that it means it has been - # assigned a "real" subtree. - return rank_1_count > 1 + for s in got_subtrees: + if s['dir']['path'] == '/bravo': + if s['auth_first'] == 1: + return True + else: + # Should not happen + raise RuntimeError("/bravo is subtree but not rank 1!") - # We are waiting for the MDS to respond to hot directories, which - # is not guaranteed to happen at a particular time, so a lengthy timeout here. - self.wait_until_true(subtrees_assigned, 600) + return False + + # Ensure the pinning has taken effect and the /bravo dir is now + # migrated to rank 1. + self.wait_until_true(subtrees_assigned, 30) + + # Do some IO (this should be split across ranks according to + # the rank-pinned dirs) + self.mount_a.create_n_files("alpha/file", 1000) + self.mount_a.create_n_files("bravo/file", 1000) # Flush the journals so that we have some backing store data # belonging to one MDS, and some to the other MDS. @@ -229,16 +236,6 @@ class TestJournalRepair(CephFSTestCase): # killing the mount also means killing the node. pass - log.info("Terminating spammer processes...") - for spammer_proc in spammers: - spammer_proc.stdin.close() - try: - spammer_proc.wait() - except (CommandFailedError, ConnectionLostError): - # The ConnectionLostError case is for kernel client, where - # killing the mount also means killing the node. - pass - # See that the second MDS will crash when it starts and tries to # acquire rank 1 damaged_id = active_mds_names[1]