From 8728da9c085fea4c34e4247f45d495437f32c5fd Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 5 Mar 2020 06:13:02 -0500 Subject: [PATCH] qa/cephfs/fuse-mount: do not use the 'stat' to check the mount state If the network couldn't response due to some reasons, the 'stat' cmd will stuck until the network recovery, the best case is it will stuck forever. Fixes: https://tracker.ceph.com/issues/44044 Signed-off-by: Xiubo Li --- qa/tasks/cephfs/fuse_mount.py | 85 +++++++++++++++++++-------------- qa/tasks/cephfs/kernel_mount.py | 5 -- qa/tasks/cephfs/mount.py | 3 +- 3 files changed, 50 insertions(+), 43 deletions(-) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index ac4da5b360d..49cd6198f9f 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -184,7 +184,7 @@ class FuseMount(CephFSMount): if self.inst is None: raise RuntimeError("cannot find client session") - def is_mounted(self): + def check_mounted_state(self): proc = self.client_remote.run( args=[ 'stat', @@ -228,13 +228,15 @@ class FuseMount(CephFSMount): sleep for 5 seconds and check again. """ - while not self.is_mounted(): + while not self.check_mounted_state(): # Even if it's not mounted, it should at least # be running: catch simple failures where it has terminated. assert not self.fuse_daemon.poll() time.sleep(5) + self.mounted = True + # Now that we're mounted, set permissions so that the rest of the test will have # unrestricted access to the filesystem mount. try: @@ -253,6 +255,7 @@ class FuseMount(CephFSMount): def umount(self): try: log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) + stderr = BytesIO() self.client_remote.run( args = [ 'sudo', @@ -261,49 +264,57 @@ class FuseMount(CephFSMount): self.mountpoint, ], cwd=self.test_dir, + stderr=stderr, timeout=(30*60), ) except run.CommandFailedError: - log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) + if "mountpoint not found" in stderr.getvalue(): + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.mountpoint) + elif "not mounted" in stderr.getvalue(): + # This happens if the mount directory already unmouted + log.info('mount point not mounted: %s', self.mountpoint) + else: + log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) - self.client_remote.run(args=[ - 'sudo', - run.Raw('PATH=/usr/sbin:$PATH'), - 'lsof', - run.Raw(';'), - 'ps', - 'auxf', - ], timeout=(60*15)) + self.client_remote.run(args=[ + 'sudo', + run.Raw('PATH=/usr/sbin:$PATH'), + 'lsof', + run.Raw(';'), + 'ps', + 'auxf', + ], timeout=(60*15)) - # abort the fuse mount, killing all hung processes - if self._fuse_conn: - self.run_python(dedent(""" - import os - path = "/sys/fs/fuse/connections/{0}/abort" - if os.path.exists(path): - open(path, "w").write("1") - """).format(self._fuse_conn)) - self._fuse_conn = None + # abort the fuse mount, killing all hung processes + if self._fuse_conn: + self.run_python(dedent(""" + import os + path = "/sys/fs/fuse/connections/{0}/abort" + if os.path.exists(path): + open(path, "w").write("1") + """).format(self._fuse_conn)) + self._fuse_conn = None - stderr = BytesIO() - try: + stderr = BytesIO() # make sure its unmounted - self.client_remote.run( - args=[ - 'sudo', - 'umount', - '-l', - '-f', - self.mountpoint, - ], - stderr=stderr, - timeout=(60*15) - ) - except CommandFailedError: - if self.is_mounted(): - raise + try: + self.client_remote.run( + args=[ + 'sudo', + 'umount', + '-l', + '-f', + self.mountpoint, + ], + stderr=stderr, + timeout=(60*15) + ) + except CommandFailedError: + if self.is_mounted(): + raise - assert not self.is_mounted() + self.mounted = False self._fuse_conn = None self.id = None self.inst = None diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 2dc07ca973c..cc0a1ae8361 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -20,8 +20,6 @@ class KernelMount(CephFSMount): def __init__(self, ctx, test_dir, client_id, client_remote, brxnet): super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote, brxnet) - self.mounted = False - def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]): if mountpoint is not None: self.mountpoint = mountpoint @@ -122,9 +120,6 @@ class KernelMount(CephFSMount): self.cleanup_netns() self.cleanup() - def is_mounted(self): - return self.mounted - def wait_until_mounted(self): """ Unlike the fuse client, the kernel client is up and running as soon diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index d913dcbcb64..3ebcd6fc2d7 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -33,6 +33,7 @@ class CephFSMount(object): self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id) self._mountpoint = None self.fs = None + self.mounted = False self._netns_name = None self.nsid = -1 if brxnet is None: @@ -86,7 +87,7 @@ class CephFSMount(object): self._netns_name = name def is_mounted(self): - raise NotImplementedError() + return self.mounted def setupfs(self, name=None): if name is None and self.fs is not None: