qa/cephfs/fuse-mount: do not use the 'stat' to check the mount state

If the network couldn't response due to some reasons, the 'stat' cmd
will stuck until the network recovery, the best case is it will stuck
forever.

Fixes: https://tracker.ceph.com/issues/44044
Signed-off-by: Xiubo Li <xiubli@redhat.com>
This commit is contained in:
Xiubo Li 2020-03-05 06:13:02 -05:00
parent f0c67256b9
commit 8728da9c08
3 changed files with 50 additions and 43 deletions

View File

@ -184,7 +184,7 @@ class FuseMount(CephFSMount):
if self.inst is None:
raise RuntimeError("cannot find client session")
def is_mounted(self):
def check_mounted_state(self):
proc = self.client_remote.run(
args=[
'stat',
@ -228,13 +228,15 @@ class FuseMount(CephFSMount):
sleep for 5 seconds and check again.
"""
while not self.is_mounted():
while not self.check_mounted_state():
# Even if it's not mounted, it should at least
# be running: catch simple failures where it has terminated.
assert not self.fuse_daemon.poll()
time.sleep(5)
self.mounted = True
# Now that we're mounted, set permissions so that the rest of the test will have
# unrestricted access to the filesystem mount.
try:
@ -253,6 +255,7 @@ class FuseMount(CephFSMount):
def umount(self):
try:
log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name))
stderr = BytesIO()
self.client_remote.run(
args = [
'sudo',
@ -261,49 +264,57 @@ class FuseMount(CephFSMount):
self.mountpoint,
],
cwd=self.test_dir,
stderr=stderr,
timeout=(30*60),
)
except run.CommandFailedError:
log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name))
if "mountpoint not found" in stderr.getvalue():
# This happens if the mount directory doesn't exist
log.info('mount point does not exist: %s', self.mountpoint)
elif "not mounted" in stderr.getvalue():
# This happens if the mount directory already unmouted
log.info('mount point not mounted: %s', self.mountpoint)
else:
log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name))
self.client_remote.run(args=[
'sudo',
run.Raw('PATH=/usr/sbin:$PATH'),
'lsof',
run.Raw(';'),
'ps',
'auxf',
], timeout=(60*15))
self.client_remote.run(args=[
'sudo',
run.Raw('PATH=/usr/sbin:$PATH'),
'lsof',
run.Raw(';'),
'ps',
'auxf',
], timeout=(60*15))
# abort the fuse mount, killing all hung processes
if self._fuse_conn:
self.run_python(dedent("""
import os
path = "/sys/fs/fuse/connections/{0}/abort"
if os.path.exists(path):
open(path, "w").write("1")
""").format(self._fuse_conn))
self._fuse_conn = None
# abort the fuse mount, killing all hung processes
if self._fuse_conn:
self.run_python(dedent("""
import os
path = "/sys/fs/fuse/connections/{0}/abort"
if os.path.exists(path):
open(path, "w").write("1")
""").format(self._fuse_conn))
self._fuse_conn = None
stderr = BytesIO()
try:
stderr = BytesIO()
# make sure its unmounted
self.client_remote.run(
args=[
'sudo',
'umount',
'-l',
'-f',
self.mountpoint,
],
stderr=stderr,
timeout=(60*15)
)
except CommandFailedError:
if self.is_mounted():
raise
try:
self.client_remote.run(
args=[
'sudo',
'umount',
'-l',
'-f',
self.mountpoint,
],
stderr=stderr,
timeout=(60*15)
)
except CommandFailedError:
if self.is_mounted():
raise
assert not self.is_mounted()
self.mounted = False
self._fuse_conn = None
self.id = None
self.inst = None

View File

@ -20,8 +20,6 @@ class KernelMount(CephFSMount):
def __init__(self, ctx, test_dir, client_id, client_remote, brxnet):
super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote, brxnet)
self.mounted = False
def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]):
if mountpoint is not None:
self.mountpoint = mountpoint
@ -122,9 +120,6 @@ class KernelMount(CephFSMount):
self.cleanup_netns()
self.cleanup()
def is_mounted(self):
return self.mounted
def wait_until_mounted(self):
"""
Unlike the fuse client, the kernel client is up and running as soon

View File

@ -33,6 +33,7 @@ class CephFSMount(object):
self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id)
self._mountpoint = None
self.fs = None
self.mounted = False
self._netns_name = None
self.nsid = -1
if brxnet is None:
@ -86,7 +87,7 @@ class CephFSMount(object):
self._netns_name = name
def is_mounted(self):
raise NotImplementedError()
return self.mounted
def setupfs(self, name=None):
if name is None and self.fs is not None: