tasks/cephfs: enable kclient for mds_* tasks

Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2014-09-15 23:41:34 +01:00
parent 2aef17d8c5
commit dedcc6483a
6 changed files with 157 additions and 33 deletions

View File

@ -113,7 +113,7 @@ class FuseMount(CephFSMount):
# Now that we're mounted, set permissions so that the rest of the test will have
# unrestricted access to the filesystem mount.
self.client_remote.run(
args=['sudo', 'chmod', '1777', '{tdir}/mnt.{id}'.format(tdir=self.test_dir, id=self.client_id)], )
args=['sudo', 'chmod', '1777', self.mountpoint])
def _mountpoint_exists(self):
return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False).exitstatus == 0

View File

@ -1,6 +1,9 @@
from StringIO import StringIO
import logging
import os
from teuthology.orchestra.run import CommandFailedError
from teuthology import misc
from teuthology.orchestra import remote as orchestra_remote
from teuthology.orchestra import run
from .mount import CephFSMount
@ -8,10 +11,16 @@ log = logging.getLogger(__name__)
class KernelMount(CephFSMount):
def __init__(self, mons, test_dir, client_id, client_remote):
def __init__(self, mons, test_dir, client_id, client_remote,
ipmi_user, ipmi_password, ipmi_domain):
super(KernelMount, self).__init__(test_dir, client_id, client_remote)
self.mons = mons
self.mounted = False
self.ipmi_user = ipmi_user
self.ipmi_password = ipmi_password
self.ipmi_domain = ipmi_domain
def write_secret_file(self, remote, role, keyring, filename):
"""
Stash the keyring in the filename specified.
@ -63,36 +72,122 @@ class KernelMount(CephFSMount):
],
)
self.client_remote.run(
args=['sudo', 'chmod', '1777', self.mountpoint])
self.mounted = True
def umount(self):
log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id))
self.client_remote.run(
args=[
'sudo',
'umount',
mnt,
self.mountpoint,
],
)
self.client_remote.run(
args=[
'rmdir',
'--',
mnt,
self.mountpoint,
],
)
self.mounted = False
def cleanup(self):
pass
def umount_wait(self):
pass
def umount_wait(self, force=False):
"""
Unlike the fuse client, the kernel client's umount is immediate
"""
try:
self.umount()
except CommandFailedError:
if not force:
raise
self.kill()
self.kill_cleanup()
self.mounted = False
def is_mounted(self):
return True
return self.mounted
def wait_until_mounted(self):
pass
"""
Unlike the fuse client, the kernel client is up and running as soon
as the initial mount() function returns.
"""
assert self.mounted
def teardown(self):
super(KernelMount, self).teardown()
self.umount()
if self.mounted:
self.umount()
def kill(self):
"""
The Ceph kernel client doesn't have a mechanism to kill itself (doing
that in side the kernel would be weird anyway), so we reboot the whole node
to get the same effect.
We use IPMI to reboot, because we don't want the client to send any
releases of capabilities.
"""
con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
self.ipmi_user,
self.ipmi_password,
self.ipmi_domain)
con.power_off()
self.mounted = False
def kill_cleanup(self):
assert not self.mounted
con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
self.ipmi_user,
self.ipmi_password,
self.ipmi_domain)
con.power_on()
# Wait for node to come back up after reboot
misc.reconnect(None, 300, [self.client_remote])
# Remove mount directory
self.client_remote.run(
args=[
'rmdir',
'--',
self.mountpoint,
],
)
def get_global_id(self):
"""
Look up the CephFS client ID for this mount, using debugfs.
"""
assert self.mounted
pyscript = """
import glob
import os
def get_global_id(client_entity_id):
for dir in glob.glob("/sys/kernel/debug/ceph/*"):
mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
return mds_sessions_lines[0].split()[1]
raise RuntimeError("Client {{0}} debugfs path not found".format(client_entity_id))
print get_global_id("{entity_id}")
""".format(entity_id=self.client_id)
p = self.client_remote.run(args=[
'sudo', 'python', '-c', pyscript
], stdout=StringIO())
return int(p.stdout.getvalue().strip())

View File

@ -6,7 +6,7 @@ import time
from textwrap import dedent
import os
from teuthology.orchestra import run
from teuthology.orchestra.run import CommandFailedError
from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
log = logging.getLogger(__name__)
@ -37,7 +37,7 @@ class CephFSMount(object):
def umount(self):
raise NotImplementedError()
def umount_wait(self):
def umount_wait(self, force=False):
raise NotImplementedError()
def kill_cleanup(self):
@ -283,5 +283,8 @@ class CephFSMount(object):
p.stdin.close()
try:
p.wait()
except CommandFailedError:
except (CommandFailedError, ConnectionLostError):
pass
def get_global_id(self):
raise NotImplementedError()

View File

@ -57,7 +57,16 @@ def task(ctx, config):
mounts = {}
for id_, remote in clients:
kernel_mount = KernelMount(mons, test_dir, id_, remote)
kernel_mount = KernelMount(
mons,
test_dir,
id_,
remote,
ctx.teuthology_config.get('ipmi_user', None),
ctx.teuthology_config.get('ipmi_password', None),
ctx.teuthology_config.get('ipmi_domain', None)
)
mounts[id_] = kernel_mount
kernel_mount.mount()

View File

@ -7,6 +7,7 @@ exceed the limits of how many caps/inodes they should hold.
import contextlib
import logging
import time
from unittest import SkipTest
from teuthology.orchestra.run import CommandFailedError
@ -86,10 +87,15 @@ class TestClientLimits(CephFSTestCase):
def setUp(self):
self.fs.mds_restart()
self.fs.wait_for_daemons()
self.mount_a.mount()
self.mount_a.wait_until_mounted()
self.mount_b.mount()
self.mount_b.wait_until_mounted()
if not self.mount_a.is_mounted():
self.mount_a.mount()
self.mount_a.wait_until_mounted()
if not self.mount_b.is_mounted():
self.mount_b.mount()
self.mount_b.wait_until_mounted()
self.mount_a.run_shell(["rm", "-rf", "*"])
def tearDown(self):
self.fs.clear_firewall()
@ -182,6 +188,10 @@ class TestClientLimits(CephFSTestCase):
metric to that effect.
"""
# The debug hook to inject the failure only exists in the fuse client
if not isinstance(self.mount_a, FuseMount):
raise SkipTest("Require FUSE client to inject client release failure")
self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
self.mount_a.teardown()
self.mount_a.mount()
@ -223,9 +233,11 @@ def task(ctx, config):
mount_a = ctx.mounts.values()[0]
mount_b = ctx.mounts.values()[1]
if not isinstance(mount_a, FuseMount):
# TODO: make kclient mount capable of all the same test tricks as ceph_fuse
raise RuntimeError("Require FUSE clients")
if not isinstance(mount_a, FuseMount) or not isinstance(mount_b, FuseMount):
# kclient kill() power cycles nodes, so requires clients to each be on
# their own node
if mount_a.client_remote.hostname == mount_b.client_remote.hostname:
raise RuntimeError("kclient clients must be on separate nodes")
# Stash references on ctx so that we can easily debug in interactive mode
# =======================================================================

View File

@ -9,7 +9,7 @@ import time
import unittest
from teuthology.orchestra import run
from teuthology.orchestra.run import CommandFailedError
from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
from teuthology.task import interactive
from tasks.cephfs.cephfs_test_case import CephFSTestCase, run_tests
@ -37,10 +37,13 @@ class TestClientRecovery(CephFSTestCase):
self.fs.clear_firewall()
self.fs.mds_restart()
self.fs.wait_for_daemons()
self.mount_a.mount()
self.mount_b.mount()
self.mount_a.wait_until_mounted()
self.mount_b.wait_until_mounted()
if not self.mount_a.is_mounted():
self.mount_a.mount()
self.mount_a.wait_until_mounted()
if not self.mount_b.is_mounted():
self.mount_b.mount()
self.mount_b.wait_until_mounted()
self.mount_a.run_shell(["sudo", "rm", "-rf", run.Raw("*")])
@ -196,8 +199,8 @@ class TestClientRecovery(CephFSTestCase):
cap_holder.stdin.close()
try:
cap_holder.wait()
except CommandFailedError:
# We killed it, so it raises an error
except (CommandFailedError, ConnectionLostError):
# We killed it (and possibly its node), so it raises an error
pass
finally:
# teardown() doesn't quite handle this case cleanly, so help it out
@ -247,8 +250,8 @@ class TestClientRecovery(CephFSTestCase):
cap_holder.stdin.close()
try:
cap_holder.wait()
except CommandFailedError:
# We killed it, so it raises an error
except (CommandFailedError, ConnectionLostError):
# We killed it (and possibly its node), so it raises an error
pass
finally:
self.mount_a.kill_cleanup()
@ -402,8 +405,10 @@ def task(ctx, config):
mount_b = ctx.mounts.values()[1]
if not isinstance(mount_a, FuseMount) or not isinstance(mount_b, FuseMount):
# TODO: make kclient mount capable of all the same test tricks as ceph_fuse
raise RuntimeError("Require FUSE clients")
# kclient kill() power cycles nodes, so requires clients to each be on
# their own node
if mount_a.client_remote.hostname == mount_b.client_remote.hostname:
raise RuntimeError("kclient clients must be on separate nodes")
# Check we have at least one remote client for use with network-dependent tests
# =============================================================================