mirror of
https://github.com/ceph/ceph
synced 2025-02-19 17:08:05 +00:00
commit
2999a4d492
@ -5,12 +5,16 @@ import logging
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from teuthology import misc
|
from teuthology import misc
|
||||||
|
from teuthology.parallel import parallel
|
||||||
from teuthology.task import ceph_manager
|
from teuthology.task import ceph_manager
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
DAEMON_WAIT_TIMEOUT = 120
|
||||||
|
|
||||||
|
|
||||||
class Filesystem(object):
|
class Filesystem(object):
|
||||||
"""
|
"""
|
||||||
This object is for driving a CephFS filesystem.
|
This object is for driving a CephFS filesystem.
|
||||||
@ -23,51 +27,112 @@ class Filesystem(object):
|
|||||||
self._ctx = ctx
|
self._ctx = ctx
|
||||||
self._config = config
|
self._config = config
|
||||||
|
|
||||||
mds_list = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
|
self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
|
||||||
if len(mds_list) != 1:
|
if len(self.mds_ids) == 0:
|
||||||
# Require exactly one MDS, the code path for creation failure when
|
raise RuntimeError("This task requires at least one MDS")
|
||||||
# a standby is available is different
|
|
||||||
raise RuntimeError("This task requires exactly one MDS")
|
|
||||||
|
|
||||||
self.mds_id = mds_list[0]
|
first_mon = misc.get_first_mon(ctx, config)
|
||||||
|
(mon_remote,) = ctx.cluster.only(first_mon).remotes.iterkeys()
|
||||||
(mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=self.mds_id)).remotes.iterkeys()
|
self.mon_manager = ceph_manager.CephManager(mon_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
|
||||||
manager = ceph_manager.CephManager(
|
self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
|
||||||
mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'),
|
|
||||||
)
|
|
||||||
self.mds_manager = manager
|
|
||||||
|
|
||||||
client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
|
client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
|
||||||
self.client_id = client_list[0]
|
self.client_id = client_list[0]
|
||||||
self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
|
self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
|
||||||
|
|
||||||
def mds_stop(self):
|
def are_daemons_healthy(self):
|
||||||
"""
|
"""
|
||||||
Stop the MDS daemon process. If it held a rank, that rank
|
Return true if all daemons are in one of active, standby, standby-replay
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
status = self.mon_manager.get_mds_status_all()
|
||||||
|
for mds_id, mds_status in status['info'].items():
|
||||||
|
if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]:
|
||||||
|
log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state']))
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def wait_for_daemons(self, timeout=None):
|
||||||
|
"""
|
||||||
|
Wait until all daemons are healthy
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
if timeout is None:
|
||||||
|
timeout = DAEMON_WAIT_TIMEOUT
|
||||||
|
|
||||||
|
elapsed = 0
|
||||||
|
while True:
|
||||||
|
if self.are_daemons_healthy():
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
time.sleep(1)
|
||||||
|
elapsed += 1
|
||||||
|
|
||||||
|
if elapsed > timeout:
|
||||||
|
raise RuntimeError("Timed out waiting for MDS daemons to become healthy")
|
||||||
|
|
||||||
|
def get_lone_mds_id(self):
|
||||||
|
if len(self.mds_ids) != 1:
|
||||||
|
raise ValueError("Explicit MDS argument required when multiple MDSs in use")
|
||||||
|
else:
|
||||||
|
return self.mds_ids[0]
|
||||||
|
|
||||||
|
def _one_or_all(self, mds_id, cb):
|
||||||
|
"""
|
||||||
|
Call a callback for a single named MDS, or for all
|
||||||
|
|
||||||
|
:param mds_id: MDS daemon name, or None
|
||||||
|
:param cb: Callback taking single argument of MDS daemon name
|
||||||
|
"""
|
||||||
|
if mds_id is None:
|
||||||
|
with parallel() as p:
|
||||||
|
for mds_id in self.mds_ids:
|
||||||
|
p.spawn(cb, mds_id)
|
||||||
|
else:
|
||||||
|
cb(mds_id)
|
||||||
|
|
||||||
|
def mds_stop(self, mds_id=None):
|
||||||
|
"""
|
||||||
|
Stop the MDS daemon process(se). If it held a rank, that rank
|
||||||
will eventually go laggy.
|
will eventually go laggy.
|
||||||
"""
|
"""
|
||||||
mds = self._ctx.daemons.get_daemon('mds', self.mds_id)
|
self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop())
|
||||||
mds.stop()
|
|
||||||
|
|
||||||
def mds_fail(self):
|
def mds_fail(self, mds_id=None):
|
||||||
"""
|
"""
|
||||||
Inform MDSMonitor that the daemon process is dead. If it held
|
Inform MDSMonitor of the death of the daemon process(es). If it held
|
||||||
a rank, that rank will be relinquished.
|
a rank, that rank will be relinquished.
|
||||||
"""
|
"""
|
||||||
self.mds_manager.raw_cluster_cmd("mds", "fail", "0")
|
self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_))
|
||||||
|
|
||||||
def mds_restart(self):
|
def mds_restart(self, mds_id=None):
|
||||||
mds = self._ctx.daemons.get_daemon('mds', self.mds_id)
|
self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart())
|
||||||
mds.restart()
|
|
||||||
|
def mds_fail_restart(self, mds_id=None):
|
||||||
|
"""
|
||||||
|
Variation on restart that includes marking MDSs as failed, so that doing this
|
||||||
|
operation followed by waiting for healthy daemon states guarantees that they
|
||||||
|
have gone down and come up, rather than potentially seeing the healthy states
|
||||||
|
that existed before the restart.
|
||||||
|
"""
|
||||||
|
def _fail_restart(id_):
|
||||||
|
self.mds_daemons[id_].stop()
|
||||||
|
self.mon_manager.raw_cluster_cmd("mds", "fail", id_)
|
||||||
|
self.mds_daemons[id_].restart()
|
||||||
|
|
||||||
|
self._one_or_all(mds_id, _fail_restart)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
log.info("Creating new filesystem")
|
log.info("Creating new filesystem")
|
||||||
|
|
||||||
assert not self._ctx.daemons.get_daemon('mds', self.mds_id).running()
|
self.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "0")
|
||||||
self.mds_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "0")
|
for mds_id in self.mds_ids:
|
||||||
self.mds_manager.raw_cluster_cmd_result('mds', 'fail', self.mds_id)
|
assert not self._ctx.daemons.get_daemon('mds', mds_id).running()
|
||||||
self.mds_manager.raw_cluster_cmd_result('fs', 'rm', "default", "--yes-i-really-mean-it")
|
self.mon_manager.raw_cluster_cmd_result('mds', 'fail', mds_id)
|
||||||
self.mds_manager.raw_cluster_cmd_result('fs', 'new', "default", "metadata", "data")
|
self.mon_manager.raw_cluster_cmd_result('fs', 'rm', "default", "--yes-i-really-mean-it")
|
||||||
|
self.mon_manager.raw_cluster_cmd_result('fs', 'new', "default", "metadata", "data")
|
||||||
|
|
||||||
def get_metadata_object(self, object_type, object_id):
|
def get_metadata_object(self, object_type, object_id):
|
||||||
"""
|
"""
|
||||||
@ -110,8 +175,10 @@ class Filesystem(object):
|
|||||||
|
|
||||||
return version
|
return version
|
||||||
|
|
||||||
def mds_asok(self, command):
|
def mds_asok(self, command, mds_id=None):
|
||||||
proc = self.mds_manager.admin_socket('mds', self.mds_id, command)
|
if mds_id is None:
|
||||||
|
mds_id = self.get_lone_mds_id()
|
||||||
|
proc = self.mon_manager.admin_socket('mds', mds_id, command)
|
||||||
response_data = proc.stdout.getvalue()
|
response_data = proc.stdout.getvalue()
|
||||||
log.info("mds_asok output: {0}".format(response_data))
|
log.info("mds_asok output: {0}".format(response_data))
|
||||||
if response_data.strip():
|
if response_data.strip():
|
||||||
@ -119,7 +186,7 @@ class Filesystem(object):
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def wait_for_state(self, goal_state, reject=None, timeout=None):
|
def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None):
|
||||||
"""
|
"""
|
||||||
Block until the MDS reaches a particular state, or a failure condition
|
Block until the MDS reaches a particular state, or a failure condition
|
||||||
is met.
|
is met.
|
||||||
@ -130,10 +197,13 @@ class Filesystem(object):
|
|||||||
:return: number of seconds waited, rounded down to integer
|
:return: number of seconds waited, rounded down to integer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if mds_id is None:
|
||||||
|
mds_id = self.get_lone_mds_id()
|
||||||
|
|
||||||
elapsed = 0
|
elapsed = 0
|
||||||
while True:
|
while True:
|
||||||
# mds_info is None if no daemon currently claims this rank
|
# mds_info is None if no daemon currently claims this rank
|
||||||
mds_info = self.mds_manager.get_mds_status(self.mds_id)
|
mds_info = self.mon_manager.get_mds_status(mds_id)
|
||||||
current_state = mds_info['state'] if mds_info else None
|
current_state = mds_info['state'] if mds_info else None
|
||||||
|
|
||||||
if current_state == goal_state:
|
if current_state == goal_state:
|
||||||
|
@ -80,3 +80,19 @@ class KernelMount(CephFSMount):
|
|||||||
mnt,
|
mnt,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def umount_wait(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def is_mounted(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def wait_until_mounted(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def teardown(self):
|
||||||
|
super(KernelMount, self).teardown()
|
||||||
|
self.umount()
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
|
from contextlib import contextmanager
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
import datetime
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
@ -47,6 +47,22 @@ class CephFSMount(object):
|
|||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def wait_until_mounted(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def mounted(self):
|
||||||
|
"""
|
||||||
|
A context manager, from an initially unmounted state, to mount
|
||||||
|
this, yield, and then unmount and clean up.
|
||||||
|
"""
|
||||||
|
self.mount()
|
||||||
|
self.wait_until_mounted()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
self.umount_wait()
|
||||||
|
|
||||||
def create_files(self):
|
def create_files(self):
|
||||||
assert(self.is_mounted())
|
assert(self.is_mounted())
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@ import logging
|
|||||||
from teuthology import misc
|
from teuthology import misc
|
||||||
|
|
||||||
from teuthology.task.ceph import write_conf
|
from teuthology.task.ceph import write_conf
|
||||||
from teuthology.task.ceph_fuse import task as ceph_fuse_ctx
|
|
||||||
from teuthology.task.cephfs.filesystem import Filesystem
|
from teuthology.task.cephfs.filesystem import Filesystem
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@ -28,6 +27,13 @@ def task(ctx, config):
|
|||||||
client: client.0
|
client: client.0
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
if not hasattr(ctx, 'ceph'):
|
||||||
|
raise RuntimeError("This task must be nested in 'ceph' task")
|
||||||
|
|
||||||
|
if not hasattr(ctx, 'mounts'):
|
||||||
|
raise RuntimeError("This task must be nested inside 'kclient' or 'ceph_fuse' task")
|
||||||
|
|
||||||
|
# Determine which client we will use
|
||||||
if config and 'client' in config:
|
if config and 'client' in config:
|
||||||
# Use client specified in config
|
# Use client specified in config
|
||||||
client_role = config['client']
|
client_role = config['client']
|
||||||
@ -43,17 +49,13 @@ def task(ctx, config):
|
|||||||
client_id = client_list[0]
|
client_id = client_list[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise RuntimeError("This task requires at least one client")
|
raise RuntimeError("This task requires at least one client")
|
||||||
else:
|
|
||||||
client_role = "client.{0}".format(client_id)
|
|
||||||
|
|
||||||
fs = Filesystem(ctx, config)
|
fs = Filesystem(ctx, config)
|
||||||
|
ctx.fs = fs
|
||||||
old_journal_version = JOURNAL_FORMAT_LEGACY
|
old_journal_version = JOURNAL_FORMAT_LEGACY
|
||||||
new_journal_version = JOURNAL_FORMAT_RESILIENT
|
new_journal_version = JOURNAL_FORMAT_RESILIENT
|
||||||
|
|
||||||
# Set config so that journal will be created in older format
|
# Set config so that journal will be created in older format
|
||||||
if not hasattr(ctx, 'ceph'):
|
|
||||||
raise RuntimeError("This task must be nested in 'ceph' task")
|
|
||||||
|
|
||||||
if 'mds' not in ctx.ceph.conf:
|
if 'mds' not in ctx.ceph.conf:
|
||||||
ctx.ceph.conf['mds'] = {}
|
ctx.ceph.conf['mds'] = {}
|
||||||
ctx.ceph.conf['mds']['mds journal format'] = old_journal_version
|
ctx.ceph.conf['mds']['mds journal format'] = old_journal_version
|
||||||
@ -61,13 +63,15 @@ def task(ctx, config):
|
|||||||
# used a different config path this won't work.
|
# used a different config path this won't work.
|
||||||
|
|
||||||
# Create a filesystem using the older journal format.
|
# Create a filesystem using the older journal format.
|
||||||
|
for mount in ctx.mounts.values():
|
||||||
|
mount.umount_wait()
|
||||||
fs.mds_stop()
|
fs.mds_stop()
|
||||||
fs.reset()
|
fs.reset()
|
||||||
fs.mds_restart()
|
fs.mds_restart()
|
||||||
|
|
||||||
# Do some client work so that the log is populated with something.
|
# Do some client work so that the log is populated with something.
|
||||||
with ceph_fuse_ctx(ctx, [client_role]) as client_mounts:
|
mount = ctx.mounts[client_id]
|
||||||
mount = client_mounts[client_id]
|
with mount.mounted():
|
||||||
mount.create_files()
|
mount.create_files()
|
||||||
mount.check_files() # sanity, this should always pass
|
mount.check_files() # sanity, this should always pass
|
||||||
|
|
||||||
@ -76,12 +80,15 @@ def task(ctx, config):
|
|||||||
write_conf(ctx)
|
write_conf(ctx)
|
||||||
|
|
||||||
# Restart the MDS.
|
# Restart the MDS.
|
||||||
fs.mds_restart()
|
fs.mds_fail_restart()
|
||||||
|
fs.wait_for_daemons()
|
||||||
|
|
||||||
|
# This ensures that all daemons come up into a valid state
|
||||||
|
fs.wait_for_daemons()
|
||||||
|
|
||||||
# Check that files created in the initial client workload are still visible
|
# Check that files created in the initial client workload are still visible
|
||||||
# in a client mount.
|
# in a client mount.
|
||||||
with ceph_fuse_ctx(ctx, [client_role]) as client_mounts:
|
with mount.mounted():
|
||||||
mount = client_mounts[client_id]
|
|
||||||
mount.check_files()
|
mount.check_files()
|
||||||
|
|
||||||
# Verify that the journal really has been rewritten.
|
# Verify that the journal really has been rewritten.
|
||||||
@ -91,4 +98,8 @@ def task(ctx, config):
|
|||||||
new_journal_version, journal_version()
|
new_journal_version, journal_version()
|
||||||
))
|
))
|
||||||
|
|
||||||
|
# Leave all MDSs and clients running for any child tasks
|
||||||
|
for mount in ctx.mounts.values():
|
||||||
|
mount.mount()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
@ -5,12 +5,15 @@ import logging
|
|||||||
import pipes
|
import pipes
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from teuthology import misc as teuthology
|
from teuthology import misc
|
||||||
|
from teuthology.orchestra.run import CommandFailedError
|
||||||
from teuthology.parallel import parallel
|
from teuthology.parallel import parallel
|
||||||
from ..orchestra import run
|
from ..orchestra import run
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CLIENT_PREFIX = 'client.'
|
||||||
|
|
||||||
|
|
||||||
def task(ctx, config):
|
def task(ctx, config):
|
||||||
"""
|
"""
|
||||||
@ -63,7 +66,7 @@ def task(ctx, config):
|
|||||||
'configuration must contain a dictionary of clients'
|
'configuration must contain a dictionary of clients'
|
||||||
|
|
||||||
overrides = ctx.config.get('overrides', {})
|
overrides = ctx.config.get('overrides', {})
|
||||||
teuthology.deep_merge(config, overrides.get('workunit', {}))
|
misc.deep_merge(config, overrides.get('workunit', {}))
|
||||||
|
|
||||||
refspec = config.get('branch')
|
refspec = config.get('branch')
|
||||||
if refspec is None:
|
if refspec is None:
|
||||||
@ -77,46 +80,42 @@ def task(ctx, config):
|
|||||||
|
|
||||||
log.info('Pulling workunits from ref %s', refspec)
|
log.info('Pulling workunits from ref %s', refspec)
|
||||||
|
|
||||||
created_dir_dict = {}
|
created_mountpoint = {}
|
||||||
|
|
||||||
if config.get('env') is not None:
|
if config.get('env') is not None:
|
||||||
assert isinstance(config['env'], dict), 'env must be a dictionary'
|
assert isinstance(config['env'], dict), 'env must be a dictionary'
|
||||||
clients = config['clients']
|
clients = config['clients']
|
||||||
|
|
||||||
|
# Create scratch dirs for any non-all workunits
|
||||||
log.info('Making a separate scratch dir for every client...')
|
log.info('Making a separate scratch dir for every client...')
|
||||||
for role in clients.iterkeys():
|
for role in clients.iterkeys():
|
||||||
assert isinstance(role, basestring)
|
assert isinstance(role, basestring)
|
||||||
if role == "all":
|
if role == "all":
|
||||||
continue
|
continue
|
||||||
PREFIX = 'client.'
|
|
||||||
assert role.startswith(PREFIX)
|
|
||||||
created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
|
|
||||||
created_dir_dict[role] = created_mnt_dir
|
|
||||||
|
|
||||||
all_spec = False #is there an all grouping?
|
assert role.startswith(CLIENT_PREFIX)
|
||||||
|
created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
|
||||||
|
created_mountpoint[role] = created_mnt_dir
|
||||||
|
|
||||||
|
# Execute any non-all workunits
|
||||||
with parallel() as p:
|
with parallel() as p:
|
||||||
for role, tests in clients.iteritems():
|
for role, tests in clients.iteritems():
|
||||||
if role != "all":
|
if role != "all":
|
||||||
p.spawn(_run_tests, ctx, refspec, role, tests,
|
p.spawn(_run_tests, ctx, refspec, role, tests,
|
||||||
config.get('env'), timeout=timeout)
|
config.get('env'), timeout=timeout)
|
||||||
else:
|
|
||||||
all_spec = True
|
|
||||||
|
|
||||||
if all_spec:
|
# Clean up dirs from any non-all workunits
|
||||||
|
for role, created in created_mountpoint.items():
|
||||||
|
_delete_dir(ctx, role, created)
|
||||||
|
|
||||||
|
# Execute any 'all' workunits
|
||||||
|
if 'all' in clients:
|
||||||
all_tasks = clients["all"]
|
all_tasks = clients["all"]
|
||||||
_spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
|
_spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
|
||||||
config.get('subdir'), timeout=timeout)
|
config.get('subdir'), timeout=timeout)
|
||||||
|
|
||||||
for role in clients.iterkeys():
|
|
||||||
assert isinstance(role, basestring)
|
|
||||||
if role == "all":
|
|
||||||
continue
|
|
||||||
PREFIX = 'client.'
|
|
||||||
assert role.startswith(PREFIX)
|
|
||||||
if created_dir_dict[role]:
|
|
||||||
_delete_dir(ctx, role)
|
|
||||||
|
|
||||||
|
def _delete_dir(ctx, role, created_mountpoint):
|
||||||
def _delete_dir(ctx, role):
|
|
||||||
"""
|
"""
|
||||||
Delete file used by this role, and delete the directory that this
|
Delete file used by this role, and delete the directory that this
|
||||||
role appeared in.
|
role appeared in.
|
||||||
@ -124,37 +123,35 @@ def _delete_dir(ctx, role):
|
|||||||
:param ctx: Context
|
:param ctx: Context
|
||||||
:param role: "role.#" where # is used for the role id.
|
:param role: "role.#" where # is used for the role id.
|
||||||
"""
|
"""
|
||||||
PREFIX = 'client.'
|
testdir = misc.get_testdir(ctx)
|
||||||
testdir = teuthology.get_testdir(ctx)
|
id_ = role[len(CLIENT_PREFIX):]
|
||||||
id_ = role[len(PREFIX):]
|
|
||||||
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
||||||
mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
|
mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
|
||||||
# Is there any reason why this is not: join(mnt, role) ?
|
# Is there any reason why this is not: join(mnt, role) ?
|
||||||
client = os.path.join(mnt, 'client.{id}'.format(id=id_))
|
client = os.path.join(mnt, 'client.{id}'.format(id=id_))
|
||||||
try:
|
|
||||||
remote.run(
|
|
||||||
args=[
|
|
||||||
'rm',
|
|
||||||
'-rf',
|
|
||||||
'--',
|
|
||||||
client,
|
|
||||||
],
|
|
||||||
)
|
|
||||||
log.info("Deleted dir {dir}".format(dir=client))
|
|
||||||
except Exception:
|
|
||||||
log.exception("Caught an exception deleting dir {dir}".format(dir=client))
|
|
||||||
|
|
||||||
try:
|
# Remove the directory inside the mount where the workunit ran
|
||||||
|
remote.run(
|
||||||
|
args=[
|
||||||
|
'rm',
|
||||||
|
'-rf',
|
||||||
|
'--',
|
||||||
|
client,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
log.info("Deleted dir {dir}".format(dir=mnt))
|
||||||
|
|
||||||
|
# If the mount was an artificially created dir, delete that too
|
||||||
|
if created_mountpoint:
|
||||||
remote.run(
|
remote.run(
|
||||||
args=[
|
args=[
|
||||||
'rmdir',
|
'rmdir',
|
||||||
'--',
|
'--',
|
||||||
mnt,
|
mnt,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
log.info("Deleted dir {dir}".format(dir=mnt))
|
log.info("Deleted artificial mount point {dir}".format(dir=client))
|
||||||
except Exception:
|
|
||||||
log.exception("Caught an exception deleting dir {dir}".format(dir=mnt))
|
|
||||||
|
|
||||||
def _make_scratch_dir(ctx, role, subdir):
|
def _make_scratch_dir(ctx, role, subdir):
|
||||||
"""
|
"""
|
||||||
@ -165,13 +162,12 @@ def _make_scratch_dir(ctx, role, subdir):
|
|||||||
:param role: "role.#" where # is used for the role id.
|
:param role: "role.#" where # is used for the role id.
|
||||||
:param subdir: use this subdir (False if not used)
|
:param subdir: use this subdir (False if not used)
|
||||||
"""
|
"""
|
||||||
retVal = False
|
created_mountpoint = False
|
||||||
PREFIX = 'client.'
|
id_ = role[len(CLIENT_PREFIX):]
|
||||||
id_ = role[len(PREFIX):]
|
|
||||||
log.debug("getting remote for {id} role {role_}".format(id=id_, role_=role))
|
log.debug("getting remote for {id} role {role_}".format(id=id_, role_=role))
|
||||||
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
||||||
dir_owner = remote.user
|
dir_owner = remote.user
|
||||||
mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
|
mnt = os.path.join(misc.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
|
||||||
# if neither kclient nor ceph-fuse are required for a workunit,
|
# if neither kclient nor ceph-fuse are required for a workunit,
|
||||||
# mnt may not exist. Stat and create the directory if it doesn't.
|
# mnt may not exist. Stat and create the directory if it doesn't.
|
||||||
try:
|
try:
|
||||||
@ -180,22 +176,24 @@ def _make_scratch_dir(ctx, role, subdir):
|
|||||||
'stat',
|
'stat',
|
||||||
'--',
|
'--',
|
||||||
mnt,
|
mnt,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
log.info('Did not need to create dir {dir}'.format(dir=mnt))
|
log.info('Did not need to create dir {dir}'.format(dir=mnt))
|
||||||
except Exception:
|
except CommandFailedError:
|
||||||
remote.run(
|
remote.run(
|
||||||
args=[
|
args=[
|
||||||
'mkdir',
|
'mkdir',
|
||||||
'--',
|
'--',
|
||||||
mnt,
|
mnt,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
log.info('Created dir {dir}'.format(dir=mnt))
|
log.info('Created dir {dir}'.format(dir=mnt))
|
||||||
retVal = True
|
created_mountpoint = True
|
||||||
|
|
||||||
if not subdir: subdir = 'client.{id}'.format(id=id_)
|
if not subdir:
|
||||||
if retVal:
|
subdir = 'client.{id}'.format(id=id_)
|
||||||
|
|
||||||
|
if created_mountpoint:
|
||||||
remote.run(
|
remote.run(
|
||||||
args=[
|
args=[
|
||||||
'cd',
|
'cd',
|
||||||
@ -205,8 +203,8 @@ def _make_scratch_dir(ctx, role, subdir):
|
|||||||
'mkdir',
|
'mkdir',
|
||||||
'--',
|
'--',
|
||||||
subdir,
|
subdir,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
remote.run(
|
remote.run(
|
||||||
args=[
|
args=[
|
||||||
@ -224,10 +222,10 @@ def _make_scratch_dir(ctx, role, subdir):
|
|||||||
'--owner={user}'.format(user=dir_owner),
|
'--owner={user}'.format(user=dir_owner),
|
||||||
'--',
|
'--',
|
||||||
subdir,
|
subdir,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
return retVal
|
return created_mountpoint
|
||||||
|
|
||||||
|
|
||||||
def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
|
def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
|
||||||
@ -237,12 +235,14 @@ def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
|
|||||||
|
|
||||||
See run_tests() for parameter documentation.
|
See run_tests() for parameter documentation.
|
||||||
"""
|
"""
|
||||||
client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
|
client_generator = misc.all_roles_of_type(ctx.cluster, 'client')
|
||||||
client_remotes = list()
|
client_remotes = list()
|
||||||
|
|
||||||
|
created_mountpoint = {}
|
||||||
for client in client_generator:
|
for client in client_generator:
|
||||||
(client_remote,) = ctx.cluster.only('client.{id}'.format(id=client)).remotes.iterkeys()
|
(client_remote,) = ctx.cluster.only('client.{id}'.format(id=client)).remotes.iterkeys()
|
||||||
client_remotes.append((client_remote, 'client.{id}'.format(id=client)))
|
client_remotes.append((client_remote, 'client.{id}'.format(id=client)))
|
||||||
_make_scratch_dir(ctx, "client.{id}".format(id=client), subdir)
|
created_mountpoint[client] = _make_scratch_dir(ctx, "client.{id}".format(id=client), subdir)
|
||||||
|
|
||||||
for unit in tests:
|
for unit in tests:
|
||||||
with parallel() as p:
|
with parallel() as p:
|
||||||
@ -251,9 +251,9 @@ def _spawn_on_all_clients(ctx, refspec, tests, env, subdir, timeout=None):
|
|||||||
timeout=timeout)
|
timeout=timeout)
|
||||||
|
|
||||||
# cleanup the generated client directories
|
# cleanup the generated client directories
|
||||||
client_generator = teuthology.all_roles_of_type(ctx.cluster, 'client')
|
client_generator = misc.all_roles_of_type(ctx.cluster, 'client')
|
||||||
for client in client_generator:
|
for client in client_generator:
|
||||||
_delete_dir(ctx, 'client.{id}'.format(id=client))
|
_delete_dir(ctx, 'client.{id}'.format(id=client), created_mountpoint[client])
|
||||||
|
|
||||||
|
|
||||||
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
|
def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
|
||||||
@ -274,11 +274,10 @@ def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
|
|||||||
hours, or 'd' for days. If '0' or anything that evaluates
|
hours, or 'd' for days. If '0' or anything that evaluates
|
||||||
to False is passed, the 'timeout' command is not used.
|
to False is passed, the 'timeout' command is not used.
|
||||||
"""
|
"""
|
||||||
testdir = teuthology.get_testdir(ctx)
|
testdir = misc.get_testdir(ctx)
|
||||||
assert isinstance(role, basestring)
|
assert isinstance(role, basestring)
|
||||||
PREFIX = 'client.'
|
assert role.startswith(CLIENT_PREFIX)
|
||||||
assert role.startswith(PREFIX)
|
id_ = role[len(CLIENT_PREFIX):]
|
||||||
id_ = role[len(PREFIX):]
|
|
||||||
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
(remote,) = ctx.cluster.only(role).remotes.iterkeys()
|
||||||
mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
|
mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
|
||||||
# subdir so we can remove and recreate this a lot without sudo
|
# subdir so we can remove and recreate this a lot without sudo
|
||||||
@ -309,12 +308,12 @@ def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
|
|||||||
run.Raw('&&'),
|
run.Raw('&&'),
|
||||||
'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
|
'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
|
||||||
run.Raw('>{tdir}/workunits.list'.format(tdir=testdir)),
|
run.Raw('>{tdir}/workunits.list'.format(tdir=testdir)),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
workunits = sorted(teuthology.get_file(
|
workunits = sorted(misc.get_file(
|
||||||
remote,
|
remote,
|
||||||
'{tdir}/workunits.list'.format(tdir=testdir)).split('\0'))
|
'{tdir}/workunits.list'.format(tdir=testdir)).split('\0'))
|
||||||
assert workunits
|
assert workunits
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -336,7 +335,7 @@ def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
|
|||||||
run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
|
run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
|
||||||
run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
|
run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
|
||||||
run.Raw('CEPH_ID="{id}"'.format(id=id_)),
|
run.Raw('CEPH_ID="{id}"'.format(id=id_)),
|
||||||
]
|
]
|
||||||
if env is not None:
|
if env is not None:
|
||||||
for var, val in env.iteritems():
|
for var, val in env.iteritems():
|
||||||
quoted_val = pipes.quote(val)
|
quoted_val = pipes.quote(val)
|
||||||
@ -352,21 +351,21 @@ def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None):
|
|||||||
'{srcdir}/{workunit}'.format(
|
'{srcdir}/{workunit}'.format(
|
||||||
srcdir=srcdir,
|
srcdir=srcdir,
|
||||||
workunit=workunit,
|
workunit=workunit,
|
||||||
),
|
),
|
||||||
])
|
])
|
||||||
remote.run(
|
remote.run(
|
||||||
logger=log.getChild(role),
|
logger=log.getChild(role),
|
||||||
args=args,
|
args=args,
|
||||||
)
|
)
|
||||||
remote.run(
|
remote.run(
|
||||||
logger=log.getChild(role),
|
logger=log.getChild(role),
|
||||||
args=['sudo', 'rm', '-rf', '--', scratch_tmp],
|
args=['sudo', 'rm', '-rf', '--', scratch_tmp],
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
log.info('Stopping %s on %s...', spec, role)
|
log.info('Stopping %s on %s...', tests, role)
|
||||||
remote.run(
|
remote.run(
|
||||||
logger=log.getChild(role),
|
logger=log.getChild(role),
|
||||||
args=[
|
args=[
|
||||||
'rm', '-rf', '--', '{tdir}/workunits.list'.format(tdir=testdir), srcdir,
|
'rm', '-rf', '--', '{tdir}/workunits.list'.format(tdir=testdir), srcdir,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user