qa/tasks: start DaemonWatchdog when ceph starts

* Start DaemonWatchdog when ceph starts
* Drop the DaemonWatchdog starting in mds_thrash.py
* Bring the thrashers in mds_thrash.py into the context

Fixes: http://tracker.ceph.com/issues/10369
Signed-off-by: Jos Collin <jcollin@redhat.com>
This commit is contained in:
Jos Collin 2019-06-06 16:50:18 +05:30
parent 146962dea1
commit 08b99eef27
No known key found for this signature in database
GPG Key ID: 10DA18C384692C82
2 changed files with 16 additions and 11 deletions

View File

@ -27,6 +27,7 @@ from teuthology import exceptions
from teuthology.orchestra import run
import ceph_client as cclient
from teuthology.orchestra.daemon import DaemonGroup
from tasks.daemonwatchdog import DaemonWatchdog
CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw']
DATA_PATH = '/var/lib/ceph/{type_}/{cluster}-{id_}'
@ -416,6 +417,12 @@ def cephfs_setup(ctx, config):
yield
@contextlib.contextmanager
def watchdog_setup(ctx, config):
ctx.ceph[config['cluster']].thrashers = []
ctx.ceph[config['cluster']].watchdog = DaemonWatchdog(ctx, config, ctx.ceph[config['cluster']].thrashers)
ctx.ceph[config['cluster']].watchdog.start()
yield
def get_mons(roles, ips, cluster_name,
mon_bind_msgr2=False,
@ -1672,6 +1679,8 @@ def stop(ctx, config):
cluster, type_, id_ = teuthology.split_role(role)
ctx.daemons.get_daemon(type_, id_, cluster).stop()
ctx.ceph[config['cluster']].watchdog.stop()
ctx.ceph[config['cluster']].watchdog.join()
yield
@ -1909,6 +1918,7 @@ def task(ctx, config):
lambda: create_rbd_pool(ctx=ctx, config=config),
lambda: cephfs_setup(ctx=ctx, config=config),
lambda: run_daemon(ctx=ctx, config=config, type_='mds'),
lambda: watchdog_setup(ctx=ctx, config=config),
]
with contextutil.nested(*subtasks):

View File

@ -14,7 +14,6 @@ from gevent.event import Event
from teuthology import misc as teuthology
from tasks.cephfs.filesystem import MDSCluster, Filesystem
from tasks.daemonwatchdog import DaemonWatchdog
log = logging.getLogger(__name__)
@ -409,29 +408,25 @@ def task(ctx, config):
status = mds_cluster.status()
log.info('Ready to start thrashing')
thrashers = []
watchdog = DaemonWatchdog(ctx, manager, config, thrashers)
watchdog.start()
manager.wait_for_clean()
assert manager.is_clean()
if 'cluster' not in config:
config['cluster'] = 'ceph'
for fs in status.get_filesystems():
thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']), fs['mdsmap']['max_mds'])
thrasher.start()
thrashers.append(thrasher)
ctx.ceph[config['cluster']].thrashers.append(thrasher)
try:
log.debug('Yielding')
yield
finally:
log.info('joining mds_thrashers')
for thrasher in thrashers:
for thrasher in ctx.ceph[config['cluster']].thrashers:
thrasher.stop()
if thrasher.e:
raise RuntimeError('error during thrashing')
thrasher.join()
log.info('done joining')
watchdog.stop()
watchdog.join()