ceph/teuthology/task/mon_recovery.py
Warren Usui d693b3f895 Lines formerly of the form '(remote,) = ctx.cluster.only(role).remotes.keys()'
and '(remote,) = ctx.cluster.only(role).remotes.iterkeys()' would fail with
ValueError and no message if there were less than 0 or more than 1 key.
Now a new function, get_single_remote_value() is called which prints out
more understandable messages.

Fixes: 7510
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Signed-off-by: Warren Usui <warren.usui@inktank.com>
2014-03-26 18:43:48 -07:00

81 lines
2.2 KiB
Python

"""
Monitor recovery
"""
import logging
import ceph_manager
from teuthology import misc as teuthology
log = logging.getLogger(__name__)
def task(ctx, config):
"""
Test monitor recovery.
"""
if config is None:
config = {}
assert isinstance(config, dict), \
'task only accepts a dict for configuration'
first_mon = teuthology.get_first_mon(ctx, config)
mon = teuthology.get_single_remote_value(ctx, first_mon)
manager = ceph_manager.CephManager(
mon,
ctx=ctx,
logger=log.getChild('ceph_manager'),
)
mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
log.info("mon ids = %s" % mons)
manager.wait_for_mon_quorum_size(len(mons))
log.info('verifying all monitors are in the quorum')
for m in mons:
s = manager.get_mon_status(m)
assert s['state'] == 'leader' or s['state'] == 'peon'
assert len(s['quorum']) == len(mons)
log.info('restarting each monitor in turn')
for m in mons:
# stop a monitor
manager.kill_mon(m)
manager.wait_for_mon_quorum_size(len(mons) - 1)
# restart
manager.revive_mon(m)
manager.wait_for_mon_quorum_size(len(mons))
# in forward and reverse order,
rmons = mons
rmons.reverse()
for mons in mons, rmons:
log.info('stopping all monitors')
for m in mons:
manager.kill_mon(m)
log.info('forming a minimal quorum for %s, then adding monitors' % mons)
qnum = (len(mons) / 2) + 1
num = 0
for m in mons:
manager.revive_mon(m)
num += 1
if num >= qnum:
manager.wait_for_mon_quorum_size(num)
# on both leader and non-leader ranks...
for rank in [0, 1]:
# take one out
log.info('removing mon %s' % mons[rank])
manager.kill_mon(mons[rank])
manager.wait_for_mon_quorum_size(len(mons) - 1)
log.info('causing some monitor log activity')
m = 30
for n in range(1, m):
manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
log.info('adding mon %s back in' % mons[rank])
manager.revive_mon(mons[rank])
manager.wait_for_mon_quorum_size(len(mons))