2013-10-12 08:28:27 +00:00
|
|
|
"""
|
|
|
|
Monitor recovery
|
|
|
|
"""
|
2011-11-09 06:06:43 +00:00
|
|
|
import logging
|
2020-03-24 08:33:22 +00:00
|
|
|
from tasks import ceph_manager
|
2011-11-09 06:06:43 +00:00
|
|
|
from teuthology import misc as teuthology
|
|
|
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
def task(ctx, config):
|
|
|
|
"""
|
|
|
|
Test monitor recovery.
|
|
|
|
"""
|
|
|
|
if config is None:
|
|
|
|
config = {}
|
|
|
|
assert isinstance(config, dict), \
|
|
|
|
'task only accepts a dict for configuration'
|
|
|
|
first_mon = teuthology.get_first_mon(ctx, config)
|
2019-10-11 15:57:47 +00:00
|
|
|
(mon,) = ctx.cluster.only(first_mon).remotes.keys()
|
2011-11-09 06:06:43 +00:00
|
|
|
|
|
|
|
manager = ceph_manager.CephManager(
|
|
|
|
mon,
|
|
|
|
ctx=ctx,
|
|
|
|
logger=log.getChild('ceph_manager'),
|
|
|
|
)
|
|
|
|
|
|
|
|
mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
|
|
|
|
log.info("mon ids = %s" % mons)
|
|
|
|
|
|
|
|
manager.wait_for_mon_quorum_size(len(mons))
|
|
|
|
|
|
|
|
log.info('verifying all monitors are in the quorum')
|
|
|
|
for m in mons:
|
|
|
|
s = manager.get_mon_status(m)
|
|
|
|
assert s['state'] == 'leader' or s['state'] == 'peon'
|
|
|
|
assert len(s['quorum']) == len(mons)
|
|
|
|
|
|
|
|
log.info('restarting each monitor in turn')
|
|
|
|
for m in mons:
|
|
|
|
# stop a monitor
|
|
|
|
manager.kill_mon(m)
|
|
|
|
manager.wait_for_mon_quorum_size(len(mons) - 1)
|
|
|
|
|
|
|
|
# restart
|
|
|
|
manager.revive_mon(m)
|
|
|
|
manager.wait_for_mon_quorum_size(len(mons))
|
|
|
|
|
|
|
|
# in forward and reverse order,
|
|
|
|
rmons = mons
|
|
|
|
rmons.reverse()
|
|
|
|
for mons in mons, rmons:
|
|
|
|
log.info('stopping all monitors')
|
|
|
|
for m in mons:
|
|
|
|
manager.kill_mon(m)
|
|
|
|
|
|
|
|
log.info('forming a minimal quorum for %s, then adding monitors' % mons)
|
2020-04-04 16:02:40 +00:00
|
|
|
qnum = (len(mons) // 2) + 1
|
2011-11-09 06:06:43 +00:00
|
|
|
num = 0
|
|
|
|
for m in mons:
|
|
|
|
manager.revive_mon(m)
|
|
|
|
num += 1
|
|
|
|
if num >= qnum:
|
|
|
|
manager.wait_for_mon_quorum_size(num)
|
|
|
|
|
|
|
|
# on both leader and non-leader ranks...
|
|
|
|
for rank in [0, 1]:
|
|
|
|
# take one out
|
|
|
|
log.info('removing mon %s' % mons[rank])
|
|
|
|
manager.kill_mon(mons[rank])
|
|
|
|
manager.wait_for_mon_quorum_size(len(mons) - 1)
|
|
|
|
|
|
|
|
log.info('causing some monitor log activity')
|
|
|
|
m = 30
|
|
|
|
for n in range(1, m):
|
|
|
|
manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
|
|
|
|
|
|
|
|
log.info('adding mon %s back in' % mons[rank])
|
|
|
|
manager.revive_mon(mons[rank])
|
|
|
|
manager.wait_for_mon_quorum_size(len(mons))
|