tasks: create ceph-mgr tests

Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
John Spray 2016-07-16 22:16:53 +01:00
parent a9df9e088d
commit 38c23c1841
3 changed files with 166 additions and 0 deletions

0
tasks/mgr/__init__.py Normal file
View File

View File

@ -0,0 +1,85 @@
from unittest import case
import json
from teuthology import misc
from tasks.ceph_test_case import CephTestCase
# TODO move definition of CephCluster
from tasks.cephfs.filesystem import CephCluster
class MgrCluster(CephCluster):
def __init__(self, ctx):
super(MgrCluster, self).__init__(ctx)
self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
if len(self.mgr_ids) == 0:
raise RuntimeError(
"This task requires at least one manager daemon")
self.mgr_daemons = dict(
[(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id
in self.mgr_ids])
@property
def admin_remote(self):
first_mon = misc.get_first_mon(self._ctx, None)
(result,) = self._ctx.cluster.only(first_mon).remotes.iterkeys()
return result
def mgr_stop(self, mgr_id):
self.mgr_daemons[mgr_id].stop()
def mgr_fail(self, mgr_id):
self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
def mgr_restart(self, mgr_id):
self.mgr_daemons[mgr_id].restart()
def get_mgr_map(self):
status = json.loads(
self.mon_manager.raw_cluster_cmd("status", "--format=json-pretty"))
return status["mgrmap"]
def get_active_id(self):
return self.get_mgr_map()["active_name"]
def get_standby_ids(self):
return [s['name'] for s in self.get_mgr_map()["standbys"]]
class MgrTestCase(CephTestCase):
REQUIRE_MGRS = 1
def setUp(self):
super(MgrTestCase, self).setUp()
# The test runner should have populated this
assert self.mgr_cluster is not None
if len(self.mgr_cluster.mgr_ids) < self.REQUIRE_MGRS:
raise case.SkipTest("Only have {0} manager daemons, "
"{1} are required".format(
len(self.mgr_cluster.mgr_ids), self.REQUIRE_MGRS))
# Restart all the daemons
for daemon in self.mgr_cluster.mgr_daemons.values():
daemon.stop()
for mgr_id in self.mgr_cluster.mgr_ids:
self.mgr_cluster.mgr_fail(mgr_id)
for daemon in self.mgr_cluster.mgr_daemons.values():
daemon.restart()
# Wait for an active to come up
self.wait_until_true(lambda: self.mgr_cluster.get_active_id() != "",
timeout=20)
expect_standbys = set(self.mgr_cluster.mgr_ids) \
- {self.mgr_cluster.get_active_id()}
self.wait_until_true(
lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys,
timeout=20)

View File

@ -0,0 +1,81 @@
import logging
from tasks.mgr.mgr_test_case import MgrTestCase
log = logging.getLogger(__name__)
class TestFailover(MgrTestCase):
REQUIRE_MGRS = 2
def test_timeout(self):
"""
That when an active mgr stops responding, a standby is promoted
after mon_mgr_beacon_grace.
"""
# Query which mgr is active
original_active = self.mgr_cluster.get_active_id()
original_standbys = self.mgr_cluster.get_standby_ids()
# Stop that daemon
self.mgr_cluster.mgr_stop(original_active)
# Assert that the other mgr becomes active
self.wait_until_true(
lambda: self.mgr_cluster.get_active_id() in original_standbys,
timeout=60
)
self.mgr_cluster.mgr_restart(original_active)
self.wait_until_true(
lambda: original_active in self.mgr_cluster.get_standby_ids(),
timeout=10
)
def test_explicit_fail(self):
"""
That when a user explicitly fails a daemon, a standby immediately
replaces it.
:return:
"""
# Query which mgr is active
original_active = self.mgr_cluster.get_active_id()
original_standbys = self.mgr_cluster.get_standby_ids()
self.mgr_cluster.mgr_fail(original_active)
# A standby should take over
self.wait_until_true(
lambda: self.mgr_cluster.get_active_id() in original_standbys,
timeout=60
)
# The one we failed should come back as a standby (he isn't
# really dead)
self.wait_until_true(
lambda: original_active in self.mgr_cluster.get_standby_ids(),
timeout=10
)
def test_standby_timeout(self):
"""
That when a standby daemon stops sending beacons, it is
removed from the list of standbys
:return:
"""
original_active = self.mgr_cluster.get_active_id()
original_standbys = self.mgr_cluster.get_standby_ids()
victim = original_standbys[0]
self.mgr_cluster.mgr_stop(victim)
expect_standbys = set(original_standbys) - {victim}
self.wait_until_true(
lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys,
timeout=60
)
self.assertEqual(self.mgr_cluster.get_active_id(), original_active)