mirror of
https://github.com/ceph/ceph
synced 2025-01-04 10:12:30 +00:00
tasks: create ceph-mgr tests
Signed-off-by: John Spray <john.spray@redhat.com>
This commit is contained in:
parent
a9df9e088d
commit
38c23c1841
0
tasks/mgr/__init__.py
Normal file
0
tasks/mgr/__init__.py
Normal file
85
tasks/mgr/mgr_test_case.py
Normal file
85
tasks/mgr/mgr_test_case.py
Normal file
@ -0,0 +1,85 @@
|
||||
|
||||
from unittest import case
|
||||
import json
|
||||
|
||||
from teuthology import misc
|
||||
from tasks.ceph_test_case import CephTestCase
|
||||
|
||||
# TODO move definition of CephCluster
|
||||
from tasks.cephfs.filesystem import CephCluster
|
||||
|
||||
|
||||
class MgrCluster(CephCluster):
|
||||
def __init__(self, ctx):
|
||||
super(MgrCluster, self).__init__(ctx)
|
||||
self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
|
||||
|
||||
if len(self.mgr_ids) == 0:
|
||||
raise RuntimeError(
|
||||
"This task requires at least one manager daemon")
|
||||
|
||||
self.mgr_daemons = dict(
|
||||
[(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id
|
||||
in self.mgr_ids])
|
||||
|
||||
@property
|
||||
def admin_remote(self):
|
||||
first_mon = misc.get_first_mon(self._ctx, None)
|
||||
(result,) = self._ctx.cluster.only(first_mon).remotes.iterkeys()
|
||||
return result
|
||||
|
||||
def mgr_stop(self, mgr_id):
|
||||
self.mgr_daemons[mgr_id].stop()
|
||||
|
||||
def mgr_fail(self, mgr_id):
|
||||
self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
|
||||
|
||||
def mgr_restart(self, mgr_id):
|
||||
self.mgr_daemons[mgr_id].restart()
|
||||
|
||||
def get_mgr_map(self):
|
||||
status = json.loads(
|
||||
self.mon_manager.raw_cluster_cmd("status", "--format=json-pretty"))
|
||||
|
||||
return status["mgrmap"]
|
||||
|
||||
def get_active_id(self):
|
||||
return self.get_mgr_map()["active_name"]
|
||||
|
||||
def get_standby_ids(self):
|
||||
return [s['name'] for s in self.get_mgr_map()["standbys"]]
|
||||
|
||||
|
||||
class MgrTestCase(CephTestCase):
|
||||
REQUIRE_MGRS = 1
|
||||
|
||||
def setUp(self):
|
||||
super(MgrTestCase, self).setUp()
|
||||
|
||||
# The test runner should have populated this
|
||||
assert self.mgr_cluster is not None
|
||||
|
||||
if len(self.mgr_cluster.mgr_ids) < self.REQUIRE_MGRS:
|
||||
raise case.SkipTest("Only have {0} manager daemons, "
|
||||
"{1} are required".format(
|
||||
len(self.mgr_cluster.mgr_ids), self.REQUIRE_MGRS))
|
||||
|
||||
# Restart all the daemons
|
||||
for daemon in self.mgr_cluster.mgr_daemons.values():
|
||||
daemon.stop()
|
||||
|
||||
for mgr_id in self.mgr_cluster.mgr_ids:
|
||||
self.mgr_cluster.mgr_fail(mgr_id)
|
||||
|
||||
for daemon in self.mgr_cluster.mgr_daemons.values():
|
||||
daemon.restart()
|
||||
|
||||
# Wait for an active to come up
|
||||
self.wait_until_true(lambda: self.mgr_cluster.get_active_id() != "",
|
||||
timeout=20)
|
||||
|
||||
expect_standbys = set(self.mgr_cluster.mgr_ids) \
|
||||
- {self.mgr_cluster.get_active_id()}
|
||||
self.wait_until_true(
|
||||
lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys,
|
||||
timeout=20)
|
81
tasks/mgr/test_failover.py
Normal file
81
tasks/mgr/test_failover.py
Normal file
@ -0,0 +1,81 @@
|
||||
|
||||
import logging
|
||||
|
||||
from tasks.mgr.mgr_test_case import MgrTestCase
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TestFailover(MgrTestCase):
|
||||
REQUIRE_MGRS = 2
|
||||
|
||||
def test_timeout(self):
|
||||
"""
|
||||
That when an active mgr stops responding, a standby is promoted
|
||||
after mon_mgr_beacon_grace.
|
||||
"""
|
||||
|
||||
# Query which mgr is active
|
||||
original_active = self.mgr_cluster.get_active_id()
|
||||
original_standbys = self.mgr_cluster.get_standby_ids()
|
||||
|
||||
# Stop that daemon
|
||||
self.mgr_cluster.mgr_stop(original_active)
|
||||
|
||||
# Assert that the other mgr becomes active
|
||||
self.wait_until_true(
|
||||
lambda: self.mgr_cluster.get_active_id() in original_standbys,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
self.mgr_cluster.mgr_restart(original_active)
|
||||
self.wait_until_true(
|
||||
lambda: original_active in self.mgr_cluster.get_standby_ids(),
|
||||
timeout=10
|
||||
)
|
||||
|
||||
def test_explicit_fail(self):
|
||||
"""
|
||||
That when a user explicitly fails a daemon, a standby immediately
|
||||
replaces it.
|
||||
:return:
|
||||
"""
|
||||
# Query which mgr is active
|
||||
original_active = self.mgr_cluster.get_active_id()
|
||||
original_standbys = self.mgr_cluster.get_standby_ids()
|
||||
|
||||
self.mgr_cluster.mgr_fail(original_active)
|
||||
|
||||
# A standby should take over
|
||||
self.wait_until_true(
|
||||
lambda: self.mgr_cluster.get_active_id() in original_standbys,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# The one we failed should come back as a standby (he isn't
|
||||
# really dead)
|
||||
self.wait_until_true(
|
||||
lambda: original_active in self.mgr_cluster.get_standby_ids(),
|
||||
timeout=10
|
||||
)
|
||||
|
||||
def test_standby_timeout(self):
|
||||
"""
|
||||
That when a standby daemon stops sending beacons, it is
|
||||
removed from the list of standbys
|
||||
:return:
|
||||
"""
|
||||
original_active = self.mgr_cluster.get_active_id()
|
||||
original_standbys = self.mgr_cluster.get_standby_ids()
|
||||
|
||||
victim = original_standbys[0]
|
||||
self.mgr_cluster.mgr_stop(victim)
|
||||
|
||||
expect_standbys = set(original_standbys) - {victim}
|
||||
|
||||
self.wait_until_true(
|
||||
lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys,
|
||||
timeout=60
|
||||
)
|
||||
self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
|
Loading…
Reference in New Issue
Block a user