mirror of
https://github.com/ceph/ceph
synced 2025-01-13 06:22:51 +00:00
176e9ef267
A quick check that clients refuse to mount when daemons are laggy, and while we're at it, that the basics of failover work. It's a trivial test, but it's nice to have this kind of thing so that we don't have to wait for weird thrasher failures if something breaks. Signed-off-by: John Spray <john.spray@redhat.com>
82 lines
2.7 KiB
Python
82 lines
2.7 KiB
Python
|
|
from unittest import case
|
|
from cephfs_test_case import CephFSTestCase
|
|
from teuthology.exceptions import CommandFailedError
|
|
|
|
|
|
class TestFailover(CephFSTestCase):
|
|
CLIENTS_REQUIRED = 1
|
|
MDSS_REQUIRED = 2
|
|
|
|
def test_simple(self):
|
|
"""
|
|
That when the active MDS is killed, a standby MDS is promoted into
|
|
its rank after the grace period.
|
|
|
|
This is just a simple unit test, the harder cases are covered
|
|
in thrashing tests.
|
|
"""
|
|
|
|
(original_active, ) = self.fs.get_active_names()
|
|
original_standbys = self.fs.get_daemon_names("up:standby")
|
|
|
|
# Kill the rank 0 daemon's physical process
|
|
self.fs.mds_stop(original_active)
|
|
|
|
grace = int(self.fs.get_config("mds_beacon_grace", service_type="mon"))
|
|
|
|
# Wait until the monitor promotes his replacement
|
|
def promoted():
|
|
active = self.fs.get_active_names()
|
|
return active and active[0] in original_standbys
|
|
|
|
self.wait_until_true(
|
|
promoted,
|
|
timeout=grace*2)
|
|
|
|
# Start the original rank 0 daemon up again, see that he becomes a standby
|
|
self.fs.mds_restart(original_active)
|
|
self.wait_until_true(
|
|
lambda: original_active in self.fs.get_daemon_names("up:standby"),
|
|
timeout=60 # Approximately long enough for MDS to start and mon to notice
|
|
)
|
|
|
|
def test_client_abort(self):
|
|
"""
|
|
That a client will respect fuse_require_active_mds and error out
|
|
when the cluster appears to be unavailable.
|
|
"""
|
|
|
|
require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true"
|
|
if not require_active:
|
|
raise case.SkipTest("fuse_require_active_mds is not set")
|
|
|
|
grace = int(self.fs.get_config("mds_beacon_grace", service_type="mon"))
|
|
|
|
# Check it's not laggy to begin with
|
|
(original_active, ) = self.fs.get_active_names()
|
|
self.assertNotIn("laggy_since", self.fs.mon_manager.get_mds_status(original_active))
|
|
|
|
self.mounts[0].umount_wait()
|
|
|
|
# Control: that we can mount and unmount usually, while the cluster is healthy
|
|
self.mounts[0].mount()
|
|
self.mounts[0].wait_until_mounted()
|
|
self.mounts[0].umount_wait()
|
|
|
|
# Stop the daemon processes
|
|
self.fs.mds_stop()
|
|
|
|
# Wait for everyone to go laggy
|
|
def laggy():
|
|
mdsmap = self.fs.mon_manager.get_mds_status_all()
|
|
for info in mdsmap['info'].values():
|
|
if "laggy_since" not in info:
|
|
return False
|
|
|
|
return True
|
|
|
|
self.wait_until_true(laggy, grace * 2)
|
|
with self.assertRaises(CommandFailedError):
|
|
self.mounts[0].mount()
|