From bee7a113c7577484bdedc300d2f5cb69128d1187 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 19 Apr 2018 11:52:50 -0700 Subject: [PATCH 1/2] MDSMonitor: do not wipe old_max_mds when marked down twice Fixes: http://tracker.ceph.com/issues/23800 Signed-off-by: Patrick Donnelly --- src/mon/FSCommands.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 09c4892dd2d..efaff5bddb3 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -389,8 +389,10 @@ public: [is_down](std::shared_ptr fs) { if (is_down) { - fs->mds_map.set_old_max_mds(); - fs->mds_map.set_max_mds(0); + if (fs->mds_map.get_max_mds() > 0) { + fs->mds_map.set_old_max_mds(); + fs->mds_map.set_max_mds(0); + } /* else already down! */ } else { mds_rank_t oldmax = fs->mds_map.get_old_max_mds(); fs->mds_map.set_max_mds(oldmax ? oldmax : 1); From 378a6fcfe877dd5fe3799f368ac541b2ced8ce4a Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 19 Apr 2018 11:57:45 -0700 Subject: [PATCH 2/2] qa: check old_max_mds is not wiped by marked down twice Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/test_failover.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index 2800f97911a..97f848ba1b9 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -88,18 +88,32 @@ class TestClusterResize(CephFSTestCase): self.shrink(2) self.wait_for_health_clear(30) + def test_down_twice(self): + """ + That marking a FS down twice does not wipe old_max_mds. + """ + + self.grow(2) + self.fs.set_down() + self.wait_for_health("MDS_ALL_DOWN", 30) + self.fs.set_down(False) + mdsmap = self.fs.get_mds_map() + self.assertTrue(mdsmap["max_mds"] == 2) + self.fs.wait_for_daemons(timeout=60) + def test_all_down(self): """ - That a health error is generated when FS has no active MDS. + That a health error is generated when FS has no active MDS and cleared + when actives come back online. """ self.fs.set_down() self.wait_for_health("MDS_ALL_DOWN", 30) self.fs.set_down(False) self.wait_for_health_clear(30) - self.fs.set_down() + self.fs.set_down(True) self.wait_for_health("MDS_ALL_DOWN", 30) - self.grow(1) + self.grow(2) self.wait_for_health_clear(30) def test_hole(self):