mirror of
https://github.com/ceph/ceph
synced 2025-02-17 16:07:37 +00:00
Merge pull request #56066 from rishabh-d-dave/mds-fail-confirm
mon,cephfs: require confirmation flag to bring down unhealthy MDS Reviewed-by: Leonid Usov <leonid.usov@ibm.com> Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
commit
18c7799cce
@ -178,6 +178,11 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
|
||||
and it can be disabled by using:
|
||||
`ceph config set mgr mgr/volumes/snapshot_clone_no_wait false`
|
||||
|
||||
* CephFS: Command "ceph mds fail" and "ceph fs fail" now requires a
|
||||
confirmation flag when some MDSs exhibit health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED. This is to prevent accidental MDS failover causing
|
||||
further delays in recovery.
|
||||
|
||||
>=18.0.0
|
||||
|
||||
* The RGW policy parser now rejects unknown principals by default. If you are
|
||||
|
@ -117,4 +117,11 @@ the following method.
|
||||
|
||||
$ sudo rm -rf /var/lib/ceph/mds/ceph-${id}
|
||||
|
||||
|
||||
.. note:: When an active MDS either has health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED, confirmation flag (--yes-i-really-mean-it)
|
||||
needs to be passed, else the command will fail. It is not recommended to
|
||||
restart an MDS which has these warnings since slow recovery at restart may
|
||||
lead to more problems.
|
||||
|
||||
.. _MDS Config Reference: ../mds-config-ref
|
||||
|
@ -193,7 +193,11 @@ file system and MDS daemons down, use the ``ceph fs fail`` command:
|
||||
|
||||
::
|
||||
|
||||
ceph fs fail <fs_name>
|
||||
ceph fs fail <fs_name> {--yes-i-really-mean-it}
|
||||
|
||||
.. note:: Note that confirmation flag is optional because it is only required
|
||||
when the MDS is active and has health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED.
|
||||
|
||||
This command sets a file system flag to prevent standbys from
|
||||
activating on the file system (the ``joinable`` flag).
|
||||
@ -210,7 +214,11 @@ respawn as standbys. The file system will be left in a degraded state.
|
||||
::
|
||||
|
||||
# For all ranks, 0-N:
|
||||
ceph mds fail <fs_name>:<n>
|
||||
ceph mds fail <fs_name>:<n> {--yes-i-really-mean-it}
|
||||
|
||||
.. note:: Note that confirmation flag is optional because it is only required
|
||||
when the MDS is active and has health warning MDS_TRIM or
|
||||
MDS_CACHE_OVERSIZED.
|
||||
|
||||
Once all ranks are inactive, the file system may also be deleted or left in
|
||||
this state for other purposes (perhaps disaster recovery).
|
||||
|
@ -5,6 +5,8 @@ overrides:
|
||||
lockdep: true
|
||||
log-ignorelist:
|
||||
- missing required features
|
||||
- \(MDS_CACHE_OVERSIZED\)
|
||||
- \(MDS_TRIM\)
|
||||
tasks:
|
||||
- cephfs_test_runner:
|
||||
fail_on_skip: false
|
||||
|
@ -612,7 +612,12 @@ class FilesystemBase(MDSClusterBase):
|
||||
self.run_ceph_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it')
|
||||
|
||||
def fail(self):
|
||||
self.run_ceph_cmd("fs", "fail", str(self.name))
|
||||
cmd = ["fs", "fail", str(self.name)]
|
||||
try:
|
||||
self.run_ceph_cmd(cmd)
|
||||
except CommandFailedError:
|
||||
cmd.append("--yes-i-really-mean-it")
|
||||
self.run_ceph_cmd(cmd)
|
||||
|
||||
def set_flag(self, var, *args):
|
||||
a = map(lambda x: str(x).lower(), args)
|
||||
@ -1181,8 +1186,13 @@ class FilesystemBase(MDSClusterBase):
|
||||
def rank_repaired(self, rank):
|
||||
self.run_ceph_cmd("mds", "repaired", "{}:{}".format(self.id, rank))
|
||||
|
||||
def rank_fail(self, rank=0):
|
||||
self.run_ceph_cmd("mds", "fail", "{}:{}".format(self.id, rank))
|
||||
def rank_fail(self, rank=0, confirm=True):
|
||||
cmd = f'mds fail {self.id}:{rank}'
|
||||
try:
|
||||
self.run_ceph_cmd(args=cmd)
|
||||
except CommandFailedError:
|
||||
cmd += ' --yes--i-really-mean-it'
|
||||
self.run_ceph_cmd(args=cmd)
|
||||
|
||||
def rank_is_running(self, rank=0, status=None):
|
||||
name = self.get_rank(rank=rank, status=status)['name']
|
||||
|
@ -91,6 +91,38 @@ class TestAdminCommands(CephFSTestCase):
|
||||
if overwrites:
|
||||
self.run_ceph_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
|
||||
|
||||
def _get_unhealthy_mds_id(self, health_report, health_warn):
|
||||
'''
|
||||
Return MDS ID for which health warning in "health_warn" has been
|
||||
generated.
|
||||
'''
|
||||
# variable "msg" should hold string something like this -
|
||||
# 'mds.b(mds.0): Behind on trimming (865/10) max_segments: 10,
|
||||
# num_segments: 86
|
||||
msg = health_report['checks'][health_warn]['detail'][0]['message']
|
||||
mds_id = msg.split('(')[0]
|
||||
mds_id = mds_id.replace('mds.', '')
|
||||
return mds_id
|
||||
|
||||
def wait_till_health_warn(self, health_warn, active_mds_id, sleep=3,
|
||||
tries=10):
|
||||
errmsg = (f'Expected health warning "{health_warn}" to eventually '
|
||||
'show up in output of command "ceph health detail". Tried '
|
||||
f'{tries} times with interval of {sleep} seconds but the '
|
||||
'health warning didn\'t turn up.')
|
||||
|
||||
with safe_while(sleep=sleep, tries=tries, action=errmsg) as proceed:
|
||||
while proceed():
|
||||
self.get_ceph_cmd_stdout(
|
||||
f'tell mds.{active_mds_id} cache status')
|
||||
|
||||
health_report = json.loads(self.get_ceph_cmd_stdout(
|
||||
'health detail --format json'))
|
||||
|
||||
if health_warn in health_report['checks']:
|
||||
return
|
||||
|
||||
|
||||
@classhook('_add_valid_tell')
|
||||
class TestValidTell(TestAdminCommands):
|
||||
@classmethod
|
||||
@ -2154,3 +2186,166 @@ class TestPermErrMsg(CephFSTestCase):
|
||||
args=(f'fs authorize {self.fs.name} {self.CLIENT_NAME} / '
|
||||
f'{wrong_perm}'), retval=self.EXPECTED_ERRNO,
|
||||
errmsgs=self.EXPECTED_ERRMSG)
|
||||
|
||||
|
||||
class TestFSFail(TestAdminCommands):
|
||||
|
||||
MDSS_REQUIRED = 2
|
||||
CLIENTS_REQUIRED = 1
|
||||
|
||||
def test_with_health_warn_oversize_cache(self):
|
||||
'''
|
||||
Test that, when health warning MDS_CACHE_OVERSIZE is present for an
|
||||
MDS, command "ceph fs fail" fails without confirmation flag and passes
|
||||
when confirmation flag is passed.
|
||||
'''
|
||||
health_warn = 'MDS_CACHE_OVERSIZED'
|
||||
self.config_set('mds', 'mds_cache_memory_limit', '1K')
|
||||
self.config_set('mds', 'mds_health_cache_threshold', '1.00000')
|
||||
active_mds_id = self.fs.get_active_names()[0]
|
||||
|
||||
self.mount_a.open_n_background('.', 400)
|
||||
self.wait_till_health_warn(health_warn, active_mds_id)
|
||||
|
||||
# actual testing begins now.
|
||||
errmsg = 'mds_cache_oversized'
|
||||
self.negtest_ceph_cmd(args=f'fs fail {self.fs.name}',
|
||||
retval=1, errmsgs=errmsg)
|
||||
self.run_ceph_cmd(f'fs fail {self.fs.name} --yes-i-really-mean-it')
|
||||
|
||||
def test_with_health_warn_trim(self):
|
||||
'''
|
||||
Test that, when health warning MDS_TRIM is present for an MDS, command
|
||||
"ceph fs fail" fails without confirmation flag and passes when
|
||||
confirmation flag is passed.
|
||||
'''
|
||||
health_warn = 'MDS_TRIM'
|
||||
# for generating health warning MDS_TRIM
|
||||
self.config_set('mds', 'mds_debug_subtrees', 'true')
|
||||
# this will really really slow the trimming, so that MDS_TRIM stays
|
||||
# for longer.
|
||||
self.config_set('mds', 'mds_log_trim_decay_rate', '60')
|
||||
self.config_set('mds', 'mds_log_trim_threshold', '1')
|
||||
active_mds_id = self.fs.get_active_names()[0]
|
||||
|
||||
self.mount_a.open_n_background('.', 400)
|
||||
self.wait_till_health_warn(health_warn, active_mds_id)
|
||||
|
||||
# actual testing begins now.
|
||||
errmsg = 'mds_trim'
|
||||
self.negtest_ceph_cmd(args=f'fs fail {self.fs.name}',
|
||||
retval=1, errmsgs=errmsg)
|
||||
self.run_ceph_cmd(f'fs fail {self.fs.name} --yes-i-really-mean-it')
|
||||
|
||||
def test_with_health_warn_with_2_active_MDSs(self):
|
||||
'''
|
||||
Test that, when a CephFS has 2 active MDSs and one of them have either
|
||||
health warning MDS_TRIM or MDS_CACHE_OVERSIZE, running "ceph fs fail"
|
||||
fails without confirmation flag and passes when confirmation flag is
|
||||
passed.
|
||||
'''
|
||||
health_warn = 'MDS_CACHE_OVERSIZED'
|
||||
self.fs.set_max_mds(2)
|
||||
self.config_set('mds', 'mds_cache_memory_limit', '1K')
|
||||
self.config_set('mds', 'mds_health_cache_threshold', '1.00000')
|
||||
self.fs.wait_for_daemons()
|
||||
mds1_id, mds2_id = self.fs.get_active_names()
|
||||
|
||||
self.mount_a.open_n_background('.', 400)
|
||||
# MDS ID for which health warning has been generated.
|
||||
self.wait_till_health_warn(health_warn, mds1_id)
|
||||
|
||||
# actual testing begins now.
|
||||
errmsg = 'mds_cache_oversized'
|
||||
self.negtest_ceph_cmd(args=f'fs fail {self.fs.name}',
|
||||
retval=1, errmsgs=errmsg)
|
||||
self.run_ceph_cmd(f'fs fail {self.fs.name} --yes-i-really-mean-it')
|
||||
|
||||
|
||||
class TestMDSFail(TestAdminCommands):
|
||||
|
||||
MDSS_REQUIRED = 2
|
||||
CLIENTS_REQUIRED = 1
|
||||
|
||||
def test_with_health_warn_oversize_cache(self):
|
||||
'''
|
||||
Test that, when health warning MDS_CACHE_OVERSIZE is present for an
|
||||
MDS, command "ceph mds fail" fails without confirmation flag and
|
||||
passes when confirmation flag is passed.
|
||||
'''
|
||||
health_warn = 'MDS_CACHE_OVERSIZED'
|
||||
self.config_set('mds', 'mds_cache_memory_limit', '1K')
|
||||
self.config_set('mds', 'mds_health_cache_threshold', '1.00000')
|
||||
active_mds_id = self.fs.get_active_names()[0]
|
||||
|
||||
self.mount_a.open_n_background('.', 400)
|
||||
self.wait_till_health_warn(health_warn, active_mds_id)
|
||||
|
||||
# actual testing begins now.
|
||||
errmsg = 'mds_cache_oversized'
|
||||
self.negtest_ceph_cmd(args=f'mds fail {active_mds_id}',
|
||||
retval=1, errmsgs=errmsg)
|
||||
self.run_ceph_cmd(f'mds fail {self.fs.name} --yes-i-really-mean-it')
|
||||
|
||||
def test_with_health_warn_trim(self):
|
||||
'''
|
||||
Test that, when health warning MDS_TRIM is present for an MDS, command
|
||||
"ceph mds fail" fails without confirmation flag and passes when
|
||||
confirmation is passed.
|
||||
'''
|
||||
health_warn = 'MDS_TRIM'
|
||||
# for generating health warning MDS_TRIM
|
||||
self.config_set('mds', 'mds_debug_subtrees', 'true')
|
||||
# this will really really slow the trimming, so that MDS_TRIM stays
|
||||
# for longer.
|
||||
self.config_set('mds', 'mds_log_trim_decay_rate', '60')
|
||||
self.config_set('mds', 'mds_log_trim_threshold', '1')
|
||||
active_mds_id = self.fs.get_active_names()[0]
|
||||
|
||||
self.mount_a.open_n_background('.', 400)
|
||||
self.wait_till_health_warn(health_warn, active_mds_id)
|
||||
|
||||
# actual testing begins now...
|
||||
errmsg = 'mds_trim'
|
||||
self.negtest_ceph_cmd(args=f'mds fail {active_mds_id}',
|
||||
retval=1, errmsgs=errmsg)
|
||||
self.run_ceph_cmd(f'mds fail {self.fs.name} --yes-i-really-mean-it')
|
||||
|
||||
def test_with_health_warn_with_2_active_MDSs(self):
|
||||
'''
|
||||
Test when a CephFS has 2 active MDSs and one of them have either
|
||||
health warning MDS_TRIM or MDS_CACHE_OVERSIZE, running "ceph mds fail"
|
||||
fails for both MDSs without confirmation flag and passes for both when
|
||||
confirmation flag is passed.
|
||||
'''
|
||||
health_warn = 'MDS_CACHE_OVERSIZED'
|
||||
self.fs.set_max_mds(2)
|
||||
self.config_set('mds', 'mds_cache_memory_limit', '1K')
|
||||
self.config_set('mds', 'mds_health_cache_threshold', '1.00000')
|
||||
self.fs.wait_for_daemons()
|
||||
mds1_id, mds2_id = self.fs.get_active_names()
|
||||
|
||||
self.mount_a.open_n_background('.', 400)
|
||||
self.wait_till_health_warn(health_warn, mds1_id)
|
||||
|
||||
health_report = json.loads(self.get_ceph_cmd_stdout('health detail '
|
||||
'--format json'))
|
||||
# MDS ID for which health warning has been generated.
|
||||
hw_mds_id = self._get_unhealthy_mds_id(health_report, health_warn)
|
||||
if mds1_id == hw_mds_id:
|
||||
non_hw_mds_id = mds2_id
|
||||
elif mds2_id == hw_mds_id:
|
||||
non_hw_mds_id = mds1_id
|
||||
else:
|
||||
raise RuntimeError('There are only 2 MDSs right now but apparently'
|
||||
'health warning was raised for an MDS other '
|
||||
'than these two. This is definitely an error.')
|
||||
|
||||
# actual testing begins now...
|
||||
errmsg = 'mds_cache_oversized'
|
||||
self.negtest_ceph_cmd(args=f'mds fail {non_hw_mds_id}', retval=1,
|
||||
errmsgs=errmsg)
|
||||
self.negtest_ceph_cmd(args=f'mds fail {hw_mds_id}', retval=1,
|
||||
errmsgs=errmsg)
|
||||
self.run_ceph_cmd('mds fail mds1_id --yes-i-really-mean-it')
|
||||
self.run_ceph_cmd('mds fail mds2_id --yes-i-really-mean-it')
|
||||
|
@ -108,6 +108,15 @@ class FailHandler : public FileSystemCommandHandler
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
bool confirm = false;
|
||||
cmd_getval(cmdmap, "yes_i_really_mean_it", confirm);
|
||||
if (!confirm &&
|
||||
mon->mdsmon()->has_health_warnings({
|
||||
MDS_HEALTH_TRIM, MDS_HEALTH_CACHE_OVERSIZED})) {
|
||||
ss << errmsg_for_unhealthy_mds;
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
auto f = [](auto&& fs) {
|
||||
fs.get_mds_map().set_flag(CEPH_MDSMAP_NOT_JOINABLE);
|
||||
};
|
||||
|
@ -91,4 +91,12 @@ public:
|
||||
std::ostream &ss) = 0;
|
||||
};
|
||||
|
||||
|
||||
static constexpr auto errmsg_for_unhealthy_mds = \
|
||||
"MDS has one of two health warnings which could extend recovery: "
|
||||
"MDS_TRIM or MDS_CACHE_OVERSIZED. MDS failover is not recommended "
|
||||
"since it might cause unexpected file system unavailability. If "
|
||||
"you wish to proceed, pass --yes-i-really-mean-it";
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -1491,6 +1491,23 @@ out:
|
||||
}
|
||||
}
|
||||
|
||||
bool MDSMonitor::has_health_warnings(vector<mds_metric_t> warnings)
|
||||
{
|
||||
for (auto& [gid, health] : pending_daemon_health) {
|
||||
for (auto& metric : health.metrics) {
|
||||
// metric.type here is the type of health warning. We are only
|
||||
// looking for types of health warnings passed to this func member
|
||||
// through variable "warnings".
|
||||
auto it = std::find(warnings.begin(), warnings.end(), metric.type);
|
||||
if (it != warnings.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int MDSMonitor::filesystem_command(
|
||||
FSMap &fsmap,
|
||||
MonOpRequestRef op,
|
||||
@ -1528,6 +1545,8 @@ int MDSMonitor::filesystem_command(
|
||||
} else if (prefix == "mds fail") {
|
||||
string who;
|
||||
cmd_getval(cmdmap, "role_or_gid", who);
|
||||
bool confirm = false;
|
||||
cmd_getval(cmdmap, "yes_i_really_mean_it", confirm);
|
||||
|
||||
MDSMap::mds_info_t failed_info;
|
||||
mds_gid_t gid = gid_from_arg(fsmap, who, ss);
|
||||
@ -1547,6 +1566,12 @@ int MDSMonitor::filesystem_command(
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (!confirm &&
|
||||
has_health_warnings({MDS_HEALTH_TRIM, MDS_HEALTH_CACHE_OVERSIZED})) {
|
||||
ss << errmsg_for_unhealthy_mds;
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
r = fail_mds(fsmap, ss, who, &failed_info);
|
||||
if (r < 0 && r == -EAGAIN) {
|
||||
mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "include/types.h"
|
||||
#include "PaxosFSMap.h"
|
||||
@ -51,6 +52,7 @@ class MDSMonitor : public PaxosService, public PaxosFSMap, protected CommandHand
|
||||
bool preprocess_query(MonOpRequestRef op) override; // true if processed.
|
||||
bool prepare_update(MonOpRequestRef op) override;
|
||||
bool should_propose(double& delay) override;
|
||||
bool has_health_warnings(std::vector<mds_metric_t> warnings);
|
||||
|
||||
bool should_print_status() const {
|
||||
auto& fs = get_fsmap();
|
||||
|
@ -319,7 +319,8 @@ COMMAND_WITH_FLAG("mds set_state "
|
||||
"name=gid,type=CephInt,range=0 "
|
||||
"name=state,type=CephInt,range=0|20",
|
||||
"set mds state of <gid> to <numeric-state>", "mds", "rw", FLAG(HIDDEN))
|
||||
COMMAND("mds fail name=role_or_gid,type=CephString",
|
||||
COMMAND("mds fail name=role_or_gid,type=CephString "
|
||||
"name=yes_i_really_mean_it,type=CephBool,req=false",
|
||||
"Mark MDS failed: trigger a failover if a standby is available",
|
||||
"mds", "rw")
|
||||
COMMAND("mds repaired name=role,type=CephString",
|
||||
@ -355,7 +356,8 @@ COMMAND("fs new "
|
||||
"make new filesystem using named pools <metadata> and <data>",
|
||||
"fs", "rw")
|
||||
COMMAND("fs fail "
|
||||
"name=fs_name,type=CephString ",
|
||||
"name=fs_name,type=CephString "
|
||||
"name=yes_i_really_mean_it,type=CephBool,req=false",
|
||||
"bring the file system down and all of its ranks",
|
||||
"fs", "rw")
|
||||
COMMAND("fs rm "
|
||||
|
Loading…
Reference in New Issue
Block a user