Merge pull request #41346 from adk3798/enough-mds-upgrade

mgr/cephadm: skip ok-to-stop for mds in upgrade if not enough mds daemons

Reviewed-by: Michael Fritch <mfritch@suse.com>
Reviewed-by: Sebastian Wagner <sewagner@redhat.com>
This commit is contained in:
Sebastian Wagner 2021-06-16 10:53:33 +02:00 committed by GitHub
commit 2381725447
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 2 deletions

View File

@ -7,7 +7,7 @@ from ceph.deployment.service_spec import PlacementSpec, ServiceSpec
from cephadm import CephadmOrchestrator from cephadm import CephadmOrchestrator
from cephadm.upgrade import CephadmUpgrade from cephadm.upgrade import CephadmUpgrade
from cephadm.serve import CephadmServe from cephadm.serve import CephadmServe
from orchestrator import OrchestratorError from orchestrator import OrchestratorError, DaemonDescription
from .fixtures import _run_cephadm, wait, with_host, with_service from .fixtures import _run_cephadm, wait, with_host, with_service
@ -142,3 +142,23 @@ def test_enough_mons_for_ok_to_stop(check_mon_command, cephadm_module: CephadmOr
check_mon_command.return_value = ( check_mon_command.return_value = (
0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '') 0, '{"monmap": {"mons": [{"name": "mon.1"}, {"name": "mon.2"}, {"name": "mon.3"}]}}', '')
assert cephadm_module.upgrade._enough_mons_for_ok_to_stop() assert cephadm_module.upgrade._enough_mons_for_ok_to_stop()
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
@mock.patch("cephadm.module.HostCache.get_daemons_by_service")
@mock.patch("cephadm.CephadmOrchestrator.get")
def test_enough_mds_for_ok_to_stop(get, get_daemons_by_service, cephadm_module: CephadmOrchestrator):
get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'test', 'max_mds': 1}}]}]
get_daemons_by_service.side_effect = [[DaemonDescription()]]
assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
DaemonDescription(daemon_type='mds', daemon_id='test.host1.gfknd', service_name='mds.test'))
get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 2}}]}]
get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
assert not cephadm_module.upgrade._enough_mds_for_ok_to_stop(
DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))
get.side_effect = [{'filesystems': [{'mdsmap': {'fs_name': 'myfs.test', 'max_mds': 1}}]}]
get_daemons_by_service.side_effect = [[DaemonDescription(), DaemonDescription()]]
assert cephadm_module.upgrade._enough_mds_for_ok_to_stop(
DaemonDescription(daemon_type='mds', daemon_id='myfs.test.host1.gfknd', service_name='mds.myfs.test'))

View File

@ -433,6 +433,31 @@ class CephadmUpgrade:
mons = [m['name'] for m in j['monmap']['mons']] mons = [m['name'] for m in j['monmap']['mons']]
return len(mons) > 2 return len(mons) > 2
def _enough_mds_for_ok_to_stop(self, mds_daemon: DaemonDescription) -> bool:
# type (DaemonDescription) -> bool
# find fs this mds daemon belongs to
fsmap = self.mgr.get("fs_map")
for i in fsmap.get('filesystems', []):
fs = i["mdsmap"]
fs_name = fs["fs_name"]
assert mds_daemon.daemon_id
if fs_name != mds_daemon.service_name().split('.', 1)[1]:
# wrong fs for this mds daemon
continue
# get number of mds daemons for this fs
mds_count = len(
[daemon for daemon in self.mgr.cache.get_daemons_by_service(mds_daemon.service_name())])
# standby mds daemons for this fs?
if fs["max_mds"] < mds_count:
return True
return False
return True # if mds has no fs it should pass ok-to-stop
def _do_upgrade(self): def _do_upgrade(self):
# type: () -> None # type: () -> None
if not self.upgrade_state: if not self.upgrade_state:
@ -580,7 +605,7 @@ class CephadmUpgrade:
to_upgrade.append(d_entry) to_upgrade.append(d_entry)
continue continue
if d.daemon_type in ['osd', 'mds']: if d.daemon_type == 'osd':
# NOTE: known_ok_to_stop is an output argument for # NOTE: known_ok_to_stop is an output argument for
# _wait_for_ok_to_stop # _wait_for_ok_to_stop
if not self._wait_for_ok_to_stop(d, known_ok_to_stop): if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
@ -590,6 +615,10 @@ class CephadmUpgrade:
if not self._wait_for_ok_to_stop(d, known_ok_to_stop): if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
return return
if d.daemon_type == 'mds' and self._enough_mds_for_ok_to_stop(d):
if not self._wait_for_ok_to_stop(d, known_ok_to_stop):
return
to_upgrade.append(d_entry) to_upgrade.append(d_entry)
# if we don't have a list of others to consider, stop now # if we don't have a list of others to consider, stop now