Merge pull request #39613 from sebastian-philipp/DaemonDescriptionStatus

mgr/orch: Add DaemonDescriptionStatus

Reviewed-by: Daniel-Pivonka <dpivonka@redhat.com>
Reviewed-by: Varsha Rao <varao@redhat.com>
This commit is contained in:
Sebastian Wagner 2021-02-25 16:15:03 +01:00 committed by GitHub
commit 24ff1c0b6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 53 additions and 67 deletions

View File

@ -518,7 +518,7 @@ class HostCache():
def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription:
dd = copy(dd_orig)
if host in self.mgr.offline_hosts:
dd.status = -1
dd.status = orchestrator.DaemonDescriptionStatus.error
dd.status_desc = 'host is offline'
dd.events = self.mgr.events.get_for_daemon(dd.name())
return dd

View File

@ -34,7 +34,7 @@ from mgr_util import create_self_signed_cert
import secrets
import orchestrator
from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \
CLICommandMeta, DaemonDescription
CLICommandMeta, DaemonDescription, DaemonDescriptionStatus
from orchestrator._interface import GenericSpec
from orchestrator._interface import daemon_type_to_service
@ -1532,7 +1532,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
sm[n].rados_config_location = spec.rados_config_location()
else:
sm[n].size = 0
if dd.status == 1:
if dd.status == DaemonDescriptionStatus.running:
sm[n].running += 1
if not sm[n].last_refresh or not dd.last_refresh or dd.last_refresh < sm[n].last_refresh: # type: ignore
sm[n].last_refresh = dd.last_refresh

View File

@ -19,12 +19,12 @@ from ceph.deployment.service_spec import ServiceSpec, HostPlacementSpec, \
from ceph.utils import str_to_datetime, datetime_now
import orchestrator
from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent
from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent, \
DaemonDescriptionStatus, daemon_type_to_service, service_to_daemon_types
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
from cephadm.schedule import HostAssignment
from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
CephadmNoImage, CEPH_UPGRADE_ORDER, ContainerInspectInfo
from orchestrator._interface import daemon_type_to_service, service_to_daemon_types
if TYPE_CHECKING:
from cephadm.module import CephadmOrchestrator
@ -206,7 +206,7 @@ class CephadmServe:
health_changed = True
failed_daemons = []
for dd in self.mgr.cache.get_daemons():
if dd.status is not None and dd.status < 0:
if dd.status is not None and dd.status == DaemonDescriptionStatus.error:
failed_daemons.append('daemon %s on %s is in %s state' % (
dd.name(), dd.hostname, dd.status_desc
))
@ -277,10 +277,10 @@ class CephadmServe:
if 'state' in d:
sd.status_desc = d['state']
sd.status = {
'running': 1,
'stopped': 0,
'error': -1,
'unknown': -1,
'running': DaemonDescriptionStatus.running,
'stopped': DaemonDescriptionStatus.stopped,
'error': DaemonDescriptionStatus.error,
'unknown': DaemonDescriptionStatus.error,
}[d['state']]
else:
sd.status_desc = 'unknown'
@ -812,7 +812,8 @@ class CephadmServe:
if not code and daemon_spec.host in self.mgr.cache.daemons:
# prime cached service state with what we (should have)
# just created
sd = daemon_spec.to_daemon_description(1, 'starting')
sd = daemon_spec.to_daemon_description(
DaemonDescriptionStatus.running, 'starting')
self.mgr.cache.add_daemon(daemon_spec.host, sd)
if daemon_spec.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager']:
self.mgr.requires_post_actions.add(daemon_spec.daemon_type)
@ -833,7 +834,7 @@ class CephadmServe:
if not reconfig:
# we have to clean up the daemon. E.g. keyrings.
servict_type = daemon_type_to_service(daemon_spec.daemon_type)
dd = daemon_spec.to_daemon_description(-1, 'failed')
dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed')
self.mgr.cephadm_services[servict_type].post_remove(dd)
raise

View File

@ -11,7 +11,7 @@ from mgr_module import HandleCommandResult, MonCommandFailed
from ceph.deployment.service_spec import ServiceSpec, RGWSpec
from ceph.deployment.utils import is_ipv6, unwrap_ipv6
from orchestrator import OrchestratorError, DaemonDescription
from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
from orchestrator._interface import daemon_type_to_service
from cephadm import utils
from mgr_util import ServerConfigException, verify_tls
@ -76,7 +76,7 @@ class CephadmDaemonDeploySpec:
return files
def to_daemon_description(self, status: int, status_desc: str) -> DaemonDescription:
def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription:
return DaemonDescription(
daemon_type=self.daemon_type,
daemon_id=self.daemon_id,
@ -249,7 +249,9 @@ class CephadmService(metaclass=ABCMeta):
# Provides a warning about if it possible or not to stop <n> daemons in a service
names = [f'{daemon_type}.{d_id}' for d_id in daemon_ids]
number_of_running_daemons = len(
[daemon for daemon in self.mgr.cache.get_daemons_by_type(daemon_type) if daemon.status == 1])
[daemon
for daemon in self.mgr.cache.get_daemons_by_type(daemon_type)
if daemon.status == DaemonDescriptionStatus.running])
if (number_of_running_daemons - len(daemon_ids)) >= low_limit:
return False, f'It is presumed safe to stop {names}'

View File

@ -1,4 +1,4 @@
from __future__ import absolute_import
# flake8: noqa
from .module import OrchestratorCli
@ -9,43 +9,12 @@ from ._interface import \
Orchestrator, OrchestratorClientMixin, \
OrchestratorValidationError, OrchestratorError, NoOrchestrator, \
ServiceDescription, InventoryFilter, HostSpec, \
DaemonDescription, \
DaemonDescription, DaemonDescriptionStatus, \
OrchestratorEvent, set_exception_subject, \
InventoryHost, DeviceLightLoc, \
UpgradeStatusSpec, daemon_type_to_service, service_to_daemon_types
__all__ = [
'OrchestratorCli',
'Completion',
'TrivialReadCompletion',
'raise_if_exception',
'ProgressReference',
'pretty_print',
'_Promise',
'CLICommand',
'_cli_write_command',
'_cli_read_command',
'CLICommandMeta',
'Orchestrator',
'OrchestratorClientMixin',
'OrchestratorValidationError',
'OrchestratorError',
'NoOrchestrator',
'ServiceDescription',
'InventoryFilter',
'HostSpec',
'DaemonDescription',
'OrchestratorEvent',
'set_exception_subject',
'InventoryHost',
'DeviceLightLoc',
'UpgradeStatusSpec',
'daemon_type_to_service',
'service_to_daemon_types',
]
import os
if 'UNITTEST' in os.environ:
import tests
__all__.append(tests.__name__)

View File

@ -7,6 +7,7 @@ Please see the ceph-mgr module developer's guide for more information.
import copy
import datetime
import enum
import errno
import logging
import pickle
@ -1255,6 +1256,12 @@ def handle_type_error(method: FuncT) -> FuncT:
return cast(FuncT, inner)
class DaemonDescriptionStatus(enum.IntEnum):
error = -1
stopped = 0
running = 1
class DaemonDescription(object):
"""
For responding to queries about the status of a particular daemon,
@ -1277,7 +1284,7 @@ class DaemonDescription(object):
container_image_name: Optional[str] = None,
container_image_digests: Optional[List[str]] = None,
version: Optional[str] = None,
status: Optional[int] = None,
status: Optional[DaemonDescriptionStatus] = None,
status_desc: Optional[str] = None,
last_refresh: Optional[datetime.datetime] = None,
created: Optional[datetime.datetime] = None,
@ -1316,7 +1323,7 @@ class DaemonDescription(object):
# Service status: -1 error, 0 stopped, 1 running
self.status = status
# Service status description when status == -1.
# Service status description when status == error.
self.status_desc = status_desc
# datetime when this info was last refreshed
@ -1416,7 +1423,7 @@ class DaemonDescription(object):
out['container_image_name'] = self.container_image_name
out['container_image_digests'] = self.container_image_digests
out['version'] = self.version
out['status'] = self.status
out['status'] = self.status.value if self.status is not None else None
out['status_desc'] = self.status_desc
if self.daemon_type == 'osd':
out['osdspec_affinity'] = self.osdspec_affinity
@ -1445,7 +1452,9 @@ class DaemonDescription(object):
if k in c:
c[k] = str_to_datetime(c[k])
events = [OrchestratorEvent.from_json(e) for e in event_strs]
return cls(events=events, **c)
status_int = c.pop('status', None)
status = DaemonDescriptionStatus(status_int) if status_int is not None else None
return cls(events=events, status=status, **c)
def __copy__(self) -> 'DaemonDescription':
# feel free to change this:

View File

@ -21,7 +21,7 @@ from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_comma
NoOrchestrator, OrchestratorValidationError, NFSServiceSpec, \
RGWSpec, InventoryFilter, InventoryHost, HostSpec, CLICommandMeta, \
ServiceDescription, DaemonDescription, IscsiServiceSpec, json_to_generic_spec, \
GenericSpec
GenericSpec, DaemonDescriptionStatus
def nice_delta(now: datetime.datetime, t: Optional[datetime.datetime], suffix: str = '') -> str:
@ -648,12 +648,12 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
status = s.status_desc
else:
status = {
-1: 'error',
0: 'stopped',
1: 'running',
DaemonDescriptionStatus.error: 'error',
DaemonDescriptionStatus.stopped: 'stopped',
DaemonDescriptionStatus.running: 'running',
None: '<unknown>'
}[s.status]
if s.status == 1 and s.started:
if s.status == DaemonDescriptionStatus.running and s.started:
status += ' (%s)' % to_pretty_timedelta(now - s.started)
table.add_row((

View File

@ -14,7 +14,7 @@ from test_orchestrator import TestOrchestrator as _TestOrchestrator
from tests import mock
from orchestrator import raise_if_exception, Completion, ProgressReference
from orchestrator import InventoryHost, DaemonDescription, ServiceDescription
from orchestrator import InventoryHost, DaemonDescription, ServiceDescription, DaemonDescriptionStatus
from orchestrator import OrchestratorValidationError
from orchestrator.module import to_format, Format, OrchestratorCli, preview_table_osd
@ -22,13 +22,14 @@ from orchestrator.module import to_format, Format, OrchestratorCli, preview_tabl
def _test_resource(data, resource_class, extra=None):
# ensure we can deserialize and serialize
rsc = resource_class.from_json(data)
rsc.to_json()
assert rsc.to_json() == resource_class.from_json(rsc.to_json()).to_json()
if extra:
# if there is an unexpected data provided
data.update(extra)
data_copy = data.copy()
data_copy.update(extra)
with pytest.raises(OrchestratorValidationError):
resource_class.from_json(data)
resource_class.from_json(data_copy)
def test_inventory():
@ -62,10 +63,14 @@ def test_daemon_description():
json_data = {
'hostname': 'test',
'daemon_type': 'mon',
'daemon_id': 'a'
'daemon_id': 'a',
'status': -1,
}
_test_resource(json_data, DaemonDescription, {'abc': False})
dd = DaemonDescription.from_json(json_data)
assert dd.status.value == DaemonDescriptionStatus.error.value
def test_raise():
c = Completion()

View File

@ -428,11 +428,11 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator):
sd.hostname = p['hostname']
sd.daemon_type = p['labels']['app'].replace('rook-ceph-', '')
status = {
'Pending': -1,
'Running': 1,
'Succeeded': 0,
'Failed': -1,
'Unknown': -1,
'Pending': orchestrator.DaemonDescriptionStatus.error,
'Running': orchestrator.DaemonDescriptionStatus.running,
'Succeeded': orchestrator.DaemonDescriptionStatus.stopped,
'Failed': orchestrator.DaemonDescriptionStatus.error,
'Unknown': orchestrator.DaemonDescriptionStatus.error,
}[p['phase']]
sd.status = status
sd.status_desc = p['phase']