mirror of
https://github.com/ceph/ceph
synced 2025-02-22 18:47:18 +00:00
Merge pull request #39613 from sebastian-philipp/DaemonDescriptionStatus
mgr/orch: Add DaemonDescriptionStatus Reviewed-by: Daniel-Pivonka <dpivonka@redhat.com> Reviewed-by: Varsha Rao <varao@redhat.com>
This commit is contained in:
commit
24ff1c0b6d
@ -518,7 +518,7 @@ class HostCache():
|
||||
def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription:
|
||||
dd = copy(dd_orig)
|
||||
if host in self.mgr.offline_hosts:
|
||||
dd.status = -1
|
||||
dd.status = orchestrator.DaemonDescriptionStatus.error
|
||||
dd.status_desc = 'host is offline'
|
||||
dd.events = self.mgr.events.get_for_daemon(dd.name())
|
||||
return dd
|
||||
|
@ -34,7 +34,7 @@ from mgr_util import create_self_signed_cert
|
||||
import secrets
|
||||
import orchestrator
|
||||
from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \
|
||||
CLICommandMeta, DaemonDescription
|
||||
CLICommandMeta, DaemonDescription, DaemonDescriptionStatus
|
||||
from orchestrator._interface import GenericSpec
|
||||
from orchestrator._interface import daemon_type_to_service
|
||||
|
||||
@ -1532,7 +1532,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
|
||||
sm[n].rados_config_location = spec.rados_config_location()
|
||||
else:
|
||||
sm[n].size = 0
|
||||
if dd.status == 1:
|
||||
if dd.status == DaemonDescriptionStatus.running:
|
||||
sm[n].running += 1
|
||||
if not sm[n].last_refresh or not dd.last_refresh or dd.last_refresh < sm[n].last_refresh: # type: ignore
|
||||
sm[n].last_refresh = dd.last_refresh
|
||||
|
@ -19,12 +19,12 @@ from ceph.deployment.service_spec import ServiceSpec, HostPlacementSpec, \
|
||||
from ceph.utils import str_to_datetime, datetime_now
|
||||
|
||||
import orchestrator
|
||||
from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent
|
||||
from orchestrator import OrchestratorError, set_exception_subject, OrchestratorEvent, \
|
||||
DaemonDescriptionStatus, daemon_type_to_service, service_to_daemon_types
|
||||
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
|
||||
from cephadm.schedule import HostAssignment
|
||||
from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
|
||||
CephadmNoImage, CEPH_UPGRADE_ORDER, ContainerInspectInfo
|
||||
from orchestrator._interface import daemon_type_to_service, service_to_daemon_types
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cephadm.module import CephadmOrchestrator
|
||||
@ -206,7 +206,7 @@ class CephadmServe:
|
||||
health_changed = True
|
||||
failed_daemons = []
|
||||
for dd in self.mgr.cache.get_daemons():
|
||||
if dd.status is not None and dd.status < 0:
|
||||
if dd.status is not None and dd.status == DaemonDescriptionStatus.error:
|
||||
failed_daemons.append('daemon %s on %s is in %s state' % (
|
||||
dd.name(), dd.hostname, dd.status_desc
|
||||
))
|
||||
@ -277,10 +277,10 @@ class CephadmServe:
|
||||
if 'state' in d:
|
||||
sd.status_desc = d['state']
|
||||
sd.status = {
|
||||
'running': 1,
|
||||
'stopped': 0,
|
||||
'error': -1,
|
||||
'unknown': -1,
|
||||
'running': DaemonDescriptionStatus.running,
|
||||
'stopped': DaemonDescriptionStatus.stopped,
|
||||
'error': DaemonDescriptionStatus.error,
|
||||
'unknown': DaemonDescriptionStatus.error,
|
||||
}[d['state']]
|
||||
else:
|
||||
sd.status_desc = 'unknown'
|
||||
@ -812,7 +812,8 @@ class CephadmServe:
|
||||
if not code and daemon_spec.host in self.mgr.cache.daemons:
|
||||
# prime cached service state with what we (should have)
|
||||
# just created
|
||||
sd = daemon_spec.to_daemon_description(1, 'starting')
|
||||
sd = daemon_spec.to_daemon_description(
|
||||
DaemonDescriptionStatus.running, 'starting')
|
||||
self.mgr.cache.add_daemon(daemon_spec.host, sd)
|
||||
if daemon_spec.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager']:
|
||||
self.mgr.requires_post_actions.add(daemon_spec.daemon_type)
|
||||
@ -833,7 +834,7 @@ class CephadmServe:
|
||||
if not reconfig:
|
||||
# we have to clean up the daemon. E.g. keyrings.
|
||||
servict_type = daemon_type_to_service(daemon_spec.daemon_type)
|
||||
dd = daemon_spec.to_daemon_description(-1, 'failed')
|
||||
dd = daemon_spec.to_daemon_description(DaemonDescriptionStatus.error, 'failed')
|
||||
self.mgr.cephadm_services[servict_type].post_remove(dd)
|
||||
raise
|
||||
|
||||
|
@ -11,7 +11,7 @@ from mgr_module import HandleCommandResult, MonCommandFailed
|
||||
|
||||
from ceph.deployment.service_spec import ServiceSpec, RGWSpec
|
||||
from ceph.deployment.utils import is_ipv6, unwrap_ipv6
|
||||
from orchestrator import OrchestratorError, DaemonDescription
|
||||
from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
|
||||
from orchestrator._interface import daemon_type_to_service
|
||||
from cephadm import utils
|
||||
from mgr_util import ServerConfigException, verify_tls
|
||||
@ -76,7 +76,7 @@ class CephadmDaemonDeploySpec:
|
||||
|
||||
return files
|
||||
|
||||
def to_daemon_description(self, status: int, status_desc: str) -> DaemonDescription:
|
||||
def to_daemon_description(self, status: DaemonDescriptionStatus, status_desc: str) -> DaemonDescription:
|
||||
return DaemonDescription(
|
||||
daemon_type=self.daemon_type,
|
||||
daemon_id=self.daemon_id,
|
||||
@ -249,7 +249,9 @@ class CephadmService(metaclass=ABCMeta):
|
||||
# Provides a warning about if it possible or not to stop <n> daemons in a service
|
||||
names = [f'{daemon_type}.{d_id}' for d_id in daemon_ids]
|
||||
number_of_running_daemons = len(
|
||||
[daemon for daemon in self.mgr.cache.get_daemons_by_type(daemon_type) if daemon.status == 1])
|
||||
[daemon
|
||||
for daemon in self.mgr.cache.get_daemons_by_type(daemon_type)
|
||||
if daemon.status == DaemonDescriptionStatus.running])
|
||||
if (number_of_running_daemons - len(daemon_ids)) >= low_limit:
|
||||
return False, f'It is presumed safe to stop {names}'
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
# flake8: noqa
|
||||
|
||||
from .module import OrchestratorCli
|
||||
|
||||
@ -9,43 +9,12 @@ from ._interface import \
|
||||
Orchestrator, OrchestratorClientMixin, \
|
||||
OrchestratorValidationError, OrchestratorError, NoOrchestrator, \
|
||||
ServiceDescription, InventoryFilter, HostSpec, \
|
||||
DaemonDescription, \
|
||||
DaemonDescription, DaemonDescriptionStatus, \
|
||||
OrchestratorEvent, set_exception_subject, \
|
||||
InventoryHost, DeviceLightLoc, \
|
||||
UpgradeStatusSpec, daemon_type_to_service, service_to_daemon_types
|
||||
|
||||
__all__ = [
|
||||
'OrchestratorCli',
|
||||
'Completion',
|
||||
'TrivialReadCompletion',
|
||||
'raise_if_exception',
|
||||
'ProgressReference',
|
||||
'pretty_print',
|
||||
'_Promise',
|
||||
'CLICommand',
|
||||
'_cli_write_command',
|
||||
'_cli_read_command',
|
||||
'CLICommandMeta',
|
||||
'Orchestrator',
|
||||
'OrchestratorClientMixin',
|
||||
'OrchestratorValidationError',
|
||||
'OrchestratorError',
|
||||
'NoOrchestrator',
|
||||
'ServiceDescription',
|
||||
'InventoryFilter',
|
||||
'HostSpec',
|
||||
'DaemonDescription',
|
||||
'OrchestratorEvent',
|
||||
'set_exception_subject',
|
||||
'InventoryHost',
|
||||
'DeviceLightLoc',
|
||||
'UpgradeStatusSpec',
|
||||
'daemon_type_to_service',
|
||||
'service_to_daemon_types',
|
||||
|
||||
]
|
||||
|
||||
import os
|
||||
if 'UNITTEST' in os.environ:
|
||||
import tests
|
||||
__all__.append(tests.__name__)
|
||||
|
@ -7,6 +7,7 @@ Please see the ceph-mgr module developer's guide for more information.
|
||||
|
||||
import copy
|
||||
import datetime
|
||||
import enum
|
||||
import errno
|
||||
import logging
|
||||
import pickle
|
||||
@ -1255,6 +1256,12 @@ def handle_type_error(method: FuncT) -> FuncT:
|
||||
return cast(FuncT, inner)
|
||||
|
||||
|
||||
class DaemonDescriptionStatus(enum.IntEnum):
|
||||
error = -1
|
||||
stopped = 0
|
||||
running = 1
|
||||
|
||||
|
||||
class DaemonDescription(object):
|
||||
"""
|
||||
For responding to queries about the status of a particular daemon,
|
||||
@ -1277,7 +1284,7 @@ class DaemonDescription(object):
|
||||
container_image_name: Optional[str] = None,
|
||||
container_image_digests: Optional[List[str]] = None,
|
||||
version: Optional[str] = None,
|
||||
status: Optional[int] = None,
|
||||
status: Optional[DaemonDescriptionStatus] = None,
|
||||
status_desc: Optional[str] = None,
|
||||
last_refresh: Optional[datetime.datetime] = None,
|
||||
created: Optional[datetime.datetime] = None,
|
||||
@ -1316,7 +1323,7 @@ class DaemonDescription(object):
|
||||
# Service status: -1 error, 0 stopped, 1 running
|
||||
self.status = status
|
||||
|
||||
# Service status description when status == -1.
|
||||
# Service status description when status == error.
|
||||
self.status_desc = status_desc
|
||||
|
||||
# datetime when this info was last refreshed
|
||||
@ -1416,7 +1423,7 @@ class DaemonDescription(object):
|
||||
out['container_image_name'] = self.container_image_name
|
||||
out['container_image_digests'] = self.container_image_digests
|
||||
out['version'] = self.version
|
||||
out['status'] = self.status
|
||||
out['status'] = self.status.value if self.status is not None else None
|
||||
out['status_desc'] = self.status_desc
|
||||
if self.daemon_type == 'osd':
|
||||
out['osdspec_affinity'] = self.osdspec_affinity
|
||||
@ -1445,7 +1452,9 @@ class DaemonDescription(object):
|
||||
if k in c:
|
||||
c[k] = str_to_datetime(c[k])
|
||||
events = [OrchestratorEvent.from_json(e) for e in event_strs]
|
||||
return cls(events=events, **c)
|
||||
status_int = c.pop('status', None)
|
||||
status = DaemonDescriptionStatus(status_int) if status_int is not None else None
|
||||
return cls(events=events, status=status, **c)
|
||||
|
||||
def __copy__(self) -> 'DaemonDescription':
|
||||
# feel free to change this:
|
||||
|
@ -21,7 +21,7 @@ from ._interface import OrchestratorClientMixin, DeviceLightLoc, _cli_read_comma
|
||||
NoOrchestrator, OrchestratorValidationError, NFSServiceSpec, \
|
||||
RGWSpec, InventoryFilter, InventoryHost, HostSpec, CLICommandMeta, \
|
||||
ServiceDescription, DaemonDescription, IscsiServiceSpec, json_to_generic_spec, \
|
||||
GenericSpec
|
||||
GenericSpec, DaemonDescriptionStatus
|
||||
|
||||
|
||||
def nice_delta(now: datetime.datetime, t: Optional[datetime.datetime], suffix: str = '') -> str:
|
||||
@ -648,12 +648,12 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
|
||||
status = s.status_desc
|
||||
else:
|
||||
status = {
|
||||
-1: 'error',
|
||||
0: 'stopped',
|
||||
1: 'running',
|
||||
DaemonDescriptionStatus.error: 'error',
|
||||
DaemonDescriptionStatus.stopped: 'stopped',
|
||||
DaemonDescriptionStatus.running: 'running',
|
||||
None: '<unknown>'
|
||||
}[s.status]
|
||||
if s.status == 1 and s.started:
|
||||
if s.status == DaemonDescriptionStatus.running and s.started:
|
||||
status += ' (%s)' % to_pretty_timedelta(now - s.started)
|
||||
|
||||
table.add_row((
|
||||
|
@ -14,7 +14,7 @@ from test_orchestrator import TestOrchestrator as _TestOrchestrator
|
||||
from tests import mock
|
||||
|
||||
from orchestrator import raise_if_exception, Completion, ProgressReference
|
||||
from orchestrator import InventoryHost, DaemonDescription, ServiceDescription
|
||||
from orchestrator import InventoryHost, DaemonDescription, ServiceDescription, DaemonDescriptionStatus
|
||||
from orchestrator import OrchestratorValidationError
|
||||
from orchestrator.module import to_format, Format, OrchestratorCli, preview_table_osd
|
||||
|
||||
@ -22,13 +22,14 @@ from orchestrator.module import to_format, Format, OrchestratorCli, preview_tabl
|
||||
def _test_resource(data, resource_class, extra=None):
|
||||
# ensure we can deserialize and serialize
|
||||
rsc = resource_class.from_json(data)
|
||||
rsc.to_json()
|
||||
assert rsc.to_json() == resource_class.from_json(rsc.to_json()).to_json()
|
||||
|
||||
if extra:
|
||||
# if there is an unexpected data provided
|
||||
data.update(extra)
|
||||
data_copy = data.copy()
|
||||
data_copy.update(extra)
|
||||
with pytest.raises(OrchestratorValidationError):
|
||||
resource_class.from_json(data)
|
||||
resource_class.from_json(data_copy)
|
||||
|
||||
|
||||
def test_inventory():
|
||||
@ -62,10 +63,14 @@ def test_daemon_description():
|
||||
json_data = {
|
||||
'hostname': 'test',
|
||||
'daemon_type': 'mon',
|
||||
'daemon_id': 'a'
|
||||
'daemon_id': 'a',
|
||||
'status': -1,
|
||||
}
|
||||
_test_resource(json_data, DaemonDescription, {'abc': False})
|
||||
|
||||
dd = DaemonDescription.from_json(json_data)
|
||||
assert dd.status.value == DaemonDescriptionStatus.error.value
|
||||
|
||||
|
||||
def test_raise():
|
||||
c = Completion()
|
||||
|
@ -428,11 +428,11 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator):
|
||||
sd.hostname = p['hostname']
|
||||
sd.daemon_type = p['labels']['app'].replace('rook-ceph-', '')
|
||||
status = {
|
||||
'Pending': -1,
|
||||
'Running': 1,
|
||||
'Succeeded': 0,
|
||||
'Failed': -1,
|
||||
'Unknown': -1,
|
||||
'Pending': orchestrator.DaemonDescriptionStatus.error,
|
||||
'Running': orchestrator.DaemonDescriptionStatus.running,
|
||||
'Succeeded': orchestrator.DaemonDescriptionStatus.stopped,
|
||||
'Failed': orchestrator.DaemonDescriptionStatus.error,
|
||||
'Unknown': orchestrator.DaemonDescriptionStatus.error,
|
||||
}[p['phase']]
|
||||
sd.status = status
|
||||
sd.status_desc = p['phase']
|
||||
|
Loading…
Reference in New Issue
Block a user