mirror of
https://github.com/ceph/ceph
synced 2025-02-19 17:08:05 +00:00
Merge pull request #26768 from sebastian-philipp/upstream-pull-26684
mgr/orchestrator: device lights Reviewed-by: Ernesto Puerta <epuertat@redhat.com> Reviewed-by: Sage Weil <sage@redhat.com> Reviewed-by: Volker Theile <vtheile@suse.com>
This commit is contained in:
commit
3feda32916
@ -182,10 +182,19 @@ Example::
|
|||||||
^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^
|
||||||
::
|
::
|
||||||
|
|
||||||
ceph orchestrator device ident-on <host> <devname>
|
ceph orchestrator device ident-on <dev_id>
|
||||||
ceph orchestrator device ident-off <host> <devname>
|
ceph orchestrator device ident-on <dev_name> <host>
|
||||||
ceph orchestrator device fault-on <host> <devname>
|
ceph orchestrator device fault-on <dev_id>
|
||||||
ceph orchestrator device fault-off <host> <devname>
|
ceph orchestrator device fault-on <dev_name> <host>
|
||||||
|
|
||||||
|
ceph orchestrator device ident-off <dev_id> [--force=true]
|
||||||
|
ceph orchestrator device ident-off <dev_id> <host> [--force=true]
|
||||||
|
ceph orchestrator device fault-off <dev_id> [--force=true]
|
||||||
|
ceph orchestrator device fault-off <dev_id> <host> [--force=true]
|
||||||
|
|
||||||
|
where ``dev_id`` is the device id as listed in ``osd metadata``,
|
||||||
|
``dev_name`` is the name of the device on the system and ``host`` is the host as
|
||||||
|
returned by ``orchestrator host ls``
|
||||||
|
|
||||||
ceph orchestrator osd ident-on {primary,journal,db,wal,all} <osd-id>
|
ceph orchestrator osd ident-on {primary,journal,db,wal,all} <osd-id>
|
||||||
ceph orchestrator osd ident-off {primary,journal,db,wal,all} <osd-id>
|
ceph orchestrator osd ident-off {primary,journal,db,wal,all} <osd-id>
|
||||||
|
@ -260,6 +260,9 @@ OSD management
|
|||||||
|
|
||||||
.. py:currentmodule:: orchestrator
|
.. py:currentmodule:: orchestrator
|
||||||
|
|
||||||
|
.. automethod:: Orchestrator.blink_device_light
|
||||||
|
.. autoclass:: DeviceLightLoc
|
||||||
|
|
||||||
.. _orchestrator-osd-replace:
|
.. _orchestrator-osd-replace:
|
||||||
|
|
||||||
OSD Replacement
|
OSD Replacement
|
||||||
|
@ -8,8 +8,8 @@ tasks:
|
|||||||
log-whitelist:
|
log-whitelist:
|
||||||
- overall HEALTH_
|
- overall HEALTH_
|
||||||
- \(MGR_DOWN\)
|
- \(MGR_DOWN\)
|
||||||
- \(MGR_INSIGHTS_WARNING\)
|
- \(DEVICE_IDENT_ON\)
|
||||||
- \(insights_health_check
|
- \(DEVICE_FAULT_ON\)
|
||||||
- \(PG_
|
- \(PG_
|
||||||
- replacing it with standby
|
- replacing it with standby
|
||||||
- No standby daemons available
|
- No standby daemons available
|
||||||
|
@ -14,8 +14,11 @@ log = logging.getLogger(__name__)
|
|||||||
class TestOrchestratorCli(MgrTestCase):
|
class TestOrchestratorCli(MgrTestCase):
|
||||||
MGRS_REQUIRED = 1
|
MGRS_REQUIRED = 1
|
||||||
|
|
||||||
|
def _cmd(self, module, *args):
|
||||||
|
return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args)
|
||||||
|
|
||||||
def _orch_cmd(self, *args):
|
def _orch_cmd(self, *args):
|
||||||
return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
|
return self._cmd("orchestrator", *args)
|
||||||
|
|
||||||
def _progress_cmd(self, *args):
|
def _progress_cmd(self, *args):
|
||||||
return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args)
|
return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args)
|
||||||
@ -93,6 +96,30 @@ class TestOrchestratorCli(MgrTestCase):
|
|||||||
with self.assertRaises(CommandFailedError):
|
with self.assertRaises(CommandFailedError):
|
||||||
self._orch_cmd("osd", "create", "notfound:device")
|
self._orch_cmd("osd", "create", "notfound:device")
|
||||||
|
|
||||||
|
def test_blink_device_light(self):
|
||||||
|
def _ls_lights(what):
|
||||||
|
return json.loads(self._cmd("device", "ls-lights"))[what]
|
||||||
|
|
||||||
|
metadata = json.loads(self._cmd("osd", "metadata"))
|
||||||
|
dev_name_ids = [osd["device_ids"] for osd in metadata]
|
||||||
|
_, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0]
|
||||||
|
|
||||||
|
for t in ["ident", "fault"]:
|
||||||
|
self.assertNotIn(dev_id, _ls_lights(t))
|
||||||
|
self._cmd("device", "light", "on", dev_id, t)
|
||||||
|
self.assertIn(dev_id, _ls_lights(t))
|
||||||
|
|
||||||
|
health = {
|
||||||
|
'ident': 'DEVICE_IDENT_ON',
|
||||||
|
'fault': 'DEVICE_FAULT_ON',
|
||||||
|
}[t]
|
||||||
|
self.wait_for_health(health, 30)
|
||||||
|
|
||||||
|
self._cmd("device", "light", "off", dev_id, t)
|
||||||
|
self.assertNotIn(dev_id, _ls_lights(t))
|
||||||
|
|
||||||
|
self.wait_for_health_clear(30)
|
||||||
|
|
||||||
def test_mds_add(self):
|
def test_mds_add(self):
|
||||||
self._orch_cmd("mds", "add", "service_name")
|
self._orch_cmd("mds", "add", "service_name")
|
||||||
|
|
||||||
|
@ -4,17 +4,15 @@ ceph-mgr orchestrator interface
|
|||||||
|
|
||||||
Please see the ceph-mgr module developer's guide for more information.
|
Please see the ceph-mgr module developer's guide for more information.
|
||||||
"""
|
"""
|
||||||
import copy
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import fnmatch
|
from collections import namedtuple
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
import uuid
|
import uuid
|
||||||
import string
|
import string
|
||||||
import random
|
import random
|
||||||
import datetime
|
import datetime
|
||||||
|
import copy
|
||||||
import six
|
|
||||||
|
|
||||||
from mgr_module import MgrModule, PersistentStoreDict
|
from mgr_module import MgrModule, PersistentStoreDict
|
||||||
from mgr_util import format_bytes
|
from mgr_util import format_bytes
|
||||||
@ -449,6 +447,17 @@ class Orchestrator(object):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def blink_device_light(self, ident_fault, on, locations):
|
||||||
|
# type: (str, bool, List[DeviceLightLoc]) -> WriteCompletion
|
||||||
|
"""
|
||||||
|
Instructs the orchestrator to enable or disable either the ident or the fault LED.
|
||||||
|
|
||||||
|
:param ident_fault: either ``"ident"`` or ``"fault"``
|
||||||
|
:param on: ``True`` = on.
|
||||||
|
:param locations: See :class:`orchestrator.DeviceLightLoc`
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
def update_mgrs(self, num, hosts):
|
def update_mgrs(self, num, hosts):
|
||||||
# type: (int, List[str]) -> WriteCompletion
|
# type: (int, List[str]) -> WriteCompletion
|
||||||
"""
|
"""
|
||||||
@ -947,6 +956,19 @@ class InventoryNode(object):
|
|||||||
return [cls(item[0], devs(item[1].data)) for item in hosts]
|
return [cls(item[0], devs(item[1].data)) for item in hosts]
|
||||||
|
|
||||||
|
|
||||||
|
class DeviceLightLoc(namedtuple('DeviceLightLoc', ['host', 'dev'])):
|
||||||
|
"""
|
||||||
|
Describes a specific device on a specific host. Used for enabling or disabling LEDs
|
||||||
|
on devices.
|
||||||
|
|
||||||
|
hostname as in :func:`orchestrator.Orchestrator.get_hosts`
|
||||||
|
|
||||||
|
device_id: e.g. ``ABC1234DEF567-1R1234_ABC8DE0Q``.
|
||||||
|
See ``ceph osd metadata | jq '.[].device_ids'``
|
||||||
|
"""
|
||||||
|
__slots__ = ()
|
||||||
|
|
||||||
|
|
||||||
def _mk_orch_methods(cls):
|
def _mk_orch_methods(cls):
|
||||||
# Needs to be defined outside of for.
|
# Needs to be defined outside of for.
|
||||||
# Otherwise meth is always bound to last key
|
# Otherwise meth is always bound to last key
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
import errno
|
import errno
|
||||||
import json
|
import json
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
from prettytable import PrettyTable
|
from prettytable import PrettyTable
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from typing import Dict, List
|
from typing import List, Set, Optional
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass # just for type checking.
|
pass # just for type checking.
|
||||||
|
|
||||||
from functools import wraps
|
|
||||||
|
|
||||||
from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \
|
from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError, \
|
||||||
DeviceSelection
|
DeviceSelection
|
||||||
@ -46,6 +46,120 @@ class OrchestratorCli(orchestrator.OrchestratorClientMixin, MgrModule):
|
|||||||
{'name': 'orchestrator'}
|
{'name': 'orchestrator'}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(OrchestratorCli, self).__init__(*args, **kwargs)
|
||||||
|
self.ident = set() # type: Set[str]
|
||||||
|
self.fault = set() # type: Set[str]
|
||||||
|
self._load()
|
||||||
|
self._refresh_health()
|
||||||
|
|
||||||
|
def _load(self):
|
||||||
|
active = self.get_store('active_devices')
|
||||||
|
if active:
|
||||||
|
decoded = json.loads(active)
|
||||||
|
self.ident = set(decoded.get('ident', []))
|
||||||
|
self.fault = set(decoded.get('fault', []))
|
||||||
|
self.log.debug('ident {}, fault {}'.format(self.ident, self.fault))
|
||||||
|
|
||||||
|
def _save(self):
|
||||||
|
encoded = json.dumps({
|
||||||
|
'ident': list(self.ident),
|
||||||
|
'fault': list(self.fault),
|
||||||
|
})
|
||||||
|
self.set_store('active_devices', encoded)
|
||||||
|
|
||||||
|
def _refresh_health(self):
|
||||||
|
h = {}
|
||||||
|
if self.ident:
|
||||||
|
h['DEVICE_IDENT_ON'] = {
|
||||||
|
'severity': 'warning',
|
||||||
|
'summary': '%d devices have ident light turned on' % len(
|
||||||
|
self.ident),
|
||||||
|
'detail': ['{} ident light enabled'.format(d) for d in self.ident]
|
||||||
|
}
|
||||||
|
if self.fault:
|
||||||
|
h['DEVICE_FAULT_ON'] = {
|
||||||
|
'severity': 'warning',
|
||||||
|
'summary': '%d devices have fault light turned on' % len(
|
||||||
|
self.fault),
|
||||||
|
'detail': ['{} fault light enabled'.format(d) for d in self.ident]
|
||||||
|
}
|
||||||
|
self.set_health_checks(h)
|
||||||
|
|
||||||
|
def _get_device_locations(self, dev_id):
|
||||||
|
# type: (str) -> List[orchestrator.DeviceLightLoc]
|
||||||
|
locs = [d['location'] for d in self.get('devices')['devices'] if d['devid'] == dev_id]
|
||||||
|
return [orchestrator.DeviceLightLoc(**l) for l in sum(locs, [])]
|
||||||
|
|
||||||
|
@_read_cli(prefix='device ls-lights',
|
||||||
|
desc='List currently active device indicator lights')
|
||||||
|
def _device_ls(self):
|
||||||
|
return HandleCommandResult(
|
||||||
|
stdout=json.dumps({
|
||||||
|
'ident': list(self.ident),
|
||||||
|
'fault': list(self.fault)
|
||||||
|
}, indent=4))
|
||||||
|
|
||||||
|
def light_on(self, fault_ident, devid):
|
||||||
|
# type: (str, str) -> HandleCommandResult
|
||||||
|
assert fault_ident in ("fault", "ident")
|
||||||
|
locs = self._get_device_locations(devid)
|
||||||
|
if locs is None:
|
||||||
|
return HandleCommandResult(stderr='device {} not found'.format(devid),
|
||||||
|
retval=-errno.ENOENT)
|
||||||
|
|
||||||
|
getattr(self, fault_ident).add(devid)
|
||||||
|
self._save()
|
||||||
|
self._refresh_health()
|
||||||
|
completion = self.blink_device_light(fault_ident, True, locs)
|
||||||
|
self._orchestrator_wait([completion])
|
||||||
|
return HandleCommandResult(stdout=str(completion.result))
|
||||||
|
|
||||||
|
def light_off(self, fault_ident, devid, force):
|
||||||
|
# type: (str, str, bool) -> HandleCommandResult
|
||||||
|
assert fault_ident in ("fault", "ident")
|
||||||
|
locs = self._get_device_locations(devid)
|
||||||
|
if locs is None:
|
||||||
|
return HandleCommandResult(stderr='device {} not found'.format(devid),
|
||||||
|
retval=-errno.ENOENT)
|
||||||
|
|
||||||
|
try:
|
||||||
|
completion = self.blink_device_light(fault_ident, False, locs)
|
||||||
|
self._orchestrator_wait([completion])
|
||||||
|
|
||||||
|
if devid in getattr(self, fault_ident):
|
||||||
|
getattr(self, fault_ident).remove(devid)
|
||||||
|
self._save()
|
||||||
|
self._refresh_health()
|
||||||
|
return HandleCommandResult(stdout=str(completion.result))
|
||||||
|
|
||||||
|
except:
|
||||||
|
# There are several reasons the try: block might fail:
|
||||||
|
# 1. the device no longer exist
|
||||||
|
# 2. the device is no longer known to Ceph
|
||||||
|
# 3. the host is not reachable
|
||||||
|
if force and devid in getattr(self, fault_ident):
|
||||||
|
getattr(self, fault_ident).remove(devid)
|
||||||
|
self._save()
|
||||||
|
self._refresh_health()
|
||||||
|
raise
|
||||||
|
|
||||||
|
@_write_cli(prefix='device light',
|
||||||
|
cmd_args='name=enable,type=CephChoices,strings=on|off '
|
||||||
|
'name=devid,type=CephString '
|
||||||
|
'name=light_type,type=CephChoices,strings=ident|fault,req=false '
|
||||||
|
'name=force,type=CephBool,req=false',
|
||||||
|
desc='Enable or disable the device light. Default type is `ident`\n'
|
||||||
|
'Usage: device light (on|off) <devid> [ident|fault] [--force]')
|
||||||
|
def _device_light(self, enable, devid, light_type=None, force=False):
|
||||||
|
# type: (str, str, Optional[str], bool) -> HandleCommandResult
|
||||||
|
light_type = light_type or 'ident'
|
||||||
|
on = enable == 'on'
|
||||||
|
if on:
|
||||||
|
return self.light_on(light_type, devid)
|
||||||
|
else:
|
||||||
|
return self.light_off(light_type, devid, force)
|
||||||
|
|
||||||
def _select_orchestrator(self):
|
def _select_orchestrator(self):
|
||||||
return self.get_module_option("orchestrator")
|
return self.get_module_option("orchestrator")
|
||||||
|
|
||||||
|
@ -246,6 +246,12 @@ class TestOrchestrator(MgrModule, orchestrator.Orchestrator):
|
|||||||
def remove_osds(self, osd_ids, destroy=False):
|
def remove_osds(self, osd_ids, destroy=False):
|
||||||
assert isinstance(osd_ids, list)
|
assert isinstance(osd_ids, list)
|
||||||
|
|
||||||
|
@deferred_write("blink_device_light")
|
||||||
|
def blink_device_light(self, ident_fault, on, locations):
|
||||||
|
assert ident_fault in ("ident", "fault")
|
||||||
|
assert len(locations)
|
||||||
|
return ''
|
||||||
|
|
||||||
@deferred_write("service_action")
|
@deferred_write("service_action")
|
||||||
def service_action(self, action, service_type, service_name=None, service_id=None):
|
def service_action(self, action, service_type, service_name=None, service_id=None):
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user